Commit dc01e7d3 authored by Or Gerlitz's avatar Or Gerlitz Committed by David S. Miller

net: Add GRO support for vxlan traffic

Add GRO handlers for vxlann, by using the UDP GRO infrastructure.

For single TCP session that goes through vxlan tunneling I got nice
improvement from 6.8Gbs to 11.5Gbs

--> UDP/VXLAN GRO disabled
$ netperf  -H 192.168.52.147 -c -C

$ netperf -t TCP_STREAM -H 192.168.52.147 -c -C
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.52.147 () port 0 AF_INET
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  65536  65536    10.00      6799.75   12.54    24.79    0.604   1.195

--> UDP/VXLAN GRO enabled

$ netperf -t TCP_STREAM -H 192.168.52.147 -c -C
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.52.147 () port 0 AF_INET
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  65536  65536    10.00      11562.72   24.90    20.34    0.706   0.577
Signed-off-by: default avatarShlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e27a2f83
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/vxlan.h> #include <net/vxlan.h>
#include <net/protocol.h>
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/addrconf.h> #include <net/addrconf.h>
...@@ -554,13 +555,106 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, ...@@ -554,13 +555,106 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
return 1; return 1;
} }
static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
struct sk_buff *p, **pp = NULL;
struct vxlanhdr *vh, *vh2;
struct ethhdr *eh, *eh2;
unsigned int hlen, off_vx, off_eth;
const struct packet_offload *ptype;
__be16 type;
int flush = 1;
off_vx = skb_gro_offset(skb);
hlen = off_vx + sizeof(*vh);
vh = skb_gro_header_fast(skb, off_vx);
if (skb_gro_header_hard(skb, hlen)) {
vh = skb_gro_header_slow(skb, hlen, off_vx);
if (unlikely(!vh))
goto out;
}
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
off_eth = skb_gro_offset(skb);
hlen = off_eth + sizeof(*eh);
eh = skb_gro_header_fast(skb, off_eth);
if (skb_gro_header_hard(skb, hlen)) {
eh = skb_gro_header_slow(skb, hlen, off_eth);
if (unlikely(!eh))
goto out;
}
flush = 0;
for (p = *head; p; p = p->next) {
if (!NAPI_GRO_CB(p)->same_flow)
continue;
vh2 = (struct vxlanhdr *)(p->data + off_vx);
eh2 = (struct ethhdr *)(p->data + off_eth);
if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
goto found;
}
found:
type = eh->h_proto;
rcu_read_lock();
ptype = gro_find_receive_by_type(type);
if (ptype == NULL) {
flush = 1;
goto out_unlock;
}
skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
pp = ptype->callbacks.gro_receive(head, skb);
out_unlock:
rcu_read_unlock();
out:
NAPI_GRO_CB(skb)->flush |= flush;
return pp;
}
static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
{
struct ethhdr *eh;
struct packet_offload *ptype;
__be16 type;
int vxlan_len = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
int err = -ENOSYS;
eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr));
type = eh->h_proto;
rcu_read_lock();
ptype = gro_find_complete_by_type(type);
if (ptype != NULL)
err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
rcu_read_unlock();
return err;
}
/* Notify netdevs that UDP port started listening */ /* Notify netdevs that UDP port started listening */
static void vxlan_notify_add_rx_port(struct sock *sk) static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
{ {
struct net_device *dev; struct net_device *dev;
struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family; sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport; __be16 port = inet_sk(sk)->inet_sport;
int err;
if (sa_family == AF_INET) {
err = udp_add_offload(&vs->udp_offloads);
if (err)
pr_warn("vxlan: udp_add_offload failed with status %d\n", err);
}
rcu_read_lock(); rcu_read_lock();
for_each_netdev_rcu(net, dev) { for_each_netdev_rcu(net, dev) {
...@@ -572,9 +666,10 @@ static void vxlan_notify_add_rx_port(struct sock *sk) ...@@ -572,9 +666,10 @@ static void vxlan_notify_add_rx_port(struct sock *sk)
} }
/* Notify netdevs that UDP port is no more listening */ /* Notify netdevs that UDP port is no more listening */
static void vxlan_notify_del_rx_port(struct sock *sk) static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
{ {
struct net_device *dev; struct net_device *dev;
struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family; sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport; __be16 port = inet_sk(sk)->inet_sport;
...@@ -586,6 +681,9 @@ static void vxlan_notify_del_rx_port(struct sock *sk) ...@@ -586,6 +681,9 @@ static void vxlan_notify_del_rx_port(struct sock *sk)
port); port);
} }
rcu_read_unlock(); rcu_read_unlock();
if (sa_family == AF_INET)
udp_del_offload(&vs->udp_offloads);
} }
/* Add new entry to forwarding table -- assumes lock held */ /* Add new entry to forwarding table -- assumes lock held */
...@@ -963,7 +1061,7 @@ void vxlan_sock_release(struct vxlan_sock *vs) ...@@ -963,7 +1061,7 @@ void vxlan_sock_release(struct vxlan_sock *vs)
spin_lock(&vn->sock_lock); spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist); hlist_del_rcu(&vs->hlist);
rcu_assign_sk_user_data(vs->sock->sk, NULL); rcu_assign_sk_user_data(vs->sock->sk, NULL);
vxlan_notify_del_rx_port(sk); vxlan_notify_del_rx_port(vs);
spin_unlock(&vn->sock_lock); spin_unlock(&vn->sock_lock);
queue_work(vxlan_wq, &vs->del_work); queue_work(vxlan_wq, &vs->del_work);
...@@ -1124,8 +1222,8 @@ static void vxlan_rcv(struct vxlan_sock *vs, ...@@ -1124,8 +1222,8 @@ static void vxlan_rcv(struct vxlan_sock *vs,
* leave the CHECKSUM_UNNECESSARY, the device checksummed it * leave the CHECKSUM_UNNECESSARY, the device checksummed it
* for us. Otherwise force the upper layers to verify it. * for us. Otherwise force the upper layers to verify it.
*/ */
if (skb->ip_summed != CHECKSUM_UNNECESSARY || !skb->encapsulation || if ((skb->ip_summed != CHECKSUM_UNNECESSARY && skb->ip_summed != CHECKSUM_PARTIAL) ||
!(vxlan->dev->features & NETIF_F_RXCSUM)) !skb->encapsulation || !(vxlan->dev->features & NETIF_F_RXCSUM))
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
skb->encapsulation = 0; skb->encapsulation = 0;
...@@ -2303,7 +2401,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, ...@@ -2303,7 +2401,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
struct sock *sk; struct sock *sk;
unsigned int h; unsigned int h;
vs = kmalloc(sizeof(*vs), GFP_KERNEL); vs = kzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs) if (!vs)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -2328,9 +2426,14 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, ...@@ -2328,9 +2426,14 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
vs->data = data; vs->data = data;
rcu_assign_sk_user_data(vs->sock->sk, vs); rcu_assign_sk_user_data(vs->sock->sk, vs);
/* Initialize the vxlan udp offloads structure */
vs->udp_offloads.port = port;
vs->udp_offloads.callbacks.gro_receive = vxlan_gro_receive;
vs->udp_offloads.callbacks.gro_complete = vxlan_gro_complete;
spin_lock(&vn->sock_lock); spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vs->hlist, vs_head(net, port)); hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
vxlan_notify_add_rx_port(sk); vxlan_notify_add_rx_port(vs);
spin_unlock(&vn->sock_lock); spin_unlock(&vn->sock_lock);
/* Mark socket as an encapsulation socket. */ /* Mark socket as an encapsulation socket. */
......
...@@ -21,6 +21,7 @@ struct vxlan_sock { ...@@ -21,6 +21,7 @@ struct vxlan_sock {
struct rcu_head rcu; struct rcu_head rcu;
struct hlist_head vni_list[VNI_HASH_SIZE]; struct hlist_head vni_list[VNI_HASH_SIZE];
atomic_t refcnt; atomic_t refcnt;
struct udp_offload udp_offloads;
}; };
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment