Commit 8b393f83 authored by David S. Miller's avatar David S. Miller

Merge branch 'geneve-vxlan-outer-checksum'

Alexander Duyck says:

====================
GENEVE/VXLAN: Enable outer Tx checksum by default

This patch series makes it so that we enable the outer Tx checksum for IPv4
tunnels by default.  This makes the behavior consistent with how we were
handling this for IPv6.  In addition I have updated the internal flags for
these tunnels so that we use a ZERO_CSUM_TX flag for IPv4 which should
match up will with the ZERO_CSUM6_TX flag which was already in use for
IPv6.

For most network devices this should be a net gain in terms of performance
as having the outer header checksum present allows for devices to report
CHECKSUM_UNNECESSARY which we can then convert to CHECKSUM_COMPLETE in order
to determine if the inner header checksum is valid.

Below is some data I collected with ixgbe with an X540 that demonstrates
this.  I located two PFs connected back to back in two different name
spaces and then setup a pair of tunnels on each, one with checksum enabled
and one without.

Recv   Send    Send                          Utilization
Socket Socket  Message  Elapsed              Send
Size   Size    Size     Time     Throughput  local
bytes  bytes   bytes    secs.    10^6bits/s  % S

noudpcsum:
 87380  16384  16384    30.00      8898.67   12.80
udpcsum:
 87380  16384  16384    30.00      9088.47   5.69

The one spot where this may cause a performance regression is if the
environment contains devices that can parse the inner headers and a device
supports NETIF_F_GSO_UDP_TUNNEL but not NETIF_F_GSO_UDP_TUNNEL_CSUM.  In
the case of such a device we have to fall back to using GSO to segment the
tunnel instead of TSO and as a result we may take a performance hit as seen
below with i40e.

Recv   Send    Send                          Utilization
Socket Socket  Message  Elapsed              Send
Size   Size    Size     Time     Throughput  local
bytes  bytes   bytes    secs.    10^6bits/s  % S

noudpcsum:
 87380  16384  16384    30.00      9085.21   3.32
udpcsum:
 87380  16384  16384    30.00      9089.23   5.54

In addition it will be necessary to update iproute2 so that we don't
provide the checksum attribute unless specified.  This way on older kernels
which don't have local checksum offload we will default to disabling the
outer checksum, and on newer kernels that have LCO we can default to
enabling it.

I also haven't investigated the effect this will have on OVS.  However I
suspect the impact should be minimal as the worst case scenario should be
that Tx checksumming will become enabled by default which should be
consistent with the existing behavior for IPv6.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 417b7ca4 6ceb31ca
...@@ -76,7 +76,7 @@ struct geneve_dev { ...@@ -76,7 +76,7 @@ struct geneve_dev {
}; };
/* Geneve device flags */ /* Geneve device flags */
#define GENEVE_F_UDP_CSUM BIT(0) #define GENEVE_F_UDP_ZERO_CSUM_TX BIT(0)
#define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1) #define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1)
#define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2) #define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2)
...@@ -703,7 +703,7 @@ static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb, ...@@ -703,7 +703,7 @@ static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb,
struct genevehdr *gnvh; struct genevehdr *gnvh;
int min_headroom; int min_headroom;
int err; int err;
bool udp_sum = !!(flags & GENEVE_F_UDP_CSUM); bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM_TX);
skb_scrub_packet(skb, xnet); skb_scrub_packet(skb, xnet);
...@@ -944,9 +944,9 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, ...@@ -944,9 +944,9 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
opts = ip_tunnel_info_opts(info); opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM) if (key->tun_flags & TUNNEL_CSUM)
flags |= GENEVE_F_UDP_CSUM; flags &= ~GENEVE_F_UDP_ZERO_CSUM_TX;
else else
flags &= ~GENEVE_F_UDP_CSUM; flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
err = geneve_build_skb(rt, skb, key->tun_flags, vni, err = geneve_build_skb(rt, skb, key->tun_flags, vni,
info->options_len, opts, flags, xnet); info->options_len, opts, flags, xnet);
...@@ -972,7 +972,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, ...@@ -972,7 +972,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
tos, ttl, df, sport, geneve->dst_port, tos, ttl, df, sport, geneve->dst_port,
!net_eq(geneve->net, dev_net(geneve->dev)), !net_eq(geneve->net, dev_net(geneve->dev)),
!(flags & GENEVE_F_UDP_CSUM)); !!(flags & GENEVE_F_UDP_ZERO_CSUM_TX));
return NETDEV_TX_OK; return NETDEV_TX_OK;
...@@ -1383,8 +1383,8 @@ static int geneve_newlink(struct net *net, struct net_device *dev, ...@@ -1383,8 +1383,8 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
metadata = true; metadata = true;
if (data[IFLA_GENEVE_UDP_CSUM] && if (data[IFLA_GENEVE_UDP_CSUM] &&
nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
flags |= GENEVE_F_UDP_CSUM; flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] &&
nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
...@@ -1454,7 +1454,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) ...@@ -1454,7 +1454,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
} }
if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
!!(geneve->flags & GENEVE_F_UDP_CSUM)) || !(geneve->flags & GENEVE_F_UDP_ZERO_CSUM_TX)) ||
nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
!!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) || !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) ||
nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
......
...@@ -1957,13 +1957,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1957,13 +1957,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto drop; goto drop;
sk = vxlan->vn4_sock->sock->sk; sk = vxlan->vn4_sock->sock->sk;
if (info) {
if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
df = htons(IP_DF);
} else {
udp_sum = !!(flags & VXLAN_F_UDP_CSUM);
}
rt = vxlan_get_route(vxlan, skb, rt = vxlan_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos, rdst ? rdst->remote_ifindex : 0, tos,
dst->sin.sin_addr.s_addr, &saddr, dst->sin.sin_addr.s_addr, &saddr,
...@@ -1997,6 +1990,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1997,6 +1990,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
return; return;
} }
if (!info)
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
df = htons(IP_DF);
tos = ip_tunnel_ecn_encap(tos, old_iph, skb); tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr), err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
...@@ -2898,8 +2896,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, ...@@ -2898,8 +2896,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
if (data[IFLA_VXLAN_PORT]) if (data[IFLA_VXLAN_PORT])
conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM])) if (data[IFLA_VXLAN_UDP_CSUM] &&
conf.flags |= VXLAN_F_UDP_CSUM; !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] && if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX])) nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
...@@ -3043,7 +3042,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) ...@@ -3043,7 +3042,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) || nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) || nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM, nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
!!(vxlan->flags & VXLAN_F_UDP_CSUM)) || !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
......
...@@ -197,7 +197,7 @@ struct vxlan_dev { ...@@ -197,7 +197,7 @@ struct vxlan_dev {
#define VXLAN_F_L2MISS 0x08 #define VXLAN_F_L2MISS 0x08
#define VXLAN_F_L3MISS 0x10 #define VXLAN_F_L3MISS 0x10
#define VXLAN_F_IPV6 0x20 #define VXLAN_F_IPV6 0x20
#define VXLAN_F_UDP_CSUM 0x40 #define VXLAN_F_UDP_ZERO_CSUM_TX 0x40
#define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80 #define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80
#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
#define VXLAN_F_REMCSUM_TX 0x200 #define VXLAN_F_REMCSUM_TX 0x200
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment