Commit ac979929 authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6-gre-offloads'

Alexander Duyck says:

====================
Add support for offloads with IPv6 GRE tunnels

This patch series enables the use of segmentation and checksum offloads
with IPv6 based GRE tunnels.

In order to enable this series I had to make a change to
iptunnel_handle_offloads so that it would no longer free the skb.  This was
necessary as there were multiple paths in the IPv6 GRE code that required
the skb to still be present so it could be freed.  As it turned out I
believe this actually fixes a bug that was present in FOU/GUE based tunnels
anyway.

Below is a quick breakdown of the performance gains seen with a simple
netperf test passing traffic through a ip6gretap tunnel and then an i40e
interface:

Throughput Throughput  Local Local   Result
           Units       CPU   Service Tag
                       Util  Demand
                       %
3544.93    10^6bits/s  6.30  4.656   "before"
13081.75   10^6bits/s  3.75  0.752   "after"
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ec9dcd35 3a80e1fa
......@@ -696,16 +696,12 @@ static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb,
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr);
err = skb_cow_head(skb, min_headroom);
if (unlikely(err)) {
kfree_skb(skb);
if (unlikely(err))
goto free_rt;
}
skb = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
err = udp_tunnel_handle_offloads(skb, udp_sum);
if (err)
goto free_rt;
}
gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
......@@ -733,16 +729,12 @@ static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb,
min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
+ GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr);
err = skb_cow_head(skb, min_headroom);
if (unlikely(err)) {
kfree_skb(skb);
if (unlikely(err))
goto free_dst;
}
skb = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
err = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb))
goto free_dst;
}
gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
......@@ -937,7 +929,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
err = geneve_build_skb(rt, skb, key->tun_flags, vni,
info->options_len, opts, flags, xnet);
if (unlikely(err))
goto err;
goto tx_error;
tos = ip_tunnel_ecn_encap(key->tos, iip, skb);
ttl = key->ttl;
......@@ -946,7 +938,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
err = geneve_build_skb(rt, skb, 0, geneve->vni,
0, NULL, flags, xnet);
if (unlikely(err))
goto err;
goto tx_error;
tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb);
ttl = geneve->ttl;
......@@ -964,7 +956,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
tx_error:
dev_kfree_skb(skb);
err:
if (err == -ELOOP)
dev->stats.collisions++;
else if (err == -ENETUNREACH)
......@@ -1026,7 +1018,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
info->options_len, opts,
flags, xnet);
if (unlikely(err))
goto err;
goto tx_error;
prio = ip_tunnel_ecn_encap(key->tos, iip, skb);
ttl = key->ttl;
......@@ -1035,7 +1027,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
err = geneve6_build_skb(dst, skb, 0, geneve->vni,
0, NULL, flags, xnet);
if (unlikely(err))
goto err;
goto tx_error;
prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
iip, skb);
......@@ -1054,7 +1046,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
tx_error:
dev_kfree_skb(skb);
err:
if (err == -ELOOP)
dev->stats.collisions++;
else if (err == -ENETUNREACH)
......
......@@ -1797,9 +1797,9 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
if (WARN_ON(!skb))
return -ENOMEM;
skb = iptunnel_handle_offloads(skb, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
err = iptunnel_handle_offloads(skb, type);
if (err)
goto out_free;
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = VXLAN_HF_VNI;
......
......@@ -309,7 +309,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags);
struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
static inline int iptunnel_pull_offloads(struct sk_buff *skb)
{
......
......@@ -105,8 +105,7 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
__be16 flags, __be64 tunnel_id,
int md_size);
static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
bool udp_csum)
static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
{
int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
......
......@@ -802,11 +802,11 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
SKB_GSO_UDP_TUNNEL;
__be16 sport;
int err;
skb = iptunnel_handle_offloads(skb, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
err = iptunnel_handle_offloads(skb, type);
if (err)
return err;
sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
skb, 0, 0, false);
......@@ -826,6 +826,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
__be16 sport;
void *data;
bool need_priv = false;
int err;
if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
......@@ -836,10 +837,9 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
optlen += need_priv ? GUE_LEN_PRIV : 0;
skb = iptunnel_handle_offloads(skb, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
err = iptunnel_handle_offloads(skb, type);
if (err)
return err;
/* Get source port (based on flow hash) before skb_push */
sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
......
......@@ -292,6 +292,18 @@ static const struct net_offload gre_offload = {
static int __init gre_offload_init(void)
{
return inet_add_offload(&gre_offload, IPPROTO_GRE);
int err;
err = inet_add_offload(&gre_offload, IPPROTO_GRE);
#if IS_ENABLED(CONFIG_IPV6)
if (err)
return err;
err = inet6_add_offload(&gre_offload, IPPROTO_GRE);
if (err)
inet_del_offload(&gre_offload, IPPROTO_GRE);
#endif
return err;
}
device_initcall(gre_offload_init);
......@@ -500,8 +500,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
bool csum)
static int gre_handle_offloads(struct sk_buff *skb, bool csum)
{
return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
......@@ -568,11 +567,8 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
}
/* Push Tunnel header. */
skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
if (IS_ERR(skb)) {
skb = NULL;
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
goto err_free_rt;
}
flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
......@@ -640,16 +636,14 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
tnl_params = &tunnel->parms.iph;
}
skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
if (IS_ERR(skb))
goto out;
if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
goto free_skb;
__gre_xmit(skb, dev, tnl_params, skb->protocol);
return NETDEV_TX_OK;
free_skb:
kfree_skb(skb);
out:
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
......@@ -664,9 +658,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
if (IS_ERR(skb))
goto out;
if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
goto free_skb;
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
......@@ -676,7 +669,6 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
free_skb:
kfree_skb(skb);
out:
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
......
......@@ -146,8 +146,8 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
}
EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
int gso_type_mask)
int iptunnel_handle_offloads(struct sk_buff *skb,
int gso_type_mask)
{
int err;
......@@ -159,9 +159,9 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
if (skb_is_gso(skb)) {
err = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(err))
goto error;
return err;
skb_shinfo(skb)->gso_type |= gso_type_mask;
return skb;
return 0;
}
if (skb->ip_summed != CHECKSUM_PARTIAL) {
......@@ -174,10 +174,7 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
skb->encapsulation = 0;
}
return skb;
error:
kfree_skb(skb);
return ERR_PTR(err);
return 0;
}
EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
......
......@@ -219,9 +219,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(skb->protocol != htons(ETH_P_IP)))
goto tx_error;
skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
if (IS_ERR(skb))
goto out;
if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP))
goto tx_error;
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
......@@ -230,7 +229,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
tx_error:
kfree_skb(skb);
out:
dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
......
......@@ -598,6 +598,18 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
opt->ops.opt_nflen = 8;
}
static __sum16 gre6_checksum(struct sk_buff *skb)
{
__wsum csum;
if (skb->ip_summed == CHECKSUM_PARTIAL)
csum = lco_csum(skb);
else
csum = skb_checksum(skb, sizeof(struct ipv6hdr),
skb->len - sizeof(struct ipv6hdr), 0);
return csum_fold(csum);
}
static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct net_device *dev,
__u8 dsfield,
......@@ -609,7 +621,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct net *net = tunnel->net;
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
unsigned int max_headroom = 0; /* The extra header space needed */
unsigned int min_headroom = 0; /* The extra header space needed */
int gre_hlen;
struct ipv6_tel_txoption opt;
int mtu;
......@@ -617,7 +629,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct net_device_stats *stats = &tunnel->dev->stats;
int err = -1;
u8 proto;
struct sk_buff *new_skb;
__be16 protocol;
if (dev->type == ARPHRD_ETHER)
......@@ -660,14 +671,14 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
mtu = dst_mtu(dst) - sizeof(*ipv6h);
if (encap_limit >= 0) {
max_headroom += 8;
min_headroom += 8;
mtu -= 8;
}
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu) {
if (skb->len > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
......@@ -685,20 +696,19 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
min_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
new_skb = skb_realloc_headroom(skb, max_headroom);
if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
if (!new_skb)
goto tx_err_dst_release;
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
int head_delta = SKB_DATA_ALIGN(min_headroom -
skb_headroom(skb) +
16);
if (skb->sk)
skb_set_owner_w(new_skb, skb->sk);
consume_skb(skb);
skb = new_skb;
err = pskb_expand_head(skb, max_t(int, head_delta, 0),
0, GFP_ATOMIC);
if (min_headroom > dev->needed_headroom)
dev->needed_headroom = min_headroom;
if (unlikely(err))
goto tx_err_dst_release;
}
if (!fl6->flowi6_mark && ndst)
......@@ -711,10 +721,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
}
if (likely(!skb->encapsulation)) {
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
}
err = iptunnel_handle_offloads(skb,
(tunnel->parms.o_flags & GRE_CSUM) ?
SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
if (err)
goto tx_err_dst_release;
skb_push(skb, gre_hlen);
skb_reset_network_header(skb);
......@@ -748,10 +759,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
*ptr = tunnel->parms.o_key;
ptr--;
}
if (tunnel->parms.o_flags&GRE_CSUM) {
if ((tunnel->parms.o_flags & GRE_CSUM) &&
!(skb_shinfo(skb)->gso_type &
(SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
*ptr = 0;
*(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
skb->len - sizeof(struct ipv6hdr));
*(__sum16 *)ptr = gre6_checksum(skb);
}
}
......@@ -987,6 +999,8 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
dev->mtu = rt->dst.dev->mtu - addend;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
if (dev->type == ARPHRD_ETHER)
dev->mtu -= ETH_HLEN;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
......@@ -1505,6 +1519,11 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_get_iflink = ip6_tnl_get_iflink,
};
#define GRE6_FEATURES (NETIF_F_SG | \
NETIF_F_FRAGLIST | \
NETIF_F_HIGHDMA | \
NETIF_F_HW_CSUM)
static void ip6gre_tap_setup(struct net_device *dev)
{
......@@ -1538,9 +1557,21 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
nt->net = dev_net(dev);
ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
/* Can use a lockless transmit, unless we generate output sequences */
if (!(nt->parms.o_flags & GRE_SEQ))
dev->features |= GRE6_FEATURES;
dev->hw_features |= GRE6_FEATURES;
if (!(nt->parms.o_flags & GRE_SEQ)) {
/* TCP segmentation offload is not supported when we
* generate output sequences.
*/
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
/* Can use a lockless transmit, unless we generate
* output sequences
*/
dev->features |= NETIF_F_LLTX;
}
err = register_netdevice(dev);
if (err)
......
......@@ -913,10 +913,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT);
if (IS_ERR(skb)) {
if (iptunnel_handle_offloads(skb, SKB_GSO_SIT)) {
ip_rt_put(rt);
goto out;
goto tx_error;
}
if (df) {
......@@ -992,7 +991,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
dst_link_failure(skb);
tx_error:
kfree_skb(skb);
out:
dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
......@@ -1002,15 +1000,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
if (IS_ERR(skb))
goto out;
if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP))
goto tx_error;
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
return NETDEV_TX_OK;
out:
tx_error:
kfree_skb(skb);
dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
......
......@@ -1013,8 +1013,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (IS_ERR(skb))
goto tx_error;
skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af));
if (IS_ERR(skb))
if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)))
goto tx_error;
skb->transport_header = skb->network_header;
......@@ -1105,8 +1104,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (IS_ERR(skb))
goto tx_error;
skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af));
if (IS_ERR(skb))
if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)))
goto tx_error;
skb->transport_header = skb->network_header;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment