Commit a4b82994 authored by David S. Miller's avatar David S. Miller

Merge branch 'ip_tunnel-next'

wenxu says:

====================
ip_tunnel: Refactor ip_gre collect metadata xmit to ip_md_tunnel_xmit

This patchset add tunnel_dst_cache and tnl_update_pmtu feature for
ip_md_tunnel_xmit also bugfix. Then Refactor collect metatdata mode
tunnel xmit to ip_md_tunnel_xmit
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 662a14d0 962924fa
...@@ -267,7 +267,7 @@ void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, ...@@ -267,7 +267,7 @@ void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol); const struct iphdr *tnl_params, const u8 protocol);
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const u8 proto); const u8 proto, int tunnel_hlen);
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
......
...@@ -458,81 +458,14 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum) ...@@ -458,81 +458,14 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum)
return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
} }
static struct rtable *gre_get_rt(struct sk_buff *skb,
struct net_device *dev,
struct flowi4 *fl,
const struct ip_tunnel_key *key)
{
struct net *net = dev_net(dev);
memset(fl, 0, sizeof(*fl));
fl->daddr = key->u.ipv4.dst;
fl->saddr = key->u.ipv4.src;
fl->flowi4_tos = RT_TOS(key->tos);
fl->flowi4_mark = skb->mark;
fl->flowi4_proto = IPPROTO_GRE;
return ip_route_output_key(net, fl);
}
static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
struct net_device *dev,
struct flowi4 *fl,
int tunnel_hlen)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
int min_headroom;
bool use_cache;
int err;
tun_info = skb_tunnel_info(skb);
key = &tun_info->key;
use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
if (use_cache)
rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
if (!rt) {
rt = gre_get_rt(skb, dev, fl, key);
if (IS_ERR(rt))
goto err_free_skb;
if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl->saddr);
}
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr);
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
int head_delta = SKB_DATA_ALIGN(min_headroom -
skb_headroom(skb) +
16);
err = pskb_expand_head(skb, max_t(int, head_delta, 0),
0, GFP_ATOMIC);
if (unlikely(err))
goto err_free_rt;
}
return rt;
err_free_rt:
ip_rt_put(rt);
err_free_skb:
kfree_skb(skb);
dev->stats.tx_dropped++;
return NULL;
}
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto) __be16 proto)
{ {
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_info *tun_info; struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key; const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
struct flowi4 fl;
int tunnel_hlen; int tunnel_hlen;
__be16 df, flags; __be16 flags;
tun_info = skb_tunnel_info(skb); tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
...@@ -542,13 +475,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -542,13 +475,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
key = &tun_info->key; key = &tun_info->key;
tunnel_hlen = gre_calc_hlen(key->tun_flags); tunnel_hlen = gre_calc_hlen(key->tun_flags);
rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); if (skb_cow_head(skb, dev->needed_headroom))
if (!rt) goto err_free_skb;
return;
/* Push Tunnel header. */ /* Push Tunnel header. */
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
goto err_free_rt; goto err_free_skb;
flags = tun_info->key.tun_flags & flags = tun_info->key.tun_flags &
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
...@@ -556,14 +488,10 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -556,14 +488,10 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel_id_to_key32(tun_info->key.tun_id), tunnel_id_to_key32(tun_info->key.tun_id),
(flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
key->tos, key->ttl, df, false);
return; return;
err_free_rt:
ip_rt_put(rt);
err_free_skb: err_free_skb:
kfree_skb(skb); kfree_skb(skb);
dev->stats.tx_dropped++; dev->stats.tx_dropped++;
...@@ -575,10 +503,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -575,10 +503,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel_info *tun_info; struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key; const struct ip_tunnel_key *key;
struct erspan_metadata *md; struct erspan_metadata *md;
struct rtable *rt = NULL;
bool truncate = false; bool truncate = false;
__be16 df, proto; __be16 proto;
struct flowi4 fl;
int tunnel_hlen; int tunnel_hlen;
int version; int version;
int nhoff; int nhoff;
...@@ -591,21 +517,20 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -591,21 +517,20 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
key = &tun_info->key; key = &tun_info->key;
if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
goto err_free_rt; goto err_free_skb;
md = ip_tunnel_info_opts(tun_info); md = ip_tunnel_info_opts(tun_info);
if (!md) if (!md)
goto err_free_rt; goto err_free_skb;
/* ERSPAN has fixed 8 byte GRE header */ /* ERSPAN has fixed 8 byte GRE header */
version = md->version; version = md->version;
tunnel_hlen = 8 + erspan_hdr_len(version); tunnel_hlen = 8 + erspan_hdr_len(version);
rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); if (skb_cow_head(skb, dev->needed_headroom))
if (!rt) goto err_free_skb;
return;
if (gre_handle_offloads(skb, false)) if (gre_handle_offloads(skb, false))
goto err_free_rt; goto err_free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) { if (skb->len > dev->mtu + dev->hard_header_len) {
pskb_trim(skb, dev->mtu + dev->hard_header_len); pskb_trim(skb, dev->mtu + dev->hard_header_len);
...@@ -634,20 +559,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -634,20 +559,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
truncate, true); truncate, true);
proto = htons(ETH_P_ERSPAN2); proto = htons(ETH_P_ERSPAN2);
} else { } else {
goto err_free_rt; goto err_free_skb;
} }
gre_build_header(skb, 8, TUNNEL_SEQ, gre_build_header(skb, 8, TUNNEL_SEQ,
proto, 0, htonl(tunnel->o_seqno++)); proto, 0, htonl(tunnel->o_seqno++));
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
key->tos, key->ttl, df, false);
return; return;
err_free_rt:
ip_rt_put(rt);
err_free_skb: err_free_skb:
kfree_skb(skb); kfree_skb(skb);
dev->stats.tx_dropped++; dev->stats.tx_dropped++;
...@@ -656,13 +577,18 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -656,13 +577,18 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{ {
struct ip_tunnel_info *info = skb_tunnel_info(skb); struct ip_tunnel_info *info = skb_tunnel_info(skb);
const struct ip_tunnel_key *key;
struct rtable *rt; struct rtable *rt;
struct flowi4 fl4; struct flowi4 fl4;
if (ip_tunnel_info_af(info) != AF_INET) if (ip_tunnel_info_af(info) != AF_INET)
return -EINVAL; return -EINVAL;
rt = gre_get_rt(skb, dev, &fl4, &info->key); key = &info->key;
ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
tunnel_id_to_key32(key->tun_id), key->tos, 0,
skb->mark);
rt = ip_route_output_key(dev_net(dev), &fl4);
if (IS_ERR(rt)) if (IS_ERR(rt))
return PTR_ERR(rt); return PTR_ERR(rt);
......
...@@ -501,15 +501,19 @@ EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); ...@@ -501,15 +501,19 @@ EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
struct rtable *rt, __be16 df, struct rtable *rt, __be16 df,
const struct iphdr *inner_iph) const struct iphdr *inner_iph,
int tunnel_hlen, __be32 dst, bool md)
{ {
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; int pkt_size;
int mtu; int mtu;
tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
if (df) if (df)
mtu = dst_mtu(&rt->dst) - dev->hard_header_len mtu = dst_mtu(&rt->dst) - dev->hard_header_len
- sizeof(struct iphdr) - tunnel->hlen; - sizeof(struct iphdr) - tunnel_hlen;
else else
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
...@@ -527,11 +531,13 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, ...@@ -527,11 +531,13 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) { else if (skb->protocol == htons(ETH_P_IPV6)) {
struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
__be32 daddr;
daddr = md ? dst : tunnel->parms.iph.daddr;
if (rt6 && mtu < dst_mtu(skb_dst(skb)) && if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
mtu >= IPV6_MIN_MTU) { mtu >= IPV6_MIN_MTU) {
if ((tunnel->parms.iph.daddr && if ((daddr && !ipv4_is_multicast(daddr)) ||
!ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
rt6->rt6i_dst.plen == 128) { rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED; rt6->rt6i_flags |= RTF_MODIFIED;
dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
...@@ -548,17 +554,19 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, ...@@ -548,17 +554,19 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0; return 0;
} }
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{ {
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
u32 headroom = sizeof(struct iphdr); u32 headroom = sizeof(struct iphdr);
struct ip_tunnel_info *tun_info; struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key; const struct ip_tunnel_key *key;
const struct iphdr *inner_iph; const struct iphdr *inner_iph;
struct rtable *rt; struct rtable *rt = NULL;
struct flowi4 fl4; struct flowi4 fl4;
__be16 df = 0; __be16 df = 0;
u8 tos, ttl; u8 tos, ttl;
bool use_cache;
tun_info = skb_tunnel_info(skb); tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
...@@ -574,20 +582,39 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) ...@@ -574,20 +582,39 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else if (skb->protocol == htons(ETH_P_IPV6)) else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
} }
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
0, skb->mark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE) if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error; goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
if (IS_ERR(rt)) { use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
dev->stats.tx_carrier_errors++; if (use_cache)
goto tx_error; rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
if (IS_ERR(rt)) {
dev->stats.tx_carrier_errors++;
goto tx_error;
}
if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl4.saddr);
} }
if (rt->dst.dev == dev) { if (rt->dst.dev == dev) {
ip_rt_put(rt); ip_rt_put(rt);
dev->stats.collisions++; dev->stats.collisions++;
goto tx_error; goto tx_error;
} }
if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
df = htons(IP_DF);
if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
key->u.ipv4.dst, true)) {
ip_rt_put(rt);
goto tx_error;
}
tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = key->ttl; ttl = key->ttl;
if (ttl == 0) { if (ttl == 0) {
...@@ -598,10 +625,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) ...@@ -598,10 +625,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else else
ttl = ip4_dst_hoplimit(&rt->dst); ttl = ip4_dst_hoplimit(&rt->dst);
} }
if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
df = htons(IP_DF); if (!df && skb->protocol == htons(ETH_P_IP))
else if (skb->protocol == htons(ETH_P_IP))
df = inner_iph->frag_off & htons(IP_DF); df = inner_iph->frag_off & htons(IP_DF);
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
if (headroom > dev->needed_headroom) if (headroom > dev->needed_headroom)
dev->needed_headroom = headroom; dev->needed_headroom = headroom;
...@@ -731,7 +758,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -731,7 +758,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error; goto tx_error;
} }
if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
0, 0, false)) {
ip_rt_put(rt); ip_rt_put(rt);
goto tx_error; goto tx_error;
} }
......
...@@ -302,7 +302,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, ...@@ -302,7 +302,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
skb_set_inner_ipproto(skb, ipproto); skb_set_inner_ipproto(skb, ipproto);
if (tunnel->collect_md) if (tunnel->collect_md)
ip_md_tunnel_xmit(skb, dev, ipproto); ip_md_tunnel_xmit(skb, dev, ipproto, 0);
else else
ip_tunnel_xmit(skb, dev, tiph, ipproto); ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK; return NETDEV_TX_OK;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment