Commit ede372dc authored by David S. Miller's avatar David S. Miller

Merge branch 'net-improve-the-process-of-redirect-and-toobig-for-ipv6-tunnels'

Xin Long says:

====================
net: improve the process of redirect and toobig for ipv6 tunnels

Now let's say there are 3 kinds of icmp packets to process for tunnels,
toobig(needfrag), redirect, others, their process should be:

 - toobig(needfrag)
   update the lower dst's pmtu by route cache, also update sk dst's pmtu
   if possible, or it will be fine if sk dst pmtu will get updated on tx
   path.

 - redirect
   update the lower dst's gw by route cache and return, no need to send
   this redirect packet to user sk.

 - others
   send the packet to user's sk, or it will also be fine to use err_count
   to count it and report fail link on tx path.

All ipv4 tunnels basically follow this while some of ipv6 tunnels are
doing in different ways, like ip6gre and ip6_tunnels update tnl dev's
mtu instead of updating lower dst pmtu, no redirect process on their
err_handlers, which doesn't make any sense and even causes performance
problems.

This patchset is to improve the process of redirect and toobig for ip6gre
ip4ip6, ip6ip6 tunnels, as in ipv4 tunnels.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0d728b84 77552cfa
...@@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) ...@@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info) u8 type, u8 code, int offset, __be32 info)
{ {
struct net *net = dev_net(skb->dev);
const struct gre_base_hdr *greh; const struct gre_base_hdr *greh;
const struct ipv6hdr *ipv6h; const struct ipv6hdr *ipv6h;
int grehlen = sizeof(*greh); int grehlen = sizeof(*greh);
...@@ -402,9 +403,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -402,9 +403,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return; return;
switch (type) { switch (type) {
__u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel; struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu; __u32 teli;
case ICMPV6_DEST_UNREACH: case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name); t->parms.name);
...@@ -435,12 +435,11 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -435,12 +435,11 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} }
return; return;
case ICMPV6_PKT_TOOBIG: case ICMPV6_PKT_TOOBIG:
mtu = be32_to_cpu(info) - offset - t->tun_hlen; ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
if (t->dev->type == ARPHRD_ETHER) return;
mtu -= ETH_HLEN; case NDISC_REDIRECT:
if (mtu < IPV6_MIN_MTU) ip6_redirect(skb, net, skb->dev->ifindex, 0,
mtu = IPV6_MIN_MTU; sock_net_uid(net, NULL));
t->dev->mtu = mtu;
return; return;
} }
...@@ -503,7 +502,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, ...@@ -503,7 +502,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto) __u32 *pmtu, __be16 proto)
{ {
struct ip6_tnl *tunnel = netdev_priv(dev); struct ip6_tnl *tunnel = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
__be16 protocol; __be16 protocol;
if (dev->type == ARPHRD_ETHER) if (dev->type == ARPHRD_ETHER)
...@@ -522,10 +520,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, ...@@ -522,10 +520,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
/* TooBig packet may have updated dst->dev's mtu */
if (dst && dst_mtu(dst) > dst->dev->mtu)
dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE); NEXTHDR_GRE);
} }
......
...@@ -471,15 +471,16 @@ static int ...@@ -471,15 +471,16 @@ static int
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
u8 *type, u8 *code, int *msg, __u32 *info, int offset) u8 *type, u8 *code, int *msg, __u32 *info, int offset)
{ {
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data; const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
struct ip6_tnl *t; struct net *net = dev_net(skb->dev);
int rel_msg = 0;
u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_type = ICMPV6_DEST_UNREACH;
u8 rel_code = ICMPV6_ADDR_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH;
u8 tproto;
__u32 rel_info = 0; __u32 rel_info = 0;
__u16 len; struct ip6_tnl *t;
int err = -ENOENT; int err = -ENOENT;
int rel_msg = 0;
u8 tproto;
__u16 len;
/* If the packet doesn't contain the original IPv6 header we are /* If the packet doesn't contain the original IPv6 header we are
in trouble since we might need the source address for further in trouble since we might need the source address for further
...@@ -497,9 +498,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, ...@@ -497,9 +498,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
err = 0; err = 0;
switch (*type) { switch (*type) {
__u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel; struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu; __u32 mtu, teli;
case ICMPV6_DEST_UNREACH: case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name); t->parms.name);
...@@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, ...@@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
} }
break; break;
case ICMPV6_PKT_TOOBIG: case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
sock_net_uid(net, NULL));
mtu = *info - offset; mtu = *info - offset;
if (mtu < IPV6_MIN_MTU) if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU; mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
if (len > mtu) { if (len > mtu) {
rel_type = ICMPV6_PKT_TOOBIG; rel_type = ICMPV6_PKT_TOOBIG;
...@@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, ...@@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1; rel_msg = 1;
} }
break; break;
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
break;
} }
*type = rel_type; *type = rel_type;
...@@ -559,13 +563,12 @@ static int ...@@ -559,13 +563,12 @@ static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info) u8 type, u8 code, int offset, __be32 info)
{ {
int rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
__u32 rel_info = ntohl(info); __u32 rel_info = ntohl(info);
int err;
struct sk_buff *skb2;
const struct iphdr *eiph; const struct iphdr *eiph;
struct sk_buff *skb2;
int err, rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
struct rtable *rt; struct rtable *rt;
struct flowi4 fl4; struct flowi4 fl4;
...@@ -590,10 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -590,10 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_type = ICMP_DEST_UNREACH; rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED; rel_code = ICMP_FRAG_NEEDED;
break; break;
case NDISC_REDIRECT:
rel_type = ICMP_REDIRECT;
rel_code = ICMP_REDIR_HOST;
/* fall through */
default: default:
return 0; return 0;
} }
...@@ -612,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -612,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
eiph = ip_hdr(skb2); eiph = ip_hdr(skb2);
/* Try to guess incoming interface */ /* Try to guess incoming interface */
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
eiph->saddr, 0, 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
0, 0,
IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
if (IS_ERR(rt)) if (IS_ERR(rt))
goto out; goto out;
skb2->dev = rt->dst.dev; skb2->dev = rt->dst.dev;
ip_rt_put(rt);
/* route "incoming" packet */ /* route "incoming" packet */
if (rt->rt_flags & RTCF_LOCAL) { if (rt->rt_flags & RTCF_LOCAL) {
ip_rt_put(rt);
rt = NULL;
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr, eiph->daddr, eiph->saddr, 0, 0,
0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
IPPROTO_IPIP, if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
RT_TOS(eiph->tos), 0);
if (IS_ERR(rt) ||
rt->dst.dev->type != ARPHRD_TUNNEL) {
if (!IS_ERR(rt)) if (!IS_ERR(rt))
ip_rt_put(rt); ip_rt_put(rt);
goto out; goto out;
} }
skb_dst_set(skb2, &rt->dst); skb_dst_set(skb2, &rt->dst);
} else { } else {
ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) || skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
...@@ -650,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -650,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2))) if (rel_info > dst_mtu(skb_dst(skb2)))
goto out; goto out;
skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
rel_info);
} }
if (rel_type == ICMP_REDIRECT)
skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
...@@ -666,11 +657,10 @@ static int ...@@ -666,11 +657,10 @@ static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info) u8 type, u8 code, int offset, __be32 info)
{ {
int rel_msg = 0; __u32 rel_info = ntohl(info);
int err, rel_msg = 0;
u8 rel_type = type; u8 rel_type = type;
u8 rel_code = code; u8 rel_code = code;
__u32 rel_info = ntohl(info);
int err;
err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset); &rel_msg, &rel_info, offset);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment