Commit 41e76c56 authored by Shirley Ma's avatar Shirley Ma Committed by Hideaki Yoshifuji

[IPV6]: Fix bugs in PMTU handling.

- crash due to redundant dst_release.
- setting expire timeout on wrong route
- wrong mtu is selected when device mtu changed while device is down
- not working pmtu discovery timeout on cloned routes
- more reasonable behaviour on administrative increase of device mtu
- Ported to 2.5.44 by Alexey N. Kuznetsov.
parent 6ddc851b
...@@ -1273,9 +1273,8 @@ static void addrconf_sit_config(struct net_device *dev) ...@@ -1273,9 +1273,8 @@ static void addrconf_sit_config(struct net_device *dev)
int addrconf_notify(struct notifier_block *this, unsigned long event, int addrconf_notify(struct notifier_block *this, unsigned long event,
void * data) void * data)
{ {
struct net_device *dev; struct net_device *dev = (struct net_device *) data;
struct inet6_dev *idev = __in6_dev_get(dev);
dev = (struct net_device *) data;
switch(event) { switch(event) {
case NETDEV_UP: case NETDEV_UP:
...@@ -1292,16 +1291,27 @@ int addrconf_notify(struct notifier_block *this, unsigned long event, ...@@ -1292,16 +1291,27 @@ int addrconf_notify(struct notifier_block *this, unsigned long event,
addrconf_dev_config(dev); addrconf_dev_config(dev);
break; break;
}; };
if (idev) {
/* If the MTU changed during the interface down, when the
interface up, the changed MTU must be reflected in the
idev as well as routers.
*/
if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
rt6_mtu_change(dev, dev->mtu);
idev->cnf.mtu6 = dev->mtu;
}
/* If the changed mtu during down is lower than IPV6_MIN_MTU
stop IPv6 on this interface.
*/
if (dev->mtu < IPV6_MIN_MTU)
addrconf_ifdown(dev, event != NETDEV_DOWN);
}
break; break;
case NETDEV_CHANGEMTU: case NETDEV_CHANGEMTU:
if (dev->mtu >= IPV6_MIN_MTU) { if ( idev && dev->mtu >= IPV6_MIN_MTU) {
struct inet6_dev *idev;
if ((idev = __in6_dev_get(dev)) == NULL)
break;
idev->cnf.mtu6 = dev->mtu;
rt6_mtu_change(dev, dev->mtu); rt6_mtu_change(dev, dev->mtu);
idev->cnf.mtu6 = dev->mtu;
break; break;
} }
......
...@@ -1168,22 +1168,12 @@ static int fib6_age(struct rt6_info *rt, void *arg) ...@@ -1168,22 +1168,12 @@ static int fib6_age(struct rt6_info *rt, void *arg)
{ {
unsigned long now = jiffies; unsigned long now = jiffies;
/* Age clones. Note, that clones are aged out
only if they are not in use now.
*/
if (rt->rt6i_flags & RTF_CACHE) {
if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
(long)(now - rt->u.dst.lastuse) >= gc_args.timeout) {
RT6_TRACE("aging clone %p\n", rt);
return -1;
}
gc_args.more++;
}
/* /*
* check addrconf expiration here. * check addrconf expiration here.
* They are expired even if they are in use. * Routes are expired even if they are in use.
*
* Also age clones. Note, that clones are aged out
* only if they are not in use now.
*/ */
if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) { if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
...@@ -1192,6 +1182,13 @@ static int fib6_age(struct rt6_info *rt, void *arg) ...@@ -1192,6 +1182,13 @@ static int fib6_age(struct rt6_info *rt, void *arg)
return -1; return -1;
} }
gc_args.more++; gc_args.more++;
} else if (rt->rt6i_flags & RTF_CACHE) {
if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
(long)(now - rt->u.dst.lastuse) >= gc_args.timeout) {
RT6_TRACE("aging clone %p\n", rt);
return -1;
}
gc_args.more++;
} }
return 0; return 0;
......
...@@ -1239,12 +1239,8 @@ int ndisc_rcv(struct sk_buff *skb) ...@@ -1239,12 +1239,8 @@ int ndisc_rcv(struct sk_buff *skb)
*/ */
struct rt6_info *rt; struct rt6_info *rt;
rt = rt6_get_dflt_router(saddr, skb->dev); rt = rt6_get_dflt_router(saddr, skb->dev);
if (rt) { if (rt)
/* It is safe only because
we aer in BH */
dst_release(&rt->u.dst);
ip6_del_rt(rt); ip6_del_rt(rt);
}
} }
} else { } else {
if (msg->icmph.icmp6_router) if (msg->icmph.icmp6_router)
......
...@@ -944,7 +944,11 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, ...@@ -944,7 +944,11 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n", printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
pmtu); pmtu);
return; /* According to RFC1981, the PMTU is set to the IPv6 minimum
link MTU if the node receives a Packet Too Big message
reporting next-hop MTU that is less than the IPv6 minimum MTU.
*/
pmtu = IPV6_MIN_MTU;
} }
rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
...@@ -982,7 +986,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, ...@@ -982,7 +986,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
nrt = rt6_cow(rt, daddr, saddr); nrt = rt6_cow(rt, daddr, saddr);
if (!nrt->u.dst.error) { if (!nrt->u.dst.error) {
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); /* According to RFC 1981, detecting PMTU increase shouldn't be
happened within 5 mins, the recommended timer is 10 mins.
Here this route expiration time is set to ip6_rt_mtu_expires
which is 10 mins. After 10 mins the decreased pmtu is expired
and detecting PMTU increase will be automatically happened.
*/
dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
dst_release(&nrt->u.dst); dst_release(&nrt->u.dst);
} }
...@@ -994,7 +1004,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, ...@@ -994,7 +1004,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
nrt->rt6i_dst.plen = 128; nrt->rt6i_dst.plen = 128;
nrt->u.dst.flags |= DST_HOST; nrt->u.dst.flags |= DST_HOST;
nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop); nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
rt6_ins(nrt); rt6_ins(nrt);
...@@ -1233,15 +1243,34 @@ struct rt6_mtu_change_arg ...@@ -1233,15 +1243,34 @@ struct rt6_mtu_change_arg
static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
{ {
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
/* In IPv6 pmtu discovery is not optional, /* In IPv6 pmtu discovery is not optional,
so that RTAX_MTU lock cannot disable it. so that RTAX_MTU lock cannot disable it.
We still use this lock to block changes We still use this lock to block changes
caused by addrconf/ndisc. caused by addrconf/ndisc.
*/ */
idev = __in6_dev_get(arg->dev);
/* For administrative MTU increase, there is no way to discover
IPv6 PMTU increase, so PMTU increase should be updated here.
Since RFC 1981 doesn't include administrative MTU increase
update PMTU increase is a MUST. (i.e. jumbo frame)
*/
/*
If new MTU is less than route PMTU, this new MTU will be the
lowest MTU in the path, update the route PMTU to refect PMTU
decreases; if new MTU is greater than route PMTU, and the
old MTU is the lowest MTU in the path, update the route PMTU
to refect the increase. In this case if the other nodes' MTU
also have the lowest MTU, TOO BIG MESSAGE will be lead to
PMTU discouvery.
*/
if (rt->rt6i_dev == arg->dev && if (rt->rt6i_dev == arg->dev &&
rt->u.dst.metrics[RTAX_MTU-1] > arg->mtu && !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
!dst_metric_locked(&rt->u.dst, RTAX_MTU)) (dst_pmtu(&rt->u.dst) > arg->mtu ||
(dst_pmtu(&rt->u.dst) < arg->mtu &&
dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss); rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535-20) if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535-20)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment