Commit 8b123d00 authored by Hideaki Yoshifuji's avatar Hideaki Yoshifuji

[IPV6] Always add a fragment header after receiving TOO BIG w/ pmtu < 1280.

According to RFC2460, PMTU is set to the IPv6 Minimum Link
MTU (1280) and a fragment header should always be included
after a node receiving Too Big message reporting PMTU is
less than the IPv6 Minimum Link MTU (1280).
Signed-off-by: default avatarHideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
parent 3b480426
...@@ -152,6 +152,7 @@ struct inet_sock { ...@@ -152,6 +152,7 @@ struct inet_sock {
}; };
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */
static inline struct inet_sock *inet_sk(const struct sock *sk) static inline struct inet_sock *inet_sk(const struct sock *sk)
{ {
......
...@@ -346,6 +346,7 @@ enum ...@@ -346,6 +346,7 @@ enum
#define RTAX_FEATURE_ECN 0x00000001 #define RTAX_FEATURE_ECN 0x00000001
#define RTAX_FEATURE_SACK 0x00000002 #define RTAX_FEATURE_SACK 0x00000002
#define RTAX_FEATURE_TIMESTAMP 0x00000004 #define RTAX_FEATURE_TIMESTAMP 0x00000004
#define RTAX_FEATURE_ALLFRAG 0x00000008
struct rta_session struct rta_session
{ {
......
...@@ -124,6 +124,15 @@ dst_pmtu(const struct dst_entry *dst) ...@@ -124,6 +124,15 @@ dst_pmtu(const struct dst_entry *dst)
return mtu; return mtu;
} }
static inline u32
dst_allfrag(const struct dst_entry *dst)
{
int ret = dst_path_metric(dst, RTAX_FEATURES) & RTAX_FEATURE_ALLFRAG;
/* Yes, _exactly_. This is paranoia. */
barrier();
return ret;
}
static inline int static inline int
dst_metric_locked(struct dst_entry *dst, int metric) dst_metric_locked(struct dst_entry *dst, int metric)
{ {
......
...@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb) ...@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb)
int ip6_output(struct sk_buff *skb) int ip6_output(struct sk_buff *skb)
{ {
if (skb->len > dst_pmtu(skb->dst)) if (skb->len > dst_pmtu(skb->dst) || dst_allfrag(skb->dst))
return ip6_fragment(skb, ip6_output2); return ip6_fragment(skb, ip6_output2);
else else
return ip6_output2(skb); return ip6_output2(skb);
...@@ -848,6 +848,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse ...@@ -848,6 +848,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
inet->cork.fl = *fl; inet->cork.fl = *fl;
np->cork.hop_limit = hlimit; np->cork.hop_limit = hlimit;
inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst); inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
if (dst_allfrag(&rt->u.dst))
inet->cork.flags |= IPCORK_ALLFRAG;
inet->cork.length = 0; inet->cork.length = 0;
sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0; sk->sk_sndmsg_off = 0;
...@@ -899,7 +901,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse ...@@ -899,7 +901,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
while (length > 0) { while (length > 0) {
/* Check if the remaining data fits into current packet. */ /* Check if the remaining data fits into current packet. */
copy = mtu - skb->len; copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
if (copy < length) if (copy < length)
copy = maxfraglen - skb->len; copy = maxfraglen - skb->len;
...@@ -924,7 +926,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse ...@@ -924,7 +926,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
* we know we need more fragment(s). * we know we need more fragment(s).
*/ */
datalen = length + fraggap; datalen = length + fraggap;
if (datalen > mtu - fragheaderlen) if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen; datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen; fraglen = datalen + fragheaderlen;
...@@ -1158,6 +1160,7 @@ int ip6_push_pending_frames(struct sock *sk) ...@@ -1158,6 +1160,7 @@ int ip6_push_pending_frames(struct sock *sk)
if (np->cork.rt) { if (np->cork.rt) {
dst_release(&np->cork.rt->u.dst); dst_release(&np->cork.rt->u.dst);
np->cork.rt = NULL; np->cork.rt = NULL;
inet->cork.flags &= ~IPCORK_ALLFRAG;
} }
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
return err; return err;
...@@ -1185,6 +1188,7 @@ void ip6_flush_pending_frames(struct sock *sk) ...@@ -1185,6 +1188,7 @@ void ip6_flush_pending_frames(struct sock *sk)
if (np->cork.rt) { if (np->cork.rt) {
dst_release(&np->cork.rt->u.dst); dst_release(&np->cork.rt->u.dst);
np->cork.rt = NULL; np->cork.rt = NULL;
inet->cork.flags &= ~IPCORK_ALLFRAG;
} }
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
} }
...@@ -628,8 +628,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) ...@@ -628,8 +628,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) { if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED; rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU) if (mtu < IPV6_MIN_MTU) {
mtu = IPV6_MIN_MTU; mtu = IPV6_MIN_MTU;
dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
}
dst->metrics[RTAX_MTU-1] = mtu; dst->metrics[RTAX_MTU-1] = mtu;
} }
} }
...@@ -1164,26 +1166,26 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, ...@@ -1164,26 +1166,26 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
struct net_device *dev, u32 pmtu) struct net_device *dev, u32 pmtu)
{ {
struct rt6_info *rt, *nrt; struct rt6_info *rt, *nrt;
int allfrag = 0;
if (pmtu < IPV6_MIN_MTU) {
if (net_ratelimit())
printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
pmtu);
/* According to RFC1981, the PMTU is set to the IPv6 minimum
link MTU if the node receives a Packet Too Big message
reporting next-hop MTU that is less than the IPv6 minimum MTU.
*/
pmtu = IPV6_MIN_MTU;
}
rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
if (rt == NULL) if (rt == NULL)
return; return;
if (pmtu >= dst_pmtu(&rt->u.dst)) if (pmtu >= dst_pmtu(&rt->u.dst))
goto out; goto out;
if (pmtu < IPV6_MIN_MTU) {
/*
* According to RFC2460, PMTU is set to the IPv6 Minimum Link
* MTU (1280) and a fragment header should always be included
* after a node receiving Too Big message reporting PMTU is
* less than the IPv6 Minimum Link MTU.
*/
pmtu = IPV6_MIN_MTU;
allfrag = 1;
}
/* New mtu received -> path was valid. /* New mtu received -> path was valid.
They are sent only in response to data packets, They are sent only in response to data packets,
so that this nexthop apparently is reachable. --ANK so that this nexthop apparently is reachable. --ANK
...@@ -1197,6 +1199,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, ...@@ -1197,6 +1199,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
*/ */
if (rt->rt6i_flags & RTF_CACHE) { if (rt->rt6i_flags & RTF_CACHE) {
rt->u.dst.metrics[RTAX_MTU-1] = pmtu; rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
goto out; goto out;
...@@ -1211,6 +1215,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, ...@@ -1211,6 +1215,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
nrt = rt6_cow(rt, daddr, saddr); nrt = rt6_cow(rt, daddr, saddr);
if (!nrt->u.dst.error) { if (!nrt->u.dst.error) {
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
/* According to RFC 1981, detecting PMTU increase shouldn't be /* According to RFC 1981, detecting PMTU increase shouldn't be
happened within 5 mins, the recommended timer is 10 mins. happened within 5 mins, the recommended timer is 10 mins.
Here this route expiration time is set to ip6_rt_mtu_expires Here this route expiration time is set to ip6_rt_mtu_expires
...@@ -1232,6 +1238,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, ...@@ -1232,6 +1238,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
ip6_ins_rt(nrt, NULL, NULL); ip6_ins_rt(nrt, NULL, NULL);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment