Commit 628a5c56 authored by John Heffner's avatar John Heffner Committed by David S. Miller

[INET]: Add IP(V6)_PMTUDISC_RPOBE

Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER.  This option forces
us not to fragment, but does not make use of the kernel path MTU discovery.
That is, it allows for user-mode MTU probing (or, packetization-layer path
MTU discovery).  This is particularly useful for diagnostic utilities, like
traceroute/tracepath.
Signed-off-by: default avatarJohn Heffner <jheffner@psc.edu>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b881ef76
...@@ -83,6 +83,7 @@ struct in_addr { ...@@ -83,6 +83,7 @@ struct in_addr {
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */
#define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */
#define IP_PMTUDISC_DO 2 /* Always DF */ #define IP_PMTUDISC_DO 2 /* Always DF */
#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
#define IP_MULTICAST_IF 32 #define IP_MULTICAST_IF 32
#define IP_MULTICAST_TTL 33 #define IP_MULTICAST_TTL 33
......
...@@ -179,6 +179,7 @@ struct in6_flowlabel_req ...@@ -179,6 +179,7 @@ struct in6_flowlabel_req
#define IPV6_PMTUDISC_DONT 0 #define IPV6_PMTUDISC_DONT 0
#define IPV6_PMTUDISC_WANT 1 #define IPV6_PMTUDISC_WANT 1
#define IPV6_PMTUDISC_DO 2 #define IPV6_PMTUDISC_DO 2
#define IPV6_PMTUDISC_PROBE 3
/* Flowlabel */ /* Flowlabel */
#define IPV6_FLOWLABEL_MGR 32 #define IPV6_FLOWLABEL_MGR 32
......
...@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb) ...@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL; return -EINVAL;
} }
static inline int ip_skb_dst_mtu(struct sk_buff *skb)
{
struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
skb->dst->dev->mtu : dst_mtu(skb->dst);
}
static inline int ip_finish_output(struct sk_buff *skb) static inline int ip_finish_output(struct sk_buff *skb)
{ {
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
...@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb) ...@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb); return dst_output(skb);
} }
#endif #endif
if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2); return ip_fragment(skb, ip_finish_output2);
else else
return ip_finish_output2(skb); return ip_finish_output2(skb);
...@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) ...@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(dst_mtu(&rt->u.dst))); htonl(ip_skb_dst_mtu(skb)));
kfree_skb(skb); kfree_skb(skb);
return -EMSGSIZE; return -EMSGSIZE;
} }
...@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk, ...@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk,
inet->cork.addr = ipc->addr; inet->cork.addr = ipc->addr;
} }
dst_hold(&rt->u.dst); dst_hold(&rt->u.dst);
inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
rt->u.dst.dev->mtu :
dst_mtu(rt->u.dst.path);
inet->cork.rt = rt; inet->cork.rt = rt;
inet->cork.length = 0; inet->cork.length = 0;
sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_page = NULL;
...@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk) ...@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
* to fragment the frame generated here. No matter, what transforms * to fragment the frame generated here. No matter, what transforms
* how transforms change size of the packet, it will come out. * how transforms change size of the packet, it will come out.
*/ */
if (inet->pmtudisc != IP_PMTUDISC_DO) if (inet->pmtudisc < IP_PMTUDISC_DO)
skb->local_df = 1; skb->local_df = 1;
/* DF bit is set when we want to see DF on outgoing frames. /* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame * If local_df is set too, we still allow to fragment this frame
* locally. */ * locally. */
if (inet->pmtudisc == IP_PMTUDISC_DO || if (inet->pmtudisc >= IP_PMTUDISC_DO ||
(skb->len <= dst_mtu(&rt->u.dst) && (skb->len <= dst_mtu(&rt->u.dst) &&
ip_dont_fragment(sk, &rt->u.dst))) ip_dont_fragment(sk, &rt->u.dst)))
df = htons(IP_DF); df = htons(IP_DF);
......
...@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, ...@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->hdrincl = val ? 1 : 0; inet->hdrincl = val ? 1 : 0;
break; break;
case IP_MTU_DISCOVER: case IP_MTU_DISCOVER:
if (val<0 || val>2) if (val<0 || val>3)
goto e_inval; goto e_inval;
inet->pmtudisc = val; inet->pmtudisc = val;
break; break;
......
...@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb) ...@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
} }
static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
{
struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
skb->dst->dev->mtu : dst_mtu(skb->dst);
}
int ip6_output(struct sk_buff *skb) int ip6_output(struct sk_buff *skb)
{ {
if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb->dst)) dst_allfrag(skb->dst))
return ip6_fragment(skb, ip6_output2); return ip6_fragment(skb, ip6_output2);
else else
...@@ -566,7 +574,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ...@@ -566,7 +574,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
hlen = ip6_find_1stfragopt(skb, &prevhdr); hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr; nexthdr = *prevhdr;
mtu = dst_mtu(&rt->u.dst); mtu = ip6_skb_dst_mtu(skb);
/* We must not fragment if the socket is set to force MTU discovery /* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket. (This last * or if the skb it not generated by a local socket. (This last
...@@ -1063,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1063,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
inet->cork.fl = *fl; inet->cork.fl = *fl;
np->cork.hop_limit = hlimit; np->cork.hop_limit = hlimit;
np->cork.tclass = tclass; np->cork.tclass = tclass;
mtu = dst_mtu(rt->u.dst.path); mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
if (np->frag_size < mtu) { if (np->frag_size < mtu) {
if (np->frag_size) if (np->frag_size)
mtu = np->frag_size; mtu = np->frag_size;
......
...@@ -694,7 +694,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, ...@@ -694,7 +694,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = ip6_ra_control(sk, val, NULL); retv = ip6_ra_control(sk, val, NULL);
break; break;
case IPV6_MTU_DISCOVER: case IPV6_MTU_DISCOVER:
if (val<0 || val>2) if (val<0 || val>3)
goto e_inval; goto e_inval;
np->pmtudisc = val; np->pmtudisc = val;
retv = 0; retv = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment