Commit c04b79b6 authored by Josh Hunt's avatar Josh Hunt Committed by David S. Miller

tcp: add new tcp_mtu_probe_floor sysctl

The current implementation of TCP MTU probing can considerably
underestimate the MTU on lossy connections allowing the MSS to get down to
48. We have found that in almost all of these cases on our networks these
paths can handle much larger MTUs meaning the connections are being
artificially limited. Even though TCP MTU probing can raise the MSS back up
we have seen this not to be the case causing connections to be "stuck" with
an MSS of 48 when heavy loss is present.

Prior to pushing out this change we could not keep TCP MTU probing enabled
b/c of the above reasons. Now with a reasonble floor set we've had it
enabled for the past 6 months.

The new sysctl will still default to TCP_MIN_SND_MSS (48), but gives
administrators the ability to control the floor of MSS probing.
Signed-off-by: default avatarJosh Hunt <johunt@akamai.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 3a5e5234
...@@ -256,6 +256,12 @@ tcp_base_mss - INTEGER ...@@ -256,6 +256,12 @@ tcp_base_mss - INTEGER
Path MTU discovery (MTU probing). If MTU probing is enabled, Path MTU discovery (MTU probing). If MTU probing is enabled,
this is the initial MSS used by the connection. this is the initial MSS used by the connection.
tcp_mtu_probe_floor - INTEGER
If MTU probing is enabled this caps the minimum MSS used for search_low
for the connection.
Default : 48
tcp_min_snd_mss - INTEGER tcp_min_snd_mss - INTEGER
TCP SYN and SYNACK messages usually advertise an ADVMSS option, TCP SYN and SYNACK messages usually advertise an ADVMSS option,
as described in RFC 1122 and RFC 6691. as described in RFC 1122 and RFC 6691.
......
...@@ -116,6 +116,7 @@ struct netns_ipv4 { ...@@ -116,6 +116,7 @@ struct netns_ipv4 {
int sysctl_tcp_l3mdev_accept; int sysctl_tcp_l3mdev_accept;
#endif #endif
int sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probing;
int sysctl_tcp_mtu_probe_floor;
int sysctl_tcp_base_mss; int sysctl_tcp_base_mss;
int sysctl_tcp_min_snd_mss; int sysctl_tcp_min_snd_mss;
int sysctl_tcp_probe_threshold; int sysctl_tcp_probe_threshold;
......
...@@ -819,6 +819,15 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -819,6 +819,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = &tcp_min_snd_mss_min, .extra1 = &tcp_min_snd_mss_min,
.extra2 = &tcp_min_snd_mss_max, .extra2 = &tcp_min_snd_mss_max,
}, },
{
.procname = "tcp_mtu_probe_floor",
.data = &init_net.ipv4.sysctl_tcp_mtu_probe_floor,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &tcp_min_snd_mss_min,
.extra2 = &tcp_min_snd_mss_max,
},
{ {
.procname = "tcp_probe_threshold", .procname = "tcp_probe_threshold",
.data = &init_net.ipv4.sysctl_tcp_probe_threshold, .data = &init_net.ipv4.sysctl_tcp_probe_threshold,
......
...@@ -2637,6 +2637,7 @@ static int __net_init tcp_sk_init(struct net *net) ...@@ -2637,6 +2637,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
......
...@@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) ...@@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
} else { } else {
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
mss = min(net->ipv4.sysctl_tcp_base_mss, mss); mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment