Commit 15d99e02 authored by Rick Jones's avatar Rick Jones Committed by David S. Miller

[TCP]: sysctl to allow TCP window > 32767 sans wscale

Back in the dark ages, we had to be conservative and only allow 15-bit
window fields if the window scale option was not negotiated.  Some
ancient stacks used a signed 16-bit quantity for the window field of
the TCP header and would get confused.

Those days are long gone, so we can use the full 16-bits by default
now.

There is a sysctl added so that we can still interact with such old
stacks
Signed-off-by: default avatarRick Jones <rick.jones2@hp.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c1b1bce8
...@@ -355,6 +355,13 @@ somaxconn - INTEGER ...@@ -355,6 +355,13 @@ somaxconn - INTEGER
Defaults to 128. See also tcp_max_syn_backlog for additional tuning Defaults to 128. See also tcp_max_syn_backlog for additional tuning
for TCP sockets. for TCP sockets.
tcp_workaround_signed_windows - BOOLEAN
If set, assume no receipt of a window scaling option means the
remote TCP is broken and treats the window as a signed quantity.
If unset, assume the remote TCP is not broken even if we do
not receive a window scaling option from them.
Default: 0
IP Variables: IP Variables:
ip_local_port_range - 2 INTEGERS ip_local_port_range - 2 INTEGERS
......
...@@ -402,6 +402,7 @@ enum ...@@ -402,6 +402,7 @@ enum
NET_IPV4_IPFRAG_MAX_DIST=112, NET_IPV4_IPFRAG_MAX_DIST=112,
NET_TCP_MTU_PROBING=113, NET_TCP_MTU_PROBING=113,
NET_TCP_BASE_MSS=114, NET_TCP_BASE_MSS=114,
NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115,
}; };
enum { enum {
......
...@@ -224,6 +224,7 @@ extern int sysctl_tcp_tso_win_divisor; ...@@ -224,6 +224,7 @@ extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_abc; extern int sysctl_tcp_abc;
extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_mtu_probing;
extern int sysctl_tcp_base_mss; extern int sysctl_tcp_base_mss;
extern int sysctl_tcp_workaround_signed_windows;
extern atomic_t tcp_memory_allocated; extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated; extern atomic_t tcp_sockets_allocated;
......
...@@ -680,7 +680,14 @@ ctl_table ipv4_table[] = { ...@@ -680,7 +680,14 @@ ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS,
.procname = "tcp_workaround_signed_windows",
.data = &sysctl_tcp_workaround_signed_windows,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{ .ctl_name = 0 } { .ctl_name = 0 }
}; };
......
...@@ -45,6 +45,11 @@ ...@@ -45,6 +45,11 @@
/* People can turn this off for buggy TCP's found in printers etc. */ /* People can turn this off for buggy TCP's found in printers etc. */
int sysctl_tcp_retrans_collapse = 1; int sysctl_tcp_retrans_collapse = 1;
/* People can turn this on to work with those rare, broken TCPs that
* interpret the window field as a signed quantity.
*/
int sysctl_tcp_workaround_signed_windows = 0;
/* This limits the percentage of the congestion window which we /* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames * will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty. * which are too large can cause TCP streams to be bursty.
...@@ -177,12 +182,18 @@ void tcp_select_initial_window(int __space, __u32 mss, ...@@ -177,12 +182,18 @@ void tcp_select_initial_window(int __space, __u32 mss,
space = (space / mss) * mss; space = (space / mss) * mss;
/* NOTE: offering an initial window larger than 32767 /* NOTE: offering an initial window larger than 32767
* will break some buggy TCP stacks. We try to be nice. * will break some buggy TCP stacks. If the admin tells us
* If we are not window scaling, then this truncates * it is likely we could be speaking with such a buggy stack
* our initial window offering to 32k. There should also * we will truncate our initial window offering to 32K-1
* be a sysctl option to stop being nice. * unless the remote has sent us a window scaling option,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/ */
(*rcv_wnd) = min(space, MAX_TCP_WINDOW); if (sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = space;
(*rcv_wscale) = 0; (*rcv_wscale) = 0;
if (wscale_ok) { if (wscale_ok) {
/* Set window scaling on max possible window /* Set window scaling on max possible window
...@@ -241,7 +252,7 @@ static u16 tcp_select_window(struct sock *sk) ...@@ -241,7 +252,7 @@ static u16 tcp_select_window(struct sock *sk)
/* Make sure we do not exceed the maximum possible /* Make sure we do not exceed the maximum possible
* scaled window. * scaled window.
*/ */
if (!tp->rx_opt.rcv_wscale) if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
new_win = min(new_win, MAX_TCP_WINDOW); new_win = min(new_win, MAX_TCP_WINDOW);
else else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment