Commit 4fddbf8a authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-fixes'

Eric Dumazet says:

====================
tcp: make sack processing more robust

Jonathan Looney brought to our attention multiple problems
in TCP stack at the sender side.

SACK processing can be abused by malicious peers to either
cause overflows, or increase of memory usage.

First two patches fix the immediate problems.

Since the malicious peers abuse senders by advertizing a very
small MSS in their SYN or SYNACK packet, the last two
patches add a new sysctl so that admins can chose a higher
limit for MSS clamping.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6be8e297 967c05ae
...@@ -255,6 +255,14 @@ tcp_base_mss - INTEGER ...@@ -255,6 +255,14 @@ tcp_base_mss - INTEGER
Path MTU discovery (MTU probing). If MTU probing is enabled, Path MTU discovery (MTU probing). If MTU probing is enabled,
this is the initial MSS used by the connection. this is the initial MSS used by the connection.
tcp_min_snd_mss - INTEGER
TCP SYN and SYNACK messages usually advertise an ADVMSS option,
as described in RFC 1122 and RFC 6691.
If this ADVMSS option is smaller than tcp_min_snd_mss,
it is silently capped to tcp_min_snd_mss.
Default : 48 (at least 8 bytes of payload per segment)
tcp_congestion_control - STRING tcp_congestion_control - STRING
Set the congestion control algorithm to be used for new Set the congestion control algorithm to be used for new
connections. The algorithm "reno" is always available, but connections. The algorithm "reno" is always available, but
......
...@@ -484,4 +484,8 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) ...@@ -484,4 +484,8 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
return (user_mss && user_mss < mss) ? user_mss : mss; return (user_mss && user_mss < mss) ? user_mss : mss;
} }
int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
int shiftlen);
#endif /* _LINUX_TCP_H */ #endif /* _LINUX_TCP_H */
...@@ -117,6 +117,7 @@ struct netns_ipv4 { ...@@ -117,6 +117,7 @@ struct netns_ipv4 {
#endif #endif
int sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probing;
int sysctl_tcp_base_mss; int sysctl_tcp_base_mss;
int sysctl_tcp_min_snd_mss;
int sysctl_tcp_probe_threshold; int sysctl_tcp_probe_threshold;
u32 sysctl_tcp_probe_interval; u32 sysctl_tcp_probe_interval;
......
...@@ -51,6 +51,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -51,6 +51,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_HEADER (128 + MAX_HEADER)
#define MAX_TCP_OPTION_SPACE 40 #define MAX_TCP_OPTION_SPACE 40
#define TCP_MIN_SND_MSS 48
#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
/* /*
* Never offer a window over 32767 without using window scaling. Some * Never offer a window over 32767 without using window scaling. Some
......
...@@ -283,6 +283,7 @@ enum ...@@ -283,6 +283,7 @@ enum
LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */ LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */ LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */
LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */ LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */
LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */
__LINUX_MIB_MAX __LINUX_MIB_MAX
}; };
......
...@@ -287,6 +287,7 @@ static const struct snmp_mib snmp4_net_list[] = { ...@@ -287,6 +287,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
SNMP_MIB_SENTINEL SNMP_MIB_SENTINEL
}; };
......
...@@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = { 1, 1 }; ...@@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 }; static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31; static int tcp_adv_win_scale_max = 31;
static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
static int tcp_min_snd_mss_max = 65535;
static int ip_privileged_port_min; static int ip_privileged_port_min;
static int ip_privileged_port_max = 65535; static int ip_privileged_port_max = 65535;
static int ip_ttl_min = 1; static int ip_ttl_min = 1;
...@@ -769,6 +771,15 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -769,6 +771,15 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
{
.procname = "tcp_min_snd_mss",
.data = &init_net.ipv4.sysctl_tcp_min_snd_mss,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &tcp_min_snd_mss_min,
.extra2 = &tcp_min_snd_mss_max,
},
{ {
.procname = "tcp_probe_threshold", .procname = "tcp_probe_threshold",
.data = &init_net.ipv4.sysctl_tcp_probe_threshold, .data = &init_net.ipv4.sysctl_tcp_probe_threshold,
......
...@@ -3873,6 +3873,7 @@ void __init tcp_init(void) ...@@ -3873,6 +3873,7 @@ void __init tcp_init(void)
unsigned long limit; unsigned long limit;
unsigned int i; unsigned int i;
BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb)); FIELD_SIZEOF(struct sk_buff, cb));
......
...@@ -1302,7 +1302,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, ...@@ -1302,7 +1302,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
TCP_SKB_CB(skb)->seq += shifted; TCP_SKB_CB(skb)->seq += shifted;
tcp_skb_pcount_add(prev, pcount); tcp_skb_pcount_add(prev, pcount);
BUG_ON(tcp_skb_pcount(skb) < pcount); WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
tcp_skb_pcount_add(skb, -pcount); tcp_skb_pcount_add(skb, -pcount);
/* When we're adding to gso_segs == 1, gso_size will be zero, /* When we're adding to gso_segs == 1, gso_size will be zero,
...@@ -1368,6 +1368,21 @@ static int skb_can_shift(const struct sk_buff *skb) ...@@ -1368,6 +1368,21 @@ static int skb_can_shift(const struct sk_buff *skb)
return !skb_headlen(skb) && skb_is_nonlinear(skb); return !skb_headlen(skb) && skb_is_nonlinear(skb);
} }
int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
int pcount, int shiftlen)
{
/* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
* Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
* to make sure not storing more than 65535 * 8 bytes per skb,
* even if current MSS is bigger.
*/
if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
return 0;
if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
return 0;
return skb_shift(to, from, shiftlen);
}
/* Try collapsing SACK blocks spanning across multiple skbs to a single /* Try collapsing SACK blocks spanning across multiple skbs to a single
* skb. * skb.
*/ */
...@@ -1473,7 +1488,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, ...@@ -1473,7 +1488,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
goto fallback; goto fallback;
if (!skb_shift(prev, skb, len)) if (!tcp_skb_shift(prev, skb, pcount, len))
goto fallback; goto fallback;
if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
goto out; goto out;
...@@ -1491,11 +1506,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, ...@@ -1491,11 +1506,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
goto out; goto out;
len = skb->len; len = skb->len;
if (skb_shift(prev, skb, len)) { pcount = tcp_skb_pcount(skb);
pcount += tcp_skb_pcount(skb); if (tcp_skb_shift(prev, skb, pcount, len))
tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb), tcp_shifted_skb(sk, prev, skb, state, pcount,
len, mss, 0); len, mss, 0);
}
out: out:
return prev; return prev;
......
...@@ -2628,6 +2628,7 @@ static int __net_init tcp_sk_init(struct net *net) ...@@ -2628,6 +2628,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_ecn_fallback = 1; net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
......
...@@ -1296,6 +1296,11 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, ...@@ -1296,6 +1296,11 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (nsize < 0) if (nsize < 0)
nsize = 0; nsize = 0;
if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
return -ENOMEM;
}
if (skb_unclone(skb, gfp)) if (skb_unclone(skb, gfp))
return -ENOMEM; return -ENOMEM;
...@@ -1454,8 +1459,7 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) ...@@ -1454,8 +1459,7 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
mss_now -= icsk->icsk_ext_hdr_len; mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */ /* Then reserve room for full set of TCP options and 8 bytes of data */
if (mss_now < 48) mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
mss_now = 48;
return mss_now; return mss_now;
} }
...@@ -2747,7 +2751,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) ...@@ -2747,7 +2751,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
if (next_skb_size <= skb_availroom(skb)) if (next_skb_size <= skb_availroom(skb))
skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
next_skb_size); next_skb_size);
else if (!skb_shift(skb, next_skb, next_skb_size)) else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
return false; return false;
} }
tcp_highest_sack_replace(sk, next_skb, skb); tcp_highest_sack_replace(sk, next_skb, skb);
......
...@@ -155,6 +155,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) ...@@ -155,6 +155,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
mss = min(net->ipv4.sysctl_tcp_base_mss, mss); mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
} }
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment