Commit 1a4f14ba authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-robust-ooo'

Eric Dumazet says:

====================
Juha-Matti Tilli reported that malicious peers could inject tiny
packets in out_of_order_queue, forcing very expensive calls
to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for
every incoming packet.

With tcp_rmem[2] default of 6MB, the ooo queue could
contain ~7000 nodes.

This patch series makes sure we cut cpu cycles enough to
render the attack not critical.

We might in the future go further, like disconnecting
or black-holing proven malicious flows.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3dd1c9a1 58152ecb
...@@ -4358,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk, ...@@ -4358,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk,
return true; return true;
} }
static bool tcp_ooo_try_coalesce(struct sock *sk,
struct sk_buff *to,
struct sk_buff *from,
bool *fragstolen)
{
bool res = tcp_try_coalesce(sk, to, from, fragstolen);
/* In case tcp_drop() is called later, update to->gso_segs */
if (res) {
u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
max_t(u16, 1, skb_shinfo(from)->gso_segs);
skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
}
return res;
}
static void tcp_drop(struct sock *sk, struct sk_buff *skb) static void tcp_drop(struct sock *sk, struct sk_buff *skb)
{ {
sk_drops_add(sk, skb); sk_drops_add(sk, skb);
...@@ -4481,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) ...@@ -4481,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* In the typical case, we are adding an skb to the end of the list. /* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/ */
if (tcp_try_coalesce(sk, tp->ooo_last_skb, if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) { skb, &fragstolen)) {
coalesce_done: coalesce_done:
tcp_grow_window(sk, skb); tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen); kfree_skb_partial(skb, fragstolen);
...@@ -4510,7 +4527,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) ...@@ -4510,7 +4527,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* All the bits are present. Drop. */ /* All the bits are present. Drop. */
NET_INC_STATS(sock_net(sk), NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE); LINUX_MIB_TCPOFOMERGE);
__kfree_skb(skb); tcp_drop(sk, skb);
skb = NULL; skb = NULL;
tcp_dsack_set(sk, seq, end_seq); tcp_dsack_set(sk, seq, end_seq);
goto add_sack; goto add_sack;
...@@ -4529,11 +4546,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) ...@@ -4529,11 +4546,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb1)->end_seq); TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk), NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE); LINUX_MIB_TCPOFOMERGE);
__kfree_skb(skb1); tcp_drop(sk, skb1);
goto merge_right; goto merge_right;
} }
} else if (tcp_try_coalesce(sk, skb1, } else if (tcp_ooo_try_coalesce(sk, skb1,
skb, &fragstolen)) { skb, &fragstolen)) {
goto coalesce_done; goto coalesce_done;
} }
p = &parent->rb_right; p = &parent->rb_right;
...@@ -4902,6 +4919,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, ...@@ -4902,6 +4919,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
static void tcp_collapse_ofo_queue(struct sock *sk) static void tcp_collapse_ofo_queue(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
u32 range_truesize, sum_tiny = 0;
struct sk_buff *skb, *head; struct sk_buff *skb, *head;
u32 start, end; u32 start, end;
...@@ -4913,6 +4931,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) ...@@ -4913,6 +4931,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
} }
start = TCP_SKB_CB(skb)->seq; start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq; end = TCP_SKB_CB(skb)->end_seq;
range_truesize = skb->truesize;
for (head = skb;;) { for (head = skb;;) {
skb = skb_rb_next(skb); skb = skb_rb_next(skb);
...@@ -4923,11 +4942,20 @@ static void tcp_collapse_ofo_queue(struct sock *sk) ...@@ -4923,11 +4942,20 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
if (!skb || if (!skb ||
after(TCP_SKB_CB(skb)->seq, end) || after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) { before(TCP_SKB_CB(skb)->end_seq, start)) {
tcp_collapse(sk, NULL, &tp->out_of_order_queue, /* Do not attempt collapsing tiny skbs */
head, skb, start, end); if (range_truesize != head->truesize ||
end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
tcp_collapse(sk, NULL, &tp->out_of_order_queue,
head, skb, start, end);
} else {
sum_tiny += range_truesize;
if (sum_tiny > sk->sk_rcvbuf >> 3)
return;
}
goto new_range; goto new_range;
} }
range_truesize += skb->truesize;
if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
start = TCP_SKB_CB(skb)->seq; start = TCP_SKB_CB(skb)->seq;
if (after(TCP_SKB_CB(skb)->end_seq, end)) if (after(TCP_SKB_CB(skb)->end_seq, end))
...@@ -4942,6 +4970,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) ...@@ -4942,6 +4970,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
* 2) not add too big latencies if thousands of packets sit there. * 2) not add too big latencies if thousands of packets sit there.
* (But if application shrinks SO_RCVBUF, we could still end up * (But if application shrinks SO_RCVBUF, we could still end up
* freeing whole queue here) * freeing whole queue here)
* 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
* *
* Return true if queue has shrunk. * Return true if queue has shrunk.
*/ */
...@@ -4949,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) ...@@ -4949,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct rb_node *node, *prev; struct rb_node *node, *prev;
int goal;
if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
return false; return false;
NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
goal = sk->sk_rcvbuf >> 3;
node = &tp->ooo_last_skb->rbnode; node = &tp->ooo_last_skb->rbnode;
do { do {
prev = rb_prev(node); prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue); rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
tcp_drop(sk, rb_to_skb(node)); tcp_drop(sk, rb_to_skb(node));
sk_mem_reclaim(sk); if (!prev || goal <= 0) {
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && sk_mem_reclaim(sk);
!tcp_under_memory_pressure(sk)) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
break; !tcp_under_memory_pressure(sk))
break;
goal = sk->sk_rcvbuf >> 3;
}
node = prev; node = prev;
} while (node); } while (node);
tp->ooo_last_skb = rb_to_skb(prev); tp->ooo_last_skb = rb_to_skb(prev);
...@@ -4997,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk) ...@@ -4997,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk)
else if (tcp_under_memory_pressure(sk)) else if (tcp_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
tcp_collapse_ofo_queue(sk); tcp_collapse_ofo_queue(sk);
if (!skb_queue_empty(&sk->sk_receive_queue)) if (!skb_queue_empty(&sk->sk_receive_queue))
tcp_collapse(sk, &sk->sk_receive_queue, NULL, tcp_collapse(sk, &sk->sk_receive_queue, NULL,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment