Commit 329033f6 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: makes tcp_try_coalesce aware of skb->head_frag

TCP coalesce can check if skb to be merged has its skb->head mapped to a
page fragment, instead of a kmalloc() area.

We had to disable coalescing in this case, for performance reasons.

We 'upgrade' skb->head as a fragment in itself.

This reduces number of cache misses when user makes its copies, since a
less sk_buff are fetched.

This makes receive and ofo queues shorter and thus reduce cache line
misses in TCP stack.

This is a followup of patch "net: allow skb->head to be a page fragment"

Tested with tg3 nic, with GRO on or off. We can see "TCPRcvCoalesce"
counter being incremented.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Matt Carlson <mcarlson@broadcom.com>
Cc: Michael Chan <mchan@broadcom.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d7e8883c
...@@ -4464,10 +4464,12 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) ...@@ -4464,10 +4464,12 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
*/ */
static bool tcp_try_coalesce(struct sock *sk, static bool tcp_try_coalesce(struct sock *sk,
struct sk_buff *to, struct sk_buff *to,
struct sk_buff *from) struct sk_buff *from,
bool *fragstolen)
{ {
int len = from->len; int delta, len = from->len;
*fragstolen = false;
if (tcp_hdr(from)->fin) if (tcp_hdr(from)->fin)
return false; return false;
if (len <= skb_tailroom(to)) { if (len <= skb_tailroom(to)) {
...@@ -4478,15 +4480,19 @@ static bool tcp_try_coalesce(struct sock *sk, ...@@ -4478,15 +4480,19 @@ static bool tcp_try_coalesce(struct sock *sk,
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
return true; return true;
} }
if (skb_has_frag_list(to) || skb_has_frag_list(from))
return false;
if (skb_headlen(from) == 0 && if (skb_headlen(from) == 0 &&
!skb_has_frag_list(to) &&
!skb_has_frag_list(from) &&
(skb_shinfo(to)->nr_frags + (skb_shinfo(to)->nr_frags +
skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) { skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) {
int delta = from->truesize - ksize(from->head) - WARN_ON_ONCE(from->head_frag);
SKB_DATA_ALIGN(sizeof(struct sk_buff)); delta = from->truesize - ksize(from->head) -
SKB_DATA_ALIGN(sizeof(struct sk_buff));
WARN_ON_ONCE(delta < len); WARN_ON_ONCE(delta < len);
copyfrags:
memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
skb_shinfo(from)->frags, skb_shinfo(from)->frags,
skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
...@@ -4499,6 +4505,20 @@ static bool tcp_try_coalesce(struct sock *sk, ...@@ -4499,6 +4505,20 @@ static bool tcp_try_coalesce(struct sock *sk,
to->data_len += len; to->data_len += len;
goto merge; goto merge;
} }
if (from->head_frag) {
struct page *page;
unsigned int offset;
if (skb_shinfo(to)->nr_frags + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
return false;
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
page, offset, skb_headlen(from));
*fragstolen = true;
delta = len; /* we dont know real truesize... */
goto copyfrags;
}
return false; return false;
} }
...@@ -4540,10 +4560,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) ...@@ -4540,10 +4560,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
end_seq = TCP_SKB_CB(skb)->end_seq; end_seq = TCP_SKB_CB(skb)->end_seq;
if (seq == TCP_SKB_CB(skb1)->end_seq) { if (seq == TCP_SKB_CB(skb1)->end_seq) {
if (!tcp_try_coalesce(sk, skb1, skb)) { bool fragstolen;
if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
__skb_queue_after(&tp->out_of_order_queue, skb1, skb); __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
} else { } else {
__kfree_skb(skb); if (fragstolen)
kmem_cache_free(skbuff_head_cache, skb);
else
__kfree_skb(skb);
skb = NULL; skb = NULL;
} }
...@@ -4626,6 +4651,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ...@@ -4626,6 +4651,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
const struct tcphdr *th = tcp_hdr(skb); const struct tcphdr *th = tcp_hdr(skb);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int eaten = -1; int eaten = -1;
bool fragstolen = false;
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
goto drop; goto drop;
...@@ -4672,7 +4698,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ...@@ -4672,7 +4698,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
goto drop; goto drop;
tail = skb_peek_tail(&sk->sk_receive_queue); tail = skb_peek_tail(&sk->sk_receive_queue);
eaten = (tail && tcp_try_coalesce(sk, tail, skb)) ? 1 : 0; eaten = (tail &&
tcp_try_coalesce(sk, tail, skb,
&fragstolen)) ? 1 : 0;
if (eaten <= 0) { if (eaten <= 0) {
skb_set_owner_r(skb, sk); skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb); __skb_queue_tail(&sk->sk_receive_queue, skb);
...@@ -4699,9 +4727,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ...@@ -4699,9 +4727,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
tcp_fast_path_check(sk); tcp_fast_path_check(sk);
if (eaten > 0) if (eaten > 0) {
__kfree_skb(skb); if (fragstolen)
else if (!sock_flag(sk, SOCK_DEAD)) kmem_cache_free(skbuff_head_cache, skb);
else
__kfree_skb(skb);
} else if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0); sk->sk_data_ready(sk, 0);
return; return;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment