Commit 73f49f8c authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-tx-headless'

Eric Dumazet says:

====================
tcp: tx path fully headless

This series completes transition of TCP stack tx path
to headless packets : All payload now reside in page frags,
never in skb->head.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f2f069da 5882efff
...@@ -333,6 +333,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, ...@@ -333,6 +333,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags); size_t size, int flags);
int tcp_send_mss(struct sock *sk, int *size_goal, int flags); int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
int tcp_wmem_schedule(struct sock *sk, int copy);
void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
int size_goal); int size_goal);
void tcp_release_cb(struct sock *sk); void tcp_release_cb(struct sock *sk);
...@@ -349,7 +350,7 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family); ...@@ -349,7 +350,7 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family);
ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, struct pipe_inode_info *pipe, size_t len,
unsigned int flags); unsigned int flags);
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
bool force_schedule); bool force_schedule);
void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
......
...@@ -858,12 +858,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ...@@ -858,12 +858,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
} }
EXPORT_SYMBOL(tcp_splice_read); EXPORT_SYMBOL(tcp_splice_read);
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
bool force_schedule) bool force_schedule)
{ {
struct sk_buff *skb; struct sk_buff *skb;
skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp); skb = alloc_skb_fclone(MAX_TCP_HEADER, gfp);
if (likely(skb)) { if (likely(skb)) {
bool mem_scheduled; bool mem_scheduled;
...@@ -957,7 +957,7 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb) ...@@ -957,7 +957,7 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
} }
static int tcp_wmem_schedule(struct sock *sk, int copy) int tcp_wmem_schedule(struct sock *sk, int copy)
{ {
int left; int left;
...@@ -1178,7 +1178,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1178,7 +1178,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto restart; goto restart;
} }
first_skb = tcp_rtx_and_write_queues_empty(sk); first_skb = tcp_rtx_and_write_queues_empty(sk);
skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, skb = tcp_stream_alloc_skb(sk, sk->sk_allocation,
first_skb); first_skb);
if (!skb) if (!skb)
goto wait_for_space; goto wait_for_space;
......
...@@ -1530,7 +1530,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, ...@@ -1530,7 +1530,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff; struct sk_buff *buff;
int nsize, old_factor; int old_factor;
long limit; long limit;
int nlen; int nlen;
u8 flags; u8 flags;
...@@ -1538,9 +1538,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, ...@@ -1538,9 +1538,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (WARN_ON(len > skb->len)) if (WARN_ON(len > skb->len))
return -EINVAL; return -EINVAL;
nsize = skb_headlen(skb) - len; DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb));
if (nsize < 0)
nsize = 0;
/* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb. /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
* We need some allowance to not penalize applications setting small * We need some allowance to not penalize applications setting small
...@@ -1560,7 +1558,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, ...@@ -1560,7 +1558,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
return -ENOMEM; return -ENOMEM;
/* Get a new skb... force flag on. */ /* Get a new skb... force flag on. */
buff = tcp_stream_alloc_skb(sk, nsize, gfp, true); buff = tcp_stream_alloc_skb(sk, gfp, true);
if (!buff) if (!buff)
return -ENOMEM; /* We'll just try again later. */ return -ENOMEM; /* We'll just try again later. */
skb_copy_decrypted(buff, skb); skb_copy_decrypted(buff, skb);
...@@ -1568,7 +1566,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, ...@@ -1568,7 +1566,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
sk_wmem_queued_add(sk, buff->truesize); sk_wmem_queued_add(sk, buff->truesize);
sk_mem_charge(sk, buff->truesize); sk_mem_charge(sk, buff->truesize);
nlen = skb->len - len - nsize; nlen = skb->len - len;
buff->truesize += nlen; buff->truesize += nlen;
skb->truesize -= nlen; skb->truesize -= nlen;
...@@ -1626,13 +1624,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) ...@@ -1626,13 +1624,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len)
struct skb_shared_info *shinfo; struct skb_shared_info *shinfo;
int i, k, eat; int i, k, eat;
eat = min_t(int, len, skb_headlen(skb)); DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb));
if (eat) {
__skb_pull(skb, eat);
len -= eat;
if (!len)
return 0;
}
eat = len; eat = len;
k = 0; k = 0;
shinfo = skb_shinfo(skb); shinfo = skb_shinfo(skb);
...@@ -1671,12 +1663,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) ...@@ -1671,12 +1663,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
TCP_SKB_CB(skb)->seq += len; TCP_SKB_CB(skb)->seq += len;
if (delta_truesize) { skb->truesize -= delta_truesize;
skb->truesize -= delta_truesize; sk_wmem_queued_add(sk, -delta_truesize);
sk_wmem_queued_add(sk, -delta_truesize); if (!skb_zcopy_pure(skb))
if (!skb_zcopy_pure(skb)) sk_mem_uncharge(sk, delta_truesize);
sk_mem_uncharge(sk, delta_truesize);
}
/* Any change of skb->len requires recalculation of tso factor. */ /* Any change of skb->len requires recalculation of tso factor. */
if (tcp_skb_pcount(skb) > 1) if (tcp_skb_pcount(skb) > 1)
...@@ -2126,11 +2116,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, ...@@ -2126,11 +2116,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
u8 flags; u8 flags;
/* All of a TSO frame must be composed of paged data. */ /* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len) DEBUG_NET_WARN_ON_ONCE(skb->len != skb->data_len);
return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
skb, len, mss_now, gfp);
buff = tcp_stream_alloc_skb(sk, 0, gfp, true); buff = tcp_stream_alloc_skb(sk, gfp, true);
if (unlikely(!buff)) if (unlikely(!buff))
return -ENOMEM; return -ENOMEM;
skb_copy_decrypted(buff, skb); skb_copy_decrypted(buff, skb);
...@@ -2446,7 +2434,7 @@ static int tcp_mtu_probe(struct sock *sk) ...@@ -2446,7 +2434,7 @@ static int tcp_mtu_probe(struct sock *sk)
return -1; return -1;
/* We're allowed to probe. Build it now. */ /* We're allowed to probe. Build it now. */
nskb = tcp_stream_alloc_skb(sk, 0, GFP_ATOMIC, false); nskb = tcp_stream_alloc_skb(sk, GFP_ATOMIC, false);
if (!nskb) if (!nskb)
return -1; return -1;
...@@ -2487,12 +2475,8 @@ static int tcp_mtu_probe(struct sock *sk) ...@@ -2487,12 +2475,8 @@ static int tcp_mtu_probe(struct sock *sk)
} else { } else {
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH); ~(TCPHDR_FIN|TCPHDR_PSH);
if (!skb_shinfo(skb)->nr_frags) { __pskb_trim_head(skb, copy);
skb_pull(skb, copy); tcp_set_skb_tso_segs(skb, mss_now);
} else {
__pskb_trim_head(skb, copy);
tcp_set_skb_tso_segs(skb, mss_now);
}
TCP_SKB_CB(skb)->seq += copy; TCP_SKB_CB(skb)->seq += copy;
} }
...@@ -3802,8 +3786,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) ...@@ -3802,8 +3786,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req; struct tcp_fastopen_request *fo = tp->fastopen_req;
int space, err = 0; struct page_frag *pfrag = sk_page_frag(sk);
struct sk_buff *syn_data; struct sk_buff *syn_data;
int space, err = 0;
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie)) if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie))
...@@ -3822,25 +3807,31 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) ...@@ -3822,25 +3807,31 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
space = min_t(size_t, space, fo->size); space = min_t(size_t, space, fo->size);
/* limit to order-0 allocations */ if (space &&
space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); !skb_page_frag_refill(min_t(size_t, space, PAGE_SIZE),
pfrag, sk->sk_allocation))
syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false); goto fallback;
syn_data = tcp_stream_alloc_skb(sk, sk->sk_allocation, false);
if (!syn_data) if (!syn_data)
goto fallback; goto fallback;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
if (space) { if (space) {
int copied = copy_from_iter(skb_put(syn_data, space), space, space = min_t(size_t, space, pfrag->size - pfrag->offset);
&fo->data->msg_iter); space = tcp_wmem_schedule(sk, space);
if (unlikely(!copied)) { }
if (space) {
space = copy_page_from_iter(pfrag->page, pfrag->offset,
space, &fo->data->msg_iter);
if (unlikely(!space)) {
tcp_skb_tsorted_anchor_cleanup(syn_data); tcp_skb_tsorted_anchor_cleanup(syn_data);
kfree_skb(syn_data); kfree_skb(syn_data);
goto fallback; goto fallback;
} }
if (copied != space) { skb_fill_page_desc(syn_data, 0, pfrag->page,
skb_trim(syn_data, copied); pfrag->offset, space);
space = copied; page_ref_inc(pfrag->page);
} pfrag->offset += space;
skb_len_add(syn_data, space);
skb_zcopy_set(syn_data, fo->uarg, NULL); skb_zcopy_set(syn_data, fo->uarg, NULL);
} }
/* No more data pending in inet_wait_for_connect() */ /* No more data pending in inet_wait_for_connect() */
...@@ -3905,7 +3896,7 @@ int tcp_connect(struct sock *sk) ...@@ -3905,7 +3896,7 @@ int tcp_connect(struct sock *sk)
return 0; return 0;
} }
buff = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, true); buff = tcp_stream_alloc_skb(sk, sk->sk_allocation, true);
if (unlikely(!buff)) if (unlikely(!buff))
return -ENOBUFS; return -ENOBUFS;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment