Commit 28d64271 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: attempt high order allocations in sock_alloc_send_pskb()

Adding paged frags skbs to af_unix sockets introduced a performance
regression on large sends because of additional page allocations, even
if each skb could carry at least 100% more payload than before.

We can instruct sock_alloc_send_pskb() to attempt high order
allocations.

Most of the time, it does a single page allocation instead of 8.

I added an additional parameter to sock_alloc_send_pskb() to
let other users to opt-in for this new feature on followup patches.

Tested:

Before patch :

$ netperf -t STREAM_STREAM
STREAM STREAM TEST
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 2304  212992  212992    10.00    46861.15

After patch :

$ netperf -t STREAM_STREAM
STREAM STREAM TEST
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 2304  212992  212992    10.00    57981.11
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e370a723
...@@ -524,7 +524,7 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, ...@@ -524,7 +524,7 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
linear = len; linear = len;
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
err); err, 0);
if (!skb) if (!skb)
return NULL; return NULL;
......
...@@ -949,7 +949,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, ...@@ -949,7 +949,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
linear = len; linear = len;
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
&err); &err, 0);
if (!skb) if (!skb)
return ERR_PTR(err); return ERR_PTR(err);
......
...@@ -1539,7 +1539,8 @@ extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, ...@@ -1539,7 +1539,8 @@ extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
unsigned long header_len, unsigned long header_len,
unsigned long data_len, unsigned long data_len,
int noblock, int noblock,
int *errcode); int *errcode,
int max_page_order);
extern void *sock_kmalloc(struct sock *sk, int size, extern void *sock_kmalloc(struct sock *sk, int size,
gfp_t priority); gfp_t priority);
extern void sock_kfree_s(struct sock *sk, void *mem, int size); extern void sock_kfree_s(struct sock *sk, void *mem, int size);
......
...@@ -1741,24 +1741,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) ...@@ -1741,24 +1741,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock, unsigned long data_len, int noblock,
int *errcode) int *errcode, int max_page_order)
{ {
struct sk_buff *skb; struct sk_buff *skb = NULL;
unsigned long chunk;
gfp_t gfp_mask; gfp_t gfp_mask;
long timeo; long timeo;
int err; int err;
int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
struct page *page;
int i;
err = -EMSGSIZE; err = -EMSGSIZE;
if (npages > MAX_SKB_FRAGS) if (npages > MAX_SKB_FRAGS)
goto failure; goto failure;
gfp_mask = sk->sk_allocation;
if (gfp_mask & __GFP_WAIT)
gfp_mask |= __GFP_REPEAT;
timeo = sock_sndtimeo(sk, noblock); timeo = sock_sndtimeo(sk, noblock);
while (1) { while (!skb) {
err = sock_error(sk); err = sock_error(sk);
if (err != 0) if (err != 0)
goto failure; goto failure;
...@@ -1767,50 +1766,52 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, ...@@ -1767,50 +1766,52 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk->sk_shutdown & SEND_SHUTDOWN) if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure; goto failure;
if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
skb = alloc_skb(header_len, gfp_mask); set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
if (skb) { set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
int i; err = -EAGAIN;
if (!timeo)
goto failure;
if (signal_pending(current))
goto interrupted;
timeo = sock_wait_for_wmem(sk, timeo);
continue;
}
/* No pages, we're done... */ err = -ENOBUFS;
if (!data_len) gfp_mask = sk->sk_allocation;
break; if (gfp_mask & __GFP_WAIT)
gfp_mask |= __GFP_REPEAT;
skb = alloc_skb(header_len, gfp_mask);
if (!skb)
goto failure;
skb->truesize += data_len; skb->truesize += data_len;
skb_shinfo(skb)->nr_frags = npages;
for (i = 0; i < npages; i++) {
struct page *page;
page = alloc_pages(sk->sk_allocation, 0); for (i = 0; npages > 0; i++) {
if (!page) { int order = max_page_order;
err = -ENOBUFS;
skb_shinfo(skb)->nr_frags = i;
kfree_skb(skb);
goto failure;
}
__skb_fill_page_desc(skb, i, while (order) {
page, 0, if (npages >= 1 << order) {
(data_len >= PAGE_SIZE ? page = alloc_pages(sk->sk_allocation |
PAGE_SIZE : __GFP_COMP | __GFP_NOWARN,
data_len)); order);
data_len -= PAGE_SIZE; if (page)
goto fill_page;
} }
order--;
/* Full success... */
break;
} }
err = -ENOBUFS; page = alloc_page(sk->sk_allocation);
if (!page)
goto failure; goto failure;
fill_page:
chunk = min_t(unsigned long, data_len,
PAGE_SIZE << order);
skb_fill_page_desc(skb, i, page, 0, chunk);
data_len -= chunk;
npages -= 1 << order;
} }
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
err = -EAGAIN;
if (!timeo)
goto failure;
if (signal_pending(current))
goto interrupted;
timeo = sock_wait_for_wmem(sk, timeo);
} }
skb_set_owner_w(skb, sk); skb_set_owner_w(skb, sk);
...@@ -1819,6 +1820,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, ...@@ -1819,6 +1820,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
interrupted: interrupted:
err = sock_intr_errno(timeo); err = sock_intr_errno(timeo);
failure: failure:
kfree_skb(skb);
*errcode = err; *errcode = err;
return NULL; return NULL;
} }
...@@ -1827,7 +1829,7 @@ EXPORT_SYMBOL(sock_alloc_send_pskb); ...@@ -1827,7 +1829,7 @@ EXPORT_SYMBOL(sock_alloc_send_pskb);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode) int noblock, int *errcode)
{ {
return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
} }
EXPORT_SYMBOL(sock_alloc_send_skb); EXPORT_SYMBOL(sock_alloc_send_skb);
......
...@@ -2181,7 +2181,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, ...@@ -2181,7 +2181,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
linear = len; linear = len;
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
err); err, 0);
if (!skb) if (!skb)
return NULL; return NULL;
......
...@@ -1479,7 +1479,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, ...@@ -1479,7 +1479,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
MAX_SKB_FRAGS * PAGE_SIZE); MAX_SKB_FRAGS * PAGE_SIZE);
skb = sock_alloc_send_pskb(sk, len - data_len, data_len, skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
msg->msg_flags & MSG_DONTWAIT, &err); msg->msg_flags & MSG_DONTWAIT, &err,
PAGE_ALLOC_COSTLY_ORDER);
if (skb == NULL) if (skb == NULL)
goto out; goto out;
...@@ -1651,7 +1652,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, ...@@ -1651,7 +1652,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
skb = sock_alloc_send_pskb(sk, size - data_len, data_len, skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
msg->msg_flags & MSG_DONTWAIT, &err); msg->msg_flags & MSG_DONTWAIT, &err,
get_order(UNIX_SKB_FRAGS_SZ));
if (!skb) if (!skb)
goto out_err; goto out_err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment