Commit 7fba5309 authored by Arjun Roy's avatar Arjun Roy Committed by Jakub Kicinski

net-zerocopy: Refactor skb frag fast-forward op.

Refactor skb frag fast-forwarding for tcp receive zerocopy. This is
part of a patch set that introduces short-circuited hybrid copies
for small receive operations, which results in roughly 33% fewer
syscalls for small RPC scenarios.

skb_advance_to_frag(), given a skb and an offset into the skb,
iterates from the first frag for the skb until we're at the frag
specified by the offset. Assuming the offset provided refers to how
many bytes in the skb are already read, the returned frag points to
the next frag we may read from, while offset_frag is set to the number
of bytes from this frag that we have already read.

If frag is not null and offset_frag is equal to 0, then we may be able
to map this frag's page into the process address space with
vm_insert_page(). However, if offset_frag is not equal to 0, then we
cannot do so.
Signed-off-by: default avatarArjun Roy <arjunroy@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 2cd81161
...@@ -1758,6 +1758,28 @@ int tcp_mmap(struct file *file, struct socket *sock, ...@@ -1758,6 +1758,28 @@ int tcp_mmap(struct file *file, struct socket *sock,
} }
EXPORT_SYMBOL(tcp_mmap); EXPORT_SYMBOL(tcp_mmap);
static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
u32 *offset_frag)
{
skb_frag_t *frag;
offset_skb -= skb_headlen(skb);
if ((int)offset_skb < 0 || skb_has_frag_list(skb))
return NULL;
frag = skb_shinfo(skb)->frags;
while (offset_skb) {
if (skb_frag_size(frag) > offset_skb) {
*offset_frag = offset_skb;
return frag;
}
offset_skb -= skb_frag_size(frag);
++frag;
}
*offset_frag = 0;
return frag;
}
static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc, static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
struct sk_buff *skb, u32 copylen, struct sk_buff *skb, u32 copylen,
u32 *offset, u32 *seq) u32 *offset, u32 *seq)
...@@ -1884,6 +1906,8 @@ static int tcp_zerocopy_receive(struct sock *sk, ...@@ -1884,6 +1906,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
curr_addr = address; curr_addr = address;
while (length + PAGE_SIZE <= zc->length) { while (length + PAGE_SIZE <= zc->length) {
if (zc->recv_skip_hint < PAGE_SIZE) { if (zc->recv_skip_hint < PAGE_SIZE) {
u32 offset_frag;
/* If we're here, finish the current batch. */ /* If we're here, finish the current batch. */
if (pg_idx) { if (pg_idx) {
ret = tcp_zerocopy_vm_insert_batch(vma, pages, ret = tcp_zerocopy_vm_insert_batch(vma, pages,
...@@ -1904,16 +1928,9 @@ static int tcp_zerocopy_receive(struct sock *sk, ...@@ -1904,16 +1928,9 @@ static int tcp_zerocopy_receive(struct sock *sk,
skb = tcp_recv_skb(sk, seq, &offset); skb = tcp_recv_skb(sk, seq, &offset);
} }
zc->recv_skip_hint = skb->len - offset; zc->recv_skip_hint = skb->len - offset;
offset -= skb_headlen(skb); frags = skb_advance_to_frag(skb, offset, &offset_frag);
if ((int)offset < 0 || skb_has_frag_list(skb)) if (!frags || offset_frag)
break; break;
frags = skb_shinfo(skb)->frags;
while (offset) {
if (skb_frag_size(frags) > offset)
goto out;
offset -= skb_frag_size(frags);
frags++;
}
} }
if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
int remaining = zc->recv_skip_hint; int remaining = zc->recv_skip_hint;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment