Commit e2dd0d05 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'zerocopy-tx-cleanups'

Pavel Begunkov says:

====================
zerocopy tx cleanups

Assorted zerocopy send path cleanups, the main part of which is
moving some net stack specific accounting out of io_uring back
to net/ in Patch 4.
====================

Link: https://patch.msgid.link/cover.1719190216.git.asml.silence@gmail.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 19e6ad2c 2ca58ed2
...@@ -1703,6 +1703,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, ...@@ -1703,6 +1703,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, struct iov_iter *from, struct sk_buff *skb, struct iov_iter *from,
size_t length); size_t length);
int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
struct iov_iter *from, size_t length);
static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
struct msghdr *msg, int len) struct msghdr *msg, int len)
{ {
......
...@@ -76,7 +76,7 @@ struct msghdr { ...@@ -76,7 +76,7 @@ struct msghdr {
__kernel_size_t msg_controllen; /* ancillary data buffer length */ __kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */
struct ubuf_info *msg_ubuf; struct ubuf_info *msg_ubuf;
int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, int (*sg_from_iter)(struct sk_buff *skb,
struct iov_iter *from, size_t length); struct iov_iter *from, size_t length);
}; };
......
...@@ -1265,14 +1265,14 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -1265,14 +1265,14 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC); return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC);
} }
static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, static int io_sg_from_iter_iovec(struct sk_buff *skb,
struct iov_iter *from, size_t length) struct iov_iter *from, size_t length)
{ {
skb_zcopy_downgrade_managed(skb); skb_zcopy_downgrade_managed(skb);
return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); return zerocopy_fill_skb_from_iter(skb, from, length);
} }
static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, static int io_sg_from_iter(struct sk_buff *skb,
struct iov_iter *from, size_t length) struct iov_iter *from, size_t length)
{ {
struct skb_shared_info *shinfo = skb_shinfo(skb); struct skb_shared_info *shinfo = skb_shinfo(skb);
...@@ -1285,7 +1285,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, ...@@ -1285,7 +1285,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
if (!frag) if (!frag)
shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
else if (unlikely(!skb_zcopy_managed(skb))) else if (unlikely(!skb_zcopy_managed(skb)))
return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); return zerocopy_fill_skb_from_iter(skb, from, length);
bi.bi_size = min(from->count, length); bi.bi_size = min(from->count, length);
bi.bi_bvec_done = from->iov_offset; bi.bi_bvec_done = from->iov_offset;
...@@ -1312,14 +1312,6 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, ...@@ -1312,14 +1312,6 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
skb->data_len += copied; skb->data_len += copied;
skb->len += copied; skb->len += copied;
skb->truesize += truesize; skb->truesize += truesize;
if (sk && sk->sk_type == SOCK_STREAM) {
sk_wmem_queued_add(sk, truesize);
if (!skb_zcopy_pure(skb))
sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
return ret; return ret;
} }
......
...@@ -610,16 +610,10 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, ...@@ -610,16 +610,10 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
} }
EXPORT_SYMBOL(skb_copy_datagram_from_iter); EXPORT_SYMBOL(skb_copy_datagram_from_iter);
int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
struct sk_buff *skb, struct iov_iter *from, struct iov_iter *from, size_t length)
size_t length)
{ {
int frag; int frag = skb_shinfo(skb)->nr_frags;
if (msg && msg->msg_ubuf && msg->sg_from_iter)
return msg->sg_from_iter(sk, skb, from, length);
frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) { while (length && iov_iter_count(from)) {
struct page *head, *last_head = NULL; struct page *head, *last_head = NULL;
...@@ -627,7 +621,6 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, ...@@ -627,7 +621,6 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
int refs, order, n = 0; int refs, order, n = 0;
size_t start; size_t start;
ssize_t copied; ssize_t copied;
unsigned long truesize;
if (frag == MAX_SKB_FRAGS) if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE; return -EMSGSIZE;
...@@ -639,17 +632,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, ...@@ -639,17 +632,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
length -= copied; length -= copied;
truesize = PAGE_ALIGN(copied + start);
skb->data_len += copied; skb->data_len += copied;
skb->len += copied; skb->len += copied;
skb->truesize += truesize; skb->truesize += PAGE_ALIGN(copied + start);
if (sk && sk->sk_type == SOCK_STREAM) {
sk_wmem_queued_add(sk, truesize);
if (!skb_zcopy_pure(skb))
sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
head = compound_head(pages[n]); head = compound_head(pages[n]);
order = compound_order(head); order = compound_order(head);
...@@ -692,6 +677,30 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, ...@@ -692,6 +677,30 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
} }
return 0; return 0;
} }
int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, struct iov_iter *from,
size_t length)
{
unsigned long orig_size = skb->truesize;
unsigned long truesize;
int ret;
if (msg && msg->msg_ubuf && msg->sg_from_iter)
ret = msg->sg_from_iter(skb, from, length);
else
ret = zerocopy_fill_skb_from_iter(skb, from, length);
truesize = skb->truesize - orig_size;
if (sk && sk->sk_type == SOCK_STREAM) {
sk_wmem_queued_add(sk, truesize);
if (!skb_zcopy_pure(skb))
sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
return ret;
}
EXPORT_SYMBOL(__zerocopy_sg_from_iter); EXPORT_SYMBOL(__zerocopy_sg_from_iter);
/** /**
......
...@@ -1871,7 +1871,6 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, ...@@ -1871,7 +1871,6 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len, struct msghdr *msg, int len,
struct ubuf_info *uarg) struct ubuf_info *uarg)
{ {
struct ubuf_info *orig_uarg = skb_zcopy(skb);
int err, orig_len = skb->len; int err, orig_len = skb->len;
if (uarg->ops->link_skb) { if (uarg->ops->link_skb) {
...@@ -1879,6 +1878,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, ...@@ -1879,6 +1878,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
if (err) if (err)
return err; return err;
} else { } else {
struct ubuf_info *orig_uarg = skb_zcopy(skb);
/* An skb can only point to one uarg. This edge case happens /* An skb can only point to one uarg. This edge case happens
* when TCP appends to an skb, but zerocopy_realloc triggered * when TCP appends to an skb, but zerocopy_realloc triggered
* a new alloc. * a new alloc.
...@@ -1899,7 +1900,6 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, ...@@ -1899,7 +1900,6 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
return err; return err;
} }
if (!uarg->ops->link_skb)
skb_zcopy_set(skb, uarg, NULL); skb_zcopy_set(skb, uarg, NULL);
return skb->len - orig_len; return skb->len - orig_len;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment