Commit df9c65b5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.7.iov_iter' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs

Pull iov_iter updates from Christian Brauner:
 "This contain's David's iov_iter cleanup work to convert the iov_iter
  iteration macros to inline functions:

   - Remove last_offset from iov_iter as it was only used by ITER_PIPE

   - Add a __user tag on copy_mc_to_user()'s dst argument on x86 to
     match that on powerpc and get rid of a sparse warning

   - Convert iter->user_backed to user_backed_iter() in the sound PCM
     driver

   - Convert iter->user_backed to user_backed_iter() in a couple of
     infiniband drivers

   - Renumber the type enum so that the ITER_* constants match the order
     in iterate_and_advance*()

   - Since the preceding patch puts UBUF and IOVEC at 0 and 1, change
     user_backed_iter() to just use the type value and get rid of the
     extra flag

   - Convert the iov_iter iteration macros to always-inline functions to
     make the code easier to follow. It uses function pointers, but they
     get optimised away

   - Move the check for ->copy_mc to _copy_from_iter() and
     copy_page_from_iter_atomic() rather than in memcpy_from_iter_mc()
     where it gets repeated for every segment. Instead, we check once
     and invoke a side function that can use iterate_bvec() rather than
     iterate_and_advance() and supply a different step function

   - Move the copy-and-csum code to net/ where it can be in proximity
     with the code that uses it

   - Fold memcpy_and_csum() in to its two users

   - Move csum_and_copy_from_iter_full() out of line and merge in
     csum_and_copy_from_iter() since the former is the only caller of
     the latter

   - Move hash_and_copy_to_iter() to net/ where it can be with its only
     caller"

* tag 'vfs-6.7.iov_iter' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs:
  iov_iter, net: Move hash_and_copy_to_iter() to net/
  iov_iter, net: Merge csum_and_copy_from_iter{,_full}() together
  iov_iter, net: Fold in csum_and_memcpy()
  iov_iter, net: Move csum_and_copy_to/from_iter() to net/
  iov_iter: Don't deal with iter->copy_mc in memcpy_from_iter_mc()
  iov_iter: Convert iterate*() to inline funcs
  iov_iter: Derive user-backedness from the iterator type
  iov_iter: Renumber ITER_* constants
  infiniband: Use user_backed_iter() to see if iterator is UBUF/IOVEC
  sound: Fix snd_pcm_readv()/writev() to use iov access functions
  iov_iter, x86: Be consistent about the __user tag on copy_mc_to_user()
  iov_iter: Remove last_offset from iov_iter as it was for ITER_PIPE
parents 3b3f874c b5f0e20f
......@@ -496,7 +496,7 @@ copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel
unsigned long __must_check
copy_mc_to_user(void *to, const void *from, unsigned len);
copy_mc_to_user(void __user *to, const void *from, unsigned len);
#endif
/*
......
......@@ -70,23 +70,23 @@ unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigne
}
EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, unsigned len)
{
unsigned long ret;
if (copy_mc_fragile_enabled) {
__uaccess_begin();
ret = copy_mc_fragile(dst, src, len);
ret = copy_mc_fragile((__force void *)dst, src, len);
__uaccess_end();
return ret;
}
if (static_cpu_has(X86_FEATURE_ERMS)) {
__uaccess_begin();
ret = copy_mc_enhanced_fast_string(dst, src, len);
ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len);
__uaccess_end();
return ret;
}
return copy_user_generic(dst, src, len);
return copy_user_generic((__force void *)dst, src, len);
}
......@@ -267,7 +267,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
if (!HFI1_CAP_IS_KSET(SDMA))
return -EINVAL;
if (!from->user_backed)
if (!user_backed_iter(from))
return -EINVAL;
idx = srcu_read_lock(&fd->pq_srcu);
pq = srcu_dereference(fd->pq, &fd->pq_srcu);
......
......@@ -2244,7 +2244,7 @@ static ssize_t qib_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp);
struct qib_user_sdma_queue *pq = fp->pq;
if (!from->user_backed || !from->nr_segs || !pq)
if (!user_backed_iter(from) || !from->nr_segs || !pq)
return -EINVAL;
return qib_user_sdma_writev(rcd, pq, iter_iov(from), from->nr_segs);
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* I/O iterator iteration building functions.
*
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#ifndef _LINUX_IOV_ITER_H
#define _LINUX_IOV_ITER_H
#include <linux/uio.h>
#include <linux/bvec.h>
typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
void *priv, void *priv2);
typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len,
void *priv, void *priv2);
/*
* Handle ITER_UBUF.
*/
static __always_inline
size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2,
iov_ustep_f step)
{
void __user *base = iter->ubuf;
size_t progress = 0, remain;
remain = step(base + iter->iov_offset, 0, len, priv, priv2);
progress = len - remain;
iter->iov_offset += progress;
iter->count -= progress;
return progress;
}
/*
* Handle ITER_IOVEC.
*/
static __always_inline
size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
iov_ustep_f step)
{
const struct iovec *p = iter->__iov;
size_t progress = 0, skip = iter->iov_offset;
do {
size_t remain, consumed;
size_t part = min(len, p->iov_len - skip);
if (likely(part)) {
remain = step(p->iov_base + skip, progress, part, priv, priv2);
consumed = part - remain;
progress += consumed;
skip += consumed;
len -= consumed;
if (skip < p->iov_len)
break;
}
p++;
skip = 0;
} while (len);
iter->nr_segs -= p - iter->__iov;
iter->__iov = p;
iter->iov_offset = skip;
iter->count -= progress;
return progress;
}
/*
* Handle ITER_KVEC.
*/
static __always_inline
size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
iov_step_f step)
{
const struct kvec *p = iter->kvec;
size_t progress = 0, skip = iter->iov_offset;
do {
size_t remain, consumed;
size_t part = min(len, p->iov_len - skip);
if (likely(part)) {
remain = step(p->iov_base + skip, progress, part, priv, priv2);
consumed = part - remain;
progress += consumed;
skip += consumed;
len -= consumed;
if (skip < p->iov_len)
break;
}
p++;
skip = 0;
} while (len);
iter->nr_segs -= p - iter->kvec;
iter->kvec = p;
iter->iov_offset = skip;
iter->count -= progress;
return progress;
}
/*
* Handle ITER_BVEC.
*/
static __always_inline
size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
iov_step_f step)
{
const struct bio_vec *p = iter->bvec;
size_t progress = 0, skip = iter->iov_offset;
do {
size_t remain, consumed;
size_t offset = p->bv_offset + skip, part;
void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE);
part = min3(len,
(size_t)(p->bv_len - skip),
(size_t)(PAGE_SIZE - offset % PAGE_SIZE));
remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
kunmap_local(kaddr);
consumed = part - remain;
len -= consumed;
progress += consumed;
skip += consumed;
if (skip >= p->bv_len) {
skip = 0;
p++;
}
if (remain)
break;
} while (len);
iter->nr_segs -= p - iter->bvec;
iter->bvec = p;
iter->iov_offset = skip;
iter->count -= progress;
return progress;
}
/*
* Handle ITER_XARRAY.
*/
static __always_inline
size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2,
iov_step_f step)
{
struct folio *folio;
size_t progress = 0;
loff_t start = iter->xarray_start + iter->iov_offset;
pgoff_t index = start / PAGE_SIZE;
XA_STATE(xas, iter->xarray, index);
rcu_read_lock();
xas_for_each(&xas, folio, ULONG_MAX) {
size_t remain, consumed, offset, part, flen;
if (xas_retry(&xas, folio))
continue;
if (WARN_ON(xa_is_value(folio)))
break;
if (WARN_ON(folio_test_hugetlb(folio)))
break;
offset = offset_in_folio(folio, start + progress);
flen = min(folio_size(folio) - offset, len);
while (flen) {
void *base = kmap_local_folio(folio, offset);
part = min_t(size_t, flen,
PAGE_SIZE - offset_in_page(offset));
remain = step(base, progress, part, priv, priv2);
kunmap_local(base);
consumed = part - remain;
progress += consumed;
len -= consumed;
if (remain || len == 0)
goto out;
flen -= consumed;
offset += consumed;
}
}
out:
rcu_read_unlock();
iter->iov_offset += progress;
iter->count -= progress;
return progress;
}
/*
* Handle ITER_DISCARD.
*/
static __always_inline
size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2,
iov_step_f step)
{
size_t progress = len;
iter->count -= progress;
return progress;
}
/**
* iterate_and_advance2 - Iterate over an iterator
* @iter: The iterator to iterate over.
* @len: The amount to iterate over.
* @priv: Data for the step functions.
* @priv2: More data for the step functions.
* @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
* @step: Function for other iterators; given kernel addresses.
*
* Iterate over the next part of an iterator, up to the specified length. The
* buffer is presented in segments, which for kernel iteration are broken up by
* physical pages and mapped, with the mapped address being presented.
*
* Two step functions, @step and @ustep, must be provided, one for handling
* mapped kernel addresses and the other is given user addresses which have the
* potential to fault since no pinning is performed.
*
* The step functions are passed the address and length of the segment, @priv,
* @priv2 and the amount of data so far iterated over (which can, for example,
* be added to @priv to point to the right part of a second buffer). The step
* functions should return the amount of the segment they didn't process (ie. 0
* indicates complete processsing).
*
* This function returns the amount of data processed (ie. 0 means nothing was
* processed and the value of @len means processes to completion).
*/
static __always_inline
size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
void *priv2, iov_ustep_f ustep, iov_step_f step)
{
if (unlikely(iter->count < len))
len = iter->count;
if (unlikely(!len))
return 0;
if (likely(iter_is_ubuf(iter)))
return iterate_ubuf(iter, len, priv, priv2, ustep);
if (likely(iter_is_iovec(iter)))
return iterate_iovec(iter, len, priv, priv2, ustep);
if (iov_iter_is_bvec(iter))
return iterate_bvec(iter, len, priv, priv2, step);
if (iov_iter_is_kvec(iter))
return iterate_kvec(iter, len, priv, priv2, step);
if (iov_iter_is_xarray(iter))
return iterate_xarray(iter, len, priv, priv2, step);
return iterate_discard(iter, len, priv, priv2, step);
}
/**
* iterate_and_advance - Iterate over an iterator
* @iter: The iterator to iterate over.
* @len: The amount to iterate over.
* @priv: Data for the step functions.
* @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
* @step: Function for other iterators; given kernel addresses.
*
* As iterate_and_advance2(), but priv2 is always NULL.
*/
static __always_inline
size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
iov_ustep_f ustep, iov_step_f step)
{
return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
}
#endif /* _LINUX_IOV_ITER_H */
......@@ -3679,6 +3679,9 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l
return __skb_put_padto(skb, len, true);
}
bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
__must_check;
static inline int skb_add_data(struct sk_buff *skb,
struct iov_iter *from, int copy)
{
......
......@@ -21,12 +21,12 @@ struct kvec {
enum iter_type {
/* iter types */
ITER_UBUF,
ITER_IOVEC,
ITER_KVEC,
ITER_BVEC,
ITER_KVEC,
ITER_XARRAY,
ITER_DISCARD,
ITER_UBUF,
};
#define ITER_SOURCE 1 // == WRITE
......@@ -43,11 +43,7 @@ struct iov_iter {
bool copy_mc;
bool nofault;
bool data_source;
bool user_backed;
union {
size_t iov_offset;
int last_offset;
};
/*
* Hack alert: overlay ubuf_iovec with iovec + count, so
* that the members resolve correctly regardless of the type
......@@ -143,7 +139,7 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
static inline bool user_backed_iter(const struct iov_iter *i)
{
return i->user_backed;
return iter_is_ubuf(i) || iter_is_iovec(i);
}
/*
......@@ -342,27 +338,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes)
return npages;
}
struct csum_state {
__wsum csum;
size_t off;
};
size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i);
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
static __always_inline __must_check
bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
__wsum *csum, struct iov_iter *i)
{
size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i);
if (likely(copied == bytes))
return true;
iov_iter_revert(i, copied);
return false;
}
size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
struct iov_iter *i);
struct iovec *iovec_from_user(const struct iovec __user *uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_iov, bool compat);
......@@ -383,7 +358,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
*i = (struct iov_iter) {
.iter_type = ITER_UBUF,
.copy_mc = false,
.user_backed = true,
.data_source = direction,
.ubuf = buf,
.count = count,
......
This diff is collapsed.
......@@ -50,7 +50,7 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/iov_iter.h>
#include <linux/indirect_call_wrapper.h>
#include <net/protocol.h>
......@@ -61,6 +61,7 @@
#include <net/tcp_states.h>
#include <trace/events/skb.h>
#include <net/busy_poll.h>
#include <crypto/hash.h>
/*
* Is a socket 'connection oriented' ?
......@@ -489,6 +490,24 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
return 0;
}
static size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
struct iov_iter *i)
{
#ifdef CONFIG_CRYPTO_HASH
struct ahash_request *hash = hashp;
struct scatterlist sg;
size_t copied;
copied = copy_to_iter(addr, bytes, i);
sg_init_one(&sg, addr, copied);
ahash_request_set_crypt(hash, &sg, NULL, copied);
crypto_ahash_update(hash);
return copied;
#else
return 0;
#endif
}
/**
* skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator
* and update a hash.
......@@ -716,6 +735,60 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
static __always_inline
size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress,
size_t len, void *from, void *priv2)
{
__wsum next, *csum = priv2;
next = csum_and_copy_to_user(from + progress, iter_to, len);
*csum = csum_block_add(*csum, next, progress);
return next ? 0 : len;
}
static __always_inline
size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
size_t len, void *from, void *priv2)
{
__wsum *csum = priv2;
__wsum next = csum_partial_copy_nocheck(from, iter_to, len);
*csum = csum_block_add(*csum, next, progress);
return 0;
}
struct csum_state {
__wsum csum;
size_t off;
};
static size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
struct iov_iter *i)
{
struct csum_state *csstate = _csstate;
__wsum sum;
if (WARN_ON_ONCE(i->data_source))
return 0;
if (unlikely(iov_iter_is_discard(i))) {
// can't use csum_memcpy() for that one - data is not copied
csstate->csum = csum_block_add(csstate->csum,
csum_partial(addr, bytes, 0),
csstate->off);
csstate->off += bytes;
return bytes;
}
sum = csum_shift(csstate->csum, csstate->off);
bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum,
copy_to_user_iter_csum,
memcpy_to_iter_csum);
csstate->csum = csum_shift(sum, csstate->off);
csstate->off += bytes;
return bytes;
}
/**
* skb_copy_and_csum_datagram - Copy datagram to an iovec iterator
* and update a checksum.
......
......@@ -62,6 +62,7 @@
#include <linux/if_vlan.h>
#include <linux/mpls.h>
#include <linux/kcov.h>
#include <linux/iov_iter.h>
#include <net/protocol.h>
#include <net/dst.h>
......@@ -6931,3 +6932,42 @@ ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
return spliced ?: ret;
}
EXPORT_SYMBOL(skb_splice_from_iter);
static __always_inline
size_t memcpy_from_iter_csum(void *iter_from, size_t progress,
size_t len, void *to, void *priv2)
{
__wsum *csum = priv2;
__wsum next = csum_partial_copy_nocheck(iter_from, to + progress, len);
*csum = csum_block_add(*csum, next, progress);
return 0;
}
static __always_inline
size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress,
size_t len, void *to, void *priv2)
{
__wsum next, *csum = priv2;
next = csum_and_copy_from_user(iter_from, to + progress, len);
*csum = csum_block_add(*csum, next, progress);
return next ? 0 : len;
}
bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
__wsum *csum, struct iov_iter *i)
{
size_t copied;
if (WARN_ON_ONCE(!i->data_source))
return false;
copied = iterate_and_advance2(i, bytes, addr, csum,
copy_from_user_iter_csum,
memcpy_from_iter_csum);
if (likely(copied == bytes))
return true;
iov_iter_revert(i, copied);
return false;
}
EXPORT_SYMBOL(csum_and_copy_from_iter_full);
......@@ -3527,7 +3527,7 @@ static ssize_t snd_pcm_readv(struct kiocb *iocb, struct iov_iter *to)
if (runtime->state == SNDRV_PCM_STATE_OPEN ||
runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
return -EBADFD;
if (!to->user_backed)
if (!user_backed_iter(to))
return -EINVAL;
if (to->nr_segs > 1024 || to->nr_segs != runtime->channels)
return -EINVAL;
......@@ -3567,7 +3567,7 @@ static ssize_t snd_pcm_writev(struct kiocb *iocb, struct iov_iter *from)
if (runtime->state == SNDRV_PCM_STATE_OPEN ||
runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
return -EBADFD;
if (!from->user_backed)
if (!user_backed_iter(from))
return -EINVAL;
if (from->nr_segs > 128 || from->nr_segs != runtime->channels ||
!frame_aligned(runtime, iov->iov_len))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment