Commit f4e8d802 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.10.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs rw iterator updates from Christian Brauner:
 "The core fs signalfd, userfaultfd, and timerfd subsystems did still
  use f_op->read() instead of f_op->read_iter(). Convert them over since
  we should aim to get rid of f_op->read() at some point.

  Aside from that io_uring and others want to mark files as FMODE_NOWAIT
  so it can make use of per-IO nonblocking hints to enable more
  efficient IO. Converting those users to f_op->read_iter() allows them
  to be marked with FMODE_NOWAIT"

* tag 'vfs-6.10.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  signalfd: convert to ->read_iter()
  userfaultfd: convert to ->read_iter()
  timerfd: convert to ->read_iter()
  new helper: copy_to_iter_full()
parents ef31ea6c 3a93daea
...@@ -68,8 +68,7 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait) ...@@ -68,8 +68,7 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait)
/* /*
* Copied from copy_siginfo_to_user() in kernel/signal.c * Copied from copy_siginfo_to_user() in kernel/signal.c
*/ */
static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, static int signalfd_copyinfo(struct iov_iter *to, kernel_siginfo_t const *kinfo)
kernel_siginfo_t const *kinfo)
{ {
struct signalfd_siginfo new; struct signalfd_siginfo new;
...@@ -146,10 +145,10 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, ...@@ -146,10 +145,10 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
break; break;
} }
if (copy_to_user(uinfo, &new, sizeof(struct signalfd_siginfo))) if (!copy_to_iter_full(&new, sizeof(struct signalfd_siginfo), to))
return -EFAULT; return -EFAULT;
return sizeof(*uinfo); return sizeof(struct signalfd_siginfo);
} }
static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info, static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info,
...@@ -199,28 +198,27 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info ...@@ -199,28 +198,27 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
* error code. The "count" parameter must be at least the size of a * error code. The "count" parameter must be at least the size of a
* "struct signalfd_siginfo". * "struct signalfd_siginfo".
*/ */
static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, static ssize_t signalfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
loff_t *ppos)
{ {
struct file *file = iocb->ki_filp;
struct signalfd_ctx *ctx = file->private_data; struct signalfd_ctx *ctx = file->private_data;
struct signalfd_siginfo __user *siginfo; size_t count = iov_iter_count(to);
int nonblock = file->f_flags & O_NONBLOCK;
ssize_t ret, total = 0; ssize_t ret, total = 0;
kernel_siginfo_t info; kernel_siginfo_t info;
bool nonblock;
count /= sizeof(struct signalfd_siginfo); count /= sizeof(struct signalfd_siginfo);
if (!count) if (!count)
return -EINVAL; return -EINVAL;
siginfo = (struct signalfd_siginfo __user *) buf; nonblock = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT;
do { do {
ret = signalfd_dequeue(ctx, &info, nonblock); ret = signalfd_dequeue(ctx, &info, nonblock);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
break; break;
ret = signalfd_copyinfo(siginfo, &info); ret = signalfd_copyinfo(to, &info);
if (ret < 0) if (ret < 0)
break; break;
siginfo++;
total += ret; total += ret;
nonblock = 1; nonblock = 1;
} while (--count); } while (--count);
...@@ -246,7 +244,7 @@ static const struct file_operations signalfd_fops = { ...@@ -246,7 +244,7 @@ static const struct file_operations signalfd_fops = {
#endif #endif
.release = signalfd_release, .release = signalfd_release,
.poll = signalfd_poll, .poll = signalfd_poll,
.read = signalfd_read, .read_iter = signalfd_read_iter,
.llseek = noop_llseek, .llseek = noop_llseek,
}; };
...@@ -265,20 +263,34 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) ...@@ -265,20 +263,34 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
signotset(mask); signotset(mask);
if (ufd == -1) { if (ufd == -1) {
struct file *file;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) if (!ctx)
return -ENOMEM; return -ENOMEM;
ctx->sigmask = *mask; ctx->sigmask = *mask;
ufd = get_unused_fd_flags(flags & O_CLOEXEC);
if (ufd < 0) {
kfree(ctx);
return ufd;
}
file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx,
O_RDWR | (flags & O_NONBLOCK));
if (IS_ERR(file)) {
put_unused_fd(ufd);
kfree(ctx);
return ufd;
}
file->f_mode |= FMODE_NOWAIT;
/* /*
* When we call this, the initialization must be complete, since * When we call this, the initialization must be complete, since
* anon_inode_getfd() will install the fd. * anon_inode_getfd() will install the fd.
*/ */
ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx, fd_install(ufd, file);
O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
if (ufd < 0)
kfree(ctx);
} else { } else {
struct fd f = fdget(ufd); struct fd f = fdget(ufd);
if (!f.file) if (!f.file)
......
...@@ -262,17 +262,18 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait) ...@@ -262,17 +262,18 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait)
return events; return events;
} }
static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
loff_t *ppos)
{ {
struct file *file = iocb->ki_filp;
struct timerfd_ctx *ctx = file->private_data; struct timerfd_ctx *ctx = file->private_data;
ssize_t res; ssize_t res;
u64 ticks = 0; u64 ticks = 0;
if (count < sizeof(ticks)) if (iov_iter_count(to) < sizeof(ticks))
return -EINVAL; return -EINVAL;
spin_lock_irq(&ctx->wqh.lock); spin_lock_irq(&ctx->wqh.lock);
if (file->f_flags & O_NONBLOCK) if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT)
res = -EAGAIN; res = -EAGAIN;
else else
res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
...@@ -312,8 +313,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, ...@@ -312,8 +313,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
ctx->ticks = 0; ctx->ticks = 0;
} }
spin_unlock_irq(&ctx->wqh.lock); spin_unlock_irq(&ctx->wqh.lock);
if (ticks) if (ticks) {
res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); res = copy_to_iter(&ticks, sizeof(ticks), to);
if (!res)
res = -EFAULT;
}
return res; return res;
} }
...@@ -384,7 +388,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg ...@@ -384,7 +388,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
static const struct file_operations timerfd_fops = { static const struct file_operations timerfd_fops = {
.release = timerfd_release, .release = timerfd_release,
.poll = timerfd_poll, .poll = timerfd_poll,
.read = timerfd_read, .read_iter = timerfd_read_iter,
.llseek = noop_llseek, .llseek = noop_llseek,
.show_fdinfo = timerfd_show, .show_fdinfo = timerfd_show,
.unlocked_ioctl = timerfd_ioctl, .unlocked_ioctl = timerfd_ioctl,
...@@ -407,6 +411,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ...@@ -407,6 +411,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{ {
int ufd; int ufd;
struct timerfd_ctx *ctx; struct timerfd_ctx *ctx;
struct file *file;
/* Check the TFD_* constants for consistency. */ /* Check the TFD_* constants for consistency. */
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
...@@ -443,11 +448,22 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ...@@ -443,11 +448,22 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
ctx->moffs = ktime_mono_to_real(0); ctx->moffs = ktime_mono_to_real(0);
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS);
if (ufd < 0) {
kfree(ctx);
return ufd;
}
file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
if (ufd < 0) if (IS_ERR(file)) {
put_unused_fd(ufd);
kfree(ctx); kfree(ctx);
return PTR_ERR(file);
}
file->f_mode |= FMODE_NOWAIT;
fd_install(ufd, file);
return ufd; return ufd;
} }
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/swapops.h> #include <linux/swapops.h>
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/uio.h>
static int sysctl_unprivileged_userfaultfd __read_mostly; static int sysctl_unprivileged_userfaultfd __read_mostly;
...@@ -282,7 +283,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, ...@@ -282,7 +283,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
/* /*
* Verify the pagetables are still not ok after having reigstered into * Verify the pagetables are still not ok after having reigstered into
* the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
* userfault that has already been resolved, if userfaultfd_read and * userfault that has already been resolved, if userfaultfd_read_iter and
* UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different
* threads. * threads.
*/ */
...@@ -1181,34 +1182,34 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, ...@@ -1181,34 +1182,34 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
return ret; return ret;
} }
static ssize_t userfaultfd_read(struct file *file, char __user *buf, static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
size_t count, loff_t *ppos)
{ {
struct file *file = iocb->ki_filp;
struct userfaultfd_ctx *ctx = file->private_data; struct userfaultfd_ctx *ctx = file->private_data;
ssize_t _ret, ret = 0; ssize_t _ret, ret = 0;
struct uffd_msg msg; struct uffd_msg msg;
int no_wait = file->f_flags & O_NONBLOCK;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
bool no_wait;
if (!userfaultfd_is_initialized(ctx)) if (!userfaultfd_is_initialized(ctx))
return -EINVAL; return -EINVAL;
no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT;
for (;;) { for (;;) {
if (count < sizeof(msg)) if (iov_iter_count(to) < sizeof(msg))
return ret ? ret : -EINVAL; return ret ? ret : -EINVAL;
_ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode); _ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode);
if (_ret < 0) if (_ret < 0)
return ret ? ret : _ret; return ret ? ret : _ret;
if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg))) _ret = !copy_to_iter_full(&msg, sizeof(msg), to);
if (_ret)
return ret ? ret : -EFAULT; return ret ? ret : -EFAULT;
ret += sizeof(msg); ret += sizeof(msg);
buf += sizeof(msg);
count -= sizeof(msg);
/* /*
* Allow to read more than one fault at time but only * Allow to read more than one fault at time but only
* block if waiting for the very first one. * block if waiting for the very first one.
*/ */
no_wait = O_NONBLOCK; no_wait = true;
} }
} }
...@@ -2176,7 +2177,7 @@ static const struct file_operations userfaultfd_fops = { ...@@ -2176,7 +2177,7 @@ static const struct file_operations userfaultfd_fops = {
#endif #endif
.release = userfaultfd_release, .release = userfaultfd_release,
.poll = userfaultfd_poll, .poll = userfaultfd_poll,
.read = userfaultfd_read, .read_iter = userfaultfd_read_iter,
.unlocked_ioctl = userfaultfd_ioctl, .unlocked_ioctl = userfaultfd_ioctl,
.compat_ioctl = compat_ptr_ioctl, .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek, .llseek = noop_llseek,
...@@ -2196,6 +2197,7 @@ static void init_once_userfaultfd_ctx(void *mem) ...@@ -2196,6 +2197,7 @@ static void init_once_userfaultfd_ctx(void *mem)
static int new_userfaultfd(int flags) static int new_userfaultfd(int flags)
{ {
struct userfaultfd_ctx *ctx; struct userfaultfd_ctx *ctx;
struct file *file;
int fd; int fd;
BUG_ON(!current->mm); BUG_ON(!current->mm);
...@@ -2219,16 +2221,26 @@ static int new_userfaultfd(int flags) ...@@ -2219,16 +2221,26 @@ static int new_userfaultfd(int flags)
init_rwsem(&ctx->map_changing_lock); init_rwsem(&ctx->map_changing_lock);
atomic_set(&ctx->mmap_changing, 0); atomic_set(&ctx->mmap_changing, 0);
ctx->mm = current->mm; ctx->mm = current->mm;
/* prevent the mm struct to be freed */
mmgrab(ctx->mm); fd = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS);
if (fd < 0)
goto err_out;
/* Create a new inode so that the LSM can block the creation. */ /* Create a new inode so that the LSM can block the creation. */
fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx, file = anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
if (fd < 0) { if (IS_ERR(file)) {
mmdrop(ctx->mm); put_unused_fd(fd);
kmem_cache_free(userfaultfd_ctx_cachep, ctx); fd = PTR_ERR(file);
goto err_out;
} }
/* prevent the mm struct to be freed */
mmgrab(ctx->mm);
file->f_mode |= FMODE_NOWAIT;
fd_install(fd, file);
return fd;
err_out:
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
return fd; return fd;
} }
......
...@@ -205,6 +205,16 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) ...@@ -205,6 +205,16 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
return 0; return 0;
} }
static __always_inline __must_check
bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i)
{
size_t copied = copy_to_iter(addr, bytes, i);
if (likely(copied == bytes))
return true;
iov_iter_revert(i, copied);
return false;
}
static __always_inline __must_check static __always_inline __must_check
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
{ {
......
...@@ -379,14 +379,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb) ...@@ -379,14 +379,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb)
static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
struct iov_iter *to) struct iov_iter *to)
{ {
int n; return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT;
n = copy_to_iter(skb->data + off, len, to);
if (n == len)
return 0;
iov_iter_revert(to, n);
return -EFAULT;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment