Commit 48aba79b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.11/io_uring-2020-12-14' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "Fairly light set of changes this time around, and mostly some bits
  that were pushed out to 5.11 instead of 5.10, fixes/cleanups, and a
  few features. In particular:

   - Cleanups around iovec import (David Laight, Pavel)

   - Add timeout support for io_uring_enter(2), which enables us to
     clean up liburing and avoid a timeout sqe submission in the
     completion path.

     The big win here is that it allows setups that split SQ and CQ
     handling into separate threads to avoid locking, as the CQ side
     will no longer submit when timeouts are needed when waiting for
     events (Hao Xu)

   - Add support for socket shutdown, and renameat/unlinkat.

   - SQPOLL cleanups and improvements (Xiaoguang Wang)

   - Allow SQPOLL setups for CAP_SYS_NICE, and enable regular
     (non-fixed) files to be used.

   - Cancelation improvements (Pavel)

   - Fixed file reference improvements (Pavel)

   - IOPOLL related race fixes (Pavel)

   - Lots of other little fixes and cleanups (mostly Pavel)"

* tag 'for-5.11/io_uring-2020-12-14' of git://git.kernel.dk/linux-block: (43 commits)
  io_uring: fix io_cqring_events()'s noflush
  io_uring: fix racy IOPOLL flush overflow
  io_uring: fix racy IOPOLL completions
  io_uring: always let io_iopoll_complete() complete polled io
  io_uring: add timeout update
  io_uring: restructure io_timeout_cancel()
  io_uring: fix files cancellation
  io_uring: use bottom half safe lock for fixed file data
  io_uring: fix miscounting ios_left
  io_uring: change submit file state invariant
  io_uring: check kthread stopped flag when sq thread is unparked
  io_uring: share fixed_file_refs b/w multiple rsrcs
  io_uring: replace inflight_wait with tctx->wait
  io_uring: don't take fs for recvmsg/sendmsg
  io_uring: only wake up sq thread while current task is in io worker context
  io_uring: don't acquire uring_lock twice
  io_uring: initialize 'timeout' properly in io_sq_thread()
  io_uring: refactor io_sq_thread() handling
  io_uring: always batch cancel in *cancel_files()
  io_uring: pass files into kill timeouts/poll
  ...
parents 005b2a9d 59850d22
...@@ -78,6 +78,8 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, ...@@ -78,6 +78,8 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
long do_rmdir(int dfd, struct filename *name); long do_rmdir(int dfd, struct filename *name);
long do_unlinkat(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name);
int may_linkat(struct path *link); int may_linkat(struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
struct filename *newname, unsigned int flags);
/* /*
* namespace.c * namespace.c
......
...@@ -1078,16 +1078,6 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, ...@@ -1078,16 +1078,6 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
return IO_WQ_CANCEL_NOTFOUND; return IO_WQ_CANCEL_NOTFOUND;
} }
static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data)
{
return work == data;
}
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
{
return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork, false);
}
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
{ {
int ret = -ENOMEM, node; int ret = -ENOMEM, node;
......
...@@ -129,7 +129,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work) ...@@ -129,7 +129,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work)
} }
void io_wq_cancel_all(struct io_wq *wq); void io_wq_cancel_all(struct io_wq *wq);
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
typedef bool (work_cancel_fn)(struct io_wq_work *, void *); typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
......
This diff is collapsed.
...@@ -4346,8 +4346,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -4346,8 +4346,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
} }
EXPORT_SYMBOL(vfs_rename); EXPORT_SYMBOL(vfs_rename);
static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, int do_renameat2(int olddfd, struct filename *from, int newdfd,
const char __user *newname, unsigned int flags) struct filename *to, unsigned int flags)
{ {
struct dentry *old_dentry, *new_dentry; struct dentry *old_dentry, *new_dentry;
struct dentry *trap; struct dentry *trap;
...@@ -4355,32 +4355,30 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, ...@@ -4355,32 +4355,30 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
struct qstr old_last, new_last; struct qstr old_last, new_last;
int old_type, new_type; int old_type, new_type;
struct inode *delegated_inode = NULL; struct inode *delegated_inode = NULL;
struct filename *from;
struct filename *to;
unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
bool should_retry = false; bool should_retry = false;
int error; int error = -EINVAL;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL; goto put_both;
if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
(flags & RENAME_EXCHANGE)) (flags & RENAME_EXCHANGE))
return -EINVAL; goto put_both;
if (flags & RENAME_EXCHANGE) if (flags & RENAME_EXCHANGE)
target_flags = 0; target_flags = 0;
retry: retry:
from = filename_parentat(olddfd, getname(oldname), lookup_flags, from = filename_parentat(olddfd, from, lookup_flags, &old_path,
&old_path, &old_last, &old_type); &old_last, &old_type);
if (IS_ERR(from)) { if (IS_ERR(from)) {
error = PTR_ERR(from); error = PTR_ERR(from);
goto exit; goto put_new;
} }
to = filename_parentat(newdfd, getname(newname), lookup_flags, to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
&new_path, &new_last, &new_type); &new_type);
if (IS_ERR(to)) { if (IS_ERR(to)) {
error = PTR_ERR(to); error = PTR_ERR(to);
goto exit1; goto exit1;
...@@ -4473,34 +4471,40 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, ...@@ -4473,34 +4471,40 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
if (retry_estale(error, lookup_flags)) if (retry_estale(error, lookup_flags))
should_retry = true; should_retry = true;
path_put(&new_path); path_put(&new_path);
putname(to);
exit1: exit1:
path_put(&old_path); path_put(&old_path);
putname(from);
if (should_retry) { if (should_retry) {
should_retry = false; should_retry = false;
lookup_flags |= LOOKUP_REVAL; lookup_flags |= LOOKUP_REVAL;
goto retry; goto retry;
} }
exit: put_both:
if (!IS_ERR(from))
putname(from);
put_new:
if (!IS_ERR(to))
putname(to);
return error; return error;
} }
SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname, unsigned int, flags) int, newdfd, const char __user *, newname, unsigned int, flags)
{ {
return do_renameat2(olddfd, oldname, newdfd, newname, flags); return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
flags);
} }
SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname) int, newdfd, const char __user *, newname)
{ {
return do_renameat2(olddfd, oldname, newdfd, newname, 0); return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
0);
} }
SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
{ {
return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
getname(newname), 0);
} }
int readlink_copy(char __user *buffer, int buflen, const char *link) int readlink_copy(char __user *buffer, int buflen, const char *link)
......
...@@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, ...@@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len); int __user *usockaddr_len);
extern int __sys_socketpair(int family, int type, int protocol, extern int __sys_socketpair(int family, int type, int protocol,
int __user *usockvec); int __user *usockvec);
extern int __sys_shutdown_sock(struct socket *sock, int how);
extern int __sys_shutdown(int fd, int how); extern int __sys_shutdown(int fd, int how);
extern struct ns_common *get_net_ns(struct ns_common *ns); extern struct ns_common *get_net_ns(struct ns_common *ns);
......
...@@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries, ...@@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries,
struct io_uring_params __user *p); struct io_uring_params __user *p);
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
u32 min_complete, u32 flags, u32 min_complete, u32 flags,
const sigset_t __user *sig, size_t sigsz); const void __user *argp, size_t argsz);
asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
void __user *arg, unsigned int nr_args); void __user *arg, unsigned int nr_args);
......
...@@ -42,6 +42,8 @@ struct io_uring_sqe { ...@@ -42,6 +42,8 @@ struct io_uring_sqe {
__u32 statx_flags; __u32 statx_flags;
__u32 fadvise_advice; __u32 fadvise_advice;
__u32 splice_flags; __u32 splice_flags;
__u32 rename_flags;
__u32 unlink_flags;
}; };
__u64 user_data; /* data to be passed back at completion time */ __u64 user_data; /* data to be passed back at completion time */
union { union {
...@@ -132,6 +134,9 @@ enum { ...@@ -132,6 +134,9 @@ enum {
IORING_OP_PROVIDE_BUFFERS, IORING_OP_PROVIDE_BUFFERS,
IORING_OP_REMOVE_BUFFERS, IORING_OP_REMOVE_BUFFERS,
IORING_OP_TEE, IORING_OP_TEE,
IORING_OP_SHUTDOWN,
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
...@@ -146,6 +151,7 @@ enum { ...@@ -146,6 +151,7 @@ enum {
* sqe->timeout_flags * sqe->timeout_flags
*/ */
#define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_ABS (1U << 0)
#define IORING_TIMEOUT_UPDATE (1U << 1)
/* /*
* sqe->splice_flags * sqe->splice_flags
...@@ -226,6 +232,7 @@ struct io_cqring_offsets { ...@@ -226,6 +232,7 @@ struct io_cqring_offsets {
#define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_GETEVENTS (1U << 0)
#define IORING_ENTER_SQ_WAKEUP (1U << 1) #define IORING_ENTER_SQ_WAKEUP (1U << 1)
#define IORING_ENTER_SQ_WAIT (1U << 2) #define IORING_ENTER_SQ_WAIT (1U << 2)
#define IORING_ENTER_EXT_ARG (1U << 3)
/* /*
* Passed in for io_uring_setup(2). Copied back with updated info on success * Passed in for io_uring_setup(2). Copied back with updated info on success
...@@ -253,6 +260,8 @@ struct io_uring_params { ...@@ -253,6 +260,8 @@ struct io_uring_params {
#define IORING_FEAT_CUR_PERSONALITY (1U << 4) #define IORING_FEAT_CUR_PERSONALITY (1U << 4)
#define IORING_FEAT_FAST_POLL (1U << 5) #define IORING_FEAT_FAST_POLL (1U << 5)
#define IORING_FEAT_POLL_32BITS (1U << 6) #define IORING_FEAT_POLL_32BITS (1U << 6)
#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
#define IORING_FEAT_EXT_ARG (1U << 8)
/* /*
* io_uring_register(2) opcodes and arguments * io_uring_register(2) opcodes and arguments
...@@ -329,4 +338,11 @@ enum { ...@@ -329,4 +338,11 @@ enum {
IORING_RESTRICTION_LAST IORING_RESTRICTION_LAST
}; };
struct io_uring_getevents_arg {
__u64 sigmask;
__u32 sigmask_sz;
__u32 pad;
__u64 ts;
};
#endif #endif
...@@ -2175,6 +2175,17 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, ...@@ -2175,6 +2175,17 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
* Shutdown a socket. * Shutdown a socket.
*/ */
int __sys_shutdown_sock(struct socket *sock, int how)
{
int err;
err = security_socket_shutdown(sock, how);
if (!err)
err = sock->ops->shutdown(sock, how);
return err;
}
int __sys_shutdown(int fd, int how) int __sys_shutdown(int fd, int how)
{ {
int err, fput_needed; int err, fput_needed;
...@@ -2182,9 +2193,7 @@ int __sys_shutdown(int fd, int how) ...@@ -2182,9 +2193,7 @@ int __sys_shutdown(int fd, int how)
sock = sockfd_lookup_light(fd, &err, &fput_needed); sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) { if (sock != NULL) {
err = security_socket_shutdown(sock, how); err = __sys_shutdown_sock(sock, how);
if (!err)
err = sock->ops->shutdown(sock, how);
fput_light(sock->file, fput_needed); fput_light(sock->file, fput_needed);
} }
return err; return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment