Commit b961f8dc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.8-2020-06-11' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "A few late stragglers in here. In particular:

   - Validate full range for provided buffers (Bijan)

   - Fix bad use of kfree() in buffer registration failure (Denis)

   - Don't allow close of ring itself, it's not fully safe. Making it
     fully safe would require making the system call more expensive,
     which isn't worth it.

   - Buffer selection fix

   - Regression fix for O_NONBLOCK retry

   - Make IORING_OP_ACCEPT honor O_NONBLOCK (Jiufei)

   - Restrict opcode handling for SQ/IOPOLL (Pavel)

   - io-wq work handling cleanups and improvements (Pavel, Xiaoguang)

   - IOPOLL race fix (Xiaoguang)"

* tag 'io_uring-5.8-2020-06-11' of git://git.kernel.dk/linux-block:
  io_uring: fix io_kiocb.flags modification race in IOPOLL mode
  io_uring: check file O_NONBLOCK state for accept
  io_uring: avoid unnecessary io_wq_work copy for fast poll feature
  io_uring: avoid whole io_wq_work copy for requests completed inline
  io_uring: allow O_NONBLOCK async retry
  io_wq: add per-wq work handler instead of per work
  io_uring: don't arm a timeout through work.func
  io_uring: remove custom ->func handlers
  io_uring: don't derive close state from ->func
  io_uring: use kvfree() in io_sqe_buffer_register()
  io_uring: validate the full range of provided buffers for access
  io_uring: re-set iov base/len for buffer select retry
  io_uring: move send/recv IOPOLL check into prep
  io_uring: deduplicate io_openat{,2}_prep()
  io_uring: do build_open_how() only once
  io_uring: fix {SQ,IO}POLL with unsupported opcodes
  io_uring: disallow close of ring itself
parents a58dfea2 65a6543d
...@@ -111,6 +111,7 @@ struct io_wq { ...@@ -111,6 +111,7 @@ struct io_wq {
unsigned long state; unsigned long state;
free_work_fn *free_work; free_work_fn *free_work;
io_wq_work_fn *do_work;
struct task_struct *manager; struct task_struct *manager;
struct user_struct *user; struct user_struct *user;
...@@ -523,7 +524,7 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -523,7 +524,7 @@ static void io_worker_handle_work(struct io_worker *worker)
hash = io_get_work_hash(work); hash = io_get_work_hash(work);
linked = old_work = work; linked = old_work = work;
linked->func(&linked); wq->do_work(&linked);
linked = (old_work == linked) ? NULL : linked; linked = (old_work == linked) ? NULL : linked;
work = next_hashed; work = next_hashed;
...@@ -780,7 +781,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) ...@@ -780,7 +781,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
struct io_wq_work *old_work = work; struct io_wq_work *old_work = work;
work->flags |= IO_WQ_WORK_CANCEL; work->flags |= IO_WQ_WORK_CANCEL;
work->func(&work); wq->do_work(&work);
work = (work == old_work) ? NULL : work; work = (work == old_work) ? NULL : work;
wq->free_work(old_work); wq->free_work(old_work);
} while (work); } while (work);
...@@ -1018,7 +1019,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1018,7 +1019,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
int ret = -ENOMEM, node; int ret = -ENOMEM, node;
struct io_wq *wq; struct io_wq *wq;
if (WARN_ON_ONCE(!data->free_work)) if (WARN_ON_ONCE(!data->free_work || !data->do_work))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
wq = kzalloc(sizeof(*wq), GFP_KERNEL); wq = kzalloc(sizeof(*wq), GFP_KERNEL);
...@@ -1032,6 +1033,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1032,6 +1033,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
} }
wq->free_work = data->free_work; wq->free_work = data->free_work;
wq->do_work = data->do_work;
/* caller must already hold a reference to this */ /* caller must already hold a reference to this */
wq->user = data->user; wq->user = data->user;
...@@ -1088,7 +1090,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1088,7 +1090,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
bool io_wq_get(struct io_wq *wq, struct io_wq_data *data) bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
{ {
if (data->free_work != wq->free_work) if (data->free_work != wq->free_work || data->do_work != wq->do_work)
return false; return false;
return refcount_inc_not_zero(&wq->use_refs); return refcount_inc_not_zero(&wq->use_refs);
......
...@@ -85,7 +85,6 @@ static inline void wq_list_del(struct io_wq_work_list *list, ...@@ -85,7 +85,6 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work { struct io_wq_work {
struct io_wq_work_node list; struct io_wq_work_node list;
void (*func)(struct io_wq_work **);
struct files_struct *files; struct files_struct *files;
struct mm_struct *mm; struct mm_struct *mm;
const struct cred *creds; const struct cred *creds;
...@@ -94,11 +93,6 @@ struct io_wq_work { ...@@ -94,11 +93,6 @@ struct io_wq_work {
pid_t task_pid; pid_t task_pid;
}; };
#define INIT_IO_WORK(work, _func) \
do { \
*(work) = (struct io_wq_work){ .func = _func }; \
} while (0) \
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
{ {
if (!work->list.next) if (!work->list.next)
...@@ -108,10 +102,12 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) ...@@ -108,10 +102,12 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
} }
typedef void (free_work_fn)(struct io_wq_work *); typedef void (free_work_fn)(struct io_wq_work *);
typedef void (io_wq_work_fn)(struct io_wq_work **);
struct io_wq_data { struct io_wq_data {
struct user_struct *user; struct user_struct *user;
io_wq_work_fn *do_work;
free_work_fn *free_work; free_work_fn *free_work;
}; };
......
...@@ -528,7 +528,6 @@ enum { ...@@ -528,7 +528,6 @@ enum {
REQ_F_INFLIGHT_BIT, REQ_F_INFLIGHT_BIT,
REQ_F_CUR_POS_BIT, REQ_F_CUR_POS_BIT,
REQ_F_NOWAIT_BIT, REQ_F_NOWAIT_BIT,
REQ_F_IOPOLL_COMPLETED_BIT,
REQ_F_LINK_TIMEOUT_BIT, REQ_F_LINK_TIMEOUT_BIT,
REQ_F_TIMEOUT_BIT, REQ_F_TIMEOUT_BIT,
REQ_F_ISREG_BIT, REQ_F_ISREG_BIT,
...@@ -540,6 +539,8 @@ enum { ...@@ -540,6 +539,8 @@ enum {
REQ_F_POLLED_BIT, REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT, REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT, REQ_F_NO_FILE_TABLE_BIT,
REQ_F_QUEUE_TIMEOUT_BIT,
REQ_F_WORK_INITIALIZED_BIT,
/* not a real bit, just to check we're not overflowing the space */ /* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT, __REQ_F_LAST_BIT,
...@@ -571,8 +572,6 @@ enum { ...@@ -571,8 +572,6 @@ enum {
REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
/* must not punt to workers */ /* must not punt to workers */
REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
/* polled IO has completed */
REQ_F_IOPOLL_COMPLETED = BIT(REQ_F_IOPOLL_COMPLETED_BIT),
/* has linked timeout */ /* has linked timeout */
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
/* timeout request */ /* timeout request */
...@@ -595,6 +594,10 @@ enum { ...@@ -595,6 +594,10 @@ enum {
REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
/* doesn't need file table for this request */ /* doesn't need file table for this request */
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
/* needs to queue linked timeout */
REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
/* io_wq_work is initialized */
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
}; };
struct async_poll { struct async_poll {
...@@ -633,6 +636,8 @@ struct io_kiocb { ...@@ -633,6 +636,8 @@ struct io_kiocb {
struct io_async_ctx *io; struct io_async_ctx *io;
int cflags; int cflags;
u8 opcode; u8 opcode;
/* polled IO has completed */
u8 iopoll_completed;
u16 buf_index; u16 buf_index;
...@@ -697,6 +702,8 @@ struct io_op_def { ...@@ -697,6 +702,8 @@ struct io_op_def {
unsigned needs_mm : 1; unsigned needs_mm : 1;
/* needs req->file assigned */ /* needs req->file assigned */
unsigned needs_file : 1; unsigned needs_file : 1;
/* don't fail if file grab fails */
unsigned needs_file_no_error : 1;
/* hash wq insertion if file is a regular file */ /* hash wq insertion if file is a regular file */
unsigned hash_reg_file : 1; unsigned hash_reg_file : 1;
/* unbound wq insertion if file is a non-regular file */ /* unbound wq insertion if file is a non-regular file */
...@@ -803,6 +810,8 @@ static const struct io_op_def io_op_defs[] = { ...@@ -803,6 +810,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_fs = 1, .needs_fs = 1,
}, },
[IORING_OP_CLOSE] = { [IORING_OP_CLOSE] = {
.needs_file = 1,
.needs_file_no_error = 1,
.file_table = 1, .file_table = 1,
}, },
[IORING_OP_FILES_UPDATE] = { [IORING_OP_FILES_UPDATE] = {
...@@ -903,6 +912,19 @@ EXPORT_SYMBOL(io_uring_get_socket); ...@@ -903,6 +912,19 @@ EXPORT_SYMBOL(io_uring_get_socket);
static void io_file_put_work(struct work_struct *work); static void io_file_put_work(struct work_struct *work);
/*
* Note: must call io_req_init_async() for the first time you
* touch any members of io_wq_work.
*/
static inline void io_req_init_async(struct io_kiocb *req)
{
if (req->flags & REQ_F_WORK_INITIALIZED)
return;
memset(&req->work, 0, sizeof(req->work));
req->flags |= REQ_F_WORK_INITIALIZED;
}
static inline bool io_async_submit(struct io_ring_ctx *ctx) static inline bool io_async_submit(struct io_ring_ctx *ctx)
{ {
return ctx->flags & IORING_SETUP_SQPOLL; return ctx->flags & IORING_SETUP_SQPOLL;
...@@ -1029,6 +1051,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req, ...@@ -1029,6 +1051,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
static inline void io_req_work_drop_env(struct io_kiocb *req) static inline void io_req_work_drop_env(struct io_kiocb *req)
{ {
if (!(req->flags & REQ_F_WORK_INITIALIZED))
return;
if (req->work.mm) { if (req->work.mm) {
mmdrop(req->work.mm); mmdrop(req->work.mm);
req->work.mm = NULL; req->work.mm = NULL;
...@@ -1575,16 +1600,6 @@ static void io_free_req(struct io_kiocb *req) ...@@ -1575,16 +1600,6 @@ static void io_free_req(struct io_kiocb *req)
io_queue_async_work(nxt); io_queue_async_work(nxt);
} }
static void io_link_work_cb(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
struct io_kiocb *link;
link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
io_queue_linked_timeout(link);
io_wq_submit_work(workptr);
}
static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
{ {
struct io_kiocb *link; struct io_kiocb *link;
...@@ -1596,7 +1611,7 @@ static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) ...@@ -1596,7 +1611,7 @@ static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
*workptr = &nxt->work; *workptr = &nxt->work;
link = io_prep_linked_timeout(nxt); link = io_prep_linked_timeout(nxt);
if (link) if (link)
nxt->work.func = io_link_work_cb; nxt->flags |= REQ_F_QUEUE_TIMEOUT;
} }
/* /*
...@@ -1781,7 +1796,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -1781,7 +1796,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
* If we find a request that requires polling, break out * If we find a request that requires polling, break out
* and complete those lists first, if we have entries there. * and complete those lists first, if we have entries there.
*/ */
if (req->flags & REQ_F_IOPOLL_COMPLETED) { if (READ_ONCE(req->iopoll_completed)) {
list_move_tail(&req->list, &done); list_move_tail(&req->list, &done);
continue; continue;
} }
...@@ -1962,7 +1977,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) ...@@ -1962,7 +1977,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
req_set_fail_links(req); req_set_fail_links(req);
req->result = res; req->result = res;
if (res != -EAGAIN) if (res != -EAGAIN)
req->flags |= REQ_F_IOPOLL_COMPLETED; WRITE_ONCE(req->iopoll_completed, 1);
} }
/* /*
...@@ -1995,7 +2010,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ...@@ -1995,7 +2010,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* For fast devices, IO may have already completed. If it has, add * For fast devices, IO may have already completed. If it has, add
* it to the front so we find it first. * it to the front so we find it first.
*/ */
if (req->flags & REQ_F_IOPOLL_COMPLETED) if (READ_ONCE(req->iopoll_completed))
list_add(&req->list, &ctx->poll_list); list_add(&req->list, &ctx->poll_list);
else else
list_add_tail(&req->list, &ctx->poll_list); list_add_tail(&req->list, &ctx->poll_list);
...@@ -2063,6 +2078,10 @@ static bool io_file_supports_async(struct file *file, int rw) ...@@ -2063,6 +2078,10 @@ static bool io_file_supports_async(struct file *file, int rw)
if (S_ISREG(mode) && file->f_op != &io_uring_fops) if (S_ISREG(mode) && file->f_op != &io_uring_fops)
return true; return true;
/* any ->read/write should understand O_NONBLOCK */
if (file->f_flags & O_NONBLOCK)
return true;
if (!(file->f_mode & FMODE_NOWAIT)) if (!(file->f_mode & FMODE_NOWAIT))
return false; return false;
...@@ -2105,8 +2124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2105,8 +2124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
kiocb->ki_ioprio = get_current_ioprio(); kiocb->ki_ioprio = get_current_ioprio();
/* don't allow async punt if RWF_NOWAIT was requested */ /* don't allow async punt if RWF_NOWAIT was requested */
if ((kiocb->ki_flags & IOCB_NOWAIT) || if (kiocb->ki_flags & IOCB_NOWAIT)
(req->file->f_flags & O_NONBLOCK))
req->flags |= REQ_F_NOWAIT; req->flags |= REQ_F_NOWAIT;
if (force_nonblock) if (force_nonblock)
...@@ -2120,6 +2138,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2120,6 +2138,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll; kiocb->ki_complete = io_complete_rw_iopoll;
req->result = 0; req->result = 0;
req->iopoll_completed = 0;
} else { } else {
if (kiocb->ki_flags & IOCB_HIPRI) if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL; return -EINVAL;
...@@ -2358,8 +2377,14 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, ...@@ -2358,8 +2377,14 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
bool needs_lock) bool needs_lock)
{ {
if (req->flags & REQ_F_BUFFER_SELECTED) if (req->flags & REQ_F_BUFFER_SELECTED) {
struct io_buffer *kbuf;
kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
iov[0].iov_len = kbuf->len;
return 0; return 0;
}
if (!req->rw.len) if (!req->rw.len)
return 0; return 0;
else if (req->rw.len > 1) else if (req->rw.len > 1)
...@@ -2741,7 +2766,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) ...@@ -2741,7 +2766,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock)
if (ret) if (ret)
goto out_free; goto out_free;
/* any defer here is final, must blocking retry */ /* any defer here is final, must blocking retry */
if (!file_can_poll(req->file)) if (!(req->flags & REQ_F_NOWAIT) &&
!file_can_poll(req->file))
req->flags |= REQ_F_MUST_PUNT; req->flags |= REQ_F_MUST_PUNT;
return -EAGAIN; return -EAGAIN;
} }
...@@ -2761,6 +2787,8 @@ static int __io_splice_prep(struct io_kiocb *req, ...@@ -2761,6 +2787,8 @@ static int __io_splice_prep(struct io_kiocb *req,
if (req->flags & REQ_F_NEED_CLEANUP) if (req->flags & REQ_F_NEED_CLEANUP)
return 0; return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sp->file_in = NULL; sp->file_in = NULL;
sp->len = READ_ONCE(sqe->len); sp->len = READ_ONCE(sqe->len);
...@@ -2775,8 +2803,14 @@ static int __io_splice_prep(struct io_kiocb *req, ...@@ -2775,8 +2803,14 @@ static int __io_splice_prep(struct io_kiocb *req,
return ret; return ret;
req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_NEED_CLEANUP;
if (!S_ISREG(file_inode(sp->file_in)->i_mode)) if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
/*
* Splice operation will be punted aync, and here need to
* modify io_wq_work.flags, so initialize io_wq_work firstly.
*/
io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_UNBOUND; req->work.flags |= IO_WQ_WORK_UNBOUND;
}
return 0; return 0;
} }
...@@ -2885,23 +2919,15 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -2885,23 +2919,15 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0; return 0;
} }
static bool io_req_cancelled(struct io_kiocb *req) static int io_fsync(struct io_kiocb *req, bool force_nonblock)
{
if (req->work.flags & IO_WQ_WORK_CANCEL) {
req_set_fail_links(req);
io_cqring_add_event(req, -ECANCELED);
io_put_req(req);
return true;
}
return false;
}
static void __io_fsync(struct io_kiocb *req)
{ {
loff_t end = req->sync.off + req->sync.len; loff_t end = req->sync.off + req->sync.len;
int ret; int ret;
/* fsync always requires a blocking context */
if (force_nonblock)
return -EAGAIN;
ret = vfs_fsync_range(req->file, req->sync.off, ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX, end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC); req->sync.flags & IORING_FSYNC_DATASYNC);
...@@ -2909,58 +2935,16 @@ static void __io_fsync(struct io_kiocb *req) ...@@ -2909,58 +2935,16 @@ static void __io_fsync(struct io_kiocb *req)
req_set_fail_links(req); req_set_fail_links(req);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
io_put_req(req); io_put_req(req);
}
static void io_fsync_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
if (io_req_cancelled(req))
return;
__io_fsync(req);
io_steal_work(req, workptr);
}
static int io_fsync(struct io_kiocb *req, bool force_nonblock)
{
/* fsync always requires a blocking context */
if (force_nonblock) {
req->work.func = io_fsync_finish;
return -EAGAIN;
}
__io_fsync(req);
return 0; return 0;
} }
static void __io_fallocate(struct io_kiocb *req)
{
int ret;
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
req->sync.len);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req(req);
}
static void io_fallocate_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
if (io_req_cancelled(req))
return;
__io_fallocate(req);
io_steal_work(req, workptr);
}
static int io_fallocate_prep(struct io_kiocb *req, static int io_fallocate_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe) const struct io_uring_sqe *sqe)
{ {
if (sqe->ioprio || sqe->buf_index || sqe->rw_flags) if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
return -EINVAL; return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
req->sync.off = READ_ONCE(sqe->off); req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->addr); req->sync.len = READ_ONCE(sqe->addr);
...@@ -2971,66 +2955,74 @@ static int io_fallocate_prep(struct io_kiocb *req, ...@@ -2971,66 +2955,74 @@ static int io_fallocate_prep(struct io_kiocb *req,
static int io_fallocate(struct io_kiocb *req, bool force_nonblock) static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
{ {
int ret;
/* fallocate always requiring blocking context */ /* fallocate always requiring blocking context */
if (force_nonblock) { if (force_nonblock)
req->work.func = io_fallocate_finish;
return -EAGAIN; return -EAGAIN;
}
__io_fallocate(req); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
req->sync.len);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req(req);
return 0; return 0;
} }
static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
const char __user *fname; const char __user *fname;
int ret; int ret;
if (sqe->ioprio || sqe->buf_index) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL; return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE) if (unlikely(sqe->ioprio || sqe->buf_index))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF; return -EBADF;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
req->open.dfd = READ_ONCE(sqe->fd); /* open.how should be already initialised */
req->open.how.mode = READ_ONCE(sqe->len); if (!(req->open.how.flags & O_PATH) && force_o_largefile())
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
req->open.how.flags = READ_ONCE(sqe->open_flags);
if (force_o_largefile())
req->open.how.flags |= O_LARGEFILE; req->open.how.flags |= O_LARGEFILE;
req->open.dfd = READ_ONCE(sqe->fd);
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
req->open.filename = getname(fname); req->open.filename = getname(fname);
if (IS_ERR(req->open.filename)) { if (IS_ERR(req->open.filename)) {
ret = PTR_ERR(req->open.filename); ret = PTR_ERR(req->open.filename);
req->open.filename = NULL; req->open.filename = NULL;
return ret; return ret;
} }
req->open.nofile = rlimit(RLIMIT_NOFILE); req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_NEED_CLEANUP;
return 0; return 0;
} }
static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
u64 flags, mode;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
mode = READ_ONCE(sqe->len);
flags = READ_ONCE(sqe->open_flags);
req->open.how = build_open_how(flags, mode);
return __io_openat_prep(req, sqe);
}
static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
struct open_how __user *how; struct open_how __user *how;
const char __user *fname;
size_t len; size_t len;
int ret; int ret;
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
if (req->flags & REQ_F_NEED_CLEANUP) if (req->flags & REQ_F_NEED_CLEANUP)
return 0; return 0;
req->open.dfd = READ_ONCE(sqe->fd);
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
len = READ_ONCE(sqe->len); len = READ_ONCE(sqe->len);
if (len < OPEN_HOW_SIZE_VER0) if (len < OPEN_HOW_SIZE_VER0)
return -EINVAL; return -EINVAL;
...@@ -3039,19 +3031,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -3039,19 +3031,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (ret) if (ret)
return ret; return ret;
if (!(req->open.how.flags & O_PATH) && force_o_largefile()) return __io_openat_prep(req, sqe);
req->open.how.flags |= O_LARGEFILE;
req->open.filename = getname(fname);
if (IS_ERR(req->open.filename)) {
ret = PTR_ERR(req->open.filename);
req->open.filename = NULL;
return ret;
}
req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
} }
static int io_openat2(struct io_kiocb *req, bool force_nonblock) static int io_openat2(struct io_kiocb *req, bool force_nonblock)
...@@ -3091,7 +3071,6 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) ...@@ -3091,7 +3071,6 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
static int io_openat(struct io_kiocb *req, bool force_nonblock) static int io_openat(struct io_kiocb *req, bool force_nonblock)
{ {
req->open.how = build_open_how(req->open.how.flags, req->open.how.mode);
return io_openat2(req, force_nonblock); return io_openat2(req, force_nonblock);
} }
...@@ -3180,7 +3159,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req, ...@@ -3180,7 +3159,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
p->addr = READ_ONCE(sqe->addr); p->addr = READ_ONCE(sqe->addr);
p->len = READ_ONCE(sqe->len); p->len = READ_ONCE(sqe->len);
if (!access_ok(u64_to_user_ptr(p->addr), p->len)) if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
return -EFAULT; return -EFAULT;
p->bgid = READ_ONCE(sqe->buf_group); p->bgid = READ_ONCE(sqe->buf_group);
...@@ -3258,6 +3237,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req, ...@@ -3258,6 +3237,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
#if defined(CONFIG_EPOLL) #if defined(CONFIG_EPOLL)
if (sqe->ioprio || sqe->buf_index) if (sqe->ioprio || sqe->buf_index)
return -EINVAL; return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
req->epoll.epfd = READ_ONCE(sqe->fd); req->epoll.epfd = READ_ONCE(sqe->fd);
req->epoll.op = READ_ONCE(sqe->len); req->epoll.op = READ_ONCE(sqe->len);
...@@ -3302,6 +3283,8 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -3302,6 +3283,8 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
if (sqe->ioprio || sqe->buf_index || sqe->off) if (sqe->ioprio || sqe->buf_index || sqe->off)
return -EINVAL; return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
req->madvise.addr = READ_ONCE(sqe->addr); req->madvise.addr = READ_ONCE(sqe->addr);
req->madvise.len = READ_ONCE(sqe->len); req->madvise.len = READ_ONCE(sqe->len);
...@@ -3336,6 +3319,8 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -3336,6 +3319,8 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
if (sqe->ioprio || sqe->buf_index || sqe->addr) if (sqe->ioprio || sqe->buf_index || sqe->addr)
return -EINVAL; return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
req->fadvise.offset = READ_ONCE(sqe->off); req->fadvise.offset = READ_ONCE(sqe->off);
req->fadvise.len = READ_ONCE(sqe->len); req->fadvise.len = READ_ONCE(sqe->len);
...@@ -3369,6 +3354,8 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock) ...@@ -3369,6 +3354,8 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->buf_index) if (sqe->ioprio || sqe->buf_index)
return -EINVAL; return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE) if (req->flags & REQ_F_FIXED_FILE)
...@@ -3409,10 +3396,14 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -3409,10 +3396,14 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
/* /*
* If we queue this for async, it must not be cancellable. That would * If we queue this for async, it must not be cancellable. That would
* leave the 'file' in an undeterminate state. * leave the 'file' in an undeterminate state, and here need to modify
* io_wq_work.flags, so initialize io_wq_work firstly.
*/ */
io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_NO_CANCEL; req->work.flags |= IO_WQ_WORK_NO_CANCEL;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
sqe->rw_flags || sqe->buf_index) sqe->rw_flags || sqe->buf_index)
return -EINVAL; return -EINVAL;
...@@ -3420,53 +3411,41 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -3420,53 +3411,41 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EBADF; return -EBADF;
req->close.fd = READ_ONCE(sqe->fd); req->close.fd = READ_ONCE(sqe->fd);
return 0; if ((req->file && req->file->f_op == &io_uring_fops) ||
} req->close.fd == req->ctx->ring_fd)
return -EBADF;
/* only called when __close_fd_get_file() is done */
static void __io_close_finish(struct io_kiocb *req)
{
int ret;
ret = filp_close(req->close.put_file, req->work.files);
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
fput(req->close.put_file);
io_put_req(req);
}
static void io_close_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
/* not cancellable, don't do io_req_cancelled() */ req->close.put_file = NULL;
__io_close_finish(req); return 0;
io_steal_work(req, workptr);
} }
static int io_close(struct io_kiocb *req, bool force_nonblock) static int io_close(struct io_kiocb *req, bool force_nonblock)
{ {
struct io_close *close = &req->close;
int ret; int ret;
req->close.put_file = NULL; /* might be already done during nonblock submission */
ret = __close_fd_get_file(req->close.fd, &req->close.put_file); if (!close->put_file) {
if (ret < 0) ret = __close_fd_get_file(close->fd, &close->put_file);
return (ret == -ENOENT) ? -EBADF : ret; if (ret < 0)
return (ret == -ENOENT) ? -EBADF : ret;
}
/* if the file has a flush method, be safe and punt to async */ /* if the file has a flush method, be safe and punt to async */
if (req->close.put_file->f_op->flush && force_nonblock) { if (close->put_file->f_op->flush && force_nonblock) {
/* avoid grabbing files - we don't need the files */ /* avoid grabbing files - we don't need the files */
req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT; req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT;
req->work.func = io_close_finish;
return -EAGAIN; return -EAGAIN;
} }
/* /* No ->flush() or already async, safely close from here */
* No ->flush(), safely close from here and just punt the ret = filp_close(close->put_file, req->work.files);
* fput() to async context. if (ret < 0)
*/ req_set_fail_links(req);
__io_close_finish(req); io_cqring_add_event(req, ret);
fput(close->put_file);
close->put_file = NULL;
io_put_req(req);
return 0; return 0;
} }
...@@ -3488,38 +3467,20 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -3488,38 +3467,20 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0; return 0;
} }
static void __io_sync_file_range(struct io_kiocb *req) static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
{ {
int ret; int ret;
/* sync_file_range always requires a blocking context */
if (force_nonblock)
return -EAGAIN;
ret = sync_file_range(req->file, req->sync.off, req->sync.len, ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags); req->sync.flags);
if (ret < 0) if (ret < 0)
req_set_fail_links(req); req_set_fail_links(req);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
io_put_req(req); io_put_req(req);
}
static void io_sync_file_range_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
if (io_req_cancelled(req))
return;
__io_sync_file_range(req);
io_steal_work(req, workptr);
}
static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
{
/* sync_file_range always requires a blocking context */
if (force_nonblock) {
req->work.func = io_sync_file_range_finish;
return -EAGAIN;
}
__io_sync_file_range(req);
return 0; return 0;
} }
...@@ -3545,6 +3506,9 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -3545,6 +3506,9 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_async_ctx *io = req->io; struct io_async_ctx *io = req->io;
int ret; int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len); sr->len = READ_ONCE(sqe->len);
...@@ -3574,9 +3538,6 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock) ...@@ -3574,9 +3538,6 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
struct socket *sock; struct socket *sock;
int ret; int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sock = sock_from_file(req->file, &ret); sock = sock_from_file(req->file, &ret);
if (sock) { if (sock) {
struct io_async_ctx io; struct io_async_ctx io;
...@@ -3630,9 +3591,6 @@ static int io_send(struct io_kiocb *req, bool force_nonblock) ...@@ -3630,9 +3591,6 @@ static int io_send(struct io_kiocb *req, bool force_nonblock)
struct socket *sock; struct socket *sock;
int ret; int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sock = sock_from_file(req->file, &ret); sock = sock_from_file(req->file, &ret);
if (sock) { if (sock) {
struct io_sr_msg *sr = &req->sr_msg; struct io_sr_msg *sr = &req->sr_msg;
...@@ -3785,6 +3743,9 @@ static int io_recvmsg_prep(struct io_kiocb *req, ...@@ -3785,6 +3743,9 @@ static int io_recvmsg_prep(struct io_kiocb *req,
struct io_async_ctx *io = req->io; struct io_async_ctx *io = req->io;
int ret; int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len); sr->len = READ_ONCE(sqe->len);
...@@ -3813,9 +3774,6 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock) ...@@ -3813,9 +3774,6 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
struct socket *sock; struct socket *sock;
int ret, cflags = 0; int ret, cflags = 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sock = sock_from_file(req->file, &ret); sock = sock_from_file(req->file, &ret);
if (sock) { if (sock) {
struct io_buffer *kbuf; struct io_buffer *kbuf;
...@@ -3877,9 +3835,6 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock) ...@@ -3877,9 +3835,6 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock)
struct socket *sock; struct socket *sock;
int ret, cflags = 0; int ret, cflags = 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sock = sock_from_file(req->file, &ret); sock = sock_from_file(req->file, &ret);
if (sock) { if (sock) {
struct io_sr_msg *sr = &req->sr_msg; struct io_sr_msg *sr = &req->sr_msg;
...@@ -3947,49 +3902,30 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -3947,49 +3902,30 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0; return 0;
} }
static int __io_accept(struct io_kiocb *req, bool force_nonblock) static int io_accept(struct io_kiocb *req, bool force_nonblock)
{ {
struct io_accept *accept = &req->accept; struct io_accept *accept = &req->accept;
unsigned file_flags; unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
int ret; int ret;
file_flags = force_nonblock ? O_NONBLOCK : 0; if (req->file->f_flags & O_NONBLOCK)
req->flags |= REQ_F_NOWAIT;
ret = __sys_accept4_file(req->file, file_flags, accept->addr, ret = __sys_accept4_file(req->file, file_flags, accept->addr,
accept->addr_len, accept->flags, accept->addr_len, accept->flags,
accept->nofile); accept->nofile);
if (ret == -EAGAIN && force_nonblock) if (ret == -EAGAIN && force_nonblock)
return -EAGAIN; return -EAGAIN;
if (ret == -ERESTARTSYS) if (ret < 0) {
ret = -EINTR; if (ret == -ERESTARTSYS)
if (ret < 0) ret = -EINTR;
req_set_fail_links(req); req_set_fail_links(req);
}
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
io_put_req(req); io_put_req(req);
return 0; return 0;
} }
static void io_accept_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
if (io_req_cancelled(req))
return;
__io_accept(req, false);
io_steal_work(req, workptr);
}
static int io_accept(struct io_kiocb *req, bool force_nonblock)
{
int ret;
ret = __io_accept(req, force_nonblock);
if (ret == -EAGAIN && force_nonblock) {
req->work.func = io_accept_finish;
return -EAGAIN;
}
return 0;
}
static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
struct io_connect *conn = &req->connect; struct io_connect *conn = &req->connect;
...@@ -4328,7 +4264,8 @@ static void io_async_task_func(struct callback_head *cb) ...@@ -4328,7 +4264,8 @@ static void io_async_task_func(struct callback_head *cb)
spin_unlock_irq(&ctx->completion_lock); spin_unlock_irq(&ctx->completion_lock);
/* restore ->work in case we need to retry again */ /* restore ->work in case we need to retry again */
memcpy(&req->work, &apoll->work, sizeof(req->work)); if (req->flags & REQ_F_WORK_INITIALIZED)
memcpy(&req->work, &apoll->work, sizeof(req->work));
kfree(apoll); kfree(apoll);
if (!canceled) { if (!canceled) {
...@@ -4425,7 +4362,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req) ...@@ -4425,7 +4362,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
return false; return false;
req->flags |= REQ_F_POLLED; req->flags |= REQ_F_POLLED;
memcpy(&apoll->work, &req->work, sizeof(req->work)); if (req->flags & REQ_F_WORK_INITIALIZED)
memcpy(&apoll->work, &req->work, sizeof(req->work));
had_io = req->io != NULL; had_io = req->io != NULL;
get_task_struct(current); get_task_struct(current);
...@@ -4450,7 +4388,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req) ...@@ -4450,7 +4388,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
if (!had_io) if (!had_io)
io_poll_remove_double(req); io_poll_remove_double(req);
spin_unlock_irq(&ctx->completion_lock); spin_unlock_irq(&ctx->completion_lock);
memcpy(&req->work, &apoll->work, sizeof(req->work)); if (req->flags & REQ_F_WORK_INITIALIZED)
memcpy(&req->work, &apoll->work, sizeof(req->work));
kfree(apoll); kfree(apoll);
return false; return false;
} }
...@@ -4495,7 +4434,9 @@ static bool io_poll_remove_one(struct io_kiocb *req) ...@@ -4495,7 +4434,9 @@ static bool io_poll_remove_one(struct io_kiocb *req)
* io_req_work_drop_env below when dropping the * io_req_work_drop_env below when dropping the
* final reference. * final reference.
*/ */
memcpy(&req->work, &apoll->work, sizeof(req->work)); if (req->flags & REQ_F_WORK_INITIALIZED)
memcpy(&req->work, &apoll->work,
sizeof(req->work));
kfree(apoll); kfree(apoll);
} }
} }
...@@ -4944,6 +4885,8 @@ static int io_req_defer_prep(struct io_kiocb *req, ...@@ -4944,6 +4885,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
if (!sqe) if (!sqe)
return 0; return 0;
io_req_init_async(req);
if (io_op_defs[req->opcode].file_table) { if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req); ret = io_grab_files(req);
if (unlikely(ret)) if (unlikely(ret))
...@@ -5381,12 +5324,26 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -5381,12 +5324,26 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0; return 0;
} }
static void io_arm_async_linked_timeout(struct io_kiocb *req)
{
struct io_kiocb *link;
/* link head's timeout is queued in io_queue_async_work() */
if (!(req->flags & REQ_F_QUEUE_TIMEOUT))
return;
link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
io_queue_linked_timeout(link);
}
static void io_wq_submit_work(struct io_wq_work **workptr) static void io_wq_submit_work(struct io_wq_work **workptr)
{ {
struct io_wq_work *work = *workptr; struct io_wq_work *work = *workptr;
struct io_kiocb *req = container_of(work, struct io_kiocb, work); struct io_kiocb *req = container_of(work, struct io_kiocb, work);
int ret = 0; int ret = 0;
io_arm_async_linked_timeout(req);
/* if NO_CANCEL is set, we must still run the work */ /* if NO_CANCEL is set, we must still run the work */
if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) == if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
IO_WQ_WORK_CANCEL) { IO_WQ_WORK_CANCEL) {
...@@ -5437,19 +5394,20 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, ...@@ -5437,19 +5394,20 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
return -EBADF; return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files); fd = array_index_nospec(fd, ctx->nr_user_files);
file = io_file_from_index(ctx, fd); file = io_file_from_index(ctx, fd);
if (!file) if (file) {
return -EBADF; req->fixed_file_refs = ctx->file_data->cur_refs;
req->fixed_file_refs = ctx->file_data->cur_refs; percpu_ref_get(req->fixed_file_refs);
percpu_ref_get(req->fixed_file_refs); }
} else { } else {
trace_io_uring_file_get(ctx, fd); trace_io_uring_file_get(ctx, fd);
file = __io_file_get(state, fd); file = __io_file_get(state, fd);
if (unlikely(!file))
return -EBADF;
} }
*out_file = file; if (file || io_op_defs[req->opcode].needs_file_no_error) {
return 0; *out_file = file;
return 0;
}
return -EBADF;
} }
static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
...@@ -5583,7 +5541,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -5583,7 +5541,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
again: again:
linked_timeout = io_prep_linked_timeout(req); linked_timeout = io_prep_linked_timeout(req);
if (req->work.creds && req->work.creds != current_cred()) { if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
req->work.creds != current_cred()) {
if (old_creds) if (old_creds)
revert_creds(old_creds); revert_creds(old_creds);
if (old_creds == req->work.creds) if (old_creds == req->work.creds)
...@@ -5606,6 +5565,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -5606,6 +5565,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
goto exit; goto exit;
} }
punt: punt:
io_req_init_async(req);
if (io_op_defs[req->opcode].file_table) { if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req); ret = io_grab_files(req);
if (ret) if (ret)
...@@ -5858,7 +5819,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -5858,7 +5819,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
refcount_set(&req->refs, 2); refcount_set(&req->refs, 2);
req->task = NULL; req->task = NULL;
req->result = 0; req->result = 0;
INIT_IO_WORK(&req->work, io_wq_submit_work);
if (unlikely(req->opcode >= IORING_OP_LAST)) if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL; return -EINVAL;
...@@ -5880,6 +5840,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -5880,6 +5840,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
id = READ_ONCE(sqe->personality); id = READ_ONCE(sqe->personality);
if (id) { if (id) {
io_req_init_async(req);
req->work.creds = idr_find(&ctx->personality_idr, id); req->work.creds = idr_find(&ctx->personality_idr, id);
if (unlikely(!req->work.creds)) if (unlikely(!req->work.creds))
return -EINVAL; return -EINVAL;
...@@ -6874,6 +6835,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, ...@@ -6874,6 +6835,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx,
data.user = ctx->user; data.user = ctx->user;
data.free_work = io_free_work; data.free_work = io_free_work;
data.do_work = io_wq_submit_work;
if (!(p->flags & IORING_SETUP_ATTACH_WQ)) { if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
/* Do QD, or 4 * CPUS, whatever is smallest */ /* Do QD, or 4 * CPUS, whatever is smallest */
...@@ -7155,8 +7117,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ...@@ -7155,8 +7117,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
ret = 0; ret = 0;
if (!pages || nr_pages > got_pages) { if (!pages || nr_pages > got_pages) {
kfree(vmas); kvfree(vmas);
kfree(pages); kvfree(pages);
pages = kvmalloc_array(nr_pages, sizeof(struct page *), pages = kvmalloc_array(nr_pages, sizeof(struct page *),
GFP_KERNEL); GFP_KERNEL);
vmas = kvmalloc_array(nr_pages, vmas = kvmalloc_array(nr_pages,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment