Commit 5d4740fc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-6.2-2022-12-19' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - Improve the locking for timeouts. This was originally queued up for
   the initial pull, but I messed up and it got missed. (Pavel)

 - Fix an issue with running task_work from the wait path, causing some
   inefficiencies (me)

 - Add a clear of ->free_iov upfront in the 32-bit compat data
   importing, so we ensure that it's always sane at completion time (me)

 - Use call_rcu_hurry() for the eventfd signaling (Dylan)

 - Ordering fix for multishot recv completions (Pavel)

 - Add the io_uring trace header to the MAINTAINERS entry (Ammar)

* tag 'io_uring-6.2-2022-12-19' of git://git.kernel.dk/linux:
  MAINTAINERS: io_uring: Add include/trace/events/io_uring.h
  io_uring/net: fix cleanup after recycle
  io_uring/net: ensure compat import handlers clear free_iov
  io_uring: include task_work run after scheduling in wait for events
  io_uring: don't use TIF_NOTIFY_SIGNAL to test for availability of task_work
  io_uring: use call_rcu_hurry if signaling an eventfd
  io_uring: fix overflow handling regression
  io_uring: ease timeout flush locking requirements
  io_uring: revise completion_lock locking
  io_uring: protect cq_timeouts with timeout_lock
parents 0a924817 5ad70eb2
...@@ -10878,6 +10878,7 @@ T: git git://git.kernel.dk/liburing ...@@ -10878,6 +10878,7 @@ T: git git://git.kernel.dk/liburing
F: io_uring/ F: io_uring/
F: include/linux/io_uring.h F: include/linux/io_uring.h
F: include/linux/io_uring_types.h F: include/linux/io_uring_types.h
F: include/trace/events/io_uring.h
F: include/uapi/linux/io_uring.h F: include/uapi/linux/io_uring.h
F: tools/io_uring/ F: tools/io_uring/
......
...@@ -538,7 +538,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx) ...@@ -538,7 +538,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
} else { } else {
atomic_inc(&ev_fd->refs); atomic_inc(&ev_fd->refs);
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
call_rcu(&ev_fd->rcu, io_eventfd_ops); call_rcu_hurry(&ev_fd->rcu, io_eventfd_ops);
else else
atomic_dec(&ev_fd->refs); atomic_dec(&ev_fd->refs);
} }
...@@ -572,12 +572,11 @@ static void io_eventfd_flush_signal(struct io_ring_ctx *ctx) ...@@ -572,12 +572,11 @@ static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
void __io_commit_cqring_flush(struct io_ring_ctx *ctx) void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
{ {
if (ctx->off_timeout_used || ctx->drain_active) { if (ctx->off_timeout_used)
io_flush_timeouts(ctx);
if (ctx->drain_active) {
spin_lock(&ctx->completion_lock); spin_lock(&ctx->completion_lock);
if (ctx->off_timeout_used) io_queue_deferred(ctx);
io_flush_timeouts(ctx);
if (ctx->drain_active)
io_queue_deferred(ctx);
spin_unlock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock);
} }
if (ctx->has_evfd) if (ctx->has_evfd)
...@@ -597,6 +596,18 @@ static inline void __io_cq_unlock(struct io_ring_ctx *ctx) ...@@ -597,6 +596,18 @@ static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
spin_unlock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock);
} }
static inline void io_cq_lock(struct io_ring_ctx *ctx)
__acquires(ctx->completion_lock)
{
spin_lock(&ctx->completion_lock);
}
static inline void io_cq_unlock(struct io_ring_ctx *ctx)
__releases(ctx->completion_lock)
{
spin_unlock(&ctx->completion_lock);
}
/* keep it inlined for io_submit_flush_completions() */ /* keep it inlined for io_submit_flush_completions() */
static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx) static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
__releases(ctx->completion_lock) __releases(ctx->completion_lock)
...@@ -916,7 +927,7 @@ static void __io_req_complete_post(struct io_kiocb *req) ...@@ -916,7 +927,7 @@ static void __io_req_complete_post(struct io_kiocb *req)
io_cq_lock(ctx); io_cq_lock(ctx);
if (!(req->flags & REQ_F_CQE_SKIP)) if (!(req->flags & REQ_F_CQE_SKIP))
__io_fill_cqe_req(ctx, req); io_fill_cqe_req(ctx, req);
/* /*
* If we're the last reference to this request, add to our locked * If we're the last reference to this request, add to our locked
...@@ -1074,9 +1085,9 @@ static void __io_req_find_next_prep(struct io_kiocb *req) ...@@ -1074,9 +1085,9 @@ static void __io_req_find_next_prep(struct io_kiocb *req)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
io_cq_lock(ctx); spin_lock(&ctx->completion_lock);
io_disarm_next(req); io_disarm_next(req);
io_cq_unlock_post(ctx); spin_unlock(&ctx->completion_lock);
} }
static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
...@@ -2470,7 +2481,14 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, ...@@ -2470,7 +2481,14 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
} }
if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
return -ETIME; return -ETIME;
return 1;
/*
* Run task_work after scheduling. If we got woken because of
* task_work being processed, run it now rather than let the caller
* do another wait loop.
*/
ret = io_run_task_work_sig(ctx);
return ret < 0 ? ret : 1;
} }
/* /*
...@@ -2535,6 +2553,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -2535,6 +2553,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
TASK_INTERRUPTIBLE); TASK_INTERRUPTIBLE);
ret = io_cqring_wait_schedule(ctx, &iowq, timeout); ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
if (__io_cqring_events_user(ctx) >= min_events)
break;
cond_resched(); cond_resched();
} while (ret > 0); } while (ret > 0);
......
...@@ -87,17 +87,6 @@ static inline void io_req_task_work_add(struct io_kiocb *req) ...@@ -87,17 +87,6 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
#define io_for_each_link(pos, head) \ #define io_for_each_link(pos, head) \
for (pos = (head); pos; pos = pos->link) for (pos = (head); pos; pos = pos->link)
static inline void io_cq_lock(struct io_ring_ctx *ctx)
__acquires(ctx->completion_lock)
{
spin_lock(&ctx->completion_lock);
}
static inline void io_cq_unlock(struct io_ring_ctx *ctx)
{
spin_unlock(&ctx->completion_lock);
}
void io_cq_unlock_post(struct io_ring_ctx *ctx); void io_cq_unlock_post(struct io_ring_ctx *ctx);
static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
...@@ -277,8 +266,7 @@ static inline int io_run_task_work(void) ...@@ -277,8 +266,7 @@ static inline int io_run_task_work(void)
static inline bool io_task_work_pending(struct io_ring_ctx *ctx) static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
{ {
return test_thread_flag(TIF_NOTIFY_SIGNAL) || return task_work_pending(current) || !wq_list_empty(&ctx->work_llist);
!wq_list_empty(&ctx->work_llist);
} }
static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx) static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
......
...@@ -494,6 +494,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, ...@@ -494,6 +494,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
if (req->flags & REQ_F_BUFFER_SELECT) { if (req->flags & REQ_F_BUFFER_SELECT) {
compat_ssize_t clen; compat_ssize_t clen;
iomsg->free_iov = NULL;
if (msg.msg_iovlen == 0) { if (msg.msg_iovlen == 0) {
sr->len = 0; sr->len = 0;
} else if (msg.msg_iovlen > 1) { } else if (msg.msg_iovlen > 1) {
...@@ -819,10 +820,10 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ...@@ -819,10 +820,10 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
goto retry_multishot; goto retry_multishot;
if (mshot_finished) { if (mshot_finished) {
io_netmsg_recycle(req, issue_flags);
/* fast path, check for non-NULL to avoid function call */ /* fast path, check for non-NULL to avoid function call */
if (kmsg->free_iov) if (kmsg->free_iov)
kfree(kmsg->free_iov); kfree(kmsg->free_iov);
io_netmsg_recycle(req, issue_flags);
req->flags &= ~REQ_F_NEED_CLEANUP; req->flags &= ~REQ_F_NEED_CLEANUP;
} }
......
...@@ -1062,7 +1062,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) ...@@ -1062,7 +1062,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
continue; continue;
req->cqe.flags = io_put_kbuf(req, 0); req->cqe.flags = io_put_kbuf(req, 0);
__io_fill_cqe_req(req->ctx, req); io_fill_cqe_req(req->ctx, req);
} }
if (unlikely(!nr_events)) if (unlikely(!nr_events))
......
...@@ -50,7 +50,6 @@ static inline void io_put_req(struct io_kiocb *req) ...@@ -50,7 +50,6 @@ static inline void io_put_req(struct io_kiocb *req)
} }
static bool io_kill_timeout(struct io_kiocb *req, int status) static bool io_kill_timeout(struct io_kiocb *req, int status)
__must_hold(&req->ctx->completion_lock)
__must_hold(&req->ctx->timeout_lock) __must_hold(&req->ctx->timeout_lock)
{ {
struct io_timeout_data *io = req->async_data; struct io_timeout_data *io = req->async_data;
...@@ -70,12 +69,13 @@ static bool io_kill_timeout(struct io_kiocb *req, int status) ...@@ -70,12 +69,13 @@ static bool io_kill_timeout(struct io_kiocb *req, int status)
} }
__cold void io_flush_timeouts(struct io_ring_ctx *ctx) __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
__must_hold(&ctx->completion_lock)
{ {
u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); u32 seq;
struct io_timeout *timeout, *tmp; struct io_timeout *timeout, *tmp;
spin_lock_irq(&ctx->timeout_lock); spin_lock_irq(&ctx->timeout_lock);
seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
struct io_kiocb *req = cmd_to_io_kiocb(timeout); struct io_kiocb *req = cmd_to_io_kiocb(timeout);
u32 events_needed, events_got; u32 events_needed, events_got;
...@@ -622,7 +622,11 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, ...@@ -622,7 +622,11 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
struct io_timeout *timeout, *tmp; struct io_timeout *timeout, *tmp;
int canceled = 0; int canceled = 0;
io_cq_lock(ctx); /*
* completion_lock is needed for io_match_task(). Take it before
* timeout_lockfirst to keep locking ordering.
*/
spin_lock(&ctx->completion_lock);
spin_lock_irq(&ctx->timeout_lock); spin_lock_irq(&ctx->timeout_lock);
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
struct io_kiocb *req = cmd_to_io_kiocb(timeout); struct io_kiocb *req = cmd_to_io_kiocb(timeout);
...@@ -632,6 +636,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, ...@@ -632,6 +636,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
canceled++; canceled++;
} }
spin_unlock_irq(&ctx->timeout_lock); spin_unlock_irq(&ctx->timeout_lock);
io_cq_unlock_post(ctx); spin_unlock(&ctx->completion_lock);
return canceled != 0; return canceled != 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment