Commit ef1a4a77 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-2023-01-06' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:
 "A few minor fixes that should go into the 6.2 release:

   - Fix for a memory leak in io-wq worker creation, if we ultimately
     end up canceling the worker creation before it gets created (me)

   - lockdep annotations for the CQ locking (Pavel)

   - A regression fix for CQ timeout handling (Pavel)

   - Ring pinning around deferred task_work fix (Pavel)

   - A trivial member move in struct io_ring_ctx, saving us some memory
     (me)"

* tag 'io_uring-2023-01-06' of git://git.kernel.dk/linux:
  io_uring: fix CQ waiting timeout handling
  io_uring: move 'poll_multi_queue' bool in io_ring_ctx
  io_uring: lockdep annotate CQ locking
  io_uring: pin context while queueing deferred tw
  io_uring/io-wq: free worker if task_work creation is canceled
parents 93387d49 12521a5d
...@@ -292,6 +292,8 @@ struct io_ring_ctx { ...@@ -292,6 +292,8 @@ struct io_ring_ctx {
struct { struct {
spinlock_t completion_lock; spinlock_t completion_lock;
bool poll_multi_queue;
/* /*
* ->iopoll_list is protected by the ctx->uring_lock for * ->iopoll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL. * io_uring instances that don't use IORING_SETUP_SQPOLL.
...@@ -300,7 +302,6 @@ struct io_ring_ctx { ...@@ -300,7 +302,6 @@ struct io_ring_ctx {
*/ */
struct io_wq_work_list iopoll_list; struct io_wq_work_list iopoll_list;
struct io_hash_table cancel_table; struct io_hash_table cancel_table;
bool poll_multi_queue;
struct llist_head work_llist; struct llist_head work_llist;
......
...@@ -1230,6 +1230,7 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) ...@@ -1230,6 +1230,7 @@ static void io_wq_cancel_tw_create(struct io_wq *wq)
worker = container_of(cb, struct io_worker, create_work); worker = container_of(cb, struct io_worker, create_work);
io_worker_cancel_cb(worker); io_worker_cancel_cb(worker);
kfree(worker);
} }
} }
......
...@@ -731,6 +731,8 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, ...@@ -731,6 +731,8 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
size_t ocq_size = sizeof(struct io_overflow_cqe); size_t ocq_size = sizeof(struct io_overflow_cqe);
bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
lockdep_assert_held(&ctx->completion_lock);
if (is_cqe32) if (is_cqe32)
ocq_size += sizeof(struct io_uring_cqe); ocq_size += sizeof(struct io_uring_cqe);
...@@ -820,9 +822,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, ...@@ -820,9 +822,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
{ {
struct io_uring_cqe *cqe; struct io_uring_cqe *cqe;
if (!ctx->task_complete)
lockdep_assert_held(&ctx->completion_lock);
ctx->cq_extra++; ctx->cq_extra++;
/* /*
...@@ -1236,13 +1235,18 @@ static void io_req_local_work_add(struct io_kiocb *req) ...@@ -1236,13 +1235,18 @@ static void io_req_local_work_add(struct io_kiocb *req)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) percpu_ref_get(&ctx->refs);
if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) {
percpu_ref_put(&ctx->refs);
return; return;
}
/* need it for the following io_cqring_wake() */ /* need it for the following io_cqring_wake() */
smp_mb__after_atomic(); smp_mb__after_atomic();
if (unlikely(atomic_read(&req->task->io_uring->in_idle))) { if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
io_move_task_work_from_local(ctx); io_move_task_work_from_local(ctx);
percpu_ref_put(&ctx->refs);
return; return;
} }
...@@ -1252,6 +1256,7 @@ static void io_req_local_work_add(struct io_kiocb *req) ...@@ -1252,6 +1256,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
if (ctx->has_evfd) if (ctx->has_evfd)
io_eventfd_signal(ctx); io_eventfd_signal(ctx);
__io_cqring_wake(ctx); __io_cqring_wake(ctx);
percpu_ref_put(&ctx->refs);
} }
void __io_req_task_work_add(struct io_kiocb *req, bool allow_local) void __io_req_task_work_add(struct io_kiocb *req, bool allow_local)
...@@ -2465,7 +2470,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx) ...@@ -2465,7 +2470,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
/* when returns >0, the caller should retry */ /* when returns >0, the caller should retry */
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq, struct io_wait_queue *iowq,
ktime_t timeout) ktime_t *timeout)
{ {
int ret; int ret;
unsigned long check_cq; unsigned long check_cq;
...@@ -2483,7 +2488,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, ...@@ -2483,7 +2488,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
if (check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)) if (check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT))
return -EBADR; return -EBADR;
} }
if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS))
return -ETIME; return -ETIME;
/* /*
...@@ -2559,7 +2564,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -2559,7 +2564,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
} }
prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
TASK_INTERRUPTIBLE); TASK_INTERRUPTIBLE);
ret = io_cqring_wait_schedule(ctx, &iowq, timeout); ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
if (__io_cqring_events_user(ctx) >= min_events) if (__io_cqring_events_user(ctx) >= min_events)
break; break;
cond_resched(); cond_resched();
......
...@@ -79,6 +79,19 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); ...@@ -79,6 +79,19 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all); bool cancel_all);
#define io_lockdep_assert_cq_locked(ctx) \
do { \
if (ctx->flags & IORING_SETUP_IOPOLL) { \
lockdep_assert_held(&ctx->uring_lock); \
} else if (!ctx->task_complete) { \
lockdep_assert_held(&ctx->completion_lock); \
} else if (ctx->submitter_task->flags & PF_EXITING) { \
lockdep_assert(current_work()); \
} else { \
lockdep_assert(current == ctx->submitter_task); \
} \
} while (0)
static inline void io_req_task_work_add(struct io_kiocb *req) static inline void io_req_task_work_add(struct io_kiocb *req)
{ {
__io_req_task_work_add(req, true); __io_req_task_work_add(req, true);
...@@ -92,6 +105,8 @@ void io_cq_unlock_post(struct io_ring_ctx *ctx); ...@@ -92,6 +105,8 @@ void io_cq_unlock_post(struct io_ring_ctx *ctx);
static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
bool overflow) bool overflow)
{ {
io_lockdep_assert_cq_locked(ctx);
if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
struct io_uring_cqe *cqe = ctx->cqe_cached; struct io_uring_cqe *cqe = ctx->cqe_cached;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment