Commit 364eb618 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-6.1-2022-11-25' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - A few poll related fixes. One fixing a race condition between poll
   cancelation and trigger, and one making the overflow handling a bit
   more robust (Lin, Pavel)

 - Fix an fput() for error handling in the direct file table (Lin)

 - Fix for a regression introduced in this cycle, where we don't always
   get TIF_NOTIFY_SIGNAL cleared appropriately (me)

* tag 'io_uring-6.1-2022-11-25' of git://git.kernel.dk/linux:
  io_uring: clear TIF_NOTIFY_SIGNAL if set and task_work not available
  io_uring/poll: fix poll_refs race with cancelation
  io_uring/filetable: fix file reference underflow
  io_uring: make poll refs more robust
  io_uring: cmpxchg for poll arm refs release
parents 3e0d88f9 7cfe7a09
...@@ -101,8 +101,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, ...@@ -101,8 +101,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
err: err:
if (needs_switch) if (needs_switch)
io_rsrc_node_switch(ctx, ctx->file_data); io_rsrc_node_switch(ctx, ctx->file_data);
if (ret)
fput(file);
return ret; return ret;
} }
......
...@@ -238,9 +238,14 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) ...@@ -238,9 +238,14 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
static inline int io_run_task_work(void) static inline int io_run_task_work(void)
{ {
/*
* Always check-and-clear the task_work notification signal. With how
* signaling works for task_work, we can find it set with nothing to
* run. We need to clear it for that case, like get_signal() does.
*/
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
clear_notify_signal();
if (task_work_pending(current)) { if (task_work_pending(current)) {
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
clear_notify_signal();
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
task_work_run(); task_work_run();
return 1; return 1;
......
...@@ -40,7 +40,14 @@ struct io_poll_table { ...@@ -40,7 +40,14 @@ struct io_poll_table {
}; };
#define IO_POLL_CANCEL_FLAG BIT(31) #define IO_POLL_CANCEL_FLAG BIT(31)
#define IO_POLL_REF_MASK GENMASK(30, 0) #define IO_POLL_RETRY_FLAG BIT(30)
#define IO_POLL_REF_MASK GENMASK(29, 0)
/*
* We usually have 1-2 refs taken, 128 is more than enough and we want to
* maximise the margin between this amount and the moment when it overflows.
*/
#define IO_POLL_REF_BIAS 128
#define IO_WQE_F_DOUBLE 1 #define IO_WQE_F_DOUBLE 1
...@@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) ...@@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe)
return priv & IO_WQE_F_DOUBLE; return priv & IO_WQE_F_DOUBLE;
} }
static bool io_poll_get_ownership_slowpath(struct io_kiocb *req)
{
int v;
/*
* poll_refs are already elevated and we don't have much hope for
* grabbing the ownership. Instead of incrementing set a retry flag
* to notify the loop that there might have been some change.
*/
v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs);
if (v & IO_POLL_REF_MASK)
return false;
return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
}
/* /*
* If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
* bump it and acquire ownership. It's disallowed to modify requests while not * bump it and acquire ownership. It's disallowed to modify requests while not
...@@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) ...@@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe)
*/ */
static inline bool io_poll_get_ownership(struct io_kiocb *req) static inline bool io_poll_get_ownership(struct io_kiocb *req)
{ {
if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS))
return io_poll_get_ownership_slowpath(req);
return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
} }
...@@ -235,6 +259,16 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) ...@@ -235,6 +259,16 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
*/ */
if ((v & IO_POLL_REF_MASK) != 1) if ((v & IO_POLL_REF_MASK) != 1)
req->cqe.res = 0; req->cqe.res = 0;
if (v & IO_POLL_RETRY_FLAG) {
req->cqe.res = 0;
/*
* We won't find new events that came in between
* vfs_poll and the ref put unless we clear the flag
* in advance.
*/
atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs);
v &= ~IO_POLL_RETRY_FLAG;
}
/* the mask was stashed in __io_poll_execute */ /* the mask was stashed in __io_poll_execute */
if (!req->cqe.res) { if (!req->cqe.res) {
...@@ -274,7 +308,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) ...@@ -274,7 +308,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
* Release all references, retry if someone tried to restart * Release all references, retry if someone tried to restart
* task_work while we were executing it. * task_work while we were executing it.
*/ */
} while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) &
IO_POLL_REF_MASK);
return IOU_POLL_NO_ACTION; return IOU_POLL_NO_ACTION;
} }
...@@ -518,7 +553,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, ...@@ -518,7 +553,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
unsigned issue_flags) unsigned issue_flags)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
int v;
INIT_HLIST_NODE(&req->hash_node); INIT_HLIST_NODE(&req->hash_node);
req->work.cancel_seq = atomic_read(&ctx->cancel_seq); req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
...@@ -586,11 +620,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req, ...@@ -586,11 +620,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
if (ipt->owning) { if (ipt->owning) {
/* /*
* Release ownership. If someone tried to queue a tw while it was * Try to release ownership. If we see a change of state, e.g.
* locked, kick it off for them. * poll was waken up, queue up a tw, it'll deal with it.
*/ */
v = atomic_dec_return(&req->poll_refs); if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1)
if (unlikely(v & IO_POLL_REF_MASK))
__io_poll_execute(req, 0); __io_poll_execute(req, 0);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment