Commit 534ca6d6 authored by Jens Axboe's avatar Jens Axboe

io_uring: split SQPOLL data into separate structure

Move all the necessary state out of io_ring_ctx, and into a new
structure, io_sq_data. The latter now deals with any state or
variables associated with the SQPOLL thread itself.

In preparation for supporting more than one io_ring_ctx per SQPOLL
thread.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent c8d1ba58
...@@ -230,6 +230,12 @@ struct io_restriction { ...@@ -230,6 +230,12 @@ struct io_restriction {
bool registered; bool registered;
}; };
struct io_sq_data {
refcount_t refs;
struct task_struct *thread;
struct wait_queue_head wait;
};
struct io_ring_ctx { struct io_ring_ctx {
struct { struct {
struct percpu_ref refs; struct percpu_ref refs;
...@@ -276,7 +282,6 @@ struct io_ring_ctx { ...@@ -276,7 +282,6 @@ struct io_ring_ctx {
/* IO offload */ /* IO offload */
struct io_wq *io_wq; struct io_wq *io_wq;
struct task_struct *sqo_thread; /* if using sq thread polling */
/* /*
* For SQPOLL usage - we hold a reference to the parent task, so we * For SQPOLL usage - we hold a reference to the parent task, so we
...@@ -287,8 +292,8 @@ struct io_ring_ctx { ...@@ -287,8 +292,8 @@ struct io_ring_ctx {
/* Only used for accounting purposes */ /* Only used for accounting purposes */
struct mm_struct *mm_account; struct mm_struct *mm_account;
struct wait_queue_head *sqo_wait; struct io_sq_data *sq_data; /* if using sq thread polling */
struct wait_queue_head __sqo_wait;
struct wait_queue_entry sqo_wait_entry; struct wait_queue_entry sqo_wait_entry;
/* /*
...@@ -1059,8 +1064,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ...@@ -1059,8 +1064,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
goto err; goto err;
ctx->flags = p->flags; ctx->flags = p->flags;
init_waitqueue_head(&ctx->__sqo_wait);
ctx->sqo_wait = &ctx->__sqo_wait;
init_waitqueue_head(&ctx->cq_wait); init_waitqueue_head(&ctx->cq_wait);
INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->ref_comp); init_completion(&ctx->ref_comp);
...@@ -1238,8 +1241,10 @@ static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk) ...@@ -1238,8 +1241,10 @@ static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk)
if (!tsk || req->task == tsk) if (!tsk || req->task == tsk)
return true; return true;
if ((ctx->flags & IORING_SETUP_SQPOLL) && req->task == ctx->sqo_thread) if (ctx->flags & IORING_SETUP_SQPOLL) {
return true; if (ctx->sq_data && req->task == ctx->sq_data->thread)
return true;
}
return false; return false;
} }
...@@ -1343,8 +1348,8 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) ...@@ -1343,8 +1348,8 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{ {
if (waitqueue_active(&ctx->wait)) if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait); wake_up(&ctx->wait);
if (waitqueue_active(ctx->sqo_wait)) if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
wake_up(ctx->sqo_wait); wake_up(&ctx->sq_data->wait);
if (io_should_trigger_evfd(ctx)) if (io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1); eventfd_signal(ctx->cq_ev_fd, 1);
} }
...@@ -2451,8 +2456,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ...@@ -2451,8 +2456,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
else else
list_add_tail(&req->inflight_entry, &ctx->iopoll_list); list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
if ((ctx->flags & IORING_SETUP_SQPOLL) && wq_has_sleeper(ctx->sqo_wait)) if ((ctx->flags & IORING_SETUP_SQPOLL) &&
wake_up(ctx->sqo_wait); wq_has_sleeper(&ctx->sq_data->wait))
wake_up(&ctx->sq_data->wait);
} }
static void __io_state_file_put(struct io_submit_state *state) static void __io_state_file_put(struct io_submit_state *state)
...@@ -6652,6 +6658,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, ...@@ -6652,6 +6658,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
unsigned long start_jiffies) unsigned long start_jiffies)
{ {
unsigned long timeout = start_jiffies + ctx->sq_thread_idle; unsigned long timeout = start_jiffies + ctx->sq_thread_idle;
struct io_sq_data *sqd = ctx->sq_data;
unsigned int to_submit; unsigned int to_submit;
int ret = 0; int ret = 0;
...@@ -6692,7 +6699,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, ...@@ -6692,7 +6699,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
!percpu_ref_is_dying(&ctx->refs))) !percpu_ref_is_dying(&ctx->refs)))
return SQT_SPIN; return SQT_SPIN;
prepare_to_wait(ctx->sqo_wait, &ctx->sqo_wait_entry, prepare_to_wait(&sqd->wait, &ctx->sqo_wait_entry,
TASK_INTERRUPTIBLE); TASK_INTERRUPTIBLE);
/* /*
...@@ -6704,7 +6711,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, ...@@ -6704,7 +6711,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
*/ */
if ((ctx->flags & IORING_SETUP_IOPOLL) && if ((ctx->flags & IORING_SETUP_IOPOLL) &&
!list_empty_careful(&ctx->iopoll_list)) { !list_empty_careful(&ctx->iopoll_list)) {
finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry); finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
goto again; goto again;
} }
...@@ -6715,7 +6722,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, ...@@ -6715,7 +6722,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
return SQT_IDLE; return SQT_IDLE;
} }
finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry); finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
io_ring_clear_wakeup_flag(ctx); io_ring_clear_wakeup_flag(ctx);
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
...@@ -6935,26 +6942,54 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) ...@@ -6935,26 +6942,54 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
return 0; return 0;
} }
static void io_sq_thread_stop(struct io_ring_ctx *ctx) static void io_put_sq_data(struct io_sq_data *sqd)
{ {
if (ctx->sqo_thread) { if (refcount_dec_and_test(&sqd->refs)) {
/*
* We may arrive here from the error branch in
* io_sq_offload_create() where the kthread is created
* without being waked up, thus wake it up now to make
* sure the wait will complete.
*/
wake_up_process(ctx->sqo_thread);
wait_for_completion(&ctx->sq_thread_comp);
/* /*
* The park is a bit of a work-around, without it we get * The park is a bit of a work-around, without it we get
* warning spews on shutdown with SQPOLL set and affinity * warning spews on shutdown with SQPOLL set and affinity
* set to a single CPU. * set to a single CPU.
*/ */
kthread_park(ctx->sqo_thread); if (sqd->thread) {
kthread_stop(ctx->sqo_thread); kthread_park(sqd->thread);
ctx->sqo_thread = NULL; kthread_stop(sqd->thread);
}
kfree(sqd);
}
}
static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
{
struct io_sq_data *sqd;
sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
if (!sqd)
return ERR_PTR(-ENOMEM);
refcount_set(&sqd->refs, 1);
init_waitqueue_head(&sqd->wait);
return sqd;
}
static void io_sq_thread_stop(struct io_ring_ctx *ctx)
{
struct io_sq_data *sqd = ctx->sq_data;
if (sqd) {
if (sqd->thread) {
/*
* We may arrive here from the error branch in
* io_sq_offload_create() where the kthread is created
* without being waked up, thus wake it up now to make
* sure the wait will complete.
*/
wake_up_process(sqd->thread);
wait_for_completion(&ctx->sq_thread_comp);
}
io_put_sq_data(sqd);
ctx->sq_data = NULL;
} }
} }
...@@ -7623,10 +7658,19 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ...@@ -7623,10 +7658,19 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
int ret; int ret;
if (ctx->flags & IORING_SETUP_SQPOLL) { if (ctx->flags & IORING_SETUP_SQPOLL) {
struct io_sq_data *sqd;
ret = -EPERM; ret = -EPERM;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
goto err; goto err;
sqd = io_get_sq_data(p);
if (IS_ERR(sqd)) {
ret = PTR_ERR(sqd);
goto err;
}
ctx->sq_data = sqd;
ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
if (!ctx->sq_thread_idle) if (!ctx->sq_thread_idle)
ctx->sq_thread_idle = HZ; ctx->sq_thread_idle = HZ;
...@@ -7640,19 +7684,18 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ...@@ -7640,19 +7684,18 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
if (!cpu_online(cpu)) if (!cpu_online(cpu))
goto err; goto err;
ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread, sqd->thread = kthread_create_on_cpu(io_sq_thread, ctx,
ctx, cpu, cpu, "io_uring-sq");
"io_uring-sq");
} else { } else {
ctx->sqo_thread = kthread_create(io_sq_thread, ctx, sqd->thread = kthread_create(io_sq_thread, ctx,
"io_uring-sq"); "io_uring-sq");
} }
if (IS_ERR(ctx->sqo_thread)) { if (IS_ERR(sqd->thread)) {
ret = PTR_ERR(ctx->sqo_thread); ret = PTR_ERR(sqd->thread);
ctx->sqo_thread = NULL; sqd->thread = NULL;
goto err; goto err;
} }
ret = io_uring_alloc_task_context(ctx->sqo_thread); ret = io_uring_alloc_task_context(sqd->thread);
if (ret) if (ret)
goto err; goto err;
} else if (p->flags & IORING_SETUP_SQ_AFF) { } else if (p->flags & IORING_SETUP_SQ_AFF) {
...@@ -7673,8 +7716,10 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ...@@ -7673,8 +7716,10 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
static void io_sq_offload_start(struct io_ring_ctx *ctx) static void io_sq_offload_start(struct io_ring_ctx *ctx)
{ {
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread) struct io_sq_data *sqd = ctx->sq_data;
wake_up_process(ctx->sqo_thread);
if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd->thread)
wake_up_process(sqd->thread);
} }
static inline void __io_unaccount_mem(struct user_struct *user, static inline void __io_unaccount_mem(struct user_struct *user,
...@@ -8402,8 +8447,8 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, ...@@ -8402,8 +8447,8 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
{ {
struct task_struct *task = current; struct task_struct *task = current;
if (ctx->flags & IORING_SETUP_SQPOLL) if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data)
task = ctx->sqo_thread; task = ctx->sq_data->thread;
io_cqring_overflow_flush(ctx, true, task, files); io_cqring_overflow_flush(ctx, true, task, files);
...@@ -8688,7 +8733,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ...@@ -8688,7 +8733,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
if (!list_empty_careful(&ctx->cq_overflow_list)) if (!list_empty_careful(&ctx->cq_overflow_list))
io_cqring_overflow_flush(ctx, false, NULL, NULL); io_cqring_overflow_flush(ctx, false, NULL, NULL);
if (flags & IORING_ENTER_SQ_WAKEUP) if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(ctx->sqo_wait); wake_up(&ctx->sq_data->wait);
submitted = to_submit; submitted = to_submit;
} else if (to_submit) { } else if (to_submit) {
ret = io_uring_add_task_file(f.file); ret = io_uring_add_task_file(f.file);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment