Commit 90554200 authored by Jens Axboe's avatar Jens Axboe

io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits

When using SQPOLL, applications can run into the issue of running out of
SQ ring entries because the thread hasn't consumed them yet. The only
option for dealing with that is checking later, or busy checking for the
condition.

Provide IORING_ENTER_SQ_WAIT if applications want to wait on this
condition.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 738277ad
...@@ -301,6 +301,7 @@ struct io_ring_ctx { ...@@ -301,6 +301,7 @@ struct io_ring_ctx {
struct io_sq_data *sq_data; /* if using sq thread polling */ struct io_sq_data *sq_data; /* if using sq thread polling */
struct wait_queue_head sqo_sq_wait;
struct wait_queue_entry sqo_wait_entry; struct wait_queue_entry sqo_wait_entry;
struct list_head sqd_list; struct list_head sqd_list;
...@@ -1072,6 +1073,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ...@@ -1072,6 +1073,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
goto err; goto err;
ctx->flags = p->flags; ctx->flags = p->flags;
init_waitqueue_head(&ctx->sqo_sq_wait);
INIT_LIST_HEAD(&ctx->sqd_list); INIT_LIST_HEAD(&ctx->sqd_list);
init_waitqueue_head(&ctx->cq_wait); init_waitqueue_head(&ctx->cq_wait);
INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->cq_overflow_list);
...@@ -1324,6 +1326,13 @@ static void io_commit_cqring(struct io_ring_ctx *ctx) ...@@ -1324,6 +1326,13 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
__io_queue_deferred(ctx); __io_queue_deferred(ctx);
} }
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
{
struct io_rings *r = ctx->rings;
return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries;
}
static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
{ {
struct io_rings *rings = ctx->rings; struct io_rings *rings = ctx->rings;
...@@ -6736,6 +6745,10 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, ...@@ -6736,6 +6745,10 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
if (likely(!percpu_ref_is_dying(&ctx->refs))) if (likely(!percpu_ref_is_dying(&ctx->refs)))
ret = io_submit_sqes(ctx, to_submit); ret = io_submit_sqes(ctx, to_submit);
mutex_unlock(&ctx->uring_lock); mutex_unlock(&ctx->uring_lock);
if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait))
wake_up(&ctx->sqo_sq_wait);
return SQT_DID_WORK; return SQT_DID_WORK;
} }
...@@ -8231,8 +8244,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) ...@@ -8231,8 +8244,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
* io_commit_cqring * io_commit_cqring
*/ */
smp_rmb(); smp_rmb();
if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head != if (!io_sqring_full(ctx))
ctx->rings->sq_ring_entries)
mask |= EPOLLOUT | EPOLLWRNORM; mask |= EPOLLOUT | EPOLLWRNORM;
if (io_cqring_events(ctx, false)) if (io_cqring_events(ctx, false))
mask |= EPOLLIN | EPOLLRDNORM; mask |= EPOLLIN | EPOLLRDNORM;
...@@ -8801,6 +8813,25 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, ...@@ -8801,6 +8813,25 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
#endif /* !CONFIG_MMU */ #endif /* !CONFIG_MMU */
static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
{
DEFINE_WAIT(wait);
do {
if (!io_sqring_full(ctx))
break;
prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
if (!io_sqring_full(ctx))
break;
schedule();
} while (!signal_pending(current));
finish_wait(&ctx->sqo_sq_wait, &wait);
}
SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
u32, min_complete, u32, flags, const sigset_t __user *, sig, u32, min_complete, u32, flags, const sigset_t __user *, sig,
size_t, sigsz) size_t, sigsz)
...@@ -8812,7 +8843,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ...@@ -8812,7 +8843,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
io_run_task_work(); io_run_task_work();
if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP)) if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
IORING_ENTER_SQ_WAIT))
return -EINVAL; return -EINVAL;
f = fdget(fd); f = fdget(fd);
...@@ -8843,6 +8875,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ...@@ -8843,6 +8875,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
io_cqring_overflow_flush(ctx, false, NULL, NULL); io_cqring_overflow_flush(ctx, false, NULL, NULL);
if (flags & IORING_ENTER_SQ_WAKEUP) if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sq_data->wait); wake_up(&ctx->sq_data->wait);
if (flags & IORING_ENTER_SQ_WAIT)
io_sqpoll_wait_sq(ctx);
submitted = to_submit; submitted = to_submit;
} else if (to_submit) { } else if (to_submit) {
ret = io_uring_add_task_file(f.file); ret = io_uring_add_task_file(f.file);
......
...@@ -225,6 +225,7 @@ struct io_cqring_offsets { ...@@ -225,6 +225,7 @@ struct io_cqring_offsets {
*/ */
#define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_GETEVENTS (1U << 0)
#define IORING_ENTER_SQ_WAKEUP (1U << 1) #define IORING_ENTER_SQ_WAKEUP (1U << 1)
#define IORING_ENTER_SQ_WAIT (1U << 2)
/* /*
* Passed in for io_uring_setup(2). Copied back with updated info on success * Passed in for io_uring_setup(2). Copied back with updated info on success
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment