Commit 738f531d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.4/io_uring-2019-09-27' of git://git.kernel.dk/linux-block

Pull more io_uring updates from Jens Axboe:
 "Just two things in here:

   - Improvement to the io_uring CQ ring wakeup for batched IO (me)

   - Fix wrong comparison in poll handling (yangerkun)

  I realize the first one is a little late in the game, but it felt
  pointless to hold it off until the next release. Went through various
  testing and reviews with Pavel and peterz"

* tag 'for-5.4/io_uring-2019-09-27' of git://git.kernel.dk/linux-block:
  io_uring: make CQ ring wakeups be more efficient
  io_uring: compare cached_cq_tail with cq.head in_io_uring_poll
parents 47db9b9a bda52162
...@@ -2768,6 +2768,38 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, ...@@ -2768,6 +2768,38 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
return submit; return submit;
} }
struct io_wait_queue {
struct wait_queue_entry wq;
struct io_ring_ctx *ctx;
unsigned to_wait;
unsigned nr_timeouts;
};
static inline bool io_should_wake(struct io_wait_queue *iowq)
{
struct io_ring_ctx *ctx = iowq->ctx;
/*
* Wake up if we have enough events, or if a timeout occured since we
* started waiting. For timeouts, we always want to return to userspace,
* regardless of event count.
*/
return io_cqring_events(ctx->rings) >= iowq->to_wait ||
atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
}
static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
int wake_flags, void *key)
{
struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
wq);
if (!io_should_wake(iowq))
return -1;
return autoremove_wake_function(curr, mode, wake_flags, key);
}
/* /*
* Wait until events become available, if we don't already have some. The * Wait until events become available, if we don't already have some. The
* application must reap them itself, as they reside on the shared cq ring. * application must reap them itself, as they reside on the shared cq ring.
...@@ -2775,8 +2807,16 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, ...@@ -2775,8 +2807,16 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz) const sigset_t __user *sig, size_t sigsz)
{ {
struct io_wait_queue iowq = {
.wq = {
.private = current,
.func = io_wake_function,
.entry = LIST_HEAD_INIT(iowq.wq.entry),
},
.ctx = ctx,
.to_wait = min_events,
};
struct io_rings *rings = ctx->rings; struct io_rings *rings = ctx->rings;
unsigned nr_timeouts;
int ret; int ret;
if (io_cqring_events(rings) >= min_events) if (io_cqring_events(rings) >= min_events)
...@@ -2795,15 +2835,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -2795,15 +2835,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret; return ret;
} }
nr_timeouts = atomic_read(&ctx->cq_timeouts); ret = 0;
/* iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
* Return if we have enough events, or if a timeout occured since do {
* we started waiting. For timeouts, we always want to return to prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
* userspace. TASK_INTERRUPTIBLE);
*/ if (io_should_wake(&iowq))
ret = wait_event_interruptible(ctx->wait, break;
io_cqring_events(rings) >= min_events || schedule();
atomic_read(&ctx->cq_timeouts) != nr_timeouts); if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
} while (1);
finish_wait(&ctx->wait, &iowq.wq);
restore_saved_sigmask_unless(ret == -ERESTARTSYS); restore_saved_sigmask_unless(ret == -ERESTARTSYS);
if (ret == -ERESTARTSYS) if (ret == -ERESTARTSYS)
ret = -EINTR; ret = -EINTR;
...@@ -3455,7 +3501,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) ...@@ -3455,7 +3501,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head != if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
ctx->rings->sq_ring_entries) ctx->rings->sq_ring_entries)
mask |= EPOLLOUT | EPOLLWRNORM; mask |= EPOLLOUT | EPOLLWRNORM;
if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail) if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail)
mask |= EPOLLIN | EPOLLRDNORM; mask |= EPOLLIN | EPOLLRDNORM;
return mask; return mask;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment