Commit 37f0e767 authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe

io_uring: optimise ctx referencing by requests

Currenlty, we allocate one ctx reference per request at submission time
and put them at free. It's batched and not so expensive but it still
bloats the kernel, adds 2 function calls for rcu and adds some overhead
for request counting in io_free_batch_list().

Always keep one reference with a request, even when it's freed and in
io_uring request caches. There is extra work at ring exit / quiesce
paths, which now need to put all cached requests. io_ring_exit_work() is
already looping, so it's not a problem. Add hybrid-busy waiting to
io_ctx_quiesce() as well for now.
Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/99613fbe396e80777228cde39bbda1aa8938554e.1633373302.git.asml.silence@gmail.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent d60aa65b
...@@ -1807,7 +1807,6 @@ static void io_req_complete_post(struct io_kiocb *req, long res, ...@@ -1807,7 +1807,6 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
io_put_task(req->task, 1); io_put_task(req->task, 1);
wq_list_add_head(&req->comp_list, &ctx->locked_free_list); wq_list_add_head(&req->comp_list, &ctx->locked_free_list);
ctx->locked_free_nr++; ctx->locked_free_nr++;
percpu_ref_put(&ctx->refs);
} }
io_commit_cqring(ctx); io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock);
...@@ -1929,6 +1928,7 @@ static bool __io_alloc_req_refill(struct io_ring_ctx *ctx) ...@@ -1929,6 +1928,7 @@ static bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
ret = 1; ret = 1;
} }
percpu_ref_get_many(&ctx->refs, ret);
for (i = 0; i < ret; i++) { for (i = 0; i < ret; i++) {
req = reqs[i]; req = reqs[i];
...@@ -1986,8 +1986,6 @@ static void __io_free_req(struct io_kiocb *req) ...@@ -1986,8 +1986,6 @@ static void __io_free_req(struct io_kiocb *req)
wq_list_add_head(&req->comp_list, &ctx->locked_free_list); wq_list_add_head(&req->comp_list, &ctx->locked_free_list);
ctx->locked_free_nr++; ctx->locked_free_nr++;
spin_unlock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock);
percpu_ref_put(&ctx->refs);
} }
static inline void io_remove_next_linked(struct io_kiocb *req) static inline void io_remove_next_linked(struct io_kiocb *req)
...@@ -2276,7 +2274,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx, ...@@ -2276,7 +2274,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
__must_hold(&ctx->uring_lock) __must_hold(&ctx->uring_lock)
{ {
struct task_struct *task = NULL; struct task_struct *task = NULL;
int task_refs = 0, ctx_refs = 0; int task_refs = 0;
do { do {
struct io_kiocb *req = container_of(node, struct io_kiocb, struct io_kiocb *req = container_of(node, struct io_kiocb,
...@@ -2296,12 +2294,9 @@ static void io_free_batch_list(struct io_ring_ctx *ctx, ...@@ -2296,12 +2294,9 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
task_refs = 0; task_refs = 0;
} }
task_refs++; task_refs++;
ctx_refs++;
wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list); wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
} while (node); } while (node);
if (ctx_refs)
percpu_ref_put_many(&ctx->refs, ctx_refs);
if (task) if (task)
io_put_task(task, task_refs); io_put_task(task, task_refs);
} }
...@@ -7212,8 +7207,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) ...@@ -7212,8 +7207,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
return 0; return 0;
/* make sure SQ entry isn't read before tail */ /* make sure SQ entry isn't read before tail */
nr = min3(nr, ctx->sq_entries, entries); nr = min3(nr, ctx->sq_entries, entries);
if (unlikely(!percpu_ref_tryget_many(&ctx->refs, nr)))
return -EAGAIN;
io_get_task_refs(nr); io_get_task_refs(nr);
io_submit_state_start(&ctx->submit_state, nr); io_submit_state_start(&ctx->submit_state, nr);
...@@ -7243,7 +7236,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) ...@@ -7243,7 +7236,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
int unused = nr - ref_used; int unused = nr - ref_used;
current->io_uring->cached_refs += unused; current->io_uring->cached_refs += unused;
percpu_ref_put_many(&ctx->refs, unused);
} }
io_submit_state_end(ctx); io_submit_state_end(ctx);
...@@ -9164,6 +9156,7 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx) ...@@ -9164,6 +9156,7 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
static void io_req_caches_free(struct io_ring_ctx *ctx) static void io_req_caches_free(struct io_ring_ctx *ctx)
{ {
struct io_submit_state *state = &ctx->submit_state; struct io_submit_state *state = &ctx->submit_state;
int nr = 0;
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
io_flush_cached_locked_reqs(ctx, state); io_flush_cached_locked_reqs(ctx, state);
...@@ -9175,7 +9168,10 @@ static void io_req_caches_free(struct io_ring_ctx *ctx) ...@@ -9175,7 +9168,10 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
node = wq_stack_extract(&state->free_list); node = wq_stack_extract(&state->free_list);
req = container_of(node, struct io_kiocb, comp_list); req = container_of(node, struct io_kiocb, comp_list);
kmem_cache_free(req_cachep, req); kmem_cache_free(req_cachep, req);
nr++;
} }
if (nr)
percpu_ref_put_many(&ctx->refs, nr);
mutex_unlock(&ctx->uring_lock); mutex_unlock(&ctx->uring_lock);
} }
...@@ -9345,6 +9341,8 @@ static void io_ring_exit_work(struct work_struct *work) ...@@ -9345,6 +9341,8 @@ static void io_ring_exit_work(struct work_struct *work)
io_sq_thread_unpark(sqd); io_sq_thread_unpark(sqd);
} }
io_req_caches_free(ctx);
if (WARN_ON_ONCE(time_after(jiffies, timeout))) { if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
/* there is little hope left, don't run it too often */ /* there is little hope left, don't run it too often */
interval = HZ * 60; interval = HZ * 60;
...@@ -10724,10 +10722,14 @@ static int io_ctx_quiesce(struct io_ring_ctx *ctx) ...@@ -10724,10 +10722,14 @@ static int io_ctx_quiesce(struct io_ring_ctx *ctx)
*/ */
mutex_unlock(&ctx->uring_lock); mutex_unlock(&ctx->uring_lock);
do { do {
ret = wait_for_completion_interruptible(&ctx->ref_comp); ret = wait_for_completion_interruptible_timeout(&ctx->ref_comp, HZ);
if (!ret) if (ret) {
ret = min(0L, ret);
break; break;
}
ret = io_run_task_work_sig(); ret = io_run_task_work_sig();
io_req_caches_free(ctx);
} while (ret >= 0); } while (ret >= 0);
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment