Commit 206aefde authored by Jens Axboe's avatar Jens Axboe

io_uring: reduce/pack size of io_ring_ctx

With the recent flurry of additions and changes to io_uring, the
layout of io_ring_ctx has become a bit stale. We're right now at
704 bytes in size on my x86-64 build, or 11 cachelines. This
patch does two things:

- We have to completion structs embedded, that we only use for
  quiesce of the ctx (or shutdown) and for sqthread init cases.
  That 2x32 bytes right there, let's dynamically allocate them.

- Reorder the struct a bit with an eye on cachelines, use cases,
  and holes.

With this patch, we're down to 512 bytes, or 8 cachelines.
Reviewed-by: default avatarJackie Liu <liuyun01@kylinos.cn>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 5f8fd2d3
...@@ -204,6 +204,7 @@ struct io_ring_ctx { ...@@ -204,6 +204,7 @@ struct io_ring_ctx {
unsigned sq_mask; unsigned sq_mask;
unsigned sq_thread_idle; unsigned sq_thread_idle;
unsigned cached_sq_dropped; unsigned cached_sq_dropped;
atomic_t cached_cq_overflow;
struct io_uring_sqe *sq_sqes; struct io_uring_sqe *sq_sqes;
struct list_head defer_list; struct list_head defer_list;
...@@ -213,25 +214,13 @@ struct io_ring_ctx { ...@@ -213,25 +214,13 @@ struct io_ring_ctx {
wait_queue_head_t inflight_wait; wait_queue_head_t inflight_wait;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
struct io_rings *rings;
/* IO offload */ /* IO offload */
struct io_wq *io_wq; struct io_wq *io_wq;
struct task_struct *sqo_thread; /* if using sq thread polling */ struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm; struct mm_struct *sqo_mm;
wait_queue_head_t sqo_wait; wait_queue_head_t sqo_wait;
struct completion sqo_thread_started;
struct {
unsigned cached_cq_tail;
atomic_t cached_cq_overflow;
unsigned cq_entries;
unsigned cq_mask;
struct wait_queue_head cq_wait;
struct fasync_struct *cq_fasync;
struct eventfd_ctx *cq_ev_fd;
atomic_t cq_timeouts;
} ____cacheline_aligned_in_smp;
struct io_rings *rings;
/* /*
* If used, fixed file set. Writers must ensure that ->refs is dead, * If used, fixed file set. Writers must ensure that ->refs is dead,
...@@ -247,7 +236,22 @@ struct io_ring_ctx { ...@@ -247,7 +236,22 @@ struct io_ring_ctx {
struct user_struct *user; struct user_struct *user;
struct completion ctx_done; /* 0 is for ctx quiesce/reinit/free, 1 is for sqo_thread started */
struct completion *completions;
#if defined(CONFIG_UNIX)
struct socket *ring_sock;
#endif
struct {
unsigned cached_cq_tail;
unsigned cq_entries;
unsigned cq_mask;
atomic_t cq_timeouts;
struct wait_queue_head cq_wait;
struct fasync_struct *cq_fasync;
struct eventfd_ctx *cq_ev_fd;
} ____cacheline_aligned_in_smp;
struct { struct {
struct mutex uring_lock; struct mutex uring_lock;
...@@ -269,10 +273,6 @@ struct io_ring_ctx { ...@@ -269,10 +273,6 @@ struct io_ring_ctx {
spinlock_t inflight_lock; spinlock_t inflight_lock;
struct list_head inflight_list; struct list_head inflight_list;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
#if defined(CONFIG_UNIX)
struct socket *ring_sock;
#endif
}; };
struct sqe_submit { struct sqe_submit {
...@@ -397,7 +397,7 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref) ...@@ -397,7 +397,7 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref)
{ {
struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
complete(&ctx->ctx_done); complete(&ctx->completions[0]);
} }
static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
...@@ -408,17 +408,19 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ...@@ -408,17 +408,19 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
if (!ctx) if (!ctx)
return NULL; return NULL;
ctx->completions = kmalloc(2 * sizeof(struct completion), GFP_KERNEL);
if (!ctx->completions)
goto err;
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
kfree(ctx); goto err;
return NULL;
}
ctx->flags = p->flags; ctx->flags = p->flags;
init_waitqueue_head(&ctx->cq_wait); init_waitqueue_head(&ctx->cq_wait);
INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->ctx_done); init_completion(&ctx->completions[0]);
init_completion(&ctx->sqo_thread_started); init_completion(&ctx->completions[1]);
mutex_init(&ctx->uring_lock); mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait); init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock); spin_lock_init(&ctx->completion_lock);
...@@ -430,6 +432,10 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ...@@ -430,6 +432,10 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
spin_lock_init(&ctx->inflight_lock); spin_lock_init(&ctx->inflight_lock);
INIT_LIST_HEAD(&ctx->inflight_list); INIT_LIST_HEAD(&ctx->inflight_list);
return ctx; return ctx;
err:
kfree(ctx->completions);
kfree(ctx);
return NULL;
} }
static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
...@@ -3046,7 +3052,7 @@ static int io_sq_thread(void *data) ...@@ -3046,7 +3052,7 @@ static int io_sq_thread(void *data)
unsigned inflight; unsigned inflight;
unsigned long timeout; unsigned long timeout;
complete(&ctx->sqo_thread_started); complete(&ctx->completions[1]);
old_fs = get_fs(); old_fs = get_fs();
set_fs(USER_DS); set_fs(USER_DS);
...@@ -3286,7 +3292,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) ...@@ -3286,7 +3292,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
static void io_sq_thread_stop(struct io_ring_ctx *ctx) static void io_sq_thread_stop(struct io_ring_ctx *ctx)
{ {
if (ctx->sqo_thread) { if (ctx->sqo_thread) {
wait_for_completion(&ctx->sqo_thread_started); wait_for_completion(&ctx->completions[1]);
/* /*
* The park is a bit of a work-around, without it we get * The park is a bit of a work-around, without it we get
* warning spews on shutdown with SQPOLL set and affinity * warning spews on shutdown with SQPOLL set and affinity
...@@ -4108,6 +4114,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) ...@@ -4108,6 +4114,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_unaccount_mem(ctx->user, io_unaccount_mem(ctx->user,
ring_pages(ctx->sq_entries, ctx->cq_entries)); ring_pages(ctx->sq_entries, ctx->cq_entries));
free_uid(ctx->user); free_uid(ctx->user);
kfree(ctx->completions);
kfree(ctx); kfree(ctx);
} }
...@@ -4152,7 +4159,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) ...@@ -4152,7 +4159,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_iopoll_reap_events(ctx); io_iopoll_reap_events(ctx);
io_cqring_overflow_flush(ctx, true); io_cqring_overflow_flush(ctx, true);
wait_for_completion(&ctx->ctx_done); wait_for_completion(&ctx->completions[0]);
io_ring_ctx_free(ctx); io_ring_ctx_free(ctx);
} }
...@@ -4555,7 +4562,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ...@@ -4555,7 +4562,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
* no new references will come in after we've killed the percpu ref. * no new references will come in after we've killed the percpu ref.
*/ */
mutex_unlock(&ctx->uring_lock); mutex_unlock(&ctx->uring_lock);
wait_for_completion(&ctx->ctx_done); wait_for_completion(&ctx->completions[0]);
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
switch (opcode) { switch (opcode) {
...@@ -4598,7 +4605,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ...@@ -4598,7 +4605,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
} }
/* bring the ctx back to life */ /* bring the ctx back to life */
reinit_completion(&ctx->ctx_done); reinit_completion(&ctx->completions[0]);
percpu_ref_reinit(&ctx->refs); percpu_ref_reinit(&ctx->refs);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment