Commit 9c38747f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-6.2-2023-01-20' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:
 "Fixes for the MSG_RING opcode. Nothing really major:

   - Fix an overflow missing serialization around posting CQEs to the
     target ring (me)

   - Disable MSG_RING on a ring that isn't enabled yet. There's nothing
     really wrong with allowing it, but 1) it's somewhat odd as nobody
     can receive them yet, and 2) it means that using the right delivery
     mechanism might change. As nobody should be sending CQEs to a ring
     that isn't enabled yet, let's just disable it (Pavel)

   - Tweak to when we decide to post remotely or not for MSG_RING
     (Pavel)"

* tag 'io_uring-6.2-2023-01-20' of git://git.kernel.dk/linux:
  io_uring/msg_ring: fix remote queue to disabled ring
  io_uring/msg_ring: fix flagging remote execution
  io_uring/msg_ring: fix missing lock on overflow for IOPOLL
  io_uring/msg_ring: move double lock/unlock helpers higher up
parents 26e57507 8579538c
......@@ -3674,7 +3674,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER
&& !(ctx->flags & IORING_SETUP_R_DISABLED))
ctx->submitter_task = get_task_struct(current);
WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
file = io_uring_get_file(ctx);
if (IS_ERR(file)) {
......@@ -3868,7 +3868,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
return -EBADFD;
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task)
ctx->submitter_task = get_task_struct(current);
WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
if (ctx->restrictions.registered)
ctx->restricted = 1;
......
......@@ -25,6 +25,28 @@ struct io_msg {
u32 flags;
};
static void io_double_unlock_ctx(struct io_ring_ctx *octx)
{
mutex_unlock(&octx->uring_lock);
}
static int io_double_lock_ctx(struct io_ring_ctx *octx,
unsigned int issue_flags)
{
/*
* To ensure proper ordering between the two ctxs, we can only
* attempt a trylock on the target. If that fails and we already have
* the source ctx lock, punt to io-wq.
*/
if (!(issue_flags & IO_URING_F_UNLOCKED)) {
if (!mutex_trylock(&octx->uring_lock))
return -EAGAIN;
return 0;
}
mutex_lock(&octx->uring_lock);
return 0;
}
void io_msg_ring_cleanup(struct io_kiocb *req)
{
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
......@@ -36,6 +58,29 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
msg->src_file = NULL;
}
static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
{
if (!target_ctx->task_complete)
return false;
return current != target_ctx->submitter_task;
}
static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
{
struct io_ring_ctx *ctx = req->file->private_data;
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
struct task_struct *task = READ_ONCE(ctx->submitter_task);
if (unlikely(!task))
return -EOWNERDEAD;
init_task_work(&msg->tw, func);
if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
return -EOWNERDEAD;
return IOU_ISSUE_SKIP_COMPLETE;
}
static void io_msg_tw_complete(struct callback_head *head)
{
struct io_msg *msg = container_of(head, struct io_msg, tw);
......@@ -43,61 +88,54 @@ static void io_msg_tw_complete(struct callback_head *head)
struct io_ring_ctx *target_ctx = req->file->private_data;
int ret = 0;
if (current->flags & PF_EXITING)
if (current->flags & PF_EXITING) {
ret = -EOWNERDEAD;
else if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
} else {
/*
* If the target ring is using IOPOLL mode, then we need to be
* holding the uring_lock for posting completions. Other ring
* types rely on the regular completion locking, which is
* handled while posting.
*/
if (target_ctx->flags & IORING_SETUP_IOPOLL)
mutex_lock(&target_ctx->uring_lock);
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
ret = -EOVERFLOW;
if (target_ctx->flags & IORING_SETUP_IOPOLL)
mutex_unlock(&target_ctx->uring_lock);
}
if (ret < 0)
req_set_fail(req);
io_req_queue_tw_complete(req, ret);
}
static int io_msg_ring_data(struct io_kiocb *req)
static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *target_ctx = req->file->private_data;
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
int ret;
if (msg->src_fd || msg->dst_fd || msg->flags)
return -EINVAL;
if (target_ctx->flags & IORING_SETUP_R_DISABLED)
return -EBADFD;
if (target_ctx->task_complete && current != target_ctx->submitter_task) {
init_task_work(&msg->tw, io_msg_tw_complete);
if (task_work_add(target_ctx->submitter_task, &msg->tw,
TWA_SIGNAL_NO_IPI))
return -EOWNERDEAD;
atomic_or(IORING_SQ_TASKRUN, &target_ctx->rings->sq_flags);
return IOU_ISSUE_SKIP_COMPLETE;
}
if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
return 0;
return -EOVERFLOW;
}
if (io_msg_need_remote(target_ctx))
return io_msg_exec_remote(req, io_msg_tw_complete);
static void io_double_unlock_ctx(struct io_ring_ctx *octx,
unsigned int issue_flags)
{
mutex_unlock(&octx->uring_lock);
}
static int io_double_lock_ctx(struct io_ring_ctx *octx,
unsigned int issue_flags)
{
/*
* To ensure proper ordering between the two ctxs, we can only
* attempt a trylock on the target. If that fails and we already have
* the source ctx lock, punt to io-wq.
*/
if (!(issue_flags & IO_URING_F_UNLOCKED)) {
if (!mutex_trylock(&octx->uring_lock))
ret = -EOVERFLOW;
if (target_ctx->flags & IORING_SETUP_IOPOLL) {
if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
return -EAGAIN;
return 0;
if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
ret = 0;
io_double_unlock_ctx(target_ctx);
} else {
if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
ret = 0;
}
mutex_lock(&octx->uring_lock);
return 0;
return ret;
}
static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
......@@ -148,7 +186,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
ret = -EOVERFLOW;
out_unlock:
io_double_unlock_ctx(target_ctx, issue_flags);
io_double_unlock_ctx(target_ctx);
return ret;
}
......@@ -174,6 +212,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
if (target_ctx == ctx)
return -EINVAL;
if (target_ctx->flags & IORING_SETUP_R_DISABLED)
return -EBADFD;
if (!src_file) {
src_file = io_msg_grab_file(req, issue_flags);
if (!src_file)
......@@ -182,14 +222,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
req->flags |= REQ_F_NEED_CLEANUP;
}
if (target_ctx->task_complete && current != target_ctx->submitter_task) {
init_task_work(&msg->tw, io_msg_tw_fd_complete);
if (task_work_add(target_ctx->submitter_task, &msg->tw,
TWA_SIGNAL))
return -EOWNERDEAD;
return IOU_ISSUE_SKIP_COMPLETE;
}
if (io_msg_need_remote(target_ctx))
return io_msg_exec_remote(req, io_msg_tw_fd_complete);
return io_msg_install_complete(req, issue_flags);
}
......@@ -224,7 +258,7 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
switch (msg->cmd) {
case IORING_MSG_DATA:
ret = io_msg_ring_data(req);
ret = io_msg_ring_data(req, issue_flags);
break;
case IORING_MSG_SEND_FD:
ret = io_msg_send_fd(req, issue_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment