Commit 099ada2c authored by Jens Axboe's avatar Jens Axboe

io_uring/rw: add write support for IOCB_DIO_CALLER_COMP

If the filesystem dio handler understands IOCB_DIO_CALLER_COMP, we'll
get a kiocb->ki_complete() callback with kiocb->dio_complete set. In
that case, rather than complete the IO directly through task_work, queue
up an intermediate task_work handler that first processes this callback
and then immediately completes the request.

For XFS, this avoids a punt through a workqueue, which is a lot less
efficient and adds latency to lower queue depth (or sync) O_DIRECT
writes.

Only do this for non-polled IO, as polled IO doesn't need this kind
of deferral as it always completes within the task itself. This then
avoids a check for deferral in the polled IO completion handler.
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 9cf3516c
...@@ -105,6 +105,7 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -105,6 +105,7 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
} else { } else {
rw->kiocb.ki_ioprio = get_current_ioprio(); rw->kiocb.ki_ioprio = get_current_ioprio();
} }
rw->kiocb.dio_complete = NULL;
rw->addr = READ_ONCE(sqe->addr); rw->addr = READ_ONCE(sqe->addr);
rw->len = READ_ONCE(sqe->len); rw->len = READ_ONCE(sqe->len);
...@@ -285,6 +286,15 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res) ...@@ -285,6 +286,15 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts) void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
{ {
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
struct kiocb *kiocb = &rw->kiocb;
if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
long res = kiocb->dio_complete(rw->kiocb.private);
io_req_set_res(req, io_fixup_rw_res(req, res), 0);
}
io_req_io_end(req); io_req_io_end(req);
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) { if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
...@@ -300,9 +310,11 @@ static void io_complete_rw(struct kiocb *kiocb, long res) ...@@ -300,9 +310,11 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb); struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
struct io_kiocb *req = cmd_to_io_kiocb(rw); struct io_kiocb *req = cmd_to_io_kiocb(rw);
if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
if (__io_complete_rw_common(req, res)) if (__io_complete_rw_common(req, res))
return; return;
io_req_set_res(req, io_fixup_rw_res(req, res), 0); io_req_set_res(req, io_fixup_rw_res(req, res), 0);
}
req->io_task_work.func = io_req_rw_complete; req->io_task_work.func = io_req_rw_complete;
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE); __io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
} }
...@@ -916,6 +928,15 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) ...@@ -916,6 +928,15 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
} }
kiocb->ki_flags |= IOCB_WRITE; kiocb->ki_flags |= IOCB_WRITE;
/*
* For non-polled IO, set IOCB_DIO_CALLER_COMP, stating that our handler
* groks deferring the completion to task context. This isn't
* necessary and useful for polled IO as that can always complete
* directly.
*/
if (!(kiocb->ki_flags & IOCB_HIPRI))
kiocb->ki_flags |= IOCB_DIO_CALLER_COMP;
if (likely(req->file->f_op->write_iter)) if (likely(req->file->f_op->write_iter))
ret2 = call_write_iter(req->file, kiocb, &s->iter); ret2 = call_write_iter(req->file, kiocb, &s->iter);
else if (req->file->f_op->write) else if (req->file->f_op->write)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment