Commit ff6165b2 authored by Jens Axboe's avatar Jens Axboe

io_uring: retain iov_iter state over io_read/io_write calls

Instead of maintaining (and setting/remembering) iov_iter size and
segment counts, just put the iov_iter in the async part of the IO
structure.

This is mostly a preparation patch for doing appropriate internal retries
for short reads, but it also cleans up the state handling nicely and
simplifies it quite a bit.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ebf0d100
...@@ -508,9 +508,8 @@ struct io_async_msghdr { ...@@ -508,9 +508,8 @@ struct io_async_msghdr {
struct io_async_rw { struct io_async_rw {
struct iovec fast_iov[UIO_FASTIOV]; struct iovec fast_iov[UIO_FASTIOV];
struct iovec *iov; const struct iovec *free_iovec;
ssize_t nr_segs; struct iov_iter iter;
ssize_t size;
struct wait_page_queue wpq; struct wait_page_queue wpq;
}; };
...@@ -915,8 +914,8 @@ static void io_file_put_work(struct work_struct *work); ...@@ -915,8 +914,8 @@ static void io_file_put_work(struct work_struct *work);
static ssize_t io_import_iovec(int rw, struct io_kiocb *req, static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
struct iovec **iovec, struct iov_iter *iter, struct iovec **iovec, struct iov_iter *iter,
bool needs_lock); bool needs_lock);
static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
struct iovec *iovec, struct iovec *fast_iov, const struct iovec *fast_iov,
struct iov_iter *iter); struct iov_iter *iter);
static struct kmem_cache *req_cachep; static struct kmem_cache *req_cachep;
...@@ -2299,7 +2298,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) ...@@ -2299,7 +2298,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
ret = io_import_iovec(rw, req, &iovec, &iter, false); ret = io_import_iovec(rw, req, &iovec, &iter, false);
if (ret < 0) if (ret < 0)
goto end_req; goto end_req;
ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter); ret = io_setup_async_rw(req, iovec, inline_vecs, &iter);
if (!ret) if (!ret)
return true; return true;
kfree(iovec); kfree(iovec);
...@@ -2820,6 +2819,13 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, ...@@ -2820,6 +2819,13 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
ssize_t ret; ssize_t ret;
u8 opcode; u8 opcode;
if (req->io) {
struct io_async_rw *iorw = &req->io->rw;
*iovec = NULL;
return iov_iter_count(&iorw->iter);
}
opcode = req->opcode; opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL; *iovec = NULL;
...@@ -2845,14 +2851,6 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, ...@@ -2845,14 +2851,6 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
return ret < 0 ? ret : sqe_len; return ret < 0 ? ret : sqe_len;
} }
if (req->io) {
struct io_async_rw *iorw = &req->io->rw;
iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size);
*iovec = NULL;
return iorw->size;
}
if (req->flags & REQ_F_BUFFER_SELECT) { if (req->flags & REQ_F_BUFFER_SELECT) {
ret = io_iov_buffer_select(req, *iovec, needs_lock); ret = io_iov_buffer_select(req, *iovec, needs_lock);
if (!ret) { if (!ret) {
...@@ -2930,21 +2928,29 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb, ...@@ -2930,21 +2928,29 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
return ret; return ret;
} }
static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size, static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
struct iovec *iovec, struct iovec *fast_iov, const struct iovec *fast_iov, struct iov_iter *iter)
struct iov_iter *iter)
{ {
struct io_async_rw *rw = &req->io->rw; struct io_async_rw *rw = &req->io->rw;
rw->nr_segs = iter->nr_segs; memcpy(&rw->iter, iter, sizeof(*iter));
rw->size = io_size; rw->free_iovec = NULL;
/* can only be fixed buffers, no need to do anything */
if (iter->type == ITER_BVEC)
return;
if (!iovec) { if (!iovec) {
rw->iov = rw->fast_iov; unsigned iov_off = 0;
if (rw->iov != fast_iov)
memcpy(rw->iov, fast_iov, rw->iter.iov = rw->fast_iov;
if (iter->iov != fast_iov) {
iov_off = iter->iov - fast_iov;
rw->iter.iov += iov_off;
}
if (rw->fast_iov != fast_iov)
memcpy(rw->fast_iov + iov_off, fast_iov + iov_off,
sizeof(struct iovec) * iter->nr_segs); sizeof(struct iovec) * iter->nr_segs);
} else { } else {
rw->iov = iovec; rw->free_iovec = iovec;
req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_NEED_CLEANUP;
} }
} }
...@@ -2963,8 +2969,8 @@ static int io_alloc_async_ctx(struct io_kiocb *req) ...@@ -2963,8 +2969,8 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
return __io_alloc_async_ctx(req); return __io_alloc_async_ctx(req);
} }
static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
struct iovec *iovec, struct iovec *fast_iov, const struct iovec *fast_iov,
struct iov_iter *iter) struct iov_iter *iter)
{ {
if (!io_op_defs[req->opcode].async_ctx) if (!io_op_defs[req->opcode].async_ctx)
...@@ -2973,7 +2979,7 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, ...@@ -2973,7 +2979,7 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
if (__io_alloc_async_ctx(req)) if (__io_alloc_async_ctx(req))
return -ENOMEM; return -ENOMEM;
io_req_map_rw(req, io_size, iovec, fast_iov, iter); io_req_map_rw(req, iovec, fast_iov, iter);
} }
return 0; return 0;
} }
...@@ -2981,18 +2987,19 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, ...@@ -2981,18 +2987,19 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
static inline int io_rw_prep_async(struct io_kiocb *req, int rw, static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
bool force_nonblock) bool force_nonblock)
{ {
struct io_async_ctx *io = req->io; struct io_async_rw *iorw = &req->io->rw;
struct iov_iter iter;
ssize_t ret; ssize_t ret;
io->rw.iov = io->rw.fast_iov; iorw->iter.iov = iorw->fast_iov;
/* reset ->io around the iovec import, we don't want to use it */
req->io = NULL; req->io = NULL;
ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock); ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov,
req->io = io; &iorw->iter, !force_nonblock);
req->io = container_of(iorw, struct io_async_ctx, rw);
if (unlikely(ret < 0)) if (unlikely(ret < 0))
return ret; return ret;
io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter); io_req_map_rw(req, iorw->iter.iov, iorw->fast_iov, &iorw->iter);
return 0; return 0;
} }
...@@ -3090,7 +3097,8 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, ...@@ -3090,7 +3097,8 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
* succeed, or in rare cases where it fails, we then fall back to using the * succeed, or in rare cases where it fails, we then fall back to using the
* async worker threads for a blocking retry. * async worker threads for a blocking retry.
*/ */
static bool io_rw_should_retry(struct io_kiocb *req) static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
struct iovec *fast_iov, struct iov_iter *iter)
{ {
struct kiocb *kiocb = &req->rw.kiocb; struct kiocb *kiocb = &req->rw.kiocb;
int ret; int ret;
...@@ -3113,8 +3121,11 @@ static bool io_rw_should_retry(struct io_kiocb *req) ...@@ -3113,8 +3121,11 @@ static bool io_rw_should_retry(struct io_kiocb *req)
* If request type doesn't require req->io to defer in general, * If request type doesn't require req->io to defer in general,
* we need to allocate it here * we need to allocate it here
*/ */
if (!req->io && __io_alloc_async_ctx(req)) if (!req->io) {
return false; if (__io_alloc_async_ctx(req))
return false;
io_req_map_rw(req, iovec, fast_iov, iter);
}
ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
io_async_buf_func, req); io_async_buf_func, req);
...@@ -3141,12 +3152,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ...@@ -3141,12 +3152,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw.kiocb; struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter; struct iov_iter __iter, *iter = &__iter;
size_t iov_count; size_t iov_count;
ssize_t io_size, ret, ret2; ssize_t io_size, ret, ret2 = 0;
unsigned long nr_segs;
if (req->io)
iter = &req->io->rw.iter;
ret = io_import_iovec(READ, req, &iovec, &iter, !force_nonblock); ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
if (ret < 0) if (ret < 0)
return ret; return ret;
io_size = ret; io_size = ret;
...@@ -3160,30 +3173,26 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ...@@ -3160,30 +3173,26 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
if (force_nonblock && !io_file_supports_async(req->file, READ)) if (force_nonblock && !io_file_supports_async(req->file, READ))
goto copy_iov; goto copy_iov;
iov_count = iov_iter_count(&iter); iov_count = iov_iter_count(iter);
nr_segs = iter.nr_segs;
ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count); ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
if (unlikely(ret)) if (unlikely(ret))
goto out_free; goto out_free;
ret2 = io_iter_do_read(req, &iter); ret2 = io_iter_do_read(req, iter);
/* Catch -EAGAIN return for forced non-blocking submission */ /* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) { if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
kiocb_done(kiocb, ret2, cs); kiocb_done(kiocb, ret2, cs);
} else { } else {
iter.count = iov_count;
iter.nr_segs = nr_segs;
copy_iov: copy_iov:
ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
&iter);
if (ret) if (ret)
goto out_free; goto out_free;
/* it's copied and will be cleaned with ->io */ /* it's copied and will be cleaned with ->io */
iovec = NULL; iovec = NULL;
/* if we can retry, do so with the callbacks armed */ /* if we can retry, do so with the callbacks armed */
if (io_rw_should_retry(req)) { if (io_rw_should_retry(req, iovec, inline_vecs, iter)) {
ret2 = io_iter_do_read(req, &iter); ret2 = io_iter_do_read(req, iter);
if (ret2 == -EIOCBQUEUED) { if (ret2 == -EIOCBQUEUED) {
goto out_free; goto out_free;
} else if (ret2 != -EAGAIN) { } else if (ret2 != -EAGAIN) {
...@@ -3223,12 +3232,14 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, ...@@ -3223,12 +3232,14 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw.kiocb; struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter; struct iov_iter __iter, *iter = &__iter;
size_t iov_count; size_t iov_count;
ssize_t ret, ret2, io_size; ssize_t ret, ret2, io_size;
unsigned long nr_segs;
ret = io_import_iovec(WRITE, req, &iovec, &iter, !force_nonblock); if (req->io)
iter = &req->io->rw.iter;
ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
if (ret < 0) if (ret < 0)
return ret; return ret;
io_size = ret; io_size = ret;
...@@ -3247,8 +3258,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, ...@@ -3247,8 +3258,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
(req->flags & REQ_F_ISREG)) (req->flags & REQ_F_ISREG))
goto copy_iov; goto copy_iov;
iov_count = iov_iter_count(&iter); iov_count = iov_iter_count(iter);
nr_segs = iter.nr_segs;
ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count); ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
if (unlikely(ret)) if (unlikely(ret))
goto out_free; goto out_free;
...@@ -3269,9 +3279,9 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, ...@@ -3269,9 +3279,9 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
kiocb->ki_flags |= IOCB_WRITE; kiocb->ki_flags |= IOCB_WRITE;
if (req->file->f_op->write_iter) if (req->file->f_op->write_iter)
ret2 = call_write_iter(req->file, kiocb, &iter); ret2 = call_write_iter(req->file, kiocb, iter);
else if (req->file->f_op->write) else if (req->file->f_op->write)
ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter); ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter);
else else
ret2 = -EINVAL; ret2 = -EINVAL;
...@@ -3284,16 +3294,10 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, ...@@ -3284,16 +3294,10 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
if (!force_nonblock || ret2 != -EAGAIN) { if (!force_nonblock || ret2 != -EAGAIN) {
kiocb_done(kiocb, ret2, cs); kiocb_done(kiocb, ret2, cs);
} else { } else {
iter.count = iov_count;
iter.nr_segs = nr_segs;
copy_iov: copy_iov:
ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
&iter); if (!ret)
if (ret) return -EAGAIN;
goto out_free;
/* it's copied and will be cleaned with ->io */
iovec = NULL;
return -EAGAIN;
} }
out_free: out_free:
if (iovec) if (iovec)
...@@ -5583,8 +5587,8 @@ static void __io_clean_op(struct io_kiocb *req) ...@@ -5583,8 +5587,8 @@ static void __io_clean_op(struct io_kiocb *req)
case IORING_OP_WRITEV: case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE_FIXED:
case IORING_OP_WRITE: case IORING_OP_WRITE:
if (io->rw.iov != io->rw.fast_iov) if (io->rw.free_iovec)
kfree(io->rw.iov); kfree(io->rw.free_iovec);
break; break;
case IORING_OP_RECVMSG: case IORING_OP_RECVMSG:
case IORING_OP_SENDMSG: case IORING_OP_SENDMSG:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment