Commit 7d67af2c authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe

io_uring: add splice(2) support

Add support for splice(2).

- output file is specified as sqe->fd, so it's handled by generic code
- hash_reg_file handled by generic code as well
- len is 32bit, but should be fine
- the fd_in is registered file, when SPLICE_F_FD_IN_FIXED is set, which
is a splice flag (i.e. sqe->splice_flags).
Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 8da11c19
...@@ -76,6 +76,7 @@ ...@@ -76,6 +76,7 @@
#include <linux/fadvise.h> #include <linux/fadvise.h>
#include <linux/eventpoll.h> #include <linux/eventpoll.h>
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include <linux/splice.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h> #include <trace/events/io_uring.h>
...@@ -428,6 +429,15 @@ struct io_epoll { ...@@ -428,6 +429,15 @@ struct io_epoll {
struct epoll_event event; struct epoll_event event;
}; };
struct io_splice {
struct file *file_out;
struct file *file_in;
loff_t off_out;
loff_t off_in;
u64 len;
unsigned int flags;
};
struct io_async_connect { struct io_async_connect {
struct sockaddr_storage address; struct sockaddr_storage address;
}; };
...@@ -544,6 +554,7 @@ struct io_kiocb { ...@@ -544,6 +554,7 @@ struct io_kiocb {
struct io_fadvise fadvise; struct io_fadvise fadvise;
struct io_madvise madvise; struct io_madvise madvise;
struct io_epoll epoll; struct io_epoll epoll;
struct io_splice splice;
}; };
struct io_async_ctx *io; struct io_async_ctx *io;
...@@ -744,6 +755,11 @@ static const struct io_op_def io_op_defs[] = { ...@@ -744,6 +755,11 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1, .unbound_nonreg_file = 1,
.file_table = 1, .file_table = 1,
}, },
[IORING_OP_SPLICE] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
}
}; };
static void io_wq_submit_work(struct io_wq_work **workptr); static void io_wq_submit_work(struct io_wq_work **workptr);
...@@ -758,6 +774,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, ...@@ -758,6 +774,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
static int io_grab_files(struct io_kiocb *req); static int io_grab_files(struct io_kiocb *req);
static void io_ring_file_ref_flush(struct fixed_file_data *data); static void io_ring_file_ref_flush(struct fixed_file_data *data);
static void io_cleanup_req(struct io_kiocb *req); static void io_cleanup_req(struct io_kiocb *req);
static int io_file_get(struct io_submit_state *state,
struct io_kiocb *req,
int fd, struct file **out_file,
bool fixed);
static struct kmem_cache *req_cachep; static struct kmem_cache *req_cachep;
...@@ -2404,6 +2424,77 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2404,6 +2424,77 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
return ret; return ret;
} }
static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_splice* sp = &req->splice;
unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
int ret;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
sp->file_in = NULL;
sp->off_in = READ_ONCE(sqe->splice_off_in);
sp->off_out = READ_ONCE(sqe->off);
sp->len = READ_ONCE(sqe->len);
sp->flags = READ_ONCE(sqe->splice_flags);
if (unlikely(sp->flags & ~valid_flags))
return -EINVAL;
ret = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in), &sp->file_in,
(sp->flags & SPLICE_F_FD_IN_FIXED));
if (ret)
return ret;
req->flags |= REQ_F_NEED_CLEANUP;
if (!S_ISREG(file_inode(sp->file_in)->i_mode))
req->work.flags |= IO_WQ_WORK_UNBOUND;
return 0;
}
static bool io_splice_punt(struct file *file)
{
if (get_pipe_info(file))
return false;
if (!io_file_supports_async(file))
return true;
return !(file->f_mode & O_NONBLOCK);
}
static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct io_splice *sp = &req->splice;
struct file *in = sp->file_in;
struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
loff_t *poff_in, *poff_out;
long ret;
if (force_nonblock) {
if (io_splice_punt(in) || io_splice_punt(out))
return -EAGAIN;
flags |= SPLICE_F_NONBLOCK;
}
poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
if (force_nonblock && ret == -EAGAIN)
return -EAGAIN;
io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
req->flags &= ~REQ_F_NEED_CLEANUP;
io_cqring_add_event(req, ret);
if (ret != sp->len)
req_set_fail_links(req);
io_put_req_find_next(req, nxt);
return 0;
}
/* /*
* IORING_OP_NOP just posts a completion event, nothing else. * IORING_OP_NOP just posts a completion event, nothing else.
*/ */
...@@ -4230,6 +4321,9 @@ static int io_req_defer_prep(struct io_kiocb *req, ...@@ -4230,6 +4321,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_EPOLL_CTL: case IORING_OP_EPOLL_CTL:
ret = io_epoll_ctl_prep(req, sqe); ret = io_epoll_ctl_prep(req, sqe);
break; break;
case IORING_OP_SPLICE:
ret = io_splice_prep(req, sqe);
break;
default: default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode); req->opcode);
...@@ -4292,6 +4386,10 @@ static void io_cleanup_req(struct io_kiocb *req) ...@@ -4292,6 +4386,10 @@ static void io_cleanup_req(struct io_kiocb *req)
case IORING_OP_STATX: case IORING_OP_STATX:
putname(req->open.filename); putname(req->open.filename);
break; break;
case IORING_OP_SPLICE:
io_put_file(req, req->splice.file_in,
(req->splice.flags & SPLICE_F_FD_IN_FIXED));
break;
} }
req->flags &= ~REQ_F_NEED_CLEANUP; req->flags &= ~REQ_F_NEED_CLEANUP;
...@@ -4495,6 +4593,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -4495,6 +4593,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} }
ret = io_epoll_ctl(req, nxt, force_nonblock); ret = io_epoll_ctl(req, nxt, force_nonblock);
break; break;
case IORING_OP_SPLICE:
if (sqe) {
ret = io_splice_prep(req, sqe);
if (ret < 0)
break;
}
ret = io_splice(req, nxt, force_nonblock);
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
break; break;
...@@ -7230,6 +7336,7 @@ static int __init io_uring_init(void) ...@@ -7230,6 +7336,7 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(8, __u64, off); BUILD_BUG_SQE_ELEM(8, __u64, off);
BUILD_BUG_SQE_ELEM(8, __u64, addr2); BUILD_BUG_SQE_ELEM(8, __u64, addr2);
BUILD_BUG_SQE_ELEM(16, __u64, addr); BUILD_BUG_SQE_ELEM(16, __u64, addr);
BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in);
BUILD_BUG_SQE_ELEM(24, __u32, len); BUILD_BUG_SQE_ELEM(24, __u32, len);
BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
...@@ -7244,9 +7351,11 @@ static int __init io_uring_init(void) ...@@ -7244,9 +7351,11 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(28, __u32, open_flags); BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
BUILD_BUG_SQE_ELEM(28, __u32, splice_flags);
BUILD_BUG_SQE_ELEM(32, __u64, user_data); BUILD_BUG_SQE_ELEM(32, __u64, user_data);
BUILD_BUG_SQE_ELEM(40, __u16, buf_index); BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
BUILD_BUG_SQE_ELEM(42, __u16, personality); BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC); req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
......
...@@ -23,7 +23,10 @@ struct io_uring_sqe { ...@@ -23,7 +23,10 @@ struct io_uring_sqe {
__u64 off; /* offset into file */ __u64 off; /* offset into file */
__u64 addr2; __u64 addr2;
}; };
__u64 addr; /* pointer to buffer or iovecs */ union {
__u64 addr; /* pointer to buffer or iovecs */
__u64 splice_off_in;
};
__u32 len; /* buffer size or number of iovecs */ __u32 len; /* buffer size or number of iovecs */
union { union {
__kernel_rwf_t rw_flags; __kernel_rwf_t rw_flags;
...@@ -37,6 +40,7 @@ struct io_uring_sqe { ...@@ -37,6 +40,7 @@ struct io_uring_sqe {
__u32 open_flags; __u32 open_flags;
__u32 statx_flags; __u32 statx_flags;
__u32 fadvise_advice; __u32 fadvise_advice;
__u32 splice_flags;
}; };
__u64 user_data; /* data to be passed back at completion time */ __u64 user_data; /* data to be passed back at completion time */
union { union {
...@@ -45,6 +49,7 @@ struct io_uring_sqe { ...@@ -45,6 +49,7 @@ struct io_uring_sqe {
__u16 buf_index; __u16 buf_index;
/* personality to use, if used */ /* personality to use, if used */
__u16 personality; __u16 personality;
__s32 splice_fd_in;
}; };
__u64 __pad2[3]; __u64 __pad2[3];
}; };
...@@ -113,6 +118,7 @@ enum { ...@@ -113,6 +118,7 @@ enum {
IORING_OP_RECV, IORING_OP_RECV,
IORING_OP_OPENAT2, IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL, IORING_OP_EPOLL_CTL,
IORING_OP_SPLICE,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
...@@ -128,6 +134,12 @@ enum { ...@@ -128,6 +134,12 @@ enum {
*/ */
#define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_ABS (1U << 0)
/*
* sqe->splice_flags
* extends splice(2) flags
*/
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
/* /*
* IO completion data structure (Completion Queue Entry) * IO completion data structure (Completion Queue Entry)
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment