Commit 9836e93c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.19/io_uring-passthrough-2022-05-22' of git://git.kernel.dk/linux-block

Pull io_uring NVMe command passthrough from Jens Axboe:
 "On top of everything else, this adds support for passthrough for
  io_uring.

  The initial feature for this is NVMe passthrough support, which allows
  non-filesystem based IO commands and admin commands.

  To support this, io_uring grows support for SQE and CQE members that
  are twice as big, allowing to pass in a full NVMe command without
  having to copy data around. And to complete with more than just a
  single 32-bit value as the output"

* tag 'for-5.19/io_uring-passthrough-2022-05-22' of git://git.kernel.dk/linux-block: (22 commits)
  io_uring: cleanup handling of the two task_work lists
  nvme: enable uring-passthrough for admin commands
  nvme: helper for uring-passthrough checks
  blk-mq: fix passthrough plugging
  nvme: add vectored-io support for uring-cmd
  nvme: wire-up uring-cmd support for io-passthru on char-device.
  nvme: refactor nvme_submit_user_cmd()
  block: wire-up support for passthrough plugging
  fs,io_uring: add infrastructure for uring-cmd
  io_uring: support CQE32 for nop operation
  io_uring: enable CQE32
  io_uring: support CQE32 in /proc info
  io_uring: add tracing for additional CQE32 fields
  io_uring: overflow processing for CQE32
  io_uring: flush completions for CQE32
  io_uring: modify io_get_cqe for CQE32
  io_uring: add CQE32 completion processing
  io_uring: add CQE32 setup processing
  io_uring: change ring size calculation for CQE32
  io_uring: store add. return values for CQE32
  ...
parents e1a8fde7 3fe07bcd
......@@ -1169,6 +1169,62 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
complete(waiting);
}
/*
* Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
* queues. This is important for md arrays to benefit from merging
* requests.
*/
static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
{
if (plug->multiple_queues)
return BLK_MAX_REQUEST_COUNT * 2;
return BLK_MAX_REQUEST_COUNT;
}
static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
{
struct request *last = rq_list_peek(&plug->mq_list);
if (!plug->rq_count) {
trace_block_plug(rq->q);
} else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
(!blk_queue_nomerges(rq->q) &&
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_mq_flush_plug_list(plug, false);
trace_block_plug(rq->q);
}
if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true;
if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
plug->has_elevator = true;
rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq);
plug->rq_count++;
}
static void __blk_execute_rq_nowait(struct request *rq, bool at_head,
rq_end_io_fn *done, bool use_plug)
{
WARN_ON(irqs_disabled());
WARN_ON(!blk_rq_is_passthrough(rq));
rq->end_io = done;
blk_account_io_start(rq);
if (use_plug && current->plug) {
blk_add_rq_to_plug(current->plug, rq);
return;
}
/*
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
*/
blk_mq_sched_insert_request(rq, at_head, true, false);
}
/**
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
* @rq: request to insert
......@@ -1184,18 +1240,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
*/
void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done)
{
WARN_ON(irqs_disabled());
WARN_ON(!blk_rq_is_passthrough(rq));
__blk_execute_rq_nowait(rq, at_head, done, true);
rq->end_io = done;
blk_account_io_start(rq);
/*
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
*/
blk_mq_sched_insert_request(rq, at_head, true, false);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
......@@ -1233,8 +1279,13 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
DECLARE_COMPLETION_ONSTACK(wait);
unsigned long hang_check;
/*
* iopoll requires request to be submitted to driver, so can't
* use plug
*/
rq->end_io_data = &wait;
blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq);
__blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq,
!blk_rq_is_poll(rq));
/* Prevent hang_check timer from firing at us during very long I/O */
hang_check = sysctl_hung_task_timeout_secs;
......@@ -2676,40 +2727,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
hctx->queue->mq_ops->commit_rqs(hctx);
}
/*
* Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
* queues. This is important for md arrays to benefit from merging
* requests.
*/
static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
{
if (plug->multiple_queues)
return BLK_MAX_REQUEST_COUNT * 2;
return BLK_MAX_REQUEST_COUNT;
}
static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
{
struct request *last = rq_list_peek(&plug->mq_list);
if (!plug->rq_count) {
trace_block_plug(rq->q);
} else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
(!blk_queue_nomerges(rq->q) &&
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_mq_flush_plug_list(plug, false);
trace_block_plug(rq->q);
}
if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true;
if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
plug->has_elevator = true;
rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq);
plug->rq_count++;
}
static bool blk_mq_attempt_bio_merge(struct request_queue *q,
struct bio *bio, unsigned int nr_segs)
{
......
......@@ -3146,6 +3146,7 @@ static const struct file_operations nvme_dev_fops = {
.release = nvme_dev_release,
.unlocked_ioctl = nvme_dev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.uring_cmd = nvme_dev_uring_cmd,
};
static ssize_t nvme_sysfs_reset(struct device *dev,
......@@ -3699,6 +3700,7 @@ static const struct file_operations nvme_ns_chr_fops = {
.release = nvme_ns_chr_release,
.unlocked_ioctl = nvme_ns_chr_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.uring_cmd = nvme_ns_chr_uring_cmd,
};
static int nvme_add_ns_cdev(struct nvme_ns *ns)
......
......@@ -5,6 +5,7 @@
*/
#include <linux/ptrace.h> /* for force_successful_syscall_return */
#include <linux/nvme_ioctl.h>
#include <linux/io_uring.h>
#include "nvme.h"
/*
......@@ -53,10 +54,21 @@ static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
return ERR_PTR(ret);
}
static int nvme_submit_user_cmd(struct request_queue *q,
static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
void *meta, unsigned len, int ret)
{
if (!ret && req_op(req) == REQ_OP_DRV_IN &&
copy_to_user(ubuf, meta, len))
ret = -EFAULT;
kfree(meta);
return ret;
}
static struct request *nvme_alloc_user_request(struct request_queue *q,
struct nvme_command *cmd, void __user *ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
u32 meta_seed, u64 *result, unsigned timeout, bool vec)
u32 meta_seed, void **metap, unsigned timeout, bool vec,
unsigned int rq_flags, blk_mq_req_flags_t blk_flags)
{
bool write = nvme_is_write(cmd);
struct nvme_ns *ns = q->queuedata;
......@@ -66,9 +78,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
void *meta = NULL;
int ret;
req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0);
req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags);
if (IS_ERR(req))
return PTR_ERR(req);
return req;
nvme_init_request(req, cmd);
if (timeout)
......@@ -105,26 +117,50 @@ static int nvme_submit_user_cmd(struct request_queue *q,
goto out_unmap;
}
req->cmd_flags |= REQ_INTEGRITY;
*metap = meta;
}
}
return req;
out_unmap:
if (bio)
blk_rq_unmap_user(bio);
out:
blk_mq_free_request(req);
return ERR_PTR(ret);
}
static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_command *cmd, void __user *ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
u32 meta_seed, u64 *result, unsigned timeout, bool vec)
{
struct request *req;
void *meta = NULL;
struct bio *bio;
int ret;
req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer,
meta_len, meta_seed, &meta, timeout, vec, 0, 0);
if (IS_ERR(req))
return PTR_ERR(req);
bio = req->bio;
ret = nvme_execute_passthru_rq(req);
if (result)
*result = le64_to_cpu(nvme_req(req)->result.u64);
if (meta && !ret && !write) {
if (copy_to_user(meta_buffer, meta, meta_len))
ret = -EFAULT;
}
kfree(meta);
out_unmap:
if (meta)
ret = nvme_finish_user_metadata(req, meta_buffer, meta,
meta_len, ret);
if (bio)
blk_rq_unmap_user(bio);
out:
blk_mq_free_request(req);
return ret;
}
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_user_io io;
......@@ -296,6 +332,139 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return status;
}
struct nvme_uring_data {
__u64 metadata;
__u64 addr;
__u32 data_len;
__u32 metadata_len;
__u32 timeout_ms;
};
/*
* This overlays struct io_uring_cmd pdu.
* Expect build errors if this grows larger than that.
*/
struct nvme_uring_cmd_pdu {
union {
struct bio *bio;
struct request *req;
};
void *meta; /* kernel-resident buffer */
void __user *meta_buffer;
u32 meta_len;
};
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
struct io_uring_cmd *ioucmd)
{
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
}
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
{
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
struct request *req = pdu->req;
struct bio *bio = req->bio;
int status;
u64 result;
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
status = -EINTR;
else
status = nvme_req(req)->status;
result = le64_to_cpu(nvme_req(req)->result.u64);
if (pdu->meta)
status = nvme_finish_user_metadata(req, pdu->meta_buffer,
pdu->meta, pdu->meta_len, status);
if (bio)
blk_rq_unmap_user(bio);
blk_mq_free_request(req);
io_uring_cmd_done(ioucmd, status, result);
}
static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err)
{
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
/* extract bio before reusing the same field for request */
struct bio *bio = pdu->bio;
pdu->req = req;
req->bio = bio;
/* this takes care of moving rest of completion-work to task context */
io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
}
static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
{
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
const struct nvme_uring_cmd *cmd = ioucmd->cmd;
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
struct request *req;
unsigned int rq_flags = 0;
blk_mq_req_flags_t blk_flags = 0;
void *meta = NULL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
c.common.opcode = READ_ONCE(cmd->opcode);
c.common.flags = READ_ONCE(cmd->flags);
if (c.common.flags)
return -EINVAL;
c.common.command_id = 0;
c.common.nsid = cpu_to_le32(cmd->nsid);
if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid)))
return -EINVAL;
c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2));
c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3));
c.common.metadata = 0;
c.common.dptr.prp1 = c.common.dptr.prp2 = 0;
c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10));
c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11));
c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12));
c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13));
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
d.metadata = READ_ONCE(cmd->metadata);
d.addr = READ_ONCE(cmd->addr);
d.data_len = READ_ONCE(cmd->data_len);
d.metadata_len = READ_ONCE(cmd->metadata_len);
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
if (issue_flags & IO_URING_F_NONBLOCK) {
rq_flags = REQ_NOWAIT;
blk_flags = BLK_MQ_REQ_NOWAIT;
}
req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr),
d.data_len, nvme_to_user_ptr(d.metadata),
d.metadata_len, 0, &meta, d.timeout_ms ?
msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags,
blk_flags);
if (IS_ERR(req))
return PTR_ERR(req);
req->end_io_data = ioucmd;
/* to free bio on completion, as req->bio will be null at that time */
pdu->bio = req->bio;
pdu->meta = meta;
pdu->meta_buffer = nvme_to_user_ptr(d.metadata);
pdu->meta_len = d.metadata_len;
blk_execute_rq_nowait(req, 0, nvme_uring_cmd_end_io);
return -EIOCBQUEUED;
}
static bool is_ctrl_ioctl(unsigned int cmd)
{
if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
......@@ -387,6 +556,53 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return __nvme_ioctl(ns, cmd, (void __user *)arg);
}
static int nvme_uring_cmd_checks(unsigned int issue_flags)
{
/* IOPOLL not supported yet */
if (issue_flags & IO_URING_F_IOPOLL)
return -EOPNOTSUPP;
/* NVMe passthrough requires big SQE/CQE support */
if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) !=
(IO_URING_F_SQE128|IO_URING_F_CQE32))
return -EOPNOTSUPP;
return 0;
}
static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
unsigned int issue_flags)
{
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
ret = nvme_uring_cmd_checks(issue_flags);
if (ret)
return ret;
switch (ioucmd->cmd_op) {
case NVME_URING_CMD_IO:
ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false);
break;
case NVME_URING_CMD_IO_VEC:
ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true);
break;
default:
ret = -ENOTTY;
}
return ret;
}
int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
{
struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev,
struct nvme_ns, cdev);
return nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp, struct nvme_ns_head *head, int srcu_idx)
......@@ -453,8 +669,46 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
}
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags)
{
struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
int srcu_idx = srcu_read_lock(&head->srcu);
struct nvme_ns *ns = nvme_find_path(head);
int ret = -EINVAL;
if (ns)
ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
}
#endif /* CONFIG_NVME_MULTIPATH */
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
{
struct nvme_ctrl *ctrl = ioucmd->file->private_data;
int ret;
ret = nvme_uring_cmd_checks(issue_flags);
if (ret)
return ret;
switch (ioucmd->cmd_op) {
case NVME_URING_CMD_ADMIN:
ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false);
break;
case NVME_URING_CMD_ADMIN_VEC:
ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true);
break;
default:
ret = -ENOTTY;
}
return ret;
}
static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
{
struct nvme_ns *ns;
......
......@@ -437,6 +437,7 @@ static const struct file_operations nvme_ns_head_chr_fops = {
.release = nvme_ns_head_chr_release,
.unlocked_ioctl = nvme_ns_head_chr_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.uring_cmd = nvme_ns_head_chr_uring_cmd,
};
static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
......
......@@ -782,7 +782,12 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
long nvme_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct pr_ops nvme_pr_ops;
......
This diff is collapsed.
......@@ -1953,6 +1953,7 @@ struct dir_context {
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
struct iov_iter;
struct io_uring_cmd;
struct file_operations {
struct module *owner;
......@@ -1995,6 +1996,7 @@ struct file_operations {
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
} __randomize_layout;
struct inode_operations {
......
......@@ -5,7 +5,32 @@
#include <linux/sched.h>
#include <linux/xarray.h>
enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1,
IO_URING_F_UNLOCKED = 2,
/* int's last bit, sign checks are usually faster than a bit test */
IO_URING_F_NONBLOCK = INT_MIN,
/* ctx state flags, for URING_CMD */
IO_URING_F_SQE128 = 4,
IO_URING_F_CQE32 = 8,
IO_URING_F_IOPOLL = 16,
};
struct io_uring_cmd {
struct file *file;
const void *cmd;
/* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd);
u32 cmd_op;
u32 pad;
u8 pdu[32]; /* available inline for free use */
};
#if defined(CONFIG_IO_URING)
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *));
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
......@@ -30,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
__io_uring_free(tsk);
}
#else
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
ssize_t ret2)
{
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *))
{
}
static inline struct sock *io_uring_get_socket(struct file *file)
{
return NULL;
......
......@@ -321,13 +321,16 @@ TRACE_EVENT(io_uring_fail_link,
* @user_data: user data associated with the request
* @res: result of the request
* @cflags: completion flags
* @extra1: extra 64-bit data for CQE32
* @extra2: extra 64-bit data for CQE32
*
*/
TRACE_EVENT(io_uring_complete,
TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags),
TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags,
u64 extra1, u64 extra2),
TP_ARGS(ctx, req, user_data, res, cflags),
TP_ARGS(ctx, req, user_data, res, cflags, extra1, extra2),
TP_STRUCT__entry (
__field( void *, ctx )
......@@ -335,6 +338,8 @@ TRACE_EVENT(io_uring_complete,
__field( u64, user_data )
__field( int, res )
__field( unsigned, cflags )
__field( u64, extra1 )
__field( u64, extra2 )
),
TP_fast_assign(
......@@ -343,12 +348,17 @@ TRACE_EVENT(io_uring_complete,
__entry->user_data = user_data;
__entry->res = res;
__entry->cflags = cflags;
__entry->extra1 = extra1;
__entry->extra2 = extra2;
),
TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x "
"extra1 %llu extra2 %llu ",
__entry->ctx, __entry->req,
__entry->user_data,
__entry->res, __entry->cflags)
__entry->res, __entry->cflags,
(unsigned long long) __entry->extra1,
(unsigned long long) __entry->extra2)
);
/**
......
......@@ -22,6 +22,7 @@ struct io_uring_sqe {
union {
__u64 off; /* offset into file */
__u64 addr2;
__u32 cmd_op;
};
union {
__u64 addr; /* pointer to buffer or iovecs */
......@@ -61,8 +62,17 @@ struct io_uring_sqe {
__s32 splice_fd_in;
__u32 file_index;
};
__u64 addr3;
__u64 __pad2[1];
union {
struct {
__u64 addr3;
__u64 __pad2[1];
};
/*
* If the ring is initialized with IORING_SETUP_SQE128, then
* this field is used for 80 bytes of arbitrary command data
*/
__u8 cmd[0];
};
};
/*
......@@ -128,6 +138,9 @@ enum {
*/
#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
......@@ -175,6 +188,7 @@ enum io_uring_op {
IORING_OP_FGETXATTR,
IORING_OP_GETXATTR,
IORING_OP_SOCKET,
IORING_OP_URING_CMD,
/* this goes last, obviously */
IORING_OP_LAST,
......@@ -251,6 +265,12 @@ struct io_uring_cqe {
__u64 user_data; /* sqe->data submission passed back */
__s32 res; /* result code for this event */
__u32 flags;
/*
* If the ring is initialized with IORING_SETUP_CQE32, then this field
* contains 16-bytes of padding, doubling the size of the CQE.
*/
__u64 big_cqe[];
};
/*
......
......@@ -70,6 +70,28 @@ struct nvme_passthru_cmd64 {
__u64 result;
};
/* same as struct nvme_passthru_cmd64, minus the 8b result field */
struct nvme_uring_cmd {
__u8 opcode;
__u8 flags;
__u16 rsvd1;
__u32 nsid;
__u32 cdw2;
__u32 cdw3;
__u64 metadata;
__u64 addr;
__u32 metadata_len;
__u32 data_len;
__u32 cdw10;
__u32 cdw11;
__u32 cdw12;
__u32 cdw13;
__u32 cdw14;
__u32 cdw15;
__u32 timeout_ms;
__u32 rsvd2;
};
#define nvme_admin_cmd nvme_passthru_cmd
#define NVME_IOCTL_ID _IO('N', 0x40)
......@@ -83,4 +105,10 @@ struct nvme_passthru_cmd64 {
#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64)
#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64)
/* io_uring async commands: */
#define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd)
#define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd)
#define NVME_URING_CMD_ADMIN _IOWR('N', 0x82, struct nvme_uring_cmd)
#define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd)
#endif /* _UAPI_LINUX_NVME_IOCTL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment