Commit eca53cb6 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.19' of git://git.infradead.org/nvme into for-4.19/block

Pull NVMe updates from Christoph:

"Highlights:

 - massively improved tracepoints (Keith Busch)
 - support for larger inline data in the RDMA host and target
   (Steve Wise)
 - RDMA setup/teardown path fixes and refactor (Sagi Grimberg)
 - Command Supported and Effects log support for the NVMe target
   (Chaitanya Kulkarni)
 - buffered I/O support for the NVMe target (Chaitanya Kulkarni)

 plus the usual set of cleanups and small enhancements."

* 'nvme-4.19' of git://git.infradead.org/nvme:
  nvmet: don't use uuid_le type
  nvmet: check fileio lba range access boundaries
  nvmet: fix file discard return status
  nvme-rdma: centralize admin/io queue teardown sequence
  nvme-rdma: centralize controller setup sequence
  nvme-rdma: unquiesce queues when deleting the controller
  nvme-rdma: mark expected switch fall-through
  nvme: add disk name to trace events
  nvme: add controller name to trace events
  nvme: use hw qid in trace events
  nvme: cache struct nvme_ctrl reference to struct nvme_request
  nvmet-rdma: add an error flow for post_recv failures
  nvmet-rdma: add unlikely check in the fast path
  nvmet-rdma: support max(16KB, PAGE_SIZE) inline data
  nvme-rdma: support up to 4 segments of inline data
  nvmet: add buffered I/O support for file backed ns
  nvmet: add commands supported and effects log page
  nvme: move init of keep_alive work item to controller initialization
  nvme.h: resync with nvme-cli
parents 42c9cdfe 1b0d2745
...@@ -652,10 +652,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, ...@@ -652,10 +652,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
} }
cmd->common.command_id = req->tag; cmd->common.command_id = req->tag;
if (ns) trace_nvme_setup_cmd(req, cmd);
trace_nvme_setup_nvm_cmd(req->q->id, cmd);
else
trace_nvme_setup_admin_cmd(cmd);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(nvme_setup_cmd); EXPORT_SYMBOL_GPL(nvme_setup_cmd);
...@@ -848,9 +845,6 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) ...@@ -848,9 +845,6 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
if (unlikely(ctrl->kato == 0)) if (unlikely(ctrl->kato == 0))
return; return;
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
} }
...@@ -3484,6 +3478,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -3484,6 +3478,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work); INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
if (ret < 0) if (ret < 0)
goto out; goto out;
......
...@@ -1737,6 +1737,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, ...@@ -1737,6 +1737,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
nvme_req(rq)->ctrl = &ctrl->ctrl;
return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
} }
......
...@@ -102,6 +102,7 @@ struct nvme_request { ...@@ -102,6 +102,7 @@ struct nvme_request {
u8 retries; u8 retries;
u8 flags; u8 flags;
u16 status; u16 status;
struct nvme_ctrl *ctrl;
}; };
/* /*
...@@ -119,6 +120,13 @@ static inline struct nvme_request *nvme_req(struct request *req) ...@@ -119,6 +120,13 @@ static inline struct nvme_request *nvme_req(struct request *req)
return blk_mq_rq_to_pdu(req); return blk_mq_rq_to_pdu(req);
} }
static inline u16 nvme_req_qid(struct request *req)
{
if (!req->rq_disk)
return 0;
return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
}
/* The below value is the specific amount of delay needed before checking /* The below value is the specific amount of delay needed before checking
* readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the
* NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
......
...@@ -418,6 +418,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, ...@@ -418,6 +418,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
BUG_ON(!nvmeq); BUG_ON(!nvmeq);
iod->nvmeq = nvmeq; iod->nvmeq = nvmeq;
nvme_req(req)->ctrl = &dev->ctrl;
return 0; return 0;
} }
......
...@@ -40,13 +40,14 @@ ...@@ -40,13 +40,14 @@
#define NVME_RDMA_MAX_SEGMENTS 256 #define NVME_RDMA_MAX_SEGMENTS 256
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1 #define NVME_RDMA_MAX_INLINE_SEGMENTS 4
struct nvme_rdma_device { struct nvme_rdma_device {
struct ib_device *dev; struct ib_device *dev;
struct ib_pd *pd; struct ib_pd *pd;
struct kref ref; struct kref ref;
struct list_head entry; struct list_head entry;
unsigned int num_inline_segments;
}; };
struct nvme_rdma_qe { struct nvme_rdma_qe {
...@@ -117,6 +118,7 @@ struct nvme_rdma_ctrl { ...@@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
struct sockaddr_storage src_addr; struct sockaddr_storage src_addr;
struct nvme_ctrl ctrl; struct nvme_ctrl ctrl;
bool use_inline_data;
}; };
static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl) static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
...@@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) ...@@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
/* +1 for drain */ /* +1 for drain */
init_attr.cap.max_recv_wr = queue->queue_size + 1; init_attr.cap.max_recv_wr = queue->queue_size + 1;
init_attr.cap.max_recv_sge = 1; init_attr.cap.max_recv_sge = 1;
init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS; init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_RC; init_attr.qp_type = IB_QPT_RC;
init_attr.send_cq = queue->ib_cq; init_attr.send_cq = queue->ib_cq;
...@@ -286,6 +288,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, ...@@ -286,6 +288,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
struct ib_device *ibdev = dev->dev; struct ib_device *ibdev = dev->dev;
int ret; int ret;
nvme_req(rq)->ctrl = &ctrl->ctrl;
ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command), ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (ret) if (ret)
...@@ -374,6 +377,8 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) ...@@ -374,6 +377,8 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
goto out_free_pd; goto out_free_pd;
} }
ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
ndev->dev->attrs.max_sge - 1);
list_add(&ndev->entry, &device_list); list_add(&ndev->entry, &device_list);
out_unlock: out_unlock:
mutex_unlock(&device_list_mutex); mutex_unlock(&device_list_mutex);
...@@ -868,6 +873,31 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -868,6 +873,31 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
return ret; return ret;
} }
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request,
&ctrl->ctrl);
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_destroy_admin_queue(ctrl, remove);
}
static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request,
&ctrl->ctrl);
if (remove)
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_destroy_io_queues(ctrl, remove);
}
}
static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
{ {
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
...@@ -912,21 +942,44 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) ...@@ -912,21 +942,44 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
} }
} }
static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
{ {
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), int ret = -EINVAL;
struct nvme_rdma_ctrl, reconnect_work);
bool changed; bool changed;
int ret;
++ctrl->ctrl.nr_reconnects; ret = nvme_rdma_configure_admin_queue(ctrl, new);
ret = nvme_rdma_configure_admin_queue(ctrl, false);
if (ret) if (ret)
goto requeue; return ret;
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
goto destroy_admin;
}
if (!(ctrl->ctrl.sgls & (1 << 2))) {
dev_err(ctrl->ctrl.device,
"Mandatory keyed sgls are not supported!\n");
goto destroy_admin;
}
if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) {
dev_warn(ctrl->ctrl.device,
"queue_size %zu > ctrl sqsize %u, clamping down\n",
ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
}
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
dev_warn(ctrl->ctrl.device,
"sqsize %u > ctrl maxcmd %u, clamping down\n",
ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
}
if (ctrl->ctrl.sgls & (1 << 20))
ctrl->use_inline_data = true;
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
ret = nvme_rdma_configure_io_queues(ctrl, false); ret = nvme_rdma_configure_io_queues(ctrl, new);
if (ret) if (ret)
goto destroy_admin; goto destroy_admin;
} }
...@@ -935,10 +988,31 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ...@@ -935,10 +988,31 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (!changed) { if (!changed) {
/* state change failure is ok if we're in DELETING state */ /* state change failure is ok if we're in DELETING state */
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
return; ret = -EINVAL;
goto destroy_io;
} }
nvme_start_ctrl(&ctrl->ctrl); nvme_start_ctrl(&ctrl->ctrl);
return 0;
destroy_io:
if (ctrl->ctrl.queue_count > 1)
nvme_rdma_destroy_io_queues(ctrl, new);
destroy_admin:
nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_rdma_destroy_admin_queue(ctrl, new);
return ret;
}
static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_rdma_ctrl, reconnect_work);
++ctrl->ctrl.nr_reconnects;
if (nvme_rdma_setup_ctrl(ctrl, false))
goto requeue;
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
ctrl->ctrl.nr_reconnects); ctrl->ctrl.nr_reconnects);
...@@ -947,9 +1021,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ...@@ -947,9 +1021,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
return; return;
destroy_admin:
nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_rdma_destroy_admin_queue(ctrl, false);
requeue: requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects); ctrl->ctrl.nr_reconnects);
...@@ -962,27 +1033,9 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) ...@@ -962,27 +1033,9 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
struct nvme_rdma_ctrl, err_work); struct nvme_rdma_ctrl, err_work);
nvme_stop_keep_alive(&ctrl->ctrl); nvme_stop_keep_alive(&ctrl->ctrl);
nvme_rdma_teardown_io_queues(ctrl, false);
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
nvme_rdma_destroy_io_queues(ctrl, false);
}
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
nvme_rdma_destroy_admin_queue(ctrl, false);
/*
* queues are not a live anymore, so restart the queues to fail fast
* new IO
*/
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_start_queues(&ctrl->ctrl); nvme_start_queues(&ctrl->ctrl);
nvme_rdma_teardown_admin_queue(ctrl, false);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we're in DELETING state */ /* state change failure is ok if we're in DELETING state */
...@@ -1090,19 +1143,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c) ...@@ -1090,19 +1143,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
} }
static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req, struct nvme_command *c) struct nvme_rdma_request *req, struct nvme_command *c,
int count)
{ {
struct nvme_sgl_desc *sg = &c->common.dptr.sgl; struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
struct scatterlist *sgl = req->sg_table.sgl;
struct ib_sge *sge = &req->sge[1];
u32 len = 0;
int i;
req->sge[1].addr = sg_dma_address(req->sg_table.sgl); for (i = 0; i < count; i++, sgl++, sge++) {
req->sge[1].length = sg_dma_len(req->sg_table.sgl); sge->addr = sg_dma_address(sgl);
req->sge[1].lkey = queue->device->pd->local_dma_lkey; sge->length = sg_dma_len(sgl);
sge->lkey = queue->device->pd->local_dma_lkey;
len += sge->length;
}
sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl)); sg->length = cpu_to_le32(len);
sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
req->num_sge++; req->num_sge += count;
return 0; return 0;
} }
...@@ -1195,15 +1256,16 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ...@@ -1195,15 +1256,16 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
goto out_free_table; goto out_free_table;
} }
if (count == 1) { if (count <= dev->num_inline_segments) {
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
queue->ctrl->use_inline_data &&
blk_rq_payload_bytes(rq) <= blk_rq_payload_bytes(rq) <=
nvme_rdma_inline_data_size(queue)) { nvme_rdma_inline_data_size(queue)) {
ret = nvme_rdma_map_sg_inline(queue, req, c); ret = nvme_rdma_map_sg_inline(queue, req, c, count);
goto out; goto out;
} }
if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) { if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
ret = nvme_rdma_map_sg_single(queue, req, c); ret = nvme_rdma_map_sg_single(queue, req, c);
goto out; goto out;
} }
...@@ -1574,6 +1636,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, ...@@ -1574,6 +1636,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_UNREACHABLE:
nvme_rdma_destroy_queue_ib(queue); nvme_rdma_destroy_queue_ib(queue);
/* fall through */
case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ADDR_ERROR:
dev_dbg(queue->ctrl->ctrl.device, dev_dbg(queue->ctrl->ctrl.device,
"CM error event %d\n", ev->event); "CM error event %d\n", ev->event);
...@@ -1736,25 +1799,12 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { ...@@ -1736,25 +1799,12 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{ {
if (ctrl->ctrl.queue_count > 1) { nvme_rdma_teardown_io_queues(ctrl, shutdown);
nvme_stop_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
nvme_rdma_destroy_io_queues(ctrl, shutdown);
}
if (shutdown) if (shutdown)
nvme_shutdown_ctrl(&ctrl->ctrl); nvme_shutdown_ctrl(&ctrl->ctrl);
else else
nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
nvme_rdma_teardown_admin_queue(ctrl, shutdown);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_destroy_admin_queue(ctrl, shutdown);
} }
static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
...@@ -1766,8 +1816,6 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) ...@@ -1766,8 +1816,6 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{ {
struct nvme_rdma_ctrl *ctrl = struct nvme_rdma_ctrl *ctrl =
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
int ret;
bool changed;
nvme_stop_ctrl(&ctrl->ctrl); nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false); nvme_rdma_shutdown_ctrl(ctrl, false);
...@@ -1778,25 +1826,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) ...@@ -1778,25 +1826,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return; return;
} }
ret = nvme_rdma_configure_admin_queue(ctrl, false); if (nvme_rdma_setup_ctrl(ctrl, false))
if (ret)
goto out_fail; goto out_fail;
if (ctrl->ctrl.queue_count > 1) {
ret = nvme_rdma_configure_io_queues(ctrl, false);
if (ret)
goto out_fail;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
if (!changed) {
/* state change failure is ok if we're in DELETING state */
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
return;
}
nvme_start_ctrl(&ctrl->ctrl);
return; return;
out_fail: out_fail:
...@@ -1959,49 +1991,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ...@@ -1959,49 +1991,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
WARN_ON_ONCE(!changed); WARN_ON_ONCE(!changed);
ret = nvme_rdma_configure_admin_queue(ctrl, true); ret = nvme_rdma_setup_ctrl(ctrl, true);
if (ret) if (ret)
goto out_uninit_ctrl; goto out_uninit_ctrl;
/* sanity check icdoff */
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
ret = -EINVAL;
goto out_remove_admin_queue;
}
/* sanity check keyed sgls */
if (!(ctrl->ctrl.sgls & (1 << 2))) {
dev_err(ctrl->ctrl.device,
"Mandatory keyed sgls are not supported!\n");
ret = -EINVAL;
goto out_remove_admin_queue;
}
/* only warn if argument is too large here, will clamp later */
if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
dev_warn(ctrl->ctrl.device,
"queue_size %zu > ctrl sqsize %u, clamping down\n",
opts->queue_size, ctrl->ctrl.sqsize + 1);
}
/* warn if maxcmd is lower than sqsize+1 */
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
dev_warn(ctrl->ctrl.device,
"sqsize %u > ctrl maxcmd %u, clamping down\n",
ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
}
if (opts->nr_io_queues) {
ret = nvme_rdma_configure_io_queues(ctrl, true);
if (ret)
goto out_remove_admin_queue;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr); ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
...@@ -2011,13 +2004,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ...@@ -2011,13 +2004,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
mutex_unlock(&nvme_rdma_ctrl_mutex); mutex_unlock(&nvme_rdma_ctrl_mutex);
nvme_start_ctrl(&ctrl->ctrl);
return &ctrl->ctrl; return &ctrl->ctrl;
out_remove_admin_queue:
nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_rdma_destroy_admin_queue(ctrl, true);
out_uninit_ctrl: out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl);
......
...@@ -128,3 +128,14 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, ...@@ -128,3 +128,14 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
return nvme_trace_common(p, cdw10); return nvme_trace_common(p, cdw10);
} }
} }
const char *nvme_trace_disk_name(struct trace_seq *p, char *name)
{
const char *ret = trace_seq_buffer_ptr(p);
if (*name)
trace_seq_printf(p, "disk=%s, ", name);
trace_seq_putc(p, 0);
return ret;
}
...@@ -50,13 +50,8 @@ ...@@ -50,13 +50,8 @@
nvme_admin_opcode_name(nvme_admin_security_recv), \ nvme_admin_opcode_name(nvme_admin_security_recv), \
nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) nvme_admin_opcode_name(nvme_admin_sanitize_nvm))
const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
u8 *cdw10);
#define __parse_nvme_admin_cmd(opcode, cdw10) \
nvme_trace_parse_admin_cmd(p, opcode, cdw10)
#define nvme_opcode_name(opcode) { opcode, #opcode } #define nvme_opcode_name(opcode) { opcode, #opcode }
#define show_opcode_name(val) \ #define show_nvm_opcode_name(val) \
__print_symbolic(val, \ __print_symbolic(val, \
nvme_opcode_name(nvme_cmd_flush), \ nvme_opcode_name(nvme_cmd_flush), \
nvme_opcode_name(nvme_cmd_write), \ nvme_opcode_name(nvme_cmd_write), \
...@@ -70,85 +65,92 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, ...@@ -70,85 +65,92 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
nvme_opcode_name(nvme_cmd_resv_acquire), \ nvme_opcode_name(nvme_cmd_resv_acquire), \
nvme_opcode_name(nvme_cmd_resv_release)) nvme_opcode_name(nvme_cmd_resv_release))
const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, #define show_opcode_name(qid, opcode) \
u8 *cdw10); (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode))
#define __parse_nvme_cmd(opcode, cdw10) \
nvme_trace_parse_nvm_cmd(p, opcode, cdw10)
TRACE_EVENT(nvme_setup_admin_cmd,
TP_PROTO(struct nvme_command *cmd),
TP_ARGS(cmd),
TP_STRUCT__entry(
__field(u8, opcode)
__field(u8, flags)
__field(u16, cid)
__field(u64, metadata)
__array(u8, cdw10, 24)
),
TP_fast_assign(
__entry->opcode = cmd->common.opcode;
__entry->flags = cmd->common.flags;
__entry->cid = cmd->common.command_id;
__entry->metadata = le64_to_cpu(cmd->common.metadata);
memcpy(__entry->cdw10, cmd->common.cdw10,
sizeof(__entry->cdw10));
),
TP_printk(" cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
__entry->cid, __entry->flags, __entry->metadata,
show_admin_opcode_name(__entry->opcode),
__parse_nvme_admin_cmd(__entry->opcode, __entry->cdw10))
);
TRACE_EVENT(nvme_setup_nvm_cmd, const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
TP_PROTO(int qid, struct nvme_command *cmd), u8 *cdw10);
TP_ARGS(qid, cmd), const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode,
u8 *cdw10);
#define parse_nvme_cmd(qid, opcode, cdw10) \
(qid ? \
nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \
nvme_trace_parse_admin_cmd(p, opcode, cdw10))
const char *nvme_trace_disk_name(struct trace_seq *p, char *name);
#define __print_disk_name(name) \
nvme_trace_disk_name(p, name)
#ifndef TRACE_HEADER_MULTI_READ
static inline void __assign_disk_name(char *name, struct gendisk *disk)
{
if (disk)
memcpy(name, disk->disk_name, DISK_NAME_LEN);
else
memset(name, 0, DISK_NAME_LEN);
}
#endif
TRACE_EVENT(nvme_setup_cmd,
TP_PROTO(struct request *req, struct nvme_command *cmd),
TP_ARGS(req, cmd),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(int, qid) __array(char, disk, DISK_NAME_LEN)
__field(u8, opcode) __field(int, ctrl_id)
__field(u8, flags) __field(int, qid)
__field(u16, cid) __field(u8, opcode)
__field(u32, nsid) __field(u8, flags)
__field(u64, metadata) __field(u16, cid)
__array(u8, cdw10, 24) __field(u32, nsid)
__field(u64, metadata)
__array(u8, cdw10, 24)
), ),
TP_fast_assign( TP_fast_assign(
__entry->qid = qid; __entry->ctrl_id = nvme_req(req)->ctrl->instance;
__entry->opcode = cmd->common.opcode; __entry->qid = nvme_req_qid(req);
__entry->flags = cmd->common.flags; __entry->opcode = cmd->common.opcode;
__entry->cid = cmd->common.command_id; __entry->flags = cmd->common.flags;
__entry->nsid = le32_to_cpu(cmd->common.nsid); __entry->cid = cmd->common.command_id;
__entry->metadata = le64_to_cpu(cmd->common.metadata); __entry->nsid = le32_to_cpu(cmd->common.nsid);
memcpy(__entry->cdw10, cmd->common.cdw10, __entry->metadata = le64_to_cpu(cmd->common.metadata);
sizeof(__entry->cdw10)); __assign_disk_name(__entry->disk, req->rq_disk);
memcpy(__entry->cdw10, cmd->common.cdw10,
sizeof(__entry->cdw10));
), ),
TP_printk("qid=%d, nsid=%u, cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
__entry->qid, __entry->nsid, __entry->cid, __entry->ctrl_id, __print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->nsid,
__entry->flags, __entry->metadata, __entry->flags, __entry->metadata,
show_opcode_name(__entry->opcode), show_opcode_name(__entry->qid, __entry->opcode),
__parse_nvme_cmd(__entry->opcode, __entry->cdw10)) parse_nvme_cmd(__entry->qid, __entry->opcode, __entry->cdw10))
); );
TRACE_EVENT(nvme_complete_rq, TRACE_EVENT(nvme_complete_rq,
TP_PROTO(struct request *req), TP_PROTO(struct request *req),
TP_ARGS(req), TP_ARGS(req),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(int, qid) __array(char, disk, DISK_NAME_LEN)
__field(int, cid) __field(int, ctrl_id)
__field(u64, result) __field(int, qid)
__field(u8, retries) __field(int, cid)
__field(u8, flags) __field(u64, result)
__field(u16, status) __field(u8, retries)
__field(u8, flags)
__field(u16, status)
), ),
TP_fast_assign( TP_fast_assign(
__entry->qid = req->q->id; __entry->ctrl_id = nvme_req(req)->ctrl->instance;
__entry->cid = req->tag; __entry->qid = nvme_req_qid(req);
__entry->result = le64_to_cpu(nvme_req(req)->result.u64); __entry->cid = req->tag;
__entry->retries = nvme_req(req)->retries; __entry->result = le64_to_cpu(nvme_req(req)->result.u64);
__entry->flags = nvme_req(req)->flags; __entry->retries = nvme_req(req)->retries;
__entry->status = nvme_req(req)->status; __entry->flags = nvme_req(req)->flags;
__entry->status = nvme_req(req)->status;
__assign_disk_name(__entry->disk, req->rq_disk);
), ),
TP_printk("qid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u", TP_printk("nvme%d: %sqid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u",
__entry->ctrl_id, __print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->result, __entry->qid, __entry->cid, __entry->result,
__entry->retries, __entry->flags, __entry->status) __entry->retries, __entry->flags, __entry->status)
......
...@@ -128,6 +128,36 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) ...@@ -128,6 +128,36 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
nvmet_req_complete(req, status); nvmet_req_complete(req, status);
} }
static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
{
u16 status = NVME_SC_INTERNAL;
struct nvme_effects_log *log;
log = kzalloc(sizeof(*log), GFP_KERNEL);
if (!log)
goto out;
log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0);
log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0);
log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0);
log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0);
log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0);
log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0);
log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0);
log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0);
log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0);
log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0);
log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0);
log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0);
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
kfree(log);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
{ {
struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ctrl *ctrl = req->sq->ctrl;
...@@ -208,7 +238,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -208,7 +238,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
/* first slot is read-only, only one slot supported */ /* first slot is read-only, only one slot supported */
id->frmw = (1 << 0) | (1 << 1); id->frmw = (1 << 0) | (1 << 1);
id->lpa = (1 << 0) | (1 << 2); id->lpa = (1 << 0) | (1 << 1) | (1 << 2);
id->elpe = NVMET_ERROR_LOG_SLOTS - 1; id->elpe = NVMET_ERROR_LOG_SLOTS - 1;
id->npss = 0; id->npss = 0;
...@@ -238,14 +268,14 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -238,14 +268,14 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls) if (ctrl->ops->has_keyed_sgls)
id->sgls |= cpu_to_le32(1 << 2); id->sgls |= cpu_to_le32(1 << 2);
if (ctrl->ops->sqe_inline_size) if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20); id->sgls |= cpu_to_le32(1 << 20);
strcpy(id->subnqn, ctrl->subsys->subsysnqn); strcpy(id->subnqn, ctrl->subsys->subsysnqn);
/* Max command capsule size is sqe + single page of in-capsule data */ /* Max command capsule size is sqe + single page of in-capsule data */
id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
ctrl->ops->sqe_inline_size) / 16); req->port->inline_data_size) / 16);
/* Max response capsule size is cqe */ /* Max response capsule size is cqe */
id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
...@@ -308,7 +338,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) ...@@ -308,7 +338,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
*/ */
id->nmic = (1 << 0); id->nmic = (1 << 0);
memcpy(&id->nguid, &ns->nguid, sizeof(uuid_le)); memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid));
id->lbaf[0].ds = ns->blksize_shift; id->lbaf[0].ds = ns->blksize_shift;
...@@ -586,6 +616,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) ...@@ -586,6 +616,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
case NVME_LOG_CHANGED_NS: case NVME_LOG_CHANGED_NS:
req->execute = nvmet_execute_get_log_changed_ns; req->execute = nvmet_execute_get_log_changed_ns;
return 0; return 0;
case NVME_LOG_CMD_EFFECTS:
req->execute = nvmet_execute_get_log_cmd_effects_ns;
return 0;
} }
break; break;
case nvme_admin_identify: case nvme_admin_identify:
......
...@@ -218,6 +218,35 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item, ...@@ -218,6 +218,35 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, addr_trsvcid); CONFIGFS_ATTR(nvmet_, addr_trsvcid);
static ssize_t nvmet_param_inline_data_size_show(struct config_item *item,
char *page)
{
struct nvmet_port *port = to_nvmet_port(item);
return snprintf(page, PAGE_SIZE, "%d\n", port->inline_data_size);
}
static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
int ret;
if (port->enabled) {
pr_err("Cannot modify inline_data_size while port enabled\n");
pr_err("Disable the port before modifying\n");
return -EACCES;
}
ret = kstrtoint(page, 0, &port->inline_data_size);
if (ret) {
pr_err("Invalid value '%s' for inline_data_size\n", page);
return -EINVAL;
}
return count;
}
CONFIGFS_ATTR(nvmet_, param_inline_data_size);
static ssize_t nvmet_addr_trtype_show(struct config_item *item, static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page) char *page)
{ {
...@@ -407,11 +436,40 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item, ...@@ -407,11 +436,40 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_ns_, enable); CONFIGFS_ATTR(nvmet_ns_, enable);
static ssize_t nvmet_ns_buffered_io_show(struct config_item *item, char *page)
{
return sprintf(page, "%d\n", to_nvmet_ns(item)->buffered_io);
}
static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
bool val;
if (strtobool(page, &val))
return -EINVAL;
mutex_lock(&ns->subsys->lock);
if (ns->enabled) {
pr_err("disable ns before setting buffered_io value.\n");
mutex_unlock(&ns->subsys->lock);
return -EINVAL;
}
ns->buffered_io = val;
mutex_unlock(&ns->subsys->lock);
return count;
}
CONFIGFS_ATTR(nvmet_ns_, buffered_io);
static struct configfs_attribute *nvmet_ns_attrs[] = { static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path, &nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid, &nvmet_ns_attr_device_nguid,
&nvmet_ns_attr_device_uuid, &nvmet_ns_attr_device_uuid,
&nvmet_ns_attr_enable, &nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
NULL, NULL,
}; };
...@@ -874,6 +932,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = { ...@@ -874,6 +932,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_traddr, &nvmet_attr_addr_traddr,
&nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype, &nvmet_attr_addr_trtype,
&nvmet_attr_param_inline_data_size,
NULL, NULL,
}; };
...@@ -903,6 +962,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, ...@@ -903,6 +962,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
INIT_LIST_HEAD(&port->entry); INIT_LIST_HEAD(&port->entry);
INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals); INIT_LIST_HEAD(&port->referrals);
port->inline_data_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid); port->disc_addr.portid = cpu_to_le16(portid);
config_group_init_type_name(&port->group, name, &nvmet_port_type); config_group_init_type_name(&port->group, name, &nvmet_port_type);
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "nvmet.h" #include "nvmet.h"
struct workqueue_struct *buffered_io_wq;
static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
static DEFINE_IDA(cntlid_ida); static DEFINE_IDA(cntlid_ida);
...@@ -241,6 +242,10 @@ int nvmet_enable_port(struct nvmet_port *port) ...@@ -241,6 +242,10 @@ int nvmet_enable_port(struct nvmet_port *port)
return ret; return ret;
} }
/* If the transport didn't set inline_data_size, then disable it. */
if (port->inline_data_size < 0)
port->inline_data_size = 0;
port->enabled = true; port->enabled = true;
return 0; return 0;
} }
...@@ -437,6 +442,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) ...@@ -437,6 +442,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid; ns->nsid = nsid;
ns->subsys = subsys; ns->subsys = subsys;
uuid_gen(&ns->uuid); uuid_gen(&ns->uuid);
ns->buffered_io = false;
return ns; return ns;
} }
...@@ -1109,6 +1115,12 @@ static int __init nvmet_init(void) ...@@ -1109,6 +1115,12 @@ static int __init nvmet_init(void)
{ {
int error; int error;
buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
WQ_MEM_RECLAIM, 0);
if (!buffered_io_wq) {
error = -ENOMEM;
goto out;
}
error = nvmet_init_discovery(); error = nvmet_init_discovery();
if (error) if (error)
goto out; goto out;
...@@ -1129,6 +1141,7 @@ static void __exit nvmet_exit(void) ...@@ -1129,6 +1141,7 @@ static void __exit nvmet_exit(void)
nvmet_exit_configfs(); nvmet_exit_configfs();
nvmet_exit_discovery(); nvmet_exit_discovery();
ida_destroy(&cntlid_ida); ida_destroy(&cntlid_ida);
destroy_workqueue(buffered_io_wq);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
......
...@@ -171,7 +171,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) ...@@ -171,7 +171,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls) if (ctrl->ops->has_keyed_sgls)
id->sgls |= cpu_to_le32(1 << 2); id->sgls |= cpu_to_le32(1 << 2);
if (ctrl->ops->sqe_inline_size) if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20); id->sgls |= cpu_to_le32(1 << 20);
strcpy(id->subnqn, ctrl->subsys->subsysnqn); strcpy(id->subnqn, ctrl->subsys->subsysnqn);
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
void nvmet_file_ns_disable(struct nvmet_ns *ns) void nvmet_file_ns_disable(struct nvmet_ns *ns)
{ {
if (ns->file) { if (ns->file) {
if (ns->buffered_io)
flush_workqueue(buffered_io_wq);
mempool_destroy(ns->bvec_pool); mempool_destroy(ns->bvec_pool);
ns->bvec_pool = NULL; ns->bvec_pool = NULL;
kmem_cache_destroy(ns->bvec_cache); kmem_cache_destroy(ns->bvec_cache);
...@@ -27,11 +29,14 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) ...@@ -27,11 +29,14 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
int nvmet_file_ns_enable(struct nvmet_ns *ns) int nvmet_file_ns_enable(struct nvmet_ns *ns)
{ {
int ret; int flags = O_RDWR | O_LARGEFILE;
struct kstat stat; struct kstat stat;
int ret;
ns->file = filp_open(ns->device_path, if (!ns->buffered_io)
O_RDWR | O_LARGEFILE | O_DIRECT, 0); flags |= O_DIRECT;
ns->file = filp_open(ns->device_path, flags, 0);
if (IS_ERR(ns->file)) { if (IS_ERR(ns->file)) {
pr_err("failed to open file %s: (%ld)\n", pr_err("failed to open file %s: (%ld)\n",
ns->device_path, PTR_ERR(ns->file)); ns->device_path, PTR_ERR(ns->file));
...@@ -100,7 +105,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, ...@@ -100,7 +105,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
iocb->ki_pos = pos; iocb->ki_pos = pos;
iocb->ki_filp = req->ns->file; iocb->ki_filp = req->ns->file;
iocb->ki_flags = IOCB_DIRECT | ki_flags; iocb->ki_flags = ki_flags | iocb_flags(req->ns->file);
ret = call_iter(iocb, &iter); ret = call_iter(iocb, &iter);
...@@ -140,6 +145,12 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) ...@@ -140,6 +145,12 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
return; return;
} }
pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
if (unlikely(pos + req->data_len > req->ns->size)) {
nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR);
return;
}
if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
GFP_KERNEL); GFP_KERNEL);
...@@ -155,8 +166,6 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) ...@@ -155,8 +166,6 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
is_sync = true; is_sync = true;
} }
pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
memset(&req->f.iocb, 0, sizeof(struct kiocb)); memset(&req->f.iocb, 0, sizeof(struct kiocb));
for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
...@@ -189,6 +198,19 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) ...@@ -189,6 +198,19 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
nvmet_file_submit_bvec(req, pos, bv_cnt, total_len); nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
} }
static void nvmet_file_buffered_io_work(struct work_struct *w)
{
struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
nvmet_file_execute_rw(req);
}
static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
{
INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
queue_work(buffered_io_wq, &req->f.work);
}
static void nvmet_file_flush_work(struct work_struct *w) static void nvmet_file_flush_work(struct work_struct *w)
{ {
struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
...@@ -209,22 +231,30 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) ...@@ -209,22 +231,30 @@ static void nvmet_file_execute_discard(struct nvmet_req *req)
{ {
int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
struct nvme_dsm_range range; struct nvme_dsm_range range;
loff_t offset; loff_t offset, len;
loff_t len; u16 ret;
int i, ret; int i;
for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
if (nvmet_copy_from_sgl(req, i * sizeof(range), &range, ret = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
sizeof(range))) sizeof(range));
if (ret)
break; break;
offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
ret = vfs_fallocate(req->ns->file, mode, offset, len); if (offset + len > req->ns->size) {
if (ret) ret = NVME_SC_LBA_RANGE | NVME_SC_DNR;
break; break;
}
if (vfs_fallocate(req->ns->file, mode, offset, len)) {
ret = NVME_SC_INTERNAL | NVME_SC_DNR;
break;
}
} }
nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); nvmet_req_complete(req, ret);
} }
static void nvmet_file_dsm_work(struct work_struct *w) static void nvmet_file_dsm_work(struct work_struct *w)
...@@ -263,6 +293,11 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) ...@@ -263,6 +293,11 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
req->ns->blksize_shift); req->ns->blksize_shift);
if (unlikely(offset + len > req->ns->size)) {
nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR);
return;
}
ret = vfs_fallocate(req->ns->file, mode, offset, len); ret = vfs_fallocate(req->ns->file, mode, offset, len);
nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
} }
...@@ -280,7 +315,10 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) ...@@ -280,7 +315,10 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
switch (cmd->common.opcode) { switch (cmd->common.opcode) {
case nvme_cmd_read: case nvme_cmd_read:
case nvme_cmd_write: case nvme_cmd_write:
req->execute = nvmet_file_execute_rw; if (req->ns->buffered_io)
req->execute = nvmet_file_execute_rw_buffered_io;
else
req->execute = nvmet_file_execute_rw;
req->data_len = nvmet_rw_len(req); req->data_len = nvmet_rw_len(req);
return 0; return 0;
case nvme_cmd_flush: case nvme_cmd_flush:
......
...@@ -227,6 +227,7 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, ...@@ -227,6 +227,7 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
{ {
struct nvme_loop_ctrl *ctrl = set->driver_data; struct nvme_loop_ctrl *ctrl = set->driver_data;
nvme_req(req)->ctrl = &ctrl->ctrl;
return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req), return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
(set == &ctrl->tag_set) ? hctx_idx + 1 : 0); (set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
} }
......
...@@ -65,6 +65,7 @@ struct nvmet_ns { ...@@ -65,6 +65,7 @@ struct nvmet_ns {
u8 nguid[16]; u8 nguid[16];
uuid_t uuid; uuid_t uuid;
bool buffered_io;
bool enabled; bool enabled;
struct nvmet_subsys *subsys; struct nvmet_subsys *subsys;
const char *device_path; const char *device_path;
...@@ -116,6 +117,7 @@ struct nvmet_port { ...@@ -116,6 +117,7 @@ struct nvmet_port {
struct list_head referrals; struct list_head referrals;
void *priv; void *priv;
bool enabled; bool enabled;
int inline_data_size;
}; };
static inline struct nvmet_port *to_nvmet_port(struct config_item *item) static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
...@@ -225,7 +227,6 @@ struct nvmet_req; ...@@ -225,7 +227,6 @@ struct nvmet_req;
struct nvmet_fabrics_ops { struct nvmet_fabrics_ops {
struct module *owner; struct module *owner;
unsigned int type; unsigned int type;
unsigned int sqe_inline_size;
unsigned int msdbd; unsigned int msdbd;
bool has_keyed_sgls : 1; bool has_keyed_sgls : 1;
void (*queue_response)(struct nvmet_req *req); void (*queue_response)(struct nvmet_req *req);
...@@ -269,6 +270,8 @@ struct nvmet_req { ...@@ -269,6 +270,8 @@ struct nvmet_req {
const struct nvmet_fabrics_ops *ops; const struct nvmet_fabrics_ops *ops;
}; };
extern struct workqueue_struct *buffered_io_wq;
static inline void nvmet_set_status(struct nvmet_req *req, u16 status) static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
{ {
req->rsp->status = cpu_to_le16(status << 1); req->rsp->status = cpu_to_le16(status << 1);
......
...@@ -33,16 +33,17 @@ ...@@ -33,16 +33,17 @@
#include "nvmet.h" #include "nvmet.h"
/* /*
* We allow up to a page of inline data to go with the SQE * We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data
*/ */
#define NVMET_RDMA_INLINE_DATA_SIZE PAGE_SIZE #define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE
#define NVMET_RDMA_MAX_INLINE_SGE 4
#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
struct nvmet_rdma_cmd { struct nvmet_rdma_cmd {
struct ib_sge sge[2]; struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
struct ib_cqe cqe; struct ib_cqe cqe;
struct ib_recv_wr wr; struct ib_recv_wr wr;
struct scatterlist inline_sg; struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
struct page *inline_page;
struct nvme_command *nvme_cmd; struct nvme_command *nvme_cmd;
struct nvmet_rdma_queue *queue; struct nvmet_rdma_queue *queue;
}; };
...@@ -116,6 +117,8 @@ struct nvmet_rdma_device { ...@@ -116,6 +117,8 @@ struct nvmet_rdma_device {
size_t srq_size; size_t srq_size;
struct kref ref; struct kref ref;
struct list_head entry; struct list_head entry;
int inline_data_size;
int inline_page_count;
}; };
static bool nvmet_rdma_use_srq; static bool nvmet_rdma_use_srq;
...@@ -138,6 +141,11 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); ...@@ -138,6 +141,11 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static const struct nvmet_fabrics_ops nvmet_rdma_ops; static const struct nvmet_fabrics_ops nvmet_rdma_ops;
static int num_pages(int len)
{
return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
}
/* XXX: really should move to a generic header sooner or later.. */ /* XXX: really should move to a generic header sooner or later.. */
static inline u32 get_unaligned_le24(const u8 *p) static inline u32 get_unaligned_le24(const u8 *p)
{ {
...@@ -184,6 +192,71 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) ...@@ -184,6 +192,71 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
} }
static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c)
{
struct scatterlist *sg;
struct ib_sge *sge;
int i;
if (!ndev->inline_data_size)
return;
sg = c->inline_sg;
sge = &c->sge[1];
for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) {
if (sge->length)
ib_dma_unmap_page(ndev->device, sge->addr,
sge->length, DMA_FROM_DEVICE);
if (sg_page(sg))
__free_page(sg_page(sg));
}
}
static int nvmet_rdma_alloc_inline_pages(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c)
{
struct scatterlist *sg;
struct ib_sge *sge;
struct page *pg;
int len;
int i;
if (!ndev->inline_data_size)
return 0;
sg = c->inline_sg;
sg_init_table(sg, ndev->inline_page_count);
sge = &c->sge[1];
len = ndev->inline_data_size;
for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) {
pg = alloc_page(GFP_KERNEL);
if (!pg)
goto out_err;
sg_assign_page(sg, pg);
sge->addr = ib_dma_map_page(ndev->device,
pg, 0, PAGE_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ndev->device, sge->addr))
goto out_err;
sge->length = min_t(int, len, PAGE_SIZE);
sge->lkey = ndev->pd->local_dma_lkey;
len -= sge->length;
}
return 0;
out_err:
for (; i >= 0; i--, sg--, sge--) {
if (sge->length)
ib_dma_unmap_page(ndev->device, sge->addr,
sge->length, DMA_FROM_DEVICE);
if (sg_page(sg))
__free_page(sg_page(sg));
}
return -ENOMEM;
}
static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev, static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c, bool admin) struct nvmet_rdma_cmd *c, bool admin)
{ {
...@@ -200,33 +273,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev, ...@@ -200,33 +273,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
c->sge[0].length = sizeof(*c->nvme_cmd); c->sge[0].length = sizeof(*c->nvme_cmd);
c->sge[0].lkey = ndev->pd->local_dma_lkey; c->sge[0].lkey = ndev->pd->local_dma_lkey;
if (!admin) { if (!admin && nvmet_rdma_alloc_inline_pages(ndev, c))
c->inline_page = alloc_pages(GFP_KERNEL, goto out_unmap_cmd;
get_order(NVMET_RDMA_INLINE_DATA_SIZE));
if (!c->inline_page)
goto out_unmap_cmd;
c->sge[1].addr = ib_dma_map_page(ndev->device,
c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
goto out_free_inline_page;
c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
c->sge[1].lkey = ndev->pd->local_dma_lkey;
}
c->cqe.done = nvmet_rdma_recv_done; c->cqe.done = nvmet_rdma_recv_done;
c->wr.wr_cqe = &c->cqe; c->wr.wr_cqe = &c->cqe;
c->wr.sg_list = c->sge; c->wr.sg_list = c->sge;
c->wr.num_sge = admin ? 1 : 2; c->wr.num_sge = admin ? 1 : ndev->inline_page_count + 1;
return 0; return 0;
out_free_inline_page:
if (!admin) {
__free_pages(c->inline_page,
get_order(NVMET_RDMA_INLINE_DATA_SIZE));
}
out_unmap_cmd: out_unmap_cmd:
ib_dma_unmap_single(ndev->device, c->sge[0].addr, ib_dma_unmap_single(ndev->device, c->sge[0].addr,
sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
...@@ -240,12 +297,8 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev, ...@@ -240,12 +297,8 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev, static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c, bool admin) struct nvmet_rdma_cmd *c, bool admin)
{ {
if (!admin) { if (!admin)
ib_dma_unmap_page(ndev->device, c->sge[1].addr, nvmet_rdma_free_inline_pages(ndev, c);
NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE);
__free_pages(c->inline_page,
get_order(NVMET_RDMA_INLINE_DATA_SIZE));
}
ib_dma_unmap_single(ndev->device, c->sge[0].addr, ib_dma_unmap_single(ndev->device, c->sge[0].addr,
sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
kfree(c->nvme_cmd); kfree(c->nvme_cmd);
...@@ -383,14 +436,21 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, ...@@ -383,14 +436,21 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *cmd) struct nvmet_rdma_cmd *cmd)
{ {
struct ib_recv_wr *bad_wr; struct ib_recv_wr *bad_wr;
int ret;
ib_dma_sync_single_for_device(ndev->device, ib_dma_sync_single_for_device(ndev->device,
cmd->sge[0].addr, cmd->sge[0].length, cmd->sge[0].addr, cmd->sge[0].length,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
if (ndev->srq) if (ndev->srq)
return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr); else
ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
if (unlikely(ret))
pr_err("post_recv cmd failed\n");
return ret;
} }
static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
...@@ -429,7 +489,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) ...@@ -429,7 +489,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
} }
if (rsp->req.sg != &rsp->cmd->inline_sg) if (rsp->req.sg != rsp->cmd->inline_sg)
sgl_free(rsp->req.sg); sgl_free(rsp->req.sg);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
...@@ -493,7 +553,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) ...@@ -493,7 +553,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
rsp->send_sge.addr, rsp->send_sge.length, rsp->send_sge.addr, rsp->send_sge.length,
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) { if (unlikely(ib_post_send(cm_id->qp, first_wr, &bad_wr))) {
pr_err("sending cmd response failed\n"); pr_err("sending cmd response failed\n");
nvmet_rdma_release_rsp(rsp); nvmet_rdma_release_rsp(rsp);
} }
...@@ -529,10 +589,25 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -529,10 +589,25 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
u64 off) u64 off)
{ {
sg_init_table(&rsp->cmd->inline_sg, 1); int sg_count = num_pages(len);
sg_set_page(&rsp->cmd->inline_sg, rsp->cmd->inline_page, len, off); struct scatterlist *sg;
rsp->req.sg = &rsp->cmd->inline_sg; int i;
rsp->req.sg_cnt = 1;
sg = rsp->cmd->inline_sg;
for (i = 0; i < sg_count; i++, sg++) {
if (i < sg_count - 1)
sg_unmark_end(sg);
else
sg_mark_end(sg);
sg->offset = off;
sg->length = min_t(int, len, PAGE_SIZE - off);
len -= sg->length;
if (!i)
off = 0;
}
rsp->req.sg = rsp->cmd->inline_sg;
rsp->req.sg_cnt = sg_count;
} }
static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
...@@ -544,7 +619,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) ...@@ -544,7 +619,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
if (!nvme_is_write(rsp->req.cmd)) if (!nvme_is_write(rsp->req.cmd))
return NVME_SC_INVALID_FIELD | NVME_SC_DNR; return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) { if (off + len > rsp->queue->dev->inline_data_size) {
pr_err("invalid inline data offset!\n"); pr_err("invalid inline data offset!\n");
return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR; return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
} }
...@@ -743,7 +818,7 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) ...@@ -743,7 +818,7 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
srq_size = 4095; /* XXX: tune */ srq_size = 4095; /* XXX: tune */
srq_attr.attr.max_wr = srq_size; srq_attr.attr.max_wr = srq_size;
srq_attr.attr.max_sge = 2; srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
srq_attr.attr.srq_limit = 0; srq_attr.attr.srq_limit = 0;
srq_attr.srq_type = IB_SRQT_BASIC; srq_attr.srq_type = IB_SRQT_BASIC;
srq = ib_create_srq(ndev->pd, &srq_attr); srq = ib_create_srq(ndev->pd, &srq_attr);
...@@ -765,11 +840,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) ...@@ -765,11 +840,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
ndev->srq = srq; ndev->srq = srq;
ndev->srq_size = srq_size; ndev->srq_size = srq_size;
for (i = 0; i < srq_size; i++) for (i = 0; i < srq_size; i++) {
nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]); ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
if (ret)
goto out_free_cmds;
}
return 0; return 0;
out_free_cmds:
nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
out_destroy_srq: out_destroy_srq:
ib_destroy_srq(srq); ib_destroy_srq(srq);
return ret; return ret;
...@@ -793,7 +873,10 @@ static void nvmet_rdma_free_dev(struct kref *ref) ...@@ -793,7 +873,10 @@ static void nvmet_rdma_free_dev(struct kref *ref)
static struct nvmet_rdma_device * static struct nvmet_rdma_device *
nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
{ {
struct nvmet_port *port = cm_id->context;
struct nvmet_rdma_device *ndev; struct nvmet_rdma_device *ndev;
int inline_page_count;
int inline_sge_count;
int ret; int ret;
mutex_lock(&device_list_mutex); mutex_lock(&device_list_mutex);
...@@ -807,6 +890,18 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) ...@@ -807,6 +890,18 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
if (!ndev) if (!ndev)
goto out_err; goto out_err;
inline_page_count = num_pages(port->inline_data_size);
inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
cm_id->device->attrs.max_sge) - 1;
if (inline_page_count > inline_sge_count) {
pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
port->inline_data_size, cm_id->device->name,
inline_sge_count * PAGE_SIZE);
port->inline_data_size = inline_sge_count * PAGE_SIZE;
inline_page_count = inline_sge_count;
}
ndev->inline_data_size = port->inline_data_size;
ndev->inline_page_count = inline_page_count;
ndev->device = cm_id->device; ndev->device = cm_id->device;
kref_init(&ndev->ref); kref_init(&ndev->ref);
...@@ -881,7 +976,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) ...@@ -881,7 +976,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
} else { } else {
/* +1 for drain */ /* +1 for drain */
qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
qp_attr.cap.max_recv_sge = 2; qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
} }
ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
...@@ -899,13 +994,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) ...@@ -899,13 +994,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
if (!ndev->srq) { if (!ndev->srq) {
for (i = 0; i < queue->recv_queue_size; i++) { for (i = 0; i < queue->recv_queue_size; i++) {
queue->cmds[i].queue = queue; queue->cmds[i].queue = queue;
nvmet_rdma_post_recv(ndev, &queue->cmds[i]); ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
if (ret)
goto err_destroy_qp;
} }
} }
out: out:
return ret; return ret;
err_destroy_qp:
rdma_destroy_qp(queue->cm_id);
err_destroy_cq: err_destroy_cq:
ib_free_cq(queue->cq); ib_free_cq(queue->cq);
goto out; goto out;
...@@ -1379,6 +1478,15 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) ...@@ -1379,6 +1478,15 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
return -EINVAL; return -EINVAL;
} }
if (port->inline_data_size < 0) {
port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
} else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
pr_warn("inline_data_size %u is too large, reducing to %u\n",
port->inline_data_size,
NVMET_RDMA_MAX_INLINE_DATA_SIZE);
port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
}
ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
port->disc_addr.trsvcid, &addr); port->disc_addr.trsvcid, &addr);
if (ret) { if (ret) {
...@@ -1456,7 +1564,6 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, ...@@ -1456,7 +1564,6 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
static const struct nvmet_fabrics_ops nvmet_rdma_ops = { static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA, .type = NVMF_TRTYPE_RDMA,
.sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE,
.msdbd = 1, .msdbd = 1,
.has_keyed_sgls = 1, .has_keyed_sgls = 1,
.add_port = nvmet_rdma_add_port, .add_port = nvmet_rdma_add_port,
......
...@@ -749,6 +749,11 @@ enum { ...@@ -749,6 +749,11 @@ enum {
NVME_FEAT_HOST_MEM_BUF = 0x0d, NVME_FEAT_HOST_MEM_BUF = 0x0d,
NVME_FEAT_TIMESTAMP = 0x0e, NVME_FEAT_TIMESTAMP = 0x0e,
NVME_FEAT_KATO = 0x0f, NVME_FEAT_KATO = 0x0f,
NVME_FEAT_HCTM = 0x10,
NVME_FEAT_NOPSC = 0x11,
NVME_FEAT_RRL = 0x12,
NVME_FEAT_PLM_CONFIG = 0x13,
NVME_FEAT_PLM_WINDOW = 0x14,
NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82, NVME_FEAT_RESV_MASK = 0x82,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment