Commit 95c7c09f authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.18' of git://git.infradead.org/nvme into for-linus

Pull NVMe fixes from Christoph:

"Fix various little regressions introduced in this merge window, plus
 a rework of the fibre channel connect and reconnect path to share the
 code instead of having separate sets of bugs. Last but not least a
 trivial trace point addition from Hannes."

* 'nvme-4.18' of git://git.infradead.org/nvme:
  nvme-fabrics: fix and refine state checks in __nvmf_check_ready
  nvme-fabrics: handle the admin-only case properly in nvmf_check_ready
  nvme-fabrics: refactor queue ready check
  blk-mq: remove blk_mq_tagset_iter
  nvme: remove nvme_reinit_tagset
  nvme-fc: fix nulling of queue data on reconnect
  nvme-fc: remove reinit_request routine
  nvme-fc: change controllers first connect to use reconnect path
  nvme: don't rely on the changed namespace list log
  nvmet: free smart-log buffer after use
  nvme-rdma: fix error flow during mapping request data
  nvme: add bio remapping tracepoint
  nvme: fix NULL pointer dereference in nvme_init_subsystem
parents da661267 35897b92
...@@ -311,35 +311,6 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, ...@@ -311,35 +311,6 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
} }
EXPORT_SYMBOL(blk_mq_tagset_busy_iter); EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
int (fn)(void *, struct request *))
{
int i, j, ret = 0;
if (WARN_ON_ONCE(!fn))
goto out;
for (i = 0; i < set->nr_hw_queues; i++) {
struct blk_mq_tags *tags = set->tags[i];
if (!tags)
continue;
for (j = 0; j < tags->nr_tags; j++) {
if (!tags->static_rqs[j])
continue;
ret = fn(data, tags->static_rqs[j]);
if (ret)
goto out;
}
}
out:
return ret;
}
EXPORT_SYMBOL_GPL(blk_mq_tagset_iter);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv) void *priv)
{ {
......
...@@ -2208,7 +2208,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ...@@ -2208,7 +2208,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
* Verify that the subsystem actually supports multiple * Verify that the subsystem actually supports multiple
* controllers, else bail out. * controllers, else bail out.
*/ */
if (!ctrl->opts->discovery_nqn && if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
dev_err(ctrl->device, dev_err(ctrl->device,
"ignoring ctrl due to duplicate subnqn (%s).\n", "ignoring ctrl due to duplicate subnqn (%s).\n",
...@@ -3197,40 +3197,28 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) ...@@ -3197,40 +3197,28 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
nvme_remove_invalid_namespaces(ctrl, nn); nvme_remove_invalid_namespaces(ctrl, nn);
} }
static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl) static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
{ {
size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
__le32 *log; __le32 *log;
int error, i; int error;
bool ret = false;
log = kzalloc(log_size, GFP_KERNEL); log = kzalloc(log_size, GFP_KERNEL);
if (!log) if (!log)
return false; return;
/*
* We need to read the log to clear the AEN, but we don't want to rely
* on it for the changed namespace information as userspace could have
* raced with us in reading the log page, which could cause us to miss
* updates.
*/
error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
if (error) { if (error)
dev_warn(ctrl->device, dev_warn(ctrl->device,
"reading changed ns log failed: %d\n", error); "reading changed ns log failed: %d\n", error);
goto out_free_log;
}
if (log[0] == cpu_to_le32(0xffffffff))
goto out_free_log;
for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
u32 nsid = le32_to_cpu(log[i]);
if (nsid == 0)
break;
dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
nvme_validate_ns(ctrl, nsid);
}
ret = true;
out_free_log:
kfree(log); kfree(log);
return ret;
} }
static void nvme_scan_work(struct work_struct *work) static void nvme_scan_work(struct work_struct *work)
...@@ -3246,9 +3234,8 @@ static void nvme_scan_work(struct work_struct *work) ...@@ -3246,9 +3234,8 @@ static void nvme_scan_work(struct work_struct *work)
WARN_ON_ONCE(!ctrl->tagset); WARN_ON_ONCE(!ctrl->tagset);
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
if (nvme_scan_changed_ns_log(ctrl))
goto out_sort_namespaces;
dev_info(ctrl->device, "rescanning namespaces.\n"); dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
} }
if (nvme_identify_ctrl(ctrl, &id)) if (nvme_identify_ctrl(ctrl, &id))
...@@ -3263,7 +3250,6 @@ static void nvme_scan_work(struct work_struct *work) ...@@ -3263,7 +3250,6 @@ static void nvme_scan_work(struct work_struct *work)
nvme_scan_ns_sequential(ctrl, nn); nvme_scan_ns_sequential(ctrl, nn);
out_free_id: out_free_id:
kfree(id); kfree(id);
out_sort_namespaces:
down_write(&ctrl->namespaces_rwsem); down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp); list_sort(NULL, &ctrl->namespaces, ns_cmp);
up_write(&ctrl->namespaces_rwsem); up_write(&ctrl->namespaces_rwsem);
...@@ -3641,16 +3627,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) ...@@ -3641,16 +3627,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
} }
EXPORT_SYMBOL_GPL(nvme_start_queues); EXPORT_SYMBOL_GPL(nvme_start_queues);
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
{
if (!ctrl->ops->reinit_request)
return 0;
return blk_mq_tagset_iter(set, set->driver_data,
ctrl->ops->reinit_request);
}
EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
int __init nvme_core_init(void) int __init nvme_core_init(void)
{ {
int result = -ENOMEM; int result = -ENOMEM;
......
...@@ -536,67 +536,55 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( ...@@ -536,67 +536,55 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
return NULL; return NULL;
} }
blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, /*
bool queue_live, bool is_connected) * For something we're not in a state to send to the device the default action
* is to busy it and retry it after the controller state is recovered. However,
* anything marked for failfast or nvme multipath is immediately failed.
*
* Note: commands used to initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
blk_status_t nvmf_fail_nonready_command(struct request *rq)
{ {
struct nvme_command *cmd = nvme_req(rq)->cmd; if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
}
EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected)) bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
return BLK_STS_OK; bool queue_live)
{
struct nvme_request *req = nvme_req(rq);
/*
* If we are in some state of setup or teardown only allow
* internally generated commands.
*/
if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD))
return false;
/*
* Only allow commands on a live queue, except for the connect command,
* which is require to set the queue live in the appropinquate states.
*/
switch (ctrl->state) { switch (ctrl->state) {
case NVME_CTRL_NEW: case NVME_CTRL_NEW:
case NVME_CTRL_CONNECTING: case NVME_CTRL_CONNECTING:
case NVME_CTRL_DELETING: if (req->cmd->common.opcode == nvme_fabrics_command &&
/* req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
* This is the case of starting a new or deleting an association return true;
* but connectivity was lost before it was fully created or torn
* down. We need to error the commands used to initialize the
* controller so the reconnect can go into a retry attempt. The
* commands should all be marked REQ_FAILFAST_DRIVER, which will
* hit the reject path below. Anything else will be queued while
* the state settles.
*/
if (!is_connected)
break;
/*
* If queue is live, allow only commands that are internally
* generated pass through. These are commands on the admin
* queue to initialize the controller. This will reject any
* ioctl admin cmds received while initializing.
*/
if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
return BLK_STS_OK;
/*
* If the queue is not live, allow only a connect command. This
* will reject any ioctl admin cmd as well as initialization
* commands if the controller reverted the queue to non-live.
*/
if (!queue_live && blk_rq_is_passthrough(rq) &&
cmd->common.opcode == nvme_fabrics_command &&
cmd->fabrics.fctype == nvme_fabrics_type_connect)
return BLK_STS_OK;
break; break;
default: default:
break; break;
case NVME_CTRL_DEAD:
return false;
} }
/* return queue_live;
* Any other new io is something we're not in a state to send to the
* device. Default action is to busy it and retry it after the
* controller state is recovered. However, anything marked for failfast
* or nvme multipath is immediately failed. Note: commands used to
* initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
} }
EXPORT_SYMBOL_GPL(nvmf_check_if_ready); EXPORT_SYMBOL_GPL(__nvmf_check_ready);
static const match_table_t opt_tokens = { static const match_table_t opt_tokens = {
{ NVMF_OPT_TRANSPORT, "transport=%s" }, { NVMF_OPT_TRANSPORT, "transport=%s" },
......
...@@ -162,7 +162,17 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); ...@@ -162,7 +162,17 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts); void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, blk_status_t nvmf_fail_nonready_command(struct request *rq);
struct request *rq, bool queue_live, bool is_connected); bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live);
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
if (likely(ctrl->state == NVME_CTRL_LIVE ||
ctrl->state == NVME_CTRL_ADMIN_ONLY))
return true;
return __nvmf_check_ready(ctrl, rq, queue_live);
}
#endif /* _NVME_FABRICS_H */ #endif /* _NVME_FABRICS_H */
...@@ -142,6 +142,7 @@ struct nvme_fc_ctrl { ...@@ -142,6 +142,7 @@ struct nvme_fc_ctrl {
struct nvme_fc_rport *rport; struct nvme_fc_rport *rport;
u32 cnum; u32 cnum;
bool ioq_live;
bool assoc_active; bool assoc_active;
u64 association_id; u64 association_id;
...@@ -1470,21 +1471,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) ...@@ -1470,21 +1471,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
static int
nvme_fc_reinit_request(void *data, struct request *rq)
{
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
memset(cmdiu, 0, sizeof(*cmdiu));
cmdiu->scsi_id = NVME_CMD_SCSI_ID;
cmdiu->fc_id = NVME_CMD_FC_ID;
cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
memset(&op->rsp_iu, 0, sizeof(op->rsp_iu));
return 0;
}
static void static void
__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
struct nvme_fc_fcp_op *op) struct nvme_fc_fcp_op *op)
...@@ -1893,6 +1879,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) ...@@ -1893,6 +1879,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
*/ */
queue->connection_id = 0; queue->connection_id = 0;
atomic_set(&queue->csn, 1);
} }
static void static void
...@@ -2279,14 +2266,13 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -2279,14 +2266,13 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
struct nvme_command *sqe = &cmdiu->sqe; struct nvme_command *sqe = &cmdiu->sqe;
enum nvmefc_fcp_datadir io_dir; enum nvmefc_fcp_datadir io_dir;
bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
u32 data_len; u32 data_len;
blk_status_t ret; blk_status_t ret;
ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
test_bit(NVME_FC_Q_LIVE, &queue->flags), !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE); return nvmf_fail_nonready_command(rq);
if (unlikely(ret))
return ret;
ret = nvme_setup_cmd(ns, rq, sqe); ret = nvme_setup_cmd(ns, rq, sqe);
if (ret) if (ret)
...@@ -2463,6 +2449,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ...@@ -2463,6 +2449,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
if (ret) if (ret)
goto out_delete_hw_queues; goto out_delete_hw_queues;
ctrl->ioq_live = true;
return 0; return 0;
out_delete_hw_queues: out_delete_hw_queues:
...@@ -2480,7 +2468,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ...@@ -2480,7 +2468,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
} }
static int static int
nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
{ {
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
unsigned int nr_io_queues; unsigned int nr_io_queues;
...@@ -2500,12 +2488,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) ...@@ -2500,12 +2488,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
if (ctrl->ctrl.queue_count == 1) if (ctrl->ctrl.queue_count == 1)
return 0; return 0;
nvme_fc_init_io_queues(ctrl);
ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
if (ret)
goto out_free_io_queues;
ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret) if (ret)
goto out_free_io_queues; goto out_free_io_queues;
...@@ -2603,8 +2585,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ...@@ -2603,8 +2585,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
* Create the admin queue * Create the admin queue
*/ */
nvme_fc_init_queue(ctrl, 0);
ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
NVME_AQ_DEPTH); NVME_AQ_DEPTH);
if (ret) if (ret)
...@@ -2615,8 +2595,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ...@@ -2615,8 +2595,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (ret) if (ret)
goto out_delete_hw_queue; goto out_delete_hw_queue;
if (ctrl->ctrl.state != NVME_CTRL_NEW) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
ret = nvmf_connect_admin_queue(&ctrl->ctrl); ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (ret) if (ret)
...@@ -2689,10 +2668,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ...@@ -2689,10 +2668,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
*/ */
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
if (ctrl->ctrl.state == NVME_CTRL_NEW) if (!ctrl->ioq_live)
ret = nvme_fc_create_io_queues(ctrl); ret = nvme_fc_create_io_queues(ctrl);
else else
ret = nvme_fc_reinit_io_queues(ctrl); ret = nvme_fc_recreate_io_queues(ctrl);
if (ret) if (ret)
goto out_term_aen_ops; goto out_term_aen_ops;
} }
...@@ -2776,8 +2755,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) ...@@ -2776,8 +2755,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
* use blk_mq_tagset_busy_itr() and the transport routine to * use blk_mq_tagset_busy_itr() and the transport routine to
* terminate the exchanges. * terminate the exchanges.
*/ */
if (ctrl->ctrl.state != NVME_CTRL_NEW) blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl); nvme_fc_terminate_exchange, &ctrl->ctrl);
...@@ -2917,7 +2895,6 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { ...@@ -2917,7 +2895,6 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.submit_async_event = nvme_fc_submit_async_event, .submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl, .delete_ctrl = nvme_fc_delete_ctrl,
.get_address = nvmf_get_address, .get_address = nvmf_get_address,
.reinit_request = nvme_fc_reinit_request,
}; };
static void static void
...@@ -2934,7 +2911,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work) ...@@ -2934,7 +2911,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
nvme_fc_reconnect_or_delete(ctrl, ret); nvme_fc_reconnect_or_delete(ctrl, ret);
else else
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller reconnect complete\n", "NVME-FC{%d}: controller connect complete\n",
ctrl->cnum); ctrl->cnum);
} }
...@@ -2982,7 +2959,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -2982,7 +2959,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
{ {
struct nvme_fc_ctrl *ctrl; struct nvme_fc_ctrl *ctrl;
unsigned long flags; unsigned long flags;
int ret, idx, retry; int ret, idx;
if (!(rport->remoteport.port_role & if (!(rport->remoteport.port_role &
(FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
...@@ -3009,11 +2986,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -3009,11 +2986,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
} }
ctrl->ctrl.opts = opts; ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0;
INIT_LIST_HEAD(&ctrl->ctrl_list); INIT_LIST_HEAD(&ctrl->ctrl_list);
ctrl->lport = lport; ctrl->lport = lport;
ctrl->rport = rport; ctrl->rport = rport;
ctrl->dev = lport->dev; ctrl->dev = lport->dev;
ctrl->cnum = idx; ctrl->cnum = idx;
ctrl->ioq_live = false;
ctrl->assoc_active = false; ctrl->assoc_active = false;
init_waitqueue_head(&ctrl->ioabort_wait); init_waitqueue_head(&ctrl->ioabort_wait);
...@@ -3032,6 +3011,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -3032,6 +3011,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato; ctrl->ctrl.kato = opts->kato;
ctrl->ctrl.cntlid = 0xffff;
ret = -ENOMEM; ret = -ENOMEM;
ctrl->queues = kcalloc(ctrl->ctrl.queue_count, ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
...@@ -3039,6 +3019,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -3039,6 +3019,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
if (!ctrl->queues) if (!ctrl->queues)
goto out_free_ida; goto out_free_ida;
nvme_fc_init_queue(ctrl, 0);
memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
...@@ -3081,62 +3063,24 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -3081,62 +3063,24 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
spin_unlock_irqrestore(&rport->lock, flags); spin_unlock_irqrestore(&rport->lock, flags);
/* if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
* It's possible that transactions used to create the association !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
* may fail. Examples: CreateAssociation LS or CreateIOConnection
* LS gets dropped/corrupted/fails; or a frame gets dropped or a
* command times out for one of the actions to init the controller
* (Connect, Get/Set_Property, Set_Features, etc). Many of these
* transport errors (frame drop, LS failure) inherently must kill
* the association. The transport is coded so that any command used
* to create the association (prior to a LIVE state transition
* while NEW or CONNECTING) will fail if it completes in error or
* times out.
*
* As such: as the connect request was mostly likely due to a
* udev event that discovered the remote port, meaning there is
* not an admin or script there to restart if the connect
* request fails, retry the initial connection creation up to
* three times before giving up and declaring failure.
*/
for (retry = 0; retry < 3; retry++) {
ret = nvme_fc_create_association(ctrl);
if (!ret)
break;
}
if (ret) {
nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
/* couldn't schedule retry - fail out */
dev_err(ctrl->ctrl.device, dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
goto fail_ctrl;
ctrl->ctrl.opts = NULL; }
/* initiate nvme ctrl ref counting teardown */ nvme_get_ctrl(&ctrl->ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
/* Remove core ctrl ref. */ if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
nvme_put_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl);
dev_err(ctrl->ctrl.device,
/* as we're past the point where we transition to the ref "NVME-FC{%d}: failed to schedule initial connect\n",
* counting teardown path, if we return a bad pointer here, ctrl->cnum);
* the calling routine, thinking it's prior to the goto fail_ctrl;
* transition, will do an rport put. Since the teardown
* path also does a rport put, we do an extra get here to
* so proper order/teardown happens.
*/
nvme_fc_rport_get(rport);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
} }
nvme_get_ctrl(&ctrl->ctrl); flush_delayed_work(&ctrl->connect_work);
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: new ctrl: NQN \"%s\"\n", "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
...@@ -3144,6 +3088,30 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -3144,6 +3088,30 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
return &ctrl->ctrl; return &ctrl->ctrl;
fail_ctrl:
nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
ctrl->ctrl.opts = NULL;
/* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl);
/* Remove core ctrl ref. */
nvme_put_ctrl(&ctrl->ctrl);
/* as we're past the point where we transition to the ref
* counting teardown path, if we return a bad pointer here,
* the calling routine, thinking it's prior to the
* transition, will do an rport put. Since the teardown
* path also does a rport put, we do an extra get here to
* so proper order/teardown happens.
*/
nvme_fc_rport_get(rport);
return ERR_PTR(-EIO);
out_cleanup_admin_q: out_cleanup_admin_q:
blk_cleanup_queue(ctrl->ctrl.admin_q); blk_cleanup_queue(ctrl->ctrl.admin_q);
out_free_admin_tag_set: out_free_admin_tag_set:
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
*/ */
#include <linux/moduleparam.h> #include <linux/moduleparam.h>
#include <trace/events/block.h>
#include "nvme.h" #include "nvme.h"
static bool multipath = true; static bool multipath = true;
...@@ -111,6 +112,9 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, ...@@ -111,6 +112,9 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
if (likely(ns)) { if (likely(ns)) {
bio->bi_disk = ns->disk; bio->bi_disk = ns->disk;
bio->bi_opf |= REQ_NVME_MPATH; bio->bi_opf |= REQ_NVME_MPATH;
trace_block_bio_remap(bio->bi_disk->queue, bio,
disk_devt(ns->head->disk),
bio->bi_iter.bi_sector);
ret = direct_make_request(bio); ret = direct_make_request(bio);
} else if (!list_empty_careful(&head->list)) { } else if (!list_empty_careful(&head->list)) {
dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
......
...@@ -321,7 +321,6 @@ struct nvme_ctrl_ops { ...@@ -321,7 +321,6 @@ struct nvme_ctrl_ops {
void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
int (*reinit_request)(void *data, struct request *rq);
void (*stop_ctrl)(struct nvme_ctrl *ctrl); void (*stop_ctrl)(struct nvme_ctrl *ctrl);
}; };
...@@ -416,7 +415,6 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl); ...@@ -416,7 +415,6 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
void nvme_start_freeze(struct nvme_ctrl *ctrl); void nvme_start_freeze(struct nvme_ctrl *ctrl);
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
#define NVME_QID_ANY -1 #define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q, struct request *nvme_alloc_request(struct request_queue *q,
......
...@@ -1189,21 +1189,38 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ...@@ -1189,21 +1189,38 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (unlikely(count <= 0)) { if (unlikely(count <= 0)) {
sg_free_table_chained(&req->sg_table, true); ret = -EIO;
return -EIO; goto out_free_table;
} }
if (count == 1) { if (count == 1) {
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
blk_rq_payload_bytes(rq) <= blk_rq_payload_bytes(rq) <=
nvme_rdma_inline_data_size(queue)) nvme_rdma_inline_data_size(queue)) {
return nvme_rdma_map_sg_inline(queue, req, c); ret = nvme_rdma_map_sg_inline(queue, req, c);
goto out;
}
if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
return nvme_rdma_map_sg_single(queue, req, c); ret = nvme_rdma_map_sg_single(queue, req, c);
goto out;
}
} }
return nvme_rdma_map_sg_fr(queue, req, c, count); ret = nvme_rdma_map_sg_fr(queue, req, c, count);
out:
if (unlikely(ret))
goto out_unmap_sg;
return 0;
out_unmap_sg:
ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
req->nents, rq_data_dir(rq) ==
WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
out_free_table:
sg_free_table_chained(&req->sg_table, true);
return ret;
} }
static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
...@@ -1613,15 +1630,14 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1613,15 +1630,14 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_rdma_qe *sqe = &req->sqe; struct nvme_rdma_qe *sqe = &req->sqe;
struct nvme_command *c = sqe->data; struct nvme_command *c = sqe->data;
struct ib_device *dev; struct ib_device *dev;
bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
blk_status_t ret; blk_status_t ret;
int err; int err;
WARN_ON_ONCE(rq->tag < 0); WARN_ON_ONCE(rq->tag < 0);
ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true); return nvmf_fail_nonready_command(rq);
if (unlikely(ret))
return ret;
dev = queue->device->dev; dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma, ib_dma_sync_single_for_cpu(dev, sqe->dma,
......
...@@ -119,9 +119,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) ...@@ -119,9 +119,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
else else
status = nvmet_get_smart_log_nsid(req, log); status = nvmet_get_smart_log_nsid(req, log);
if (status) if (status)
goto out; goto out_free_log;
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
out_free_log:
kfree(log);
out: out:
nvmet_req_complete(req, status); nvmet_req_complete(req, status);
} }
......
...@@ -158,12 +158,11 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -158,12 +158,11 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_loop_queue *queue = hctx->driver_data; struct nvme_loop_queue *queue = hctx->driver_data;
struct request *req = bd->rq; struct request *req = bd->rq;
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
blk_status_t ret; blk_status_t ret;
ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req, if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true); return nvmf_fail_nonready_command(req);
if (unlikely(ret))
return ret;
ret = nvme_setup_cmd(ns, req, &iod->cmd); ret = nvme_setup_cmd(ns, req, &iod->cmd);
if (ret) if (ret)
......
...@@ -281,8 +281,6 @@ void blk_freeze_queue_start(struct request_queue *q); ...@@ -281,8 +281,6 @@ void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout); unsigned long timeout);
int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
int (reinit_request)(void *, struct request *));
int blk_mq_map_queues(struct blk_mq_tag_set *set); int blk_mq_map_queues(struct blk_mq_tag_set *set);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment