Commit 6cdefc6e authored by James Smart's avatar James Smart Committed by Christoph Hellwig

nvme: if_ready checks to fail io to deleting controller

The revised if_ready checks skipped over the case of returning error when
the controller is being deleted.  Instead it was returning BUSY, which
caused the ios to retry, which caused the ns delete to hang waiting for
the ios to drain.

Stack trace of hang looks like:
 kworker/u64:2   D    0    74      2 0x80000000
 Workqueue: nvme-delete-wq nvme_delete_ctrl_work [nvme_core]
 Call Trace:
  ? __schedule+0x26d/0x820
  schedule+0x32/0x80
  blk_mq_freeze_queue_wait+0x36/0x80
  ? remove_wait_queue+0x60/0x60
  blk_cleanup_queue+0x72/0x160
  nvme_ns_remove+0x106/0x140 [nvme_core]
  nvme_remove_namespaces+0x7e/0xa0 [nvme_core]
  nvme_delete_ctrl_work+0x4d/0x80 [nvme_core]
  process_one_work+0x160/0x350
  worker_thread+0x1c3/0x3d0
  kthread+0xf5/0x130
  ? process_one_work+0x350/0x350
  ? kthread_bind+0x10/0x10
  ret_from_fork+0x1f/0x30

Extend nvmf_fail_nonready_command() to supply the controller pointer so
that the controller state can be looked at. Fail any io to a controller
that is deleting.

Fixes: 3bc32bb1 ("nvme-fabrics: refactor queue ready check")
Fixes: 35897b92 ("nvme-fabrics: fix and refine state checks in __nvmf_check_ready")
Signed-off-by: default avatarJames Smart <james.smart@broadcom.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Tested-by: default avatarEwan D. Milne <emilne@redhat.com>
Reviewed-by: default avatarEwan D. Milne <emilne@redhat.com>
parent d082dc15
...@@ -539,14 +539,18 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( ...@@ -539,14 +539,18 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
/* /*
* For something we're not in a state to send to the device the default action * For something we're not in a state to send to the device the default action
* is to busy it and retry it after the controller state is recovered. However, * is to busy it and retry it after the controller state is recovered. However,
* anything marked for failfast or nvme multipath is immediately failed. * if the controller is deleting or if anything is marked for failfast or
* nvme multipath it is immediately failed.
* *
* Note: commands used to initialize the controller will be marked for failfast. * Note: commands used to initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast. * Note: nvme cli/ioctl commands are marked for failfast.
*/ */
blk_status_t nvmf_fail_nonready_command(struct request *rq) blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{ {
if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) if (ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DEAD &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE; return BLK_STS_RESOURCE;
nvme_req(rq)->status = NVME_SC_ABORT_REQ; nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR; return BLK_STS_IOERR;
......
...@@ -162,7 +162,8 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); ...@@ -162,7 +162,8 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts); void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
blk_status_t nvmf_fail_nonready_command(struct request *rq); blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live); bool queue_live);
......
...@@ -2272,7 +2272,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -2272,7 +2272,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(rq); return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
ret = nvme_setup_cmd(ns, rq, sqe); ret = nvme_setup_cmd(ns, rq, sqe);
if (ret) if (ret)
......
...@@ -1639,7 +1639,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1639,7 +1639,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
WARN_ON_ONCE(rq->tag < 0); WARN_ON_ONCE(rq->tag < 0);
if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(rq); return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
dev = queue->device->dev; dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma, ib_dma_sync_single_for_cpu(dev, sqe->dma,
......
...@@ -162,7 +162,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -162,7 +162,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_status_t ret; blk_status_t ret;
if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
return nvmf_fail_nonready_command(req); return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req);
ret = nvme_setup_cmd(ns, req, &iod->cmd); ret = nvme_setup_cmd(ns, req, &iod->cmd);
if (ret) if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment