Commit 0a02e39f authored by James Smart's avatar James Smart Committed by Christoph Hellwig

nvme-fc: correct io termination handling

The io completion handling for i/o's that are failing due to
to a transport error or association termination had issues, causing
io failures (DNR set so retries didn't kick in) or long stalls.

Change the io completion handler for the following items:

When an io has been completed due to a transport abort (based on an
exchange error) or when marked as aborted as part of an association
termination (FCOP_FLAGS_TERMIO), set the NVME completion status to
NVME_SC_ABORTED. By default, do not set DNR on the status so that a
retry can be attempted after association recreate.

In cases where an io is failed (non-successful nvme status including
aborted), if the controller is being deleted (blk_queue_dying) or
the io was part of the ios used for association creation (ctrl state
is NEW or RECONNECTING), then additionally set the DNR bit so the io
will not be retried. If the failed io was part of association creation,
the failure will tear down the partially completioned association and
typically restart a new reconnect attempt (another create association
later).

Rearranged code flow to remove a largely unneeded local variable.
Signed-off-by: default avatarJames Smart <james.smart@broadcom.com>
Reviewed-by: default avatarJohannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent a7a7cbe3
...@@ -1387,7 +1387,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1387,7 +1387,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
struct nvme_command *sqe = &op->cmd_iu.sqe; struct nvme_command *sqe = &op->cmd_iu.sqe;
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
union nvme_result result; union nvme_result result;
bool complete_rq, terminate_assoc = true; bool terminate_assoc = true;
/* /*
* WARNING: * WARNING:
...@@ -1429,8 +1429,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1429,8 +1429,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
sizeof(op->rsp_iu), DMA_FROM_DEVICE); sizeof(op->rsp_iu), DMA_FROM_DEVICE);
if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); op->flags & FCOP_FLAGS_TERMIO)
status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
else if (freq->status) else if (freq->status)
status = cpu_to_le16(NVME_SC_INTERNAL << 1); status = cpu_to_le16(NVME_SC_INTERNAL << 1);
...@@ -1494,23 +1495,27 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1494,23 +1495,27 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
done: done:
if (op->flags & FCOP_FLAGS_AEN) { if (op->flags & FCOP_FLAGS_AEN) {
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); __nvme_fc_fcpop_chk_teardowns(ctrl, op);
atomic_set(&op->state, FCPOP_STATE_IDLE); atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags = FCOP_FLAGS_AEN; /* clear other flags */ op->flags = FCOP_FLAGS_AEN; /* clear other flags */
nvme_fc_ctrl_put(ctrl); nvme_fc_ctrl_put(ctrl);
goto check_error; goto check_error;
} }
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); /*
if (!complete_rq) { * Force failures of commands if we're killing the controller
if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { * or have an error on a command used to create an new association
status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); */
if (blk_queue_dying(rq->q)) if (status &&
(blk_queue_dying(rq->q) ||
ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
status |= cpu_to_le16(NVME_SC_DNR << 1); status |= cpu_to_le16(NVME_SC_DNR << 1);
}
nvme_end_request(rq, status, result); if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
} else
__nvme_fc_final_op_cleanup(rq); __nvme_fc_final_op_cleanup(rq);
else
nvme_end_request(rq, status, result);
check_error: check_error:
if (terminate_assoc) if (terminate_assoc)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment