Commit 9ec58ec7 authored by James Smart's avatar James Smart Committed by Martin K. Petersen

scsi: lpfc: Fix NVMe recovery after mailbox timeout

If a mailbox command times out, the SLI port is deemed in error and the
port is reset.  The HBA cleanup is not returning I/Os to the NVMe layer
before the port is unregistered. This is due to the HBA being marked
offline (!SLI_ACTIVE) and cleanup being done by the mailbox timeout handler
rather than an general adapter reset routine.  The mailbox timeout handler
mailbox handler only cleaned up SCSI I/Os.

Fix by reworking the mailbox handler to:

 - After handling the mailbox error, detect the board is already in
   failure (may be due to another error), and leave cleanup to the
   other handler.

 - If the mailbox command timeout is initial detector of the port error,
   continue with the board cleanup and marking the adapter offline
   (!SLI_ACTIVE). Remove the SCSI-only I/O cleanup routine. The generic
   reset adapter routine that is subsequently invoked, will clean up the
   I/Os.

 - Have the reset adapter routine flush all NVMe and SCSI I/Os if the
   adapter has been marked failed (!SLI_ACTIVE).

 - Rework the NVMe I/O terminate routine to take a status code to fail the
   I/O with and update so that cleaned up I/O calls the wqe completion
   routine. Currently it is bypassing the wqe cleanup and calling the NVMe
   I/O completion directly. The wqe completion routine will take care of
   data structure and node cleanup then call the NVMe I/O completion
   handler.

Link: https://lore.kernel.org/r/20210104180240.46824-11-jsmart2021@gmail.comCo-developed-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarJames Smart <jsmart2021@gmail.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 31051249
...@@ -255,7 +255,6 @@ void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, ...@@ -255,7 +255,6 @@ void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba,
int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
struct fc_frame_header *fc_hdr); struct fc_frame_header *fc_hdr);
void lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, struct lpfc_queue *wq); void lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, struct lpfc_queue *wq);
void lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba);
void lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba); void lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba);
void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *, void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
uint16_t); uint16_t);
...@@ -598,7 +597,8 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd, ...@@ -598,7 +597,8 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd,
void lpfc_io_ktime(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd); void lpfc_io_ktime(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd);
void lpfc_wqe_cmd_template(void); void lpfc_wqe_cmd_template(void);
void lpfc_nvmet_cmd_template(void); void lpfc_nvmet_cmd_template(void);
void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn); void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
uint32_t stat, uint32_t param);
extern int lpfc_enable_nvmet_cnt; extern int lpfc_enable_nvmet_cnt;
extern unsigned long long lpfc_enable_nvmet[]; extern unsigned long long lpfc_enable_nvmet[];
extern int lpfc_no_hba_reset_cnt; extern int lpfc_no_hba_reset_cnt;
......
...@@ -6191,10 +6191,14 @@ lpfc_reset_hba(struct lpfc_hba *phba) ...@@ -6191,10 +6191,14 @@ lpfc_reset_hba(struct lpfc_hba *phba)
phba->link_state = LPFC_HBA_ERROR; phba->link_state = LPFC_HBA_ERROR;
return; return;
} }
if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
/* If not LPFC_SLI_ACTIVE, force all IO to be flushed */
if (phba->sli.sli_flag & LPFC_SLI_ACTIVE) {
lpfc_offline_prep(phba, LPFC_MBX_WAIT); lpfc_offline_prep(phba, LPFC_MBX_WAIT);
else } else {
lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT); lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
lpfc_sli_flush_io_rings(phba);
}
lpfc_offline(phba); lpfc_offline(phba);
lpfc_sli_brdrestart(phba); lpfc_sli_brdrestart(phba);
lpfc_online(phba); lpfc_online(phba);
......
...@@ -2596,14 +2596,17 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba) ...@@ -2596,14 +2596,17 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
} }
void void
lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn) lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
uint32_t stat, uint32_t param)
{ {
#if (IS_ENABLED(CONFIG_NVME_FC)) #if (IS_ENABLED(CONFIG_NVME_FC))
struct lpfc_io_buf *lpfc_ncmd; struct lpfc_io_buf *lpfc_ncmd;
struct nvmefc_fcp_req *nCmd; struct nvmefc_fcp_req *nCmd;
struct lpfc_nvme_fcpreq_priv *freqpriv; struct lpfc_wcqe_complete wcqe;
struct lpfc_wcqe_complete *wcqep = &wcqe;
if (!pwqeIn->context1) { lpfc_ncmd = (struct lpfc_io_buf *)pwqeIn->context1;
if (!lpfc_ncmd) {
lpfc_sli_release_iocbq(phba, pwqeIn); lpfc_sli_release_iocbq(phba, pwqeIn);
return; return;
} }
...@@ -2613,31 +2616,29 @@ lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn) ...@@ -2613,31 +2616,29 @@ lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn)
lpfc_sli_release_iocbq(phba, pwqeIn); lpfc_sli_release_iocbq(phba, pwqeIn);
return; return;
} }
lpfc_ncmd = (struct lpfc_io_buf *)pwqeIn->context1;
spin_lock(&lpfc_ncmd->buf_lock); spin_lock(&lpfc_ncmd->buf_lock);
if (!lpfc_ncmd->nvmeCmd) { nCmd = lpfc_ncmd->nvmeCmd;
if (!nCmd) {
spin_unlock(&lpfc_ncmd->buf_lock); spin_unlock(&lpfc_ncmd->buf_lock);
lpfc_release_nvme_buf(phba, lpfc_ncmd); lpfc_release_nvme_buf(phba, lpfc_ncmd);
return; return;
} }
spin_unlock(&lpfc_ncmd->buf_lock);
nCmd = lpfc_ncmd->nvmeCmd;
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
"6194 NVME Cancel xri %x\n", "6194 NVME Cancel xri %x\n",
lpfc_ncmd->cur_iocbq.sli4_xritag); lpfc_ncmd->cur_iocbq.sli4_xritag);
nCmd->transferred_length = 0; wcqep->word0 = 0;
nCmd->rcv_rsplen = 0; bf_set(lpfc_wcqe_c_status, wcqep, stat);
nCmd->status = NVME_SC_INTERNAL; wcqep->parameter = param;
freqpriv = nCmd->private; wcqep->word3 = 0; /* xb is 0 */
freqpriv->nvme_buf = NULL;
lpfc_ncmd->nvmeCmd = NULL;
spin_unlock(&lpfc_ncmd->buf_lock);
nCmd->done(nCmd);
/* Call release with XB=1 to queue the IO into the abort list. */ /* Call release with XB=1 to queue the IO into the abort list. */
lpfc_release_nvme_buf(phba, lpfc_ncmd); if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
bf_set(lpfc_wcqe_c_xb, wcqep, 1);
(pwqeIn->wqe_cmpl)(phba, pwqeIn, wcqep);
#endif #endif
} }
...@@ -1532,15 +1532,19 @@ lpfc_sli_cancel_iocbs(struct lpfc_hba *phba, struct list_head *iocblist, ...@@ -1532,15 +1532,19 @@ lpfc_sli_cancel_iocbs(struct lpfc_hba *phba, struct list_head *iocblist,
while (!list_empty(iocblist)) { while (!list_empty(iocblist)) {
list_remove_head(iocblist, piocb, struct lpfc_iocbq, list); list_remove_head(iocblist, piocb, struct lpfc_iocbq, list);
if (!piocb->iocb_cmpl) { if (piocb->wqe_cmpl) {
if (piocb->iocb_flag & LPFC_IO_NVME) if (piocb->iocb_flag & LPFC_IO_NVME)
lpfc_nvme_cancel_iocb(phba, piocb); lpfc_nvme_cancel_iocb(phba, piocb,
ulpstatus, ulpWord4);
else else
lpfc_sli_release_iocbq(phba, piocb); lpfc_sli_release_iocbq(phba, piocb);
} else {
} else if (piocb->iocb_cmpl) {
piocb->iocb.ulpStatus = ulpstatus; piocb->iocb.ulpStatus = ulpstatus;
piocb->iocb.un.ulpWord[4] = ulpWord4; piocb->iocb.un.ulpWord[4] = ulpWord4;
(piocb->iocb_cmpl) (phba, piocb, piocb); (piocb->iocb_cmpl) (phba, piocb, piocb);
} else {
lpfc_sli_release_iocbq(phba, piocb);
} }
} }
return; return;
...@@ -8269,8 +8273,10 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba) ...@@ -8269,8 +8273,10 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
struct lpfc_sli *psli = &phba->sli; struct lpfc_sli *psli = &phba->sli;
/* If the mailbox completed, process the completion and return */ /* If the mailbox completed, process the completion */
if (lpfc_sli4_process_missed_mbox_completions(phba)) lpfc_sli4_process_missed_mbox_completions(phba);
if (!(psli->sli_flag & LPFC_SLI_ACTIVE))
return; return;
if (pmbox != NULL) if (pmbox != NULL)
...@@ -8311,8 +8317,6 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba) ...@@ -8311,8 +8317,6 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
psli->sli_flag &= ~LPFC_SLI_ACTIVE; psli->sli_flag &= ~LPFC_SLI_ACTIVE;
spin_unlock_irq(&phba->hbalock); spin_unlock_irq(&phba->hbalock);
lpfc_sli_abort_fcp_rings(phba);
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"0345 Resetting board due to mailbox timeout\n"); "0345 Resetting board due to mailbox timeout\n");
...@@ -11783,7 +11787,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport, ...@@ -11783,7 +11787,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
struct lpfc_io_buf *lpfc_cmd; struct lpfc_io_buf *lpfc_cmd;
int rc = 1; int rc = 1;
if (iocbq->vport != vport) if (!iocbq || iocbq->vport != vport)
return rc; return rc;
if (!(iocbq->iocb_flag & LPFC_IO_FCP) || if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment