Commit 7a7e66c6 authored by Can Guo's avatar Can Guo Committed by Martin K. Petersen

scsi: ufs: Fix a race condition between ufshcd_abort() and eh_work()

In current task abort routine, if task abort happens to the device W-LUN,
the code directly jumps to ufshcd_eh_host_reset_handler() to perform a full
reset and restore then returns FAIL or SUCCESS. Commands sent to the device
W-LUN are most likely the SSU cmds sent during UFS PM operations. If such
SSU cmd enters task abort routine when ufshcd_eh_host_reset_handler()
flushes eh_work, it will get stuck there since err_handler is serialized
with PM operations.

In order to unblock above call path, we merely clean up the lrb taken by
this cmd, queue the eh_work and return SUCCESS. Once the cmd is aborted,
the PM operation which sends out the cmd just errors out, then err_handler
shall be able to proceed with the full reset and restore.

In this scenario, the cmd is aborted even before it is actually cleared by
HW, set the lrb->in_use flag to prevent subsequent cmds, including SCSI
cmds and dev cmds, from taking the lrb released from abort. The flag shall
evetually be cleared in __ufshcd_transfer_req_compl() invoked by the full
reset and restore from err_handler.

[mkp: conflict with event logging series]

Link: https://lore.kernel.org/r/1606910644-21185-3-git-send-email-cang@codeaurora.orgReviewed-by: default avatarAsutosh Das <asutoshd@codeaurora.org>
Reviewed-by: default avatarStanley Chu <stanley.chu@mediatek.com>
Signed-off-by: default avatarCan Guo <cang@codeaurora.org>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 88a92d6a
...@@ -2558,6 +2558,14 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) ...@@ -2558,6 +2558,14 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
(hba->clk_gating.state != CLKS_ON)); (hba->clk_gating.state != CLKS_ON));
lrbp = &hba->lrb[tag]; lrbp = &hba->lrb[tag];
if (unlikely(lrbp->in_use)) {
if (hba->pm_op_in_progress)
set_host_byte(cmd, DID_BAD_TARGET);
else
err = SCSI_MLQUEUE_HOST_BUSY;
ufshcd_release(hba);
goto out;
}
WARN_ON(lrbp->cmd); WARN_ON(lrbp->cmd);
lrbp->cmd = cmd; lrbp->cmd = cmd;
...@@ -2800,6 +2808,11 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, ...@@ -2800,6 +2808,11 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
init_completion(&wait); init_completion(&wait);
lrbp = &hba->lrb[tag]; lrbp = &hba->lrb[tag];
if (unlikely(lrbp->in_use)) {
err = -EBUSY;
goto out;
}
WARN_ON(lrbp->cmd); WARN_ON(lrbp->cmd);
err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag); err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
if (unlikely(err)) if (unlikely(err))
...@@ -2816,6 +2829,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, ...@@ -2816,6 +2829,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout); err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout);
out:
ufshcd_add_query_upiu_trace(hba, tag, ufshcd_add_query_upiu_trace(hba, tag,
err ? "query_complete_err" : "query_complete"); err ? "query_complete_err" : "query_complete");
...@@ -4980,9 +4994,11 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, ...@@ -4980,9 +4994,11 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
struct scsi_cmnd *cmd; struct scsi_cmnd *cmd;
int result; int result;
int index; int index;
bool update_scaling = false;
for_each_set_bit(index, &completed_reqs, hba->nutrs) { for_each_set_bit(index, &completed_reqs, hba->nutrs) {
lrbp = &hba->lrb[index]; lrbp = &hba->lrb[index];
lrbp->in_use = false;
lrbp->compl_time_stamp = ktime_get(); lrbp->compl_time_stamp = ktime_get();
cmd = lrbp->cmd; cmd = lrbp->cmd;
if (cmd) { if (cmd) {
...@@ -4995,15 +5011,17 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, ...@@ -4995,15 +5011,17 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
/* Do not touch lrbp after scsi done */ /* Do not touch lrbp after scsi done */
cmd->scsi_done(cmd); cmd->scsi_done(cmd);
__ufshcd_release(hba); __ufshcd_release(hba);
update_scaling = true;
} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE || } else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) { lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) {
if (hba->dev_cmd.complete) { if (hba->dev_cmd.complete) {
ufshcd_add_command_trace(hba, index, ufshcd_add_command_trace(hba, index,
"dev_complete"); "dev_complete");
complete(hba->dev_cmd.complete); complete(hba->dev_cmd.complete);
update_scaling = true;
} }
} }
if (ufshcd_is_clkscaling_supported(hba)) if (ufshcd_is_clkscaling_supported(hba) && update_scaling)
hba->clk_scaling.active_reqs--; hba->clk_scaling.active_reqs--;
} }
...@@ -6426,8 +6444,12 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, ...@@ -6426,8 +6444,12 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
init_completion(&wait); init_completion(&wait);
lrbp = &hba->lrb[tag]; lrbp = &hba->lrb[tag];
WARN_ON(lrbp->cmd); if (unlikely(lrbp->in_use)) {
err = -EBUSY;
goto out;
}
WARN_ON(lrbp->cmd);
lrbp->cmd = NULL; lrbp->cmd = NULL;
lrbp->sense_bufflen = 0; lrbp->sense_bufflen = 0;
lrbp->sense_buffer = NULL; lrbp->sense_buffer = NULL;
...@@ -6499,6 +6521,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, ...@@ -6499,6 +6521,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
} }
} }
out:
blk_put_request(req); blk_put_request(req);
out_unlock: out_unlock:
up_read(&hba->clk_scaling_lock); up_read(&hba->clk_scaling_lock);
...@@ -6749,18 +6772,6 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) ...@@ -6749,18 +6772,6 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
BUG(); BUG();
} }
/*
* Task abort to the device W-LUN is illegal. When this command
* will fail, due to spec violation, scsi err handling next step
* will be to send LU reset which, again, is a spec violation.
* To avoid these unnecessary/illegal step we skip to the last error
* handling stage: reset and restore.
*/
if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
return ufshcd_eh_host_reset_handler(cmd);
}
ufshcd_hold(hba, false); ufshcd_hold(hba, false);
reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
/* If command is already aborted/completed, return SUCCESS */ /* If command is already aborted/completed, return SUCCESS */
...@@ -6781,7 +6792,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) ...@@ -6781,7 +6792,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
* to reduce repeated printouts. For other aborted requests only print * to reduce repeated printouts. For other aborted requests only print
* basic details. * basic details.
*/ */
scsi_print_command(hba->lrb[tag].cmd); scsi_print_command(cmd);
if (!hba->req_abort_count) { if (!hba->req_abort_count) {
ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, tag); ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, tag);
ufshcd_print_evt_hist(hba); ufshcd_print_evt_hist(hba);
...@@ -6800,6 +6811,29 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) ...@@ -6800,6 +6811,29 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
goto cleanup; goto cleanup;
} }
/*
* Task abort to the device W-LUN is illegal. When this command
* will fail, due to spec violation, scsi err handling next step
* will be to send LU reset which, again, is a spec violation.
* To avoid these unnecessary/illegal steps, first we clean up
* the lrb taken by this cmd and mark the lrb as in_use, then
* queue the eh_work and bail.
*/
if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
spin_lock_irqsave(host->host_lock, flags);
if (lrbp->cmd) {
__ufshcd_transfer_req_compl(hba, (1UL << tag));
__set_bit(tag, &hba->outstanding_reqs);
lrbp->in_use = true;
hba->force_reset = true;
ufshcd_schedule_eh_work(hba);
}
spin_unlock_irqrestore(host->host_lock, flags);
goto out;
}
/* Skip task abort in case previous aborts failed and report failure */ /* Skip task abort in case previous aborts failed and report failure */
if (lrbp->req_abort_skip) if (lrbp->req_abort_skip)
err = -EIO; err = -EIO;
......
...@@ -193,6 +193,7 @@ struct ufs_pm_lvl_states { ...@@ -193,6 +193,7 @@ struct ufs_pm_lvl_states {
* @crypto_key_slot: the key slot to use for inline crypto (-1 if none) * @crypto_key_slot: the key slot to use for inline crypto (-1 if none)
* @data_unit_num: the data unit number for the first block for inline crypto * @data_unit_num: the data unit number for the first block for inline crypto
* @req_abort_skip: skip request abort task flag * @req_abort_skip: skip request abort task flag
* @in_use: indicates that this lrb is still in use
*/ */
struct ufshcd_lrb { struct ufshcd_lrb {
struct utp_transfer_req_desc *utr_descriptor_ptr; struct utp_transfer_req_desc *utr_descriptor_ptr;
...@@ -222,6 +223,7 @@ struct ufshcd_lrb { ...@@ -222,6 +223,7 @@ struct ufshcd_lrb {
#endif #endif
bool req_abort_skip; bool req_abort_skip;
bool in_use;
}; };
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment