Commit e74006ed authored by Xiang Chen's avatar Xiang Chen Committed by Martin K. Petersen

scsi: hisi_sas: Fix the conflict between device gone and host reset

When device gone, it will check whether it is during reset, if not, it will
send internal task abort. Before internal task abort returned, reset
begins, and it will check whether SAS_PHY_UNUSED is set, if not, it will
call hisi_sas_init_device(), but at that time domain_device may already be
freed or part of it is freed, so it may referenece null pointer in
hisi_sas_init_device(). It may occur as follows:

    thread0				thread1
hisi_sas_dev_gone()
    check whether in RESET(no)
    internal task abort
				    reset prep
				    soft_reset
				    ... (part of reset_done)
    internal task abort failed
    release resource anyway
    clear_itct
    device->lldd_dev=NULL
				    hisi_sas_reset_init_all_device
					check sas_dev->dev_type is SAS_PHY_UNUSED and
					!device
    set dev_type SAS_PHY_UNUSED
    sas_free_device
					hisi_sas_init_device
					...

Semaphore hisi_hba.sema is used to sync the processes of device gone and
host reset.

To solve the issue, expand the scope that semaphore protects and let them
never occur together.

And also some places will check whether domain_device is NULL to judge
whether the device is gone. So when device gone, need to clear
sas_dev->sas_device.

Link: https://lore.kernel.org/r/1567774537-20003-14-git-send-email-john.garry@huawei.comSigned-off-by: default avatarXiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: default avatarJohn Garry <john.garry@huawei.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 97b151e7
...@@ -1049,21 +1049,22 @@ static void hisi_sas_dev_gone(struct domain_device *device) ...@@ -1049,21 +1049,22 @@ static void hisi_sas_dev_gone(struct domain_device *device)
dev_info(dev, "dev[%d:%x] is gone\n", dev_info(dev, "dev[%d:%x] is gone\n",
sas_dev->device_id, sas_dev->dev_type); sas_dev->device_id, sas_dev->dev_type);
down(&hisi_hba->sem);
if (!test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) { if (!test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) {
hisi_sas_internal_task_abort(hisi_hba, device, hisi_sas_internal_task_abort(hisi_hba, device,
HISI_SAS_INT_ABT_DEV, 0); HISI_SAS_INT_ABT_DEV, 0);
hisi_sas_dereg_device(hisi_hba, device); hisi_sas_dereg_device(hisi_hba, device);
down(&hisi_hba->sem);
hisi_hba->hw->clear_itct(hisi_hba, sas_dev); hisi_hba->hw->clear_itct(hisi_hba, sas_dev);
up(&hisi_hba->sem);
device->lldd_dev = NULL; device->lldd_dev = NULL;
} }
if (hisi_hba->hw->free_device) if (hisi_hba->hw->free_device)
hisi_hba->hw->free_device(sas_dev); hisi_hba->hw->free_device(sas_dev);
sas_dev->dev_type = SAS_PHY_UNUSED; sas_dev->dev_type = SAS_PHY_UNUSED;
sas_dev->sas_device = NULL;
up(&hisi_hba->sem);
} }
static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags) static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags)
...@@ -1543,11 +1544,11 @@ void hisi_sas_controller_reset_done(struct hisi_hba *hisi_hba) ...@@ -1543,11 +1544,11 @@ void hisi_sas_controller_reset_done(struct hisi_hba *hisi_hba)
msleep(1000); msleep(1000);
hisi_sas_refresh_port_id(hisi_hba); hisi_sas_refresh_port_id(hisi_hba);
clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags); clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
up(&hisi_hba->sem);
if (hisi_hba->reject_stp_links_msk) if (hisi_hba->reject_stp_links_msk)
hisi_sas_terminate_stp_reject(hisi_hba); hisi_sas_terminate_stp_reject(hisi_hba);
hisi_sas_reset_init_all_devices(hisi_hba); hisi_sas_reset_init_all_devices(hisi_hba);
up(&hisi_hba->sem);
scsi_unblock_requests(shost); scsi_unblock_requests(shost);
clear_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags); clear_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment