Commit 54d74e6b authored by Suganath Prabu's avatar Suganath Prabu Committed by Martin K. Petersen

scsi: mpt3sas: Run SAS DEVICE STATUS CHANGE EVENT from ISR

In some cases, like while performing extensive expander reset or phy reset,
user may observe that drives are not visible in OS. Driver's
firmware-worker thread is blocked for more than 120 seconds resulting in a
call trace.

1. Received target add event for Device A and hence driver has registered
this device to SML by calling sas_rphy_add(). SML has half added this
device and returned the control to the driver by quitting from
sas_rphy_add() API, and started some background scanning on this device A.

2. While background scanning is going on device A, driver has received SAS
DEVICE STATUS CHANGE EVENT with RC code "Internal device reset" event and
hence driver has set tm_busy flag for this Device A from FW worker thread
context. When tm_busy flag is set then driver return scsi commands with
device busy status asking the kernel to retry the command after some time.
So background scanning for device A will be waiting for this tm_busy to be
cleared.

3. Meanwhile driver has received a target add event for Device B and hence
driver called sas_rphy_add() API to register this device with SML. But
since background scanning for Device A is still pending and SML is not
quitting from sas_rphy_add(), the driver’s firmware worker thread got
blocked.

4. Now driver has received SAS DEVICE STATUS CHANGE EVENT with RC code
"Internal device reset complete" event. But as driver’s firmware worker
thread got blocked in Step3, it can’t process this event and it was not
clearing the tm_busy flag and deadlock occurred (where SML was waiting for
tm_busy flag to be cleared and our FW worker thread is waiting for SML to
quit from sas_device_rphy_add() API).

Same deadlock will be observed even if device B is getting removed in
step3. So to limit these types of deadlocks driver will process the SAS
DEVICE STATUS CHANGE EVENT events from ISR context instead of processing
this event from worker thread context.  This improvement avoids above
deadlock.
Signed-off-by: default avatarSuganath Prabu <suganath-prabu.subramani@broadcom.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 1edc6770
...@@ -6469,24 +6469,17 @@ _scsih_sas_device_status_change_event_debug(struct MPT3SAS_ADAPTER *ioc, ...@@ -6469,24 +6469,17 @@ _scsih_sas_device_status_change_event_debug(struct MPT3SAS_ADAPTER *ioc,
/** /**
* _scsih_sas_device_status_change_event - handle device status change * _scsih_sas_device_status_change_event - handle device status change
* @ioc: per adapter object * @ioc: per adapter object
* @fw_event: The fw_event_work object * @event_data: The fw event
* Context: user. * Context: user.
*/ */
static void static void
_scsih_sas_device_status_change_event(struct MPT3SAS_ADAPTER *ioc, _scsih_sas_device_status_change_event(struct MPT3SAS_ADAPTER *ioc,
struct fw_event_work *fw_event) Mpi2EventDataSasDeviceStatusChange_t *event_data)
{ {
struct MPT3SAS_TARGET *target_priv_data; struct MPT3SAS_TARGET *target_priv_data;
struct _sas_device *sas_device; struct _sas_device *sas_device;
u64 sas_address; u64 sas_address;
unsigned long flags; unsigned long flags;
Mpi2EventDataSasDeviceStatusChange_t *event_data =
(Mpi2EventDataSasDeviceStatusChange_t *)
fw_event->event_data;
if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
_scsih_sas_device_status_change_event_debug(ioc,
event_data);
/* In MPI Revision K (0xC), the internal device reset complete was /* In MPI Revision K (0xC), the internal device reset complete was
* implemented, so avoid setting tm_busy flag for older firmware. * implemented, so avoid setting tm_busy flag for older firmware.
...@@ -6518,6 +6511,12 @@ _scsih_sas_device_status_change_event(struct MPT3SAS_ADAPTER *ioc, ...@@ -6518,6 +6511,12 @@ _scsih_sas_device_status_change_event(struct MPT3SAS_ADAPTER *ioc,
else else
target_priv_data->tm_busy = 0; target_priv_data->tm_busy = 0;
if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
ioc_info(ioc,
"%s tm_busy flag for handle(0x%04x)\n",
(target_priv_data->tm_busy == 1) ? "Enable" : "Disable",
target_priv_data->handle);
out: out:
if (sas_device) if (sas_device)
sas_device_put(sas_device); sas_device_put(sas_device);
...@@ -9346,7 +9345,10 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event) ...@@ -9346,7 +9345,10 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
_scsih_sas_topology_change_event(ioc, fw_event); _scsih_sas_topology_change_event(ioc, fw_event);
break; break;
case MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE: case MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE:
_scsih_sas_device_status_change_event(ioc, fw_event); if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
_scsih_sas_device_status_change_event_debug(ioc,
(Mpi2EventDataSasDeviceStatusChange_t *)
fw_event->event_data);
break; break;
case MPI2_EVENT_SAS_DISCOVERY: case MPI2_EVENT_SAS_DISCOVERY:
_scsih_sas_discovery_event(ioc, fw_event); _scsih_sas_discovery_event(ioc, fw_event);
...@@ -9519,6 +9521,10 @@ mpt3sas_scsih_event_callback(struct MPT3SAS_ADAPTER *ioc, u8 msix_index, ...@@ -9519,6 +9521,10 @@ mpt3sas_scsih_event_callback(struct MPT3SAS_ADAPTER *ioc, u8 msix_index,
break; break;
} }
case MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE: case MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE:
_scsih_sas_device_status_change_event(ioc,
(Mpi2EventDataSasDeviceStatusChange_t *)
mpi_reply->EventData);
break;
case MPI2_EVENT_IR_OPERATION_STATUS: case MPI2_EVENT_IR_OPERATION_STATUS:
case MPI2_EVENT_SAS_DISCOVERY: case MPI2_EVENT_SAS_DISCOVERY:
case MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR: case MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment