Commit 18ff5f08 authored by Kevin Barnett's avatar Kevin Barnett Committed by Martin K. Petersen

scsi: smartpqi: Add additional logging for LUN resets

LUN resets can take longer to complete. Adding in more driver logging helps
show where the driver is in the reset process.

Add a timeout in pqi_device_wait_for_pending_io() to cap how long the
driver will wait for outstanding commands.

Link: https://lore.kernel.org/r/161549385119.25025.10366493975709358647.stgit@brunhildaReviewed-by: default avatarMahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
Reviewed-by: default avatarScott Benesh <scott.benesh@microchip.com>
Reviewed-by: default avatarMike McGowen <mike.mcgowen@microchip.com>
Reviewed-by: default avatarScott Teel <scott.teel@microchip.com>
Signed-off-by: default avatarKevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: default avatarDon Brace <don.brace@microchip.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 55732a46
...@@ -84,7 +84,7 @@ static void pqi_ofa_setup_host_buffer(struct pqi_ctrl_info *ctrl_info); ...@@ -84,7 +84,7 @@ static void pqi_ofa_setup_host_buffer(struct pqi_ctrl_info *ctrl_info);
static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info); static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info);
static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info); static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info);
static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device, unsigned long timeout_secs); struct pqi_scsi_dev *device, unsigned long timeout_msecs);
/* for flags argument to pqi_submit_raid_request_synchronous() */ /* for flags argument to pqi_submit_raid_request_synchronous() */
#define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1 #define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1
...@@ -335,11 +335,34 @@ static void pqi_wait_if_ctrl_blocked(struct pqi_ctrl_info *ctrl_info) ...@@ -335,11 +335,34 @@ static void pqi_wait_if_ctrl_blocked(struct pqi_ctrl_info *ctrl_info)
atomic_dec(&ctrl_info->num_blocked_threads); atomic_dec(&ctrl_info->num_blocked_threads);
} }
#define PQI_QUIESCE_WARNING_TIMEOUT_SECS 10
static inline void pqi_ctrl_wait_until_quiesced(struct pqi_ctrl_info *ctrl_info) static inline void pqi_ctrl_wait_until_quiesced(struct pqi_ctrl_info *ctrl_info)
{ {
unsigned long start_jiffies;
unsigned long warning_timeout;
bool displayed_warning;
displayed_warning = false;
start_jiffies = jiffies;
warning_timeout = (PQI_QUIESCE_WARNING_TIMEOUT_SECS * PQI_HZ) + start_jiffies;
while (atomic_read(&ctrl_info->num_busy_threads) > while (atomic_read(&ctrl_info->num_busy_threads) >
atomic_read(&ctrl_info->num_blocked_threads)) atomic_read(&ctrl_info->num_blocked_threads)) {
if (time_after(jiffies, warning_timeout)) {
dev_warn(&ctrl_info->pci_dev->dev,
"waiting %u seconds for driver activity to quiesce\n",
jiffies_to_msecs(jiffies - start_jiffies) / 1000);
displayed_warning = true;
warning_timeout = (PQI_QUIESCE_WARNING_TIMEOUT_SECS * PQI_HZ) + jiffies;
}
usleep_range(1000, 2000); usleep_range(1000, 2000);
}
if (displayed_warning)
dev_warn(&ctrl_info->pci_dev->dev,
"driver activity quiesced after waiting for %u seconds\n",
jiffies_to_msecs(jiffies - start_jiffies) / 1000);
} }
static inline bool pqi_device_offline(struct pqi_scsi_dev *device) static inline bool pqi_device_offline(struct pqi_scsi_dev *device)
...@@ -1669,7 +1692,7 @@ static int pqi_add_device(struct pqi_ctrl_info *ctrl_info, ...@@ -1669,7 +1692,7 @@ static int pqi_add_device(struct pqi_ctrl_info *ctrl_info,
return rc; return rc;
} }
#define PQI_PENDING_IO_TIMEOUT_SECS 20 #define PQI_REMOVE_DEVICE_PENDING_IO_TIMEOUT_MSECS (20 * 1000)
static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device)
{ {
...@@ -1677,7 +1700,8 @@ static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi ...@@ -1677,7 +1700,8 @@ static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi
pqi_device_remove_start(device); pqi_device_remove_start(device);
rc = pqi_device_wait_for_pending_io(ctrl_info, device, PQI_PENDING_IO_TIMEOUT_SECS); rc = pqi_device_wait_for_pending_io(ctrl_info, device,
PQI_REMOVE_DEVICE_PENDING_IO_TIMEOUT_MSECS);
if (rc) if (rc)
dev_err(&ctrl_info->pci_dev->dev, dev_err(&ctrl_info->pci_dev->dev,
"scsi %d:%d:%d:%d removing device with %d outstanding command(s)\n", "scsi %d:%d:%d:%d removing device with %d outstanding command(s)\n",
...@@ -3086,7 +3110,7 @@ static void pqi_process_io_error(unsigned int iu_type, ...@@ -3086,7 +3110,7 @@ static void pqi_process_io_error(unsigned int iu_type,
} }
} }
static int pqi_interpret_task_management_response( static int pqi_interpret_task_management_response(struct pqi_ctrl_info *ctrl_info,
struct pqi_task_management_response *response) struct pqi_task_management_response *response)
{ {
int rc; int rc;
...@@ -3104,6 +3128,10 @@ static int pqi_interpret_task_management_response( ...@@ -3104,6 +3128,10 @@ static int pqi_interpret_task_management_response(
break; break;
} }
if (rc)
dev_err(&ctrl_info->pci_dev->dev,
"Task Management Function error: %d (response code: %u)\n", rc, response->response_code);
return rc; return rc;
} }
...@@ -3172,9 +3200,8 @@ static int pqi_process_io_intr(struct pqi_ctrl_info *ctrl_info, struct pqi_queue ...@@ -3172,9 +3200,8 @@ static int pqi_process_io_intr(struct pqi_ctrl_info *ctrl_info, struct pqi_queue
&((struct pqi_vendor_general_response *)response)->status); &((struct pqi_vendor_general_response *)response)->status);
break; break;
case PQI_RESPONSE_IU_TASK_MANAGEMENT: case PQI_RESPONSE_IU_TASK_MANAGEMENT:
io_request->status = io_request->status = pqi_interpret_task_management_response(ctrl_info,
pqi_interpret_task_management_response( (void *)response);
(void *)response);
break; break;
case PQI_RESPONSE_IU_AIO_PATH_DISABLED: case PQI_RESPONSE_IU_AIO_PATH_DISABLED:
pqi_aio_path_disabled(io_request); pqi_aio_path_disabled(io_request);
...@@ -5836,24 +5863,37 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info, ...@@ -5836,24 +5863,37 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
} }
} }
#define PQI_PENDING_IO_WARNING_TIMEOUT_SECS 10
static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device, unsigned long timeout_secs) struct pqi_scsi_dev *device, unsigned long timeout_msecs)
{ {
unsigned long timeout; int cmds_outstanding;
unsigned long start_jiffies;
unsigned long warning_timeout;
unsigned long msecs_waiting;
start_jiffies = jiffies;
warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * PQI_HZ) + start_jiffies;
timeout = (timeout_secs * PQI_HZ) + jiffies; while ((cmds_outstanding = atomic_read(&device->scsi_cmds_outstanding)) > 0) {
while (atomic_read(&device->scsi_cmds_outstanding)) {
pqi_check_ctrl_health(ctrl_info); pqi_check_ctrl_health(ctrl_info);
if (pqi_ctrl_offline(ctrl_info)) if (pqi_ctrl_offline(ctrl_info))
return -ENXIO; return -ENXIO;
if (timeout_secs != NO_TIMEOUT) { msecs_waiting = jiffies_to_msecs(jiffies - start_jiffies);
if (time_after(jiffies, timeout)) { if (msecs_waiting > timeout_msecs) {
dev_err(&ctrl_info->pci_dev->dev, dev_err(&ctrl_info->pci_dev->dev,
"timed out waiting for pending I/O\n"); "scsi %d:%d:%d:%d: timed out after %lu seconds waiting for %d outstanding command(s)\n",
return -ETIMEDOUT; ctrl_info->scsi_host->host_no, device->bus, device->target,
} device->lun, msecs_waiting / 1000, cmds_outstanding);
return -ETIMEDOUT;
}
if (time_after(jiffies, warning_timeout)) {
dev_warn(&ctrl_info->pci_dev->dev,
"scsi %d:%d:%d:%d: waiting %lu seconds for %d outstanding command(s)\n",
ctrl_info->scsi_host->host_no, device->bus, device->target,
device->lun, msecs_waiting / 1000, cmds_outstanding);
warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * PQI_HZ) + jiffies;
} }
usleep_range(1000, 2000); usleep_range(1000, 2000);
} }
...@@ -5869,13 +5909,15 @@ static void pqi_lun_reset_complete(struct pqi_io_request *io_request, ...@@ -5869,13 +5909,15 @@ static void pqi_lun_reset_complete(struct pqi_io_request *io_request,
complete(waiting); complete(waiting);
} }
#define PQI_LUN_RESET_TIMEOUT_SECS 30
#define PQI_LUN_RESET_POLL_COMPLETION_SECS 10 #define PQI_LUN_RESET_POLL_COMPLETION_SECS 10
static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info, static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device, struct completion *wait) struct pqi_scsi_dev *device, struct completion *wait)
{ {
int rc; int rc;
unsigned int wait_secs;
wait_secs = 0;
while (1) { while (1) {
if (wait_for_completion_io_timeout(wait, if (wait_for_completion_io_timeout(wait,
...@@ -5889,13 +5931,21 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info, ...@@ -5889,13 +5931,21 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
rc = -ENXIO; rc = -ENXIO;
break; break;
} }
wait_secs += PQI_LUN_RESET_POLL_COMPLETION_SECS;
dev_warn(&ctrl_info->pci_dev->dev,
"scsi %d:%d:%d:%d: waiting %u seconds for LUN reset to complete\n",
ctrl_info->scsi_host->host_no, device->bus, device->target, device->lun,
wait_secs);
} }
return rc; return rc;
} }
static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, #define PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS 30
struct pqi_scsi_dev *device)
static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device)
{ {
int rc; int rc;
struct pqi_io_request *io_request; struct pqi_io_request *io_request;
...@@ -5917,8 +5967,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, ...@@ -5917,8 +5967,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info,
sizeof(request->lun_number)); sizeof(request->lun_number));
request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET; request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
if (ctrl_info->tmf_iu_timeout_supported) if (ctrl_info->tmf_iu_timeout_supported)
put_unaligned_le16(PQI_LUN_RESET_TIMEOUT_SECS, put_unaligned_le16(PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS, &request->timeout);
&request->timeout);
pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH, pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH,
io_request); io_request);
...@@ -5932,29 +5981,33 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, ...@@ -5932,29 +5981,33 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info,
return rc; return rc;
} }
#define PQI_LUN_RESET_RETRIES 3 #define PQI_LUN_RESET_RETRIES 3
#define PQI_LUN_RESET_RETRY_INTERVAL_MSECS 10000 #define PQI_LUN_RESET_RETRY_INTERVAL_MSECS (10 * 1000)
#define PQI_LUN_RESET_PENDING_IO_TIMEOUT_SECS 120 #define PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS (10 * 60 * 1000)
#define PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS (2 * 60 * 1000)
static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device)
struct pqi_scsi_dev *device)
{ {
int rc; int reset_rc;
int wait_rc;
unsigned int retries; unsigned int retries;
unsigned long timeout_secs; unsigned long timeout_msecs;
for (retries = 0;;) { for (retries = 0;;) {
rc = pqi_lun_reset(ctrl_info, device); reset_rc = pqi_lun_reset(ctrl_info, device);
if (rc == 0 || ++retries > PQI_LUN_RESET_RETRIES) if (reset_rc == 0 || ++retries > PQI_LUN_RESET_RETRIES)
break; break;
msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS); msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS);
} }
timeout_secs = rc ? PQI_LUN_RESET_PENDING_IO_TIMEOUT_SECS : NO_TIMEOUT; timeout_msecs = reset_rc ? PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS :
PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS;
rc |= pqi_device_wait_for_pending_io(ctrl_info, device, timeout_secs); wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, timeout_msecs);
if (wait_rc && reset_rc == 0)
reset_rc = wait_rc;
return rc == 0 ? SUCCESS : FAILED; return reset_rc == 0 ? SUCCESS : FAILED;
} }
static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment