Commit 6d40afbc authored by Sumit Saxena's avatar Sumit Saxena Committed by Martin K. Petersen

megaraid_sas: MFI IO timeout handling

This patch will do proper error handling for DCMD timeout failure cases
for Fusion adapters:

1. For MFI adapters, in case of DCMD timeout (DCMD which must return
SUCCESS) driver will call kill adapter.

2. What action needs to be taken in case of DCMD timeout is decided by
function dcmd_timeout_ocr_possible().  DCMD timeout causing OCR is
applicable to the following commands:

	MR_DCMD_PD_LIST_QUERY
	MR_DCMD_LD_GET_LIST
	MR_DCMD_LD_LIST_QUERY
	MR_DCMD_CTRL_SET_CRASH_DUMP_PARAMS
	MR_DCMD_SYSTEM_PD_MAP_GET_INFO
	MR_DCMD_LD_MAP_GET_INFO

3. If DCMD fails from driver init path there are certain DCMDs which
must return SUCCESS. If those DCMDs fail, driver bails out. For optional
DCMDs like pd_info etc., driver continues without executing certain
functionality.
Signed-off-by: default avatarSumit Saxena <sumit.saxena@avagotech.com>
Signed-off-by: default avatarKashyap Desai <kashyap.desai@avagotech.com>
Reviewed-by: default avatarTomas Henzl <thenzl@redhat.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 11c71cb4
...@@ -170,6 +170,7 @@ ...@@ -170,6 +170,7 @@
/* Driver internal */ /* Driver internal */
#define DRV_DCMD_POLLED_MODE 0x1 #define DRV_DCMD_POLLED_MODE 0x1
#define DRV_DCMD_SKIP_REFIRE 0x2
/* /*
* Definition for cmd_status * Definition for cmd_status
...@@ -1093,6 +1094,11 @@ enum MR_SCSI_CMD_TYPE { ...@@ -1093,6 +1094,11 @@ enum MR_SCSI_CMD_TYPE {
NON_READ_WRITE_SYSPDIO = 3, NON_READ_WRITE_SYSPDIO = 3,
}; };
enum DCMD_TIMEOUT_ACTION {
INITIATE_OCR = 0,
KILL_ADAPTER = 1,
IGNORE_TIMEOUT = 2,
};
/* Frame Type */ /* Frame Type */
#define IO_FRAME 0 #define IO_FRAME 0
#define PTHRU_FRAME 1 #define PTHRU_FRAME 1
...@@ -1139,6 +1145,7 @@ enum MR_SCSI_CMD_TYPE { ...@@ -1139,6 +1145,7 @@ enum MR_SCSI_CMD_TYPE {
#define MFI_OB_INTR_STATUS_MASK 0x00000002 #define MFI_OB_INTR_STATUS_MASK 0x00000002
#define MFI_POLL_TIMEOUT_SECS 60 #define MFI_POLL_TIMEOUT_SECS 60
#define MFI_IO_TIMEOUT_SECS 180
#define MEGASAS_SRIOV_HEARTBEAT_INTERVAL_VF (5 * HZ) #define MEGASAS_SRIOV_HEARTBEAT_INTERVAL_VF (5 * HZ)
#define MEGASAS_OCR_SETTLE_TIME_VF (1000 * 30) #define MEGASAS_OCR_SETTLE_TIME_VF (1000 * 30)
#define MEGASAS_ROUTINE_WAIT_TIME_VF 300 #define MEGASAS_ROUTINE_WAIT_TIME_VF 300
...@@ -1918,7 +1925,7 @@ struct megasas_instance_template { ...@@ -1918,7 +1925,7 @@ struct megasas_instance_template {
u32 (*init_adapter)(struct megasas_instance *); u32 (*init_adapter)(struct megasas_instance *);
u32 (*build_and_issue_cmd) (struct megasas_instance *, u32 (*build_and_issue_cmd) (struct megasas_instance *,
struct scsi_cmnd *); struct scsi_cmnd *);
void (*issue_dcmd) (struct megasas_instance *instance, int (*issue_dcmd)(struct megasas_instance *instance,
struct megasas_cmd *cmd); struct megasas_cmd *cmd);
}; };
...@@ -2016,6 +2023,19 @@ struct megasas_mgmt_info { ...@@ -2016,6 +2023,19 @@ struct megasas_mgmt_info {
int max_index; int max_index;
}; };
enum MEGASAS_OCR_CAUSE {
FW_FAULT_OCR = 0,
SCSIIO_TIMEOUT_OCR = 1,
MFI_IO_TIMEOUT_OCR = 2,
};
enum DCMD_RETURN_STATUS {
DCMD_SUCCESS = 0,
DCMD_TIMEOUT = 1,
DCMD_FAILED = 2,
DCMD_NOT_FIRED = 3,
};
u8 u8
MR_BuildRaidContext(struct megasas_instance *instance, MR_BuildRaidContext(struct megasas_instance *instance,
struct IO_REQUEST_INFO *io_info, struct IO_REQUEST_INFO *io_info,
......
This diff is collapsed.
...@@ -576,11 +576,12 @@ wait_and_poll(struct megasas_instance *instance, struct megasas_cmd *cmd, ...@@ -576,11 +576,12 @@ wait_and_poll(struct megasas_instance *instance, struct megasas_cmd *cmd,
msleep(20); msleep(20);
} }
if (frame_hdr->cmd_status == 0xff) if (frame_hdr->cmd_status == MFI_STAT_INVALID_STATUS)
return -ETIME; return DCMD_TIMEOUT;
else if (frame_hdr->cmd_status == MFI_STAT_OK)
return (frame_hdr->cmd_status == MFI_STAT_OK) ? return DCMD_SUCCESS;
0 : 1; else
return DCMD_FAILED;
} }
/** /**
...@@ -784,7 +785,8 @@ megasas_sync_pd_seq_num(struct megasas_instance *instance, bool pend) { ...@@ -784,7 +785,8 @@ megasas_sync_pd_seq_num(struct megasas_instance *instance, bool pend) {
/* Below code is only for non pended DCMD */ /* Below code is only for non pended DCMD */
if (instance->ctrl_context && !instance->mask_interrupts) if (instance->ctrl_context && !instance->mask_interrupts)
ret = megasas_issue_blocked_cmd(instance, cmd, 60); ret = megasas_issue_blocked_cmd(instance, cmd,
MFI_IO_TIMEOUT_SECS);
else else
ret = megasas_issue_polled(instance, cmd); ret = megasas_issue_polled(instance, cmd);
...@@ -795,7 +797,10 @@ megasas_sync_pd_seq_num(struct megasas_instance *instance, bool pend) { ...@@ -795,7 +797,10 @@ megasas_sync_pd_seq_num(struct megasas_instance *instance, bool pend) {
ret = -EINVAL; ret = -EINVAL;
} }
if (!ret) if (ret == DCMD_TIMEOUT && instance->ctrl_context)
megaraid_sas_kill_hba(instance);
if (ret == DCMD_SUCCESS)
instance->pd_seq_map_id++; instance->pd_seq_map_id++;
megasas_return_cmd(instance, cmd); megasas_return_cmd(instance, cmd);
...@@ -875,10 +880,13 @@ megasas_get_ld_map_info(struct megasas_instance *instance) ...@@ -875,10 +880,13 @@ megasas_get_ld_map_info(struct megasas_instance *instance)
if (instance->ctrl_context && !instance->mask_interrupts) if (instance->ctrl_context && !instance->mask_interrupts)
ret = megasas_issue_blocked_cmd(instance, cmd, ret = megasas_issue_blocked_cmd(instance, cmd,
MEGASAS_BLOCKED_CMD_TIMEOUT); MFI_IO_TIMEOUT_SECS);
else else
ret = megasas_issue_polled(instance, cmd); ret = megasas_issue_polled(instance, cmd);
if (ret == DCMD_TIMEOUT && instance->ctrl_context)
megaraid_sas_kill_hba(instance);
megasas_return_cmd(instance, cmd); megasas_return_cmd(instance, cmd);
return ret; return ret;
...@@ -2411,7 +2419,7 @@ build_mpt_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd) ...@@ -2411,7 +2419,7 @@ build_mpt_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
* @cmd: mfi cmd pointer * @cmd: mfi cmd pointer
* *
*/ */
void int
megasas_issue_dcmd_fusion(struct megasas_instance *instance, megasas_issue_dcmd_fusion(struct megasas_instance *instance,
struct megasas_cmd *cmd) struct megasas_cmd *cmd)
{ {
...@@ -2419,10 +2427,13 @@ megasas_issue_dcmd_fusion(struct megasas_instance *instance, ...@@ -2419,10 +2427,13 @@ megasas_issue_dcmd_fusion(struct megasas_instance *instance,
req_desc = build_mpt_cmd(instance, cmd); req_desc = build_mpt_cmd(instance, cmd);
if (!req_desc) { if (!req_desc) {
dev_err(&instance->pdev->dev, "Couldn't issue MFI pass thru cmd\n"); dev_info(&instance->pdev->dev, "Failed from %s %d\n",
return; __func__, __LINE__);
return DCMD_NOT_FIRED;
} }
megasas_fire_cmd_fusion(instance, req_desc); megasas_fire_cmd_fusion(instance, req_desc);
return DCMD_SUCCESS;
} }
/** /**
...@@ -2583,7 +2594,7 @@ megasas_check_reset_fusion(struct megasas_instance *instance, ...@@ -2583,7 +2594,7 @@ megasas_check_reset_fusion(struct megasas_instance *instance,
/* This function waits for outstanding commands on fusion to complete */ /* This function waits for outstanding commands on fusion to complete */
int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance,
int iotimeout, int *convert) int reason, int *convert)
{ {
int i, outstanding, retval = 0, hb_seconds_missed = 0; int i, outstanding, retval = 0, hb_seconds_missed = 0;
u32 fw_state; u32 fw_state;
...@@ -2599,14 +2610,22 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, ...@@ -2599,14 +2610,22 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance,
retval = 1; retval = 1;
goto out; goto out;
} }
if (reason == MFI_IO_TIMEOUT_OCR) {
dev_info(&instance->pdev->dev,
"MFI IO is timed out, initiating OCR\n");
retval = 1;
goto out;
}
/* If SR-IOV VF mode & heartbeat timeout, don't wait */ /* If SR-IOV VF mode & heartbeat timeout, don't wait */
if (instance->requestorId && !iotimeout) { if (instance->requestorId && !reason) {
retval = 1; retval = 1;
goto out; goto out;
} }
/* If SR-IOV VF mode & I/O timeout, check for HB timeout */ /* If SR-IOV VF mode & I/O timeout, check for HB timeout */
if (instance->requestorId && iotimeout) { if (instance->requestorId && reason) {
if (instance->hb_host_mem->HB.fwCounter != if (instance->hb_host_mem->HB.fwCounter !=
instance->hb_host_mem->HB.driverCounter) { instance->hb_host_mem->HB.driverCounter) {
instance->hb_host_mem->HB.driverCounter = instance->hb_host_mem->HB.driverCounter =
...@@ -2680,6 +2699,7 @@ void megasas_refire_mgmt_cmd(struct megasas_instance *instance) ...@@ -2680,6 +2699,7 @@ void megasas_refire_mgmt_cmd(struct megasas_instance *instance)
struct megasas_cmd *cmd_mfi; struct megasas_cmd *cmd_mfi;
union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc; union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc;
u16 smid; u16 smid;
bool refire_cmd = 0;
fusion = instance->ctrl_context; fusion = instance->ctrl_context;
...@@ -2695,10 +2715,12 @@ void megasas_refire_mgmt_cmd(struct megasas_instance *instance) ...@@ -2695,10 +2715,12 @@ void megasas_refire_mgmt_cmd(struct megasas_instance *instance)
continue; continue;
req_desc = megasas_get_request_descriptor req_desc = megasas_get_request_descriptor
(instance, smid - 1); (instance, smid - 1);
if (req_desc && ((cmd_mfi->frame->dcmd.opcode != refire_cmd = req_desc && ((cmd_mfi->frame->dcmd.opcode !=
cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO)) && cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO)) &&
(cmd_mfi->frame->dcmd.opcode != (cmd_mfi->frame->dcmd.opcode !=
cpu_to_le32(MR_DCMD_SYSTEM_PD_MAP_GET_INFO)))) cpu_to_le32(MR_DCMD_SYSTEM_PD_MAP_GET_INFO)))
&& !(cmd_mfi->flags & DRV_DCMD_SKIP_REFIRE);
if (refire_cmd)
megasas_fire_cmd_fusion(instance, req_desc); megasas_fire_cmd_fusion(instance, req_desc);
else else
megasas_return_cmd(instance, cmd_mfi); megasas_return_cmd(instance, cmd_mfi);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment