Commit 7a38dc0b authored by Hannes Reinecke's avatar Hannes Reinecke Committed by Martin K. Petersen

scsi: scsi_error: count medium access timeout only once per EH run

The current medium access timeout counter will be increased for
each command, so if there are enough failed commands we'll hit
the medium access timeout for even a single device failure and
the following kernel message is displayed:

sd H:C:T:L: [sdXY] Medium access timeout failure. Offlining disk!

Fix this by making the timeout per EH run, ie the counter will
only be increased once per device and EH run.

Fixes: 18a4d0a2 ("[SCSI] Handle disk devices which can not process medium access commands")
Cc: Ewan Milne <emilne@redhat.com>
Cc: Lawrence Obermann <loberman@redhat.com>
Cc: Benjamin Block <bblock@linux.vnet.ibm.com>
Cc: Steffen Maier <maier@linux.vnet.ibm.com>
Signed-off-by: default avatarHannes Reinecke <hare@suse.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 104d9c7f
...@@ -220,6 +220,23 @@ scsi_abort_command(struct scsi_cmnd *scmd) ...@@ -220,6 +220,23 @@ scsi_abort_command(struct scsi_cmnd *scmd)
return SUCCESS; return SUCCESS;
} }
/**
* scsi_eh_reset - call into ->eh_action to reset internal counters
* @scmd: scmd to run eh on.
*
* The scsi driver might be carrying internal state about the
* devices, so we need to call into the driver to reset the
* internal state once the error handler is started.
*/
static void scsi_eh_reset(struct scsi_cmnd *scmd)
{
if (!blk_rq_is_passthrough(scmd->request)) {
struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
if (sdrv->eh_reset)
sdrv->eh_reset(scmd);
}
}
/** /**
* scsi_eh_scmd_add - add scsi cmd to error handling. * scsi_eh_scmd_add - add scsi cmd to error handling.
* @scmd: scmd to run eh on. * @scmd: scmd to run eh on.
...@@ -249,6 +266,7 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) ...@@ -249,6 +266,7 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED)
eh_flag &= ~SCSI_EH_CANCEL_CMD; eh_flag &= ~SCSI_EH_CANCEL_CMD;
scmd->eh_eflags |= eh_flag; scmd->eh_eflags |= eh_flag;
scsi_eh_reset(scmd);
list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
shost->host_failed++; shost->host_failed++;
scsi_eh_wakeup(shost); scsi_eh_wakeup(shost);
......
...@@ -115,6 +115,7 @@ static void sd_rescan(struct device *); ...@@ -115,6 +115,7 @@ static void sd_rescan(struct device *);
static int sd_init_command(struct scsi_cmnd *SCpnt); static int sd_init_command(struct scsi_cmnd *SCpnt);
static void sd_uninit_command(struct scsi_cmnd *SCpnt); static void sd_uninit_command(struct scsi_cmnd *SCpnt);
static int sd_done(struct scsi_cmnd *); static int sd_done(struct scsi_cmnd *);
static void sd_eh_reset(struct scsi_cmnd *);
static int sd_eh_action(struct scsi_cmnd *, int); static int sd_eh_action(struct scsi_cmnd *, int);
static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
static void scsi_disk_release(struct device *cdev); static void scsi_disk_release(struct device *cdev);
...@@ -532,6 +533,7 @@ static struct scsi_driver sd_template = { ...@@ -532,6 +533,7 @@ static struct scsi_driver sd_template = {
.uninit_command = sd_uninit_command, .uninit_command = sd_uninit_command,
.done = sd_done, .done = sd_done,
.eh_action = sd_eh_action, .eh_action = sd_eh_action,
.eh_reset = sd_eh_reset,
}; };
/* /*
...@@ -1685,6 +1687,26 @@ static const struct block_device_operations sd_fops = { ...@@ -1685,6 +1687,26 @@ static const struct block_device_operations sd_fops = {
.pr_ops = &sd_pr_ops, .pr_ops = &sd_pr_ops,
}; };
/**
* sd_eh_reset - reset error handling callback
* @scmd: sd-issued command that has failed
*
* This function is called by the SCSI midlayer before starting
* SCSI EH. When counting medium access failures we have to be
* careful to register it only only once per device and SCSI EH run;
* there might be several timed out commands which will cause the
* 'max_medium_access_timeouts' counter to trigger after the first
* SCSI EH run already and set the device to offline.
* So this function resets the internal counter before starting SCSI EH.
**/
static void sd_eh_reset(struct scsi_cmnd *scmd)
{
struct scsi_disk *sdkp = scsi_disk(scmd->request->rq_disk);
/* New SCSI EH run, reset gate variable */
sdkp->ignore_medium_access_errors = false;
}
/** /**
* sd_eh_action - error handling callback * sd_eh_action - error handling callback
* @scmd: sd-issued command that has failed * @scmd: sd-issued command that has failed
...@@ -1714,7 +1736,10 @@ static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp) ...@@ -1714,7 +1736,10 @@ static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp)
* process of recovering or has it suffered an internal failure * process of recovering or has it suffered an internal failure
* that prevents access to the storage medium. * that prevents access to the storage medium.
*/ */
if (!sdkp->ignore_medium_access_errors) {
sdkp->medium_access_timed_out++; sdkp->medium_access_timed_out++;
sdkp->ignore_medium_access_errors = true;
}
/* /*
* If the device keeps failing read/write commands but TEST UNIT * If the device keeps failing read/write commands but TEST UNIT
......
...@@ -106,6 +106,7 @@ struct scsi_disk { ...@@ -106,6 +106,7 @@ struct scsi_disk {
unsigned rc_basis: 2; unsigned rc_basis: 2;
unsigned zoned: 2; unsigned zoned: 2;
unsigned urswrz : 1; unsigned urswrz : 1;
unsigned ignore_medium_access_errors : 1;
}; };
#define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev) #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
......
...@@ -16,6 +16,7 @@ struct scsi_driver { ...@@ -16,6 +16,7 @@ struct scsi_driver {
void (*uninit_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *);
int (*done)(struct scsi_cmnd *); int (*done)(struct scsi_cmnd *);
int (*eh_action)(struct scsi_cmnd *, int); int (*eh_action)(struct scsi_cmnd *, int);
void (*eh_reset)(struct scsi_cmnd *);
}; };
#define to_scsi_driver(drv) \ #define to_scsi_driver(drv) \
container_of((drv), struct scsi_driver, gendrv) container_of((drv), struct scsi_driver, gendrv)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment