Commit 4e9fbe87 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ide-scsi error handling fixes

From: Willem Riede <wrlk@riede.org>

The patch revises the error handling in ide-scsi, fixing the scheduling
while locked issues, and make it work properly, at least for me...

Specific changes in this patch:

- introduce idescsi_expiry, a timeout routine for the ide subsystem,
  which simply flags the fact that the command timed out, but postpones
  any other action until either the command still finishes on its own
  (unlikely?) or the scsi error handler kicks in;

- introduce idescsi_atapi_error and idescsi_atapi_abort, error routines
  for the ide subsystem, which are modeled after those of ide-cd, but
  take only minimal effort to recover, leaving the heavy lifting for
  the scsi error handler;

- rewrite (and rename for clarity) idescsi_eh_abort and idescsi_eh_error,
  the abort/error routines to be called by the scsi error handler --
  this redesign should not have the scheduling while atomic problems
  of the old implementation.

- move ide_cdrom_dump_status() from ide-cd.c to ide-lib.c as
  ide_dump_atapi_status() and both ide-cd and ide-scsi call it.

- replaces BUG() by WARN_ON()/printk in the error handling code.

- sets TASK_UNINTERRUPTIBLE before schedule_timeout() and moves the host
  unlock/lock around the while loop inside the loop in idescsi_eh_reset().
parent e5c539b8
......@@ -554,53 +554,6 @@ static void cdrom_queue_request_sense(ide_drive_t *drive,
(void) ide_do_drive_cmd(drive, rq, ide_preempt);
}
/*
* Error reporting, in human readable form (luxurious, but a memory hog).
*/
byte ide_cdrom_dump_status (ide_drive_t *drive, const char *msg, byte stat)
{
unsigned long flags;
atapi_status_t status;
atapi_error_t error;
status.all = stat;
local_irq_set(flags);
printk("%s: %s: status=0x%02x", drive->name, msg, stat);
#if FANCY_STATUS_DUMPS
printk(" { ");
if (status.b.bsy)
printk("Busy ");
else {
if (status.b.drdy) printk("DriveReady ");
if (status.b.df) printk("DeviceFault ");
if (status.b.dsc) printk("SeekComplete ");
if (status.b.drq) printk("DataRequest ");
if (status.b.corr) printk("CorrectedError ");
if (status.b.idx) printk("Index ");
if (status.b.check) printk("Error ");
}
printk("}");
#endif /* FANCY_STATUS_DUMPS */
printk("\n");
if ((status.all & (status.b.bsy|status.b.check)) == status.b.check) {
error.all = HWIF(drive)->INB(IDE_ERROR_REG);
printk("%s: %s: error=0x%02x", drive->name, msg, error.all);
#if FANCY_STATUS_DUMPS
if (error.b.ili) printk("IllegalLengthIndication ");
if (error.b.eom) printk("EndOfMedia ");
if (error.b.abrt) printk("Aborted Command ");
if (error.b.mcr) printk("MediaChangeRequested ");
if (error.b.sense_key) printk("LastFailedSense 0x%02x ",
error.b.sense_key);
#endif /* FANCY_STATUS_DUMPS */
printk("\n");
}
local_irq_restore(flags);
return error.all;
}
/*
* ide_error() takes action based on the error returned by the drive.
*/
......@@ -609,7 +562,7 @@ ide_startstop_t ide_cdrom_error (ide_drive_t *drive, const char *msg, byte stat)
struct request *rq;
byte err;
err = ide_cdrom_dump_status(drive, msg, stat);
err = ide_dump_atapi_status(drive, msg, stat);
if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
return ide_stopped;
/* retry only "normal" I/O: */
......@@ -3412,7 +3365,7 @@ static ide_driver_t ide_cdrom_driver = {
.supports_dsc_overlap = 1,
.cleanup = ide_cdrom_cleanup,
.do_request = ide_do_rw_cdrom,
.sense = ide_cdrom_dump_status,
.sense = ide_dump_atapi_status,
.error = ide_cdrom_error,
.abort = ide_cdrom_abort,
.capacity = ide_cdrom_capacity,
......
......@@ -447,3 +447,55 @@ int ide_set_xfer_rate(ide_drive_t *drive, u8 rate)
EXPORT_SYMBOL_GPL(ide_set_xfer_rate);
/**
* ide_dump_atapi_status - print human readable atapi status
* @drive: drive that status applies to
* @msg: text message to print
* @stat: status byte to decode
*
* Error reporting, in human readable form (luxurious, but a memory hog).
*/
byte ide_dump_atapi_status (ide_drive_t *drive, const char *msg, byte stat)
{
unsigned long flags;
atapi_status_t status;
atapi_error_t error;
status.all = stat;
local_irq_set(flags);
printk("%s: %s: status=0x%02x", drive->name, msg, stat);
#if FANCY_STATUS_DUMPS
printk(" { ");
if (status.b.bsy)
printk("Busy ");
else {
if (status.b.drdy) printk("DriveReady ");
if (status.b.df) printk("DeviceFault ");
if (status.b.dsc) printk("SeekComplete ");
if (status.b.drq) printk("DataRequest ");
if (status.b.corr) printk("CorrectedError ");
if (status.b.idx) printk("Index ");
if (status.b.check) printk("Error ");
}
printk("}");
#endif /* FANCY_STATUS_DUMPS */
printk("\n");
if ((status.all & (status.b.bsy|status.b.check)) == status.b.check) {
error.all = HWIF(drive)->INB(IDE_ERROR_REG);
printk("%s: %s: error=0x%02x", drive->name, msg, error.all);
#if FANCY_STATUS_DUMPS
if (error.b.ili) printk("IllegalLengthIndication ");
if (error.b.eom) printk("EndOfMedia ");
if (error.b.abrt) printk("Aborted Command ");
if (error.b.mcr) printk("MediaChangeRequested ");
if (error.b.sense_key) printk("LastFailedSense 0x%02x ",
error.b.sense_key);
#endif /* FANCY_STATUS_DUMPS */
printk("\n");
}
local_irq_restore(flags);
return error.all;
}
EXPORT_SYMBOL(ide_dump_atapi_status);
......@@ -78,6 +78,7 @@ typedef struct idescsi_pc_s {
#define PC_DMA_IN_PROGRESS 0 /* 1 while DMA in progress */
#define PC_WRITING 1 /* Data direction */
#define PC_TRANSFORM 2 /* transform SCSI commands */
#define PC_TIMEDOUT 3 /* command timed out */
#define PC_DMA_OK 4 /* Use DMA */
/*
......@@ -307,6 +308,41 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co
return ide_do_drive_cmd(drive, rq, ide_preempt);
}
ide_startstop_t idescsi_atapi_error (ide_drive_t *drive, const char *msg, byte stat)
{
struct request *rq;
byte err;
err = ide_dump_atapi_status(drive, msg, stat);
if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
return ide_stopped;
if (HWIF(drive)->INB(IDE_STATUS_REG) & (BUSY_STAT|DRQ_STAT))
/* force an abort */
HWIF(drive)->OUTB(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG);
rq->errors++;
DRIVER(drive)->end_request(drive, 0, 0);
return ide_stopped;
}
ide_startstop_t idescsi_atapi_abort (ide_drive_t *drive, const char *msg)
{
struct request *rq;
if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
return ide_stopped;
#if IDESCSI_DEBUG_LOG
printk(KERN_WARNING "idescsi_atapi_abort called for %lu\n",
((idescsi_pc_t *) rq->special)->scsi_cmd->serial_number);
#endif
rq->errors |= ERROR_MAX;
DRIVER(drive)->end_request(drive, 0, 0);
return ide_stopped;
}
static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
{
idescsi_scsi_t *scsi = drive_to_idescsi(drive);
......@@ -334,7 +370,13 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
kfree(rq);
pc = opc;
rq = pc->rq;
pc->scsi_cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16);
pc->scsi_cmd->result = (CHECK_CONDITION << 1) |
((test_bit(PC_TIMEDOUT, &pc->flags)?DID_TIME_OUT:DID_OK) << 16);
} else if (test_bit(PC_TIMEDOUT, &pc->flags)) {
if (log)
printk (KERN_WARNING "ide-scsi: %s: timed out for %lu\n",
drive->name, pc->scsi_cmd->serial_number);
pc->scsi_cmd->result = DID_TIME_OUT << 16;
} else if (rq->errors >= ERROR_MAX) {
pc->scsi_cmd->result = DID_ERROR << 16;
if (log)
......@@ -374,6 +416,19 @@ static inline unsigned long get_timeout(idescsi_pc_t *pc)
return IDE_MAX(WAIT_CMD, pc->timeout - jiffies);
}
static int idescsi_expiry(ide_drive_t *drive)
{
idescsi_scsi_t *scsi = drive->driver_data;
idescsi_pc_t *pc = scsi->pc;
#if IDESCSI_DEBUG_LOG
printk(KERN_WARNING "idescsi_expiry called for %lu at %lu\n", pc->scsi_cmd->serial_number, jiffies);
#endif
set_bit(PC_TIMEDOUT, &pc->flags);
return 0; /* we do not want the ide subsystem to retry */
}
/*
* Our interrupt handler.
*/
......@@ -393,6 +448,15 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
printk (KERN_INFO "ide-scsi: Reached idescsi_pc_intr interrupt handler\n");
#endif /* IDESCSI_DEBUG_LOG */
if (test_bit(PC_TIMEDOUT, &pc->flags)){
#if IDESCSI_DEBUG_LOG
printk(KERN_WARNING "idescsi_pc_intr: got timed out packet %lu at %lu\n",
pc->scsi_cmd->serial_number, jiffies);
#endif
/* end this request now - scsi should retry it*/
idescsi_end_request (drive, 1, 0);
return ide_stopped;
}
if (test_and_clear_bit (PC_DMA_IN_PROGRESS, &pc->flags)) {
#if IDESCSI_DEBUG_LOG
printk ("ide-scsi: %s: DMA complete\n", drive->name);
......@@ -442,7 +506,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
pc->actually_transferred += temp;
pc->current_position += temp;
idescsi_discard_data(drive, bcount.all - temp);
ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), NULL);
ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
return ide_started;
}
#if IDESCSI_DEBUG_LOG
......@@ -467,7 +531,8 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
pc->actually_transferred += bcount.all;
pc->current_position += bcount.all;
ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), NULL); /* And set the interrupt handler again */
/* And set the interrupt handler again */
ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
return ide_started;
}
......@@ -492,7 +557,7 @@ static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive)
if (HWGROUP(drive)->handler != NULL)
BUG();
/* Set the interrupt routine */
ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), NULL);
ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
/* Send the actual packet */
atapi_output_bytes(drive, scsi->pc->c, 12);
if (test_bit (PC_DMA_OK, &pc->flags)) {
......@@ -540,7 +605,7 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc)
if (HWGROUP(drive)->handler != NULL)
BUG();
ide_set_handler(drive, &idescsi_transfer_pc,
get_timeout(pc), NULL);
get_timeout(pc), idescsi_expiry);
/* Issue the packet command */
HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG);
return ide_started;
......@@ -633,6 +698,8 @@ static ide_driver_t idescsi_driver = {
.cleanup = idescsi_cleanup,
.do_request = idescsi_do_request,
.end_request = idescsi_end_request,
.error = idescsi_atapi_error,
.abort = idescsi_atapi_abort,
.drives = LIST_HEAD_INIT(idescsi_driver.drives),
};
......@@ -852,66 +919,132 @@ static int idescsi_queue (Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
return 1;
}
static int idescsi_abort (Scsi_Cmnd *cmd)
static int idescsi_eh_abort (Scsi_Cmnd *cmd)
{
int countdown = 8;
unsigned long flags;
idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host);
ide_drive_t *drive = scsi->drive;
int busy;
int ret = FAILED;
printk (KERN_ERR "ide-scsi: abort called for %lu\n", cmd->serial_number);
while (countdown--) {
/* is cmd active?
* need to lock so this stuff doesn't change under us */
spin_lock_irqsave(&ide_lock, flags);
if (scsi->pc && scsi->pc->scsi_cmd &&
scsi->pc->scsi_cmd->serial_number == cmd->serial_number) {
/* yep - let's give it some more time -
* we can do that, we're in _our_ error kernel thread */
spin_unlock_irqrestore(&ide_lock, flags);
scsi_sleep(HZ);
continue;
}
/* no, but is it queued in the ide subsystem? */
if (elv_queue_empty(drive->queue)) {
spin_unlock_irqrestore(&ide_lock, flags);
return SUCCESS;
}
spin_unlock_irqrestore(&ide_lock, flags);
schedule_timeout(HZ/10);
/* In idescsi_eh_abort we try to gently pry our command from the ide subsystem */
if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
printk (KERN_WARNING "ide-scsi: abort called for %lu\n", cmd->serial_number);
if (!drive) {
printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_abort\n");
WARN_ON(1);
goto no_drive;
}
return FAILED;
/* First give it some more time, how much is "right" is hard to say :-( */
busy = ide_wait_not_busy(HWIF(drive), 100); /* FIXME - uses mdelay which causes latency? */
if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
printk (KERN_WARNING "ide-scsi: drive did%s become ready\n", busy?" not":"");
spin_lock_irq(&ide_lock);
/* If there is no pc running we're done (our interrupt took care of it) */
if (!scsi->pc) {
ret = SUCCESS;
goto ide_unlock;
}
/* It's somewhere in flight. Does ide subsystem agree? */
if (scsi->pc->scsi_cmd->serial_number == cmd->serial_number && !busy &&
elv_queue_empty(drive->queue) && HWGROUP(drive)->rq != scsi->pc->rq) {
/*
* FIXME - not sure this condition can ever occur
*/
printk (KERN_ERR "ide-scsi: cmd aborted!\n");
idescsi_free_bio(scsi->pc->rq->bio);
if (scsi->pc->rq->flags & REQ_SENSE)
kfree(scsi->pc->buffer);
kfree(scsi->pc->rq);
kfree(scsi->pc);
scsi->pc = NULL;
ret = SUCCESS;
}
ide_unlock:
spin_unlock_irq(&ide_lock);
no_drive:
if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
printk (KERN_WARNING "ide-scsi: abort returns %s\n", ret == SUCCESS?"success":"failed");
return ret;
}
static int idescsi_reset (Scsi_Cmnd *cmd)
static int idescsi_eh_reset (Scsi_Cmnd *cmd)
{
unsigned long flags;
struct request *req;
idescsi_scsi_t *idescsi = scsihost_to_idescsi(cmd->device->host);
ide_drive_t *drive = idescsi->drive;
idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host);
ide_drive_t *drive = scsi->drive;
int ready = 0;
int ret = SUCCESS;
printk (KERN_ERR "ide-scsi: reset called for %lu\n", cmd->serial_number);
/* first null the handler for the drive and let any process
* doing IO (on another CPU) run to (partial) completion
* the lock prevents processing new requests */
spin_lock_irqsave(&ide_lock, flags);
while (HWGROUP(drive)->handler) {
HWGROUP(drive)->handler = NULL;
schedule_timeout(1);
/* In idescsi_eh_reset we forcefully remove the command from the ide subsystem and reset the device. */
if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
printk (KERN_WARNING "ide-scsi: reset called for %lu\n", cmd->serial_number);
if (!drive) {
printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_reset\n");
WARN_ON(1);
return FAILED;
}
spin_lock_irq(&ide_lock);
if (!scsi->pc || (req = scsi->pc->rq) != HWGROUP(drive)->rq || !HWGROUP(drive)->handler) {
printk (KERN_WARNING "ide-scsi: No active request in idescsi_eh_reset\n");
spin_unlock(&ide_lock);
return FAILED;
}
/* kill current request */
blkdev_dequeue_request(req);
end_that_request_last(req);
idescsi_free_bio(req->bio);
if (req->flags & REQ_SENSE)
kfree(scsi->pc->buffer);
kfree(scsi->pc);
scsi->pc = NULL;
kfree(req);
/* now nuke the drive queue */
while ((req = elv_next_request(drive->queue))) {
blkdev_dequeue_request(req);
end_that_request_last(req);
}
/* FIXME - this will probably leak memory */
HWGROUP(drive)->rq = NULL;
if (drive_to_idescsi(drive))
drive_to_idescsi(drive)->pc = NULL;
spin_unlock_irqrestore(&ide_lock, flags);
/* finally, reset the drive (and its partner on the bus...) */
ide_do_reset (drive);
return SUCCESS;
HWGROUP(drive)->handler = NULL;
HWGROUP(drive)->busy = 1; /* will set this to zero when ide reset finished */
spin_unlock_irq(&ide_lock);
ide_do_reset(drive);
/* ide_do_reset starts a polling handler which restarts itself every 50ms until the reset finishes */
do {
set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(cmd->device->host->host_lock);
schedule_timeout(HZ/20);
spin_lock_irq(cmd->device->host->host_lock);
} while ( HWGROUP(drive)->handler );
ready = drive_is_ready(drive);
HWGROUP(drive)->busy--;
if (!ready) {
printk (KERN_ERR "ide-scsi: reset failed!\n");
ret = FAILED;
}
return ret;
}
static int idescsi_bios(struct scsi_device *sdev, struct block_device *bdev,
......@@ -935,8 +1068,8 @@ static Scsi_Host_Template idescsi_template = {
.slave_configure = idescsi_slave_configure,
.ioctl = idescsi_ioctl,
.queuecommand = idescsi_queue,
.eh_abort_handler = idescsi_abort,
.eh_device_reset_handler = idescsi_reset,
.eh_abort_handler = idescsi_eh_abort,
.eh_host_reset_handler = idescsi_eh_reset,
.bios_param = idescsi_bios,
.can_queue = 40,
.this_id = -1,
......
......@@ -1612,6 +1612,7 @@ extern int ide_dma_enable(ide_drive_t *drive);
extern char *ide_xfer_verbose(u8 xfer_rate);
extern void ide_toggle_bounce(ide_drive_t *drive, int on);
extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate);
extern byte ide_dump_atapi_status(ide_drive_t *drive, const char *msg, byte stat);
typedef struct ide_pio_timings_s {
int setup_time; /* Address setup (ns) minimum */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment