Commit 20c64468 authored by Stefan Weinhuber's avatar Stefan Weinhuber Committed by Linus Torvalds

[PATCH] s390: dasd extended error reporting

The DASD extended error reporting is a facility that allows to get detailed
information about certain problems in the DASD I/O.  This information can be
used to implement fail-over applications that can recover these problems.
Signed-off-by: default avatarStefan Weinhuber <wein@de.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 554a826e
...@@ -55,4 +55,12 @@ config DASD_DIAG ...@@ -55,4 +55,12 @@ config DASD_DIAG
Disks under VM. If you are not running under VM or unsure what it is, Disks under VM. If you are not running under VM or unsure what it is,
say "N". say "N".
config DASD_EER
bool "Extended error reporting (EER)"
depends on DASD
help
This driver provides a character device interface to the
DASD extended error reporting. This is only needed if you want to
use applications written for the EER facility.
endif endif
...@@ -7,6 +7,9 @@ dasd_fba_mod-objs := dasd_fba.o dasd_3370_erp.o dasd_9336_erp.o ...@@ -7,6 +7,9 @@ dasd_fba_mod-objs := dasd_fba.o dasd_3370_erp.o dasd_9336_erp.o
dasd_diag_mod-objs := dasd_diag.o dasd_diag_mod-objs := dasd_diag.o
dasd_mod-objs := dasd.o dasd_ioctl.o dasd_proc.o dasd_devmap.o \ dasd_mod-objs := dasd.o dasd_ioctl.o dasd_proc.o dasd_devmap.o \
dasd_genhd.o dasd_erp.o dasd_genhd.o dasd_erp.o
ifdef CONFIG_DASD_EER
dasd_mod-objs += dasd_eer.o
endif
obj-$(CONFIG_DASD) += dasd_mod.o obj-$(CONFIG_DASD) += dasd_mod.o
obj-$(CONFIG_DASD_DIAG) += dasd_diag_mod.o obj-$(CONFIG_DASD_DIAG) += dasd_diag_mod.o
......
...@@ -151,6 +151,8 @@ dasd_state_new_to_known(struct dasd_device *device) ...@@ -151,6 +151,8 @@ dasd_state_new_to_known(struct dasd_device *device)
static inline void static inline void
dasd_state_known_to_new(struct dasd_device * device) dasd_state_known_to_new(struct dasd_device * device)
{ {
/* Disable extended error reporting for this device. */
dasd_eer_disable(device);
/* Forget the discipline information. */ /* Forget the discipline information. */
if (device->discipline) if (device->discipline)
module_put(device->discipline->owner); module_put(device->discipline->owner);
...@@ -892,6 +894,9 @@ dasd_handle_state_change_pending(struct dasd_device *device) ...@@ -892,6 +894,9 @@ dasd_handle_state_change_pending(struct dasd_device *device)
struct dasd_ccw_req *cqr; struct dasd_ccw_req *cqr;
struct list_head *l, *n; struct list_head *l, *n;
/* First of all start sense subsystem status request. */
dasd_eer_snss(device);
device->stopped &= ~DASD_STOPPED_PENDING; device->stopped &= ~DASD_STOPPED_PENDING;
/* restart all 'running' IO on queue */ /* restart all 'running' IO on queue */
...@@ -1111,6 +1116,19 @@ __dasd_process_ccw_queue(struct dasd_device * device, ...@@ -1111,6 +1116,19 @@ __dasd_process_ccw_queue(struct dasd_device * device,
} }
goto restart; goto restart;
} }
/* First of all call extended error reporting. */
if (dasd_eer_enabled(device) &&
cqr->status == DASD_CQR_FAILED) {
dasd_eer_write(device, cqr, DASD_EER_FATALERROR);
/* restart request */
cqr->status = DASD_CQR_QUEUED;
cqr->retries = 255;
device->stopped |= DASD_STOPPED_QUIESCE;
goto restart;
}
/* Process finished ERP request. */ /* Process finished ERP request. */
if (cqr->refers) { if (cqr->refers) {
__dasd_process_erp(device, cqr); __dasd_process_erp(device, cqr);
...@@ -1248,7 +1266,8 @@ __dasd_start_head(struct dasd_device * device) ...@@ -1248,7 +1266,8 @@ __dasd_start_head(struct dasd_device * device)
cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list); cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list);
/* check FAILFAST */ /* check FAILFAST */
if (device->stopped & ~DASD_STOPPED_PENDING && if (device->stopped & ~DASD_STOPPED_PENDING &&
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags)) { test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
(!dasd_eer_enabled(device))) {
cqr->status = DASD_CQR_FAILED; cqr->status = DASD_CQR_FAILED;
dasd_schedule_bh(device); dasd_schedule_bh(device);
} }
...@@ -1807,6 +1826,7 @@ dasd_exit(void) ...@@ -1807,6 +1826,7 @@ dasd_exit(void)
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
dasd_proc_exit(); dasd_proc_exit();
#endif #endif
dasd_eer_exit();
if (dasd_page_cache != NULL) { if (dasd_page_cache != NULL) {
kmem_cache_destroy(dasd_page_cache); kmem_cache_destroy(dasd_page_cache);
dasd_page_cache = NULL; dasd_page_cache = NULL;
...@@ -2003,6 +2023,9 @@ dasd_generic_notify(struct ccw_device *cdev, int event) ...@@ -2003,6 +2023,9 @@ dasd_generic_notify(struct ccw_device *cdev, int event)
switch (event) { switch (event) {
case CIO_GONE: case CIO_GONE:
case CIO_NO_PATH: case CIO_NO_PATH:
/* First of all call extended error reporting. */
dasd_eer_write(device, NULL, DASD_EER_NOPATH);
if (device->state < DASD_STATE_BASIC) if (device->state < DASD_STATE_BASIC)
break; break;
/* Device is active. We want to keep it. */ /* Device is active. We want to keep it. */
...@@ -2060,6 +2083,7 @@ dasd_generic_auto_online (struct ccw_driver *dasd_discipline_driver) ...@@ -2060,6 +2083,7 @@ dasd_generic_auto_online (struct ccw_driver *dasd_discipline_driver)
put_driver(drv); put_driver(drv);
} }
static int __init static int __init
dasd_init(void) dasd_init(void)
{ {
...@@ -2092,6 +2116,9 @@ dasd_init(void) ...@@ -2092,6 +2116,9 @@ dasd_init(void)
rc = dasd_parse(); rc = dasd_parse();
if (rc) if (rc)
goto failed; goto failed;
rc = dasd_eer_init();
if (rc)
goto failed;
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
rc = dasd_proc_init(); rc = dasd_proc_init();
if (rc) if (rc)
......
...@@ -1108,6 +1108,9 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense) ...@@ -1108,6 +1108,9 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense)
case 0x0B: case 0x0B:
DEV_MESSAGE(KERN_WARNING, device, "%s", DEV_MESSAGE(KERN_WARNING, device, "%s",
"FORMAT F - Volume is suspended duplex"); "FORMAT F - Volume is suspended duplex");
/* call extended error reporting (EER) */
dasd_eer_write(device, erp->refers,
DASD_EER_PPRCSUSPEND);
break; break;
case 0x0C: case 0x0C:
DEV_MESSAGE(KERN_WARNING, device, "%s", DEV_MESSAGE(KERN_WARNING, device, "%s",
......
...@@ -715,10 +715,51 @@ dasd_discipline_show(struct device *dev, struct device_attribute *attr, char *bu ...@@ -715,10 +715,51 @@ dasd_discipline_show(struct device *dev, struct device_attribute *attr, char *bu
static DEVICE_ATTR(discipline, 0444, dasd_discipline_show, NULL); static DEVICE_ATTR(discipline, 0444, dasd_discipline_show, NULL);
/*
* extended error-reporting
*/
static ssize_t
dasd_eer_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct dasd_devmap *devmap;
int eer_flag;
devmap = dasd_find_busid(dev->bus_id);
if (!IS_ERR(devmap) && devmap->device)
eer_flag = dasd_eer_enabled(devmap->device);
else
eer_flag = 0;
return snprintf(buf, PAGE_SIZE, eer_flag ? "1\n" : "0\n");
}
static ssize_t
dasd_eer_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct dasd_devmap *devmap;
int rc;
devmap = dasd_devmap_from_cdev(to_ccwdev(dev));
if (IS_ERR(devmap))
return PTR_ERR(devmap);
if (!devmap->device)
return count;
if (buf[0] == '1') {
rc = dasd_eer_enable(devmap->device);
if (rc)
return rc;
} else
dasd_eer_disable(devmap->device);
return count;
}
static DEVICE_ATTR(eer_enabled, 0644, dasd_eer_show, dasd_eer_store);
static struct attribute * dasd_attrs[] = { static struct attribute * dasd_attrs[] = {
&dev_attr_readonly.attr, &dev_attr_readonly.attr,
&dev_attr_discipline.attr, &dev_attr_discipline.attr,
&dev_attr_use_diag.attr, &dev_attr_use_diag.attr,
&dev_attr_eer_enabled.attr,
NULL, NULL,
}; };
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define DASD_ECKD_CCW_PSF 0x27 #define DASD_ECKD_CCW_PSF 0x27
#define DASD_ECKD_CCW_RSSD 0x3e #define DASD_ECKD_CCW_RSSD 0x3e
#define DASD_ECKD_CCW_LOCATE_RECORD 0x47 #define DASD_ECKD_CCW_LOCATE_RECORD 0x47
#define DASD_ECKD_CCW_SNSS 0x54
#define DASD_ECKD_CCW_DEFINE_EXTENT 0x63 #define DASD_ECKD_CCW_DEFINE_EXTENT 0x63
#define DASD_ECKD_CCW_WRITE_MT 0x85 #define DASD_ECKD_CCW_WRITE_MT 0x85
#define DASD_ECKD_CCW_READ_MT 0x86 #define DASD_ECKD_CCW_READ_MT 0x86
......
This diff is collapsed.
...@@ -268,6 +268,23 @@ struct dasd_discipline { ...@@ -268,6 +268,23 @@ struct dasd_discipline {
extern struct dasd_discipline *dasd_diag_discipline_pointer; extern struct dasd_discipline *dasd_diag_discipline_pointer;
/*
* Notification numbers for extended error reporting notifications:
* The DASD_EER_DISABLE notification is sent before a dasd_device (and it's
* eer pointer) is freed. The error reporting module needs to do all necessary
* cleanup steps.
* The DASD_EER_TRIGGER notification sends the actual error reports (triggers).
*/
#define DASD_EER_DISABLE 0
#define DASD_EER_TRIGGER 1
/* Trigger IDs for extended error reporting DASD_EER_TRIGGER notification */
#define DASD_EER_FATALERROR 1
#define DASD_EER_NOPATH 2
#define DASD_EER_STATECHANGE 3
#define DASD_EER_PPRCSUSPEND 4
struct dasd_device { struct dasd_device {
/* Block device stuff. */ /* Block device stuff. */
struct gendisk *gdp; struct gendisk *gdp;
...@@ -281,6 +298,9 @@ struct dasd_device { ...@@ -281,6 +298,9 @@ struct dasd_device {
unsigned long flags; /* per device flags */ unsigned long flags; /* per device flags */
unsigned short features; /* copy of devmap-features (read-only!) */ unsigned short features; /* copy of devmap-features (read-only!) */
/* extended error reporting stuff (eer) */
struct dasd_ccw_req *eer_cqr;
/* Device discipline stuff. */ /* Device discipline stuff. */
struct dasd_discipline *discipline; struct dasd_discipline *discipline;
struct dasd_discipline *base_discipline; struct dasd_discipline *base_discipline;
...@@ -326,6 +346,8 @@ struct dasd_device { ...@@ -326,6 +346,8 @@ struct dasd_device {
/* per device flags */ /* per device flags */
#define DASD_FLAG_DSC_ERROR 2 /* return -EIO when disconnected */ #define DASD_FLAG_DSC_ERROR 2 /* return -EIO when disconnected */
#define DASD_FLAG_OFFLINE 3 /* device is in offline processing */ #define DASD_FLAG_OFFLINE 3 /* device is in offline processing */
#define DASD_FLAG_EER_SNSS 4 /* A SNSS is required */
#define DASD_FLAG_EER_IN_USE 5 /* A SNSS request is running */
void dasd_put_device_wake(struct dasd_device *); void dasd_put_device_wake(struct dasd_device *);
...@@ -545,6 +567,30 @@ dasd_era_t dasd_9336_erp_examine(struct dasd_ccw_req *, struct irb *); ...@@ -545,6 +567,30 @@ dasd_era_t dasd_9336_erp_examine(struct dasd_ccw_req *, struct irb *);
dasd_era_t dasd_9343_erp_examine(struct dasd_ccw_req *, struct irb *); dasd_era_t dasd_9343_erp_examine(struct dasd_ccw_req *, struct irb *);
struct dasd_ccw_req *dasd_9343_erp_action(struct dasd_ccw_req *); struct dasd_ccw_req *dasd_9343_erp_action(struct dasd_ccw_req *);
/* externals in dasd_eer.c */
#ifdef CONFIG_DASD_EER
int dasd_eer_init(void);
void dasd_eer_exit(void);
int dasd_eer_enable(struct dasd_device *);
void dasd_eer_disable(struct dasd_device *);
void dasd_eer_write(struct dasd_device *, struct dasd_ccw_req *cqr,
unsigned int id);
void dasd_eer_snss(struct dasd_device *);
static inline int dasd_eer_enabled(struct dasd_device *device)
{
return device->eer_cqr != NULL;
}
#else
#define dasd_eer_init() (0)
#define dasd_eer_exit() do { } while (0)
#define dasd_eer_enable(d) (0)
#define dasd_eer_disable(d) do { } while (0)
#define dasd_eer_write(d,c,i) do { } while (0)
#define dasd_eer_snss(d) do { } while (0)
#define dasd_eer_enabled(d) (0)
#endif /* CONFIG_DASD_ERR */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* DASD_H */ #endif /* DASD_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment