Commit 9467a9b3 authored by Martin Peschke's avatar Martin Peschke Committed by James Bottomley

[SCSI] zfcp: Trace all triggers of error recovery activity

This patch allows any recovery event to be traced back to an exact
cause, e.g. a particular request identified by an id (address).
Signed-off-by: default avatarMartin Peschke <mp3@de.ibm.com>
Signed-off-by: default avatarChristof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: default avatarJames Bottomley <James.Bottomley@HansenPartnership.com>
parent 698ec016
......@@ -1326,10 +1326,10 @@ zfcp_nameserver_enqueue(struct zfcp_adapter *adapter)
#define ZFCP_LOG_AREA ZFCP_LOG_AREA_FC
static void
zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
struct fsf_status_read_buffer *status_buffer)
static void zfcp_fsf_incoming_els_rscn(struct zfcp_fsf_req *fsf_req)
{
struct fsf_status_read_buffer *status_buffer = (void*)fsf_req->data;
struct zfcp_adapter *adapter = fsf_req->adapter;
struct fcp_rscn_head *fcp_rscn_head;
struct fcp_rscn_element *fcp_rscn_element;
struct zfcp_port *port;
......@@ -1376,7 +1376,8 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
ZFCP_LOG_INFO("incoming RSCN, trying to open "
"port 0x%016Lx\n", port->wwpn);
zfcp_erp_port_reopen(port,
ZFCP_STATUS_COMMON_ERP_FAILED);
ZFCP_STATUS_COMMON_ERP_FAILED,
82, (u64)fsf_req);
continue;
}
......@@ -1407,10 +1408,10 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
}
}
static void
zfcp_fsf_incoming_els_plogi(struct zfcp_adapter *adapter,
struct fsf_status_read_buffer *status_buffer)
static void zfcp_fsf_incoming_els_plogi(struct zfcp_fsf_req *fsf_req)
{
struct fsf_status_read_buffer *status_buffer = (void*)fsf_req->data;
struct zfcp_adapter *adapter = fsf_req->adapter;
struct fsf_plogi *els_plogi;
struct zfcp_port *port;
unsigned long flags;
......@@ -1429,14 +1430,14 @@ zfcp_fsf_incoming_els_plogi(struct zfcp_adapter *adapter,
status_buffer->d_id,
zfcp_get_busid_by_adapter(adapter));
} else {
zfcp_erp_port_forced_reopen(port, 0);
zfcp_erp_port_forced_reopen(port, 0, 83, (u64)fsf_req);
}
}
static void
zfcp_fsf_incoming_els_logo(struct zfcp_adapter *adapter,
struct fsf_status_read_buffer *status_buffer)
static void zfcp_fsf_incoming_els_logo(struct zfcp_fsf_req *fsf_req)
{
struct fsf_status_read_buffer *status_buffer = (void*)fsf_req->data;
struct zfcp_adapter *adapter = fsf_req->adapter;
struct fcp_logo *els_logo = (struct fcp_logo *) status_buffer->payload;
struct zfcp_port *port;
unsigned long flags;
......@@ -1454,7 +1455,7 @@ zfcp_fsf_incoming_els_logo(struct zfcp_adapter *adapter,
status_buffer->d_id,
zfcp_get_busid_by_adapter(adapter));
} else {
zfcp_erp_port_forced_reopen(port, 0);
zfcp_erp_port_forced_reopen(port, 0, 84, (u64)fsf_req);
}
}
......@@ -1481,12 +1482,12 @@ zfcp_fsf_incoming_els(struct zfcp_fsf_req *fsf_req)
zfcp_san_dbf_event_incoming_els(fsf_req);
if (els_type == LS_PLOGI)
zfcp_fsf_incoming_els_plogi(adapter, status_buffer);
zfcp_fsf_incoming_els_plogi(fsf_req);
else if (els_type == LS_LOGO)
zfcp_fsf_incoming_els_logo(adapter, status_buffer);
zfcp_fsf_incoming_els_logo(fsf_req);
else if ((els_type & 0xffff0000) == LS_RSCN)
/* we are only concerned with the command, not the length */
zfcp_fsf_incoming_els_rscn(adapter, status_buffer);
zfcp_fsf_incoming_els_rscn(fsf_req);
else
zfcp_fsf_incoming_els_unknown(adapter, status_buffer);
}
......
......@@ -172,7 +172,7 @@ zfcp_ccw_set_online(struct ccw_device *ccw_device)
zfcp_erp_modify_adapter_status(adapter, 10, 0,
ZFCP_STATUS_COMMON_RUNNING, ZFCP_SET);
zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED);
zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, 85, 0);
zfcp_erp_wait(adapter);
goto out;
......@@ -197,7 +197,7 @@ zfcp_ccw_set_offline(struct ccw_device *ccw_device)
down(&zfcp_data.config_sema);
adapter = dev_get_drvdata(&ccw_device->dev);
zfcp_erp_adapter_shutdown(adapter, 0);
zfcp_erp_adapter_shutdown(adapter, 0, 86, 0);
zfcp_erp_wait(adapter);
zfcp_erp_thread_kill(adapter);
up(&zfcp_data.config_sema);
......@@ -224,13 +224,13 @@ zfcp_ccw_notify(struct ccw_device *ccw_device, int event)
ZFCP_LOG_NORMAL("adapter %s: device gone\n",
zfcp_get_busid_by_adapter(adapter));
debug_text_event(adapter->erp_dbf,1,"dev_gone");
zfcp_erp_adapter_shutdown(adapter, 0);
zfcp_erp_adapter_shutdown(adapter, 0, 87, 0);
break;
case CIO_NO_PATH:
ZFCP_LOG_NORMAL("adapter %s: no path\n",
zfcp_get_busid_by_adapter(adapter));
debug_text_event(adapter->erp_dbf,1,"no_path");
zfcp_erp_adapter_shutdown(adapter, 0);
zfcp_erp_adapter_shutdown(adapter, 0, 88, 0);
break;
case CIO_OPER:
ZFCP_LOG_NORMAL("adapter %s: operational again\n",
......@@ -240,7 +240,7 @@ zfcp_ccw_notify(struct ccw_device *ccw_device, int event)
ZFCP_STATUS_COMMON_RUNNING,
ZFCP_SET);
zfcp_erp_adapter_reopen(adapter,
ZFCP_STATUS_COMMON_ERP_FAILED);
ZFCP_STATUS_COMMON_ERP_FAILED, 89, 0);
break;
}
zfcp_erp_wait(adapter);
......@@ -272,7 +272,7 @@ zfcp_ccw_shutdown(struct ccw_device *cdev)
down(&zfcp_data.config_sema);
adapter = dev_get_drvdata(&cdev->dev);
zfcp_erp_adapter_shutdown(adapter, 0);
zfcp_erp_adapter_shutdown(adapter, 0, 90, 0);
zfcp_erp_wait(adapter);
up(&zfcp_data.config_sema);
}
......
......@@ -523,6 +523,7 @@ static struct debug_view zfcp_hba_dbf_view = {
static const char *zfcp_rec_dbf_tags[] = {
[ZFCP_REC_DBF_ID_THREAD] = "thread",
[ZFCP_REC_DBF_ID_TARGET] = "target",
[ZFCP_REC_DBF_ID_TRIGGER] = "trigger",
};
static const char *zfcp_rec_dbf_ids[] = {
......@@ -587,6 +588,89 @@ static const char *zfcp_rec_dbf_ids[] = {
[59] = "unit access denied open unit",
[60] = "shared unit access denied open unit",
[61] = "unit access denied fcp",
[62] = "request timeout",
[63] = "adisc link test reject or timeout",
[64] = "adisc link test d_id changed",
[65] = "adisc link test failed",
[66] = "recovery out of memory",
[67] = "adapter recovery repeated after state change",
[68] = "port recovery repeated after state change",
[69] = "unit recovery repeated after state change",
[70] = "port recovery follow-up after successful adapter recovery",
[71] = "adapter recovery escalation after failed adapter recovery",
[72] = "port recovery follow-up after successful physical port "
"recovery",
[73] = "adapter recovery escalation after failed physical port "
"recovery",
[74] = "unit recovery follow-up after successful port recovery",
[75] = "physical port recovery escalation after failed port "
"recovery",
[76] = "port recovery escalation after failed unit recovery",
[77] = "recovery opening nameserver port",
[78] = "duplicate request id",
[79] = "link down",
[80] = "exclusive read-only unit access unsupported",
[81] = "shared read-write unit access unsupported",
[82] = "incoming rscn",
[83] = "incoming plogi",
[84] = "incoming logo",
[85] = "online",
[86] = "offline",
[87] = "ccw device gone",
[88] = "ccw device no path",
[89] = "ccw device operational",
[90] = "ccw device shutdown",
[91] = "sysfs port addition",
[92] = "sysfs port removal",
[93] = "sysfs adapter recovery",
[94] = "sysfs unit addition",
[95] = "sysfs unit removal",
[96] = "sysfs port recovery",
[97] = "sysfs unit recovery",
[98] = "sequence number mismatch",
[99] = "link up",
[100] = "error state",
[101] = "status read physical port closed",
[102] = "link up status read",
[103] = "too many failed status read buffers",
[104] = "port handle not valid abort",
[105] = "lun handle not valid abort",
[106] = "port handle not valid ct",
[107] = "port handle not valid close port",
[108] = "port handle not valid close physical port",
[109] = "port handle not valid open unit",
[110] = "port handle not valid close unit",
[111] = "lun handle not valid close unit",
[112] = "port handle not valid fcp",
[113] = "lun handle not valid fcp",
[114] = "handle mismatch fcp",
[115] = "lun not valid fcp",
[116] = "qdio send failed",
[117] = "version mismatch",
[118] = "incompatible qtcb type",
[119] = "unknown protocol status",
[120] = "unknown fsf command",
[121] = "no recommendation for status qualifier",
[122] = "status read physical port closed in error",
[123] = "fc service class not supported ct",
[124] = "fc service class not supported els",
[125] = "need newer zfcp",
[126] = "need newer microcode",
[127] = "arbitrated loop not supported",
[128] = "unknown topology",
[129] = "qtcb size mismatch",
[130] = "unknown fsf status ecd",
[131] = "fcp request too big",
[132] = "fc service class not supported fcp",
[133] = "data direction not valid fcp",
[134] = "command length not valid fcp",
[135] = "status read act update",
[136] = "status read cfdc update",
[137] = "hbaapi port open",
[138] = "hbaapi unit open",
[139] = "hbaapi unit shutdown",
[140] = "qdio error",
[141] = "scsi host reset",
};
static int zfcp_rec_dbf_view_format(debug_info_t *id, struct debug_view *view,
......@@ -613,6 +697,17 @@ static int zfcp_rec_dbf_view_format(debug_info_t *id, struct debug_view *view,
zfcp_dbf_out(&p, "wwpn", "0x%016Lx", r->u.target.wwpn);
zfcp_dbf_out(&p, "fcp_lun", "0x%016Lx", r->u.target.fcp_lun);
break;
case ZFCP_REC_DBF_ID_TRIGGER:
zfcp_dbf_out(&p, "reference", "0x%016Lx", r->u.trigger.ref);
zfcp_dbf_out(&p, "erp_action", "0x%016Lx", r->u.trigger.action);
zfcp_dbf_out(&p, "requested", "%d", r->u.trigger.want);
zfcp_dbf_out(&p, "executed", "%d", r->u.trigger.need);
zfcp_dbf_out(&p, "wwpn", "0x%016Lx", r->u.trigger.wwpn);
zfcp_dbf_out(&p, "fcp_lun", "0x%016Lx", r->u.trigger.fcp_lun);
zfcp_dbf_out(&p, "adapter_status", "0x%08x", r->u.trigger.as);
zfcp_dbf_out(&p, "port_status", "0x%08x", r->u.trigger.ps);
zfcp_dbf_out(&p, "unit_status", "0x%08x", r->u.trigger.us);
break;
}
sprintf(p, "\n");
return (p - buf) + 1;
......@@ -727,6 +822,45 @@ void zfcp_rec_dbf_event_unit(u8 id, u64 ref, struct zfcp_unit *unit)
unit->fcp_lun);
}
/**
* zfcp_rec_dbf_event_trigger - trace event for triggered error recovery
* @id2: identifier for error recovery trigger
* @ref: additional reference (e.g. request)
* @want: originally requested error recovery action
* @need: error recovery action actually initiated
* @action: address of error recovery action struct
* @adapter: adapter
* @port: port
* @unit: unit
*/
void zfcp_rec_dbf_event_trigger(u8 id2, u64 ref, u8 want, u8 need, u64 action,
struct zfcp_adapter *adapter,
struct zfcp_port *port, struct zfcp_unit *unit)
{
struct zfcp_rec_dbf_record *r = &adapter->rec_dbf_buf;
unsigned long flags;
spin_lock_irqsave(&adapter->rec_dbf_lock, flags);
memset(r, 0, sizeof(*r));
r->id = ZFCP_REC_DBF_ID_TRIGGER;
r->id2 = id2;
r->u.trigger.ref = ref;
r->u.trigger.want = want;
r->u.trigger.need = need;
r->u.trigger.action = action;
r->u.trigger.as = atomic_read(&adapter->status);
if (port) {
r->u.trigger.ps = atomic_read(&port->status);
r->u.trigger.wwpn = port->wwpn;
}
if (unit) {
r->u.trigger.us = atomic_read(&unit->status);
r->u.trigger.fcp_lun = unit->fcp_lun;
}
debug_event(adapter->rec_dbf, action ? 1 : 4, r, sizeof(*r));
spin_unlock_irqrestore(&adapter->rec_dbf_lock, flags);
}
static void
_zfcp_san_dbf_event_common_ct(const char *tag, struct zfcp_fsf_req *fsf_req,
u32 s_id, u32 d_id, void *buffer, int buflen)
......
......@@ -295,18 +295,32 @@ struct zfcp_rec_dbf_record_target {
u32 erp_count;
} __attribute__ ((packed));
struct zfcp_rec_dbf_record_trigger {
u8 want;
u8 need;
u32 as;
u32 ps;
u32 us;
u64 ref;
u64 action;
u64 wwpn;
u64 fcp_lun;
} __attribute__ ((packed));
struct zfcp_rec_dbf_record {
u8 id;
u8 id2;
union {
struct zfcp_rec_dbf_record_thread thread;
struct zfcp_rec_dbf_record_target target;
struct zfcp_rec_dbf_record_trigger trigger;
} u;
} __attribute__ ((packed));
enum {
ZFCP_REC_DBF_ID_THREAD,
ZFCP_REC_DBF_ID_TARGET,
ZFCP_REC_DBF_ID_TRIGGER,
};
struct zfcp_hba_dbf_record_response {
......
This diff is collapsed.
......@@ -133,20 +133,20 @@ extern struct fc_function_template zfcp_transport_functions;
/******************************** ERP ****************************************/
extern void zfcp_erp_modify_adapter_status(struct zfcp_adapter *, u8, u64, u32,
int);
extern int zfcp_erp_adapter_reopen(struct zfcp_adapter *, int);
extern int zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int);
extern int zfcp_erp_adapter_reopen(struct zfcp_adapter *, int, u8, u64);
extern int zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int, u8, u64);
extern void zfcp_erp_adapter_failed(struct zfcp_adapter *, u8, u64);
extern void zfcp_erp_modify_port_status(struct zfcp_port *, u8, u64, u32, int);
extern int zfcp_erp_port_reopen(struct zfcp_port *, int);
extern int zfcp_erp_port_shutdown(struct zfcp_port *, int);
extern int zfcp_erp_port_forced_reopen(struct zfcp_port *, int);
extern int zfcp_erp_port_reopen(struct zfcp_port *, int, u8, u64);
extern int zfcp_erp_port_shutdown(struct zfcp_port *, int, u8, u64);
extern int zfcp_erp_port_forced_reopen(struct zfcp_port *, int, u8, u64);
extern void zfcp_erp_port_failed(struct zfcp_port *, u8, u64);
extern int zfcp_erp_port_reopen_all(struct zfcp_adapter *, int);
extern int zfcp_erp_port_reopen_all(struct zfcp_adapter *, int, u8, u64);
extern void zfcp_erp_modify_unit_status(struct zfcp_unit *, u8, u64, u32, int);
extern int zfcp_erp_unit_reopen(struct zfcp_unit *, int);
extern int zfcp_erp_unit_shutdown(struct zfcp_unit *, int);
extern int zfcp_erp_unit_reopen(struct zfcp_unit *, int, u8, u64);
extern int zfcp_erp_unit_shutdown(struct zfcp_unit *, int, u8, u64);
extern void zfcp_erp_unit_failed(struct zfcp_unit *, u8, u64);
extern int zfcp_erp_thread_setup(struct zfcp_adapter *);
......@@ -160,9 +160,9 @@ extern void zfcp_erp_port_boxed(struct zfcp_port *, u8 id, u64 ref);
extern void zfcp_erp_unit_boxed(struct zfcp_unit *, u8 id, u64 ref);
extern void zfcp_erp_port_access_denied(struct zfcp_port *, u8 id, u64 ref);
extern void zfcp_erp_unit_access_denied(struct zfcp_unit *, u8 id, u64 ref);
extern void zfcp_erp_adapter_access_changed(struct zfcp_adapter *);
extern void zfcp_erp_port_access_changed(struct zfcp_port *);
extern void zfcp_erp_unit_access_changed(struct zfcp_unit *);
extern void zfcp_erp_adapter_access_changed(struct zfcp_adapter *, u8, u64);
extern void zfcp_erp_port_access_changed(struct zfcp_port *, u8, u64);
extern void zfcp_erp_unit_access_changed(struct zfcp_unit *, u8, u64);
/******************************** AUX ****************************************/
extern void zfcp_rec_dbf_event_thread(u8 id, struct zfcp_adapter *adapter,
......@@ -170,6 +170,9 @@ extern void zfcp_rec_dbf_event_thread(u8 id, struct zfcp_adapter *adapter,
extern void zfcp_rec_dbf_event_adapter(u8 id, u64 ref, struct zfcp_adapter *);
extern void zfcp_rec_dbf_event_port(u8 id, u64 ref, struct zfcp_port *port);
extern void zfcp_rec_dbf_event_unit(u8 id, u64 ref, struct zfcp_unit *unit);
extern void zfcp_rec_dbf_event_trigger(u8 id, u64 ref, u8 want, u8 need,
u64 action, struct zfcp_adapter *,
struct zfcp_port *, struct zfcp_unit *);
extern void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *);
extern void zfcp_hba_dbf_event_fsf_unsol(const char *, struct zfcp_adapter *,
......
This diff is collapsed.
......@@ -175,8 +175,8 @@ zfcp_qdio_handler_error_check(struct zfcp_adapter *adapter, unsigned int status,
* which is set again in case we have missed by a mile.
*/
zfcp_erp_adapter_reopen(adapter,
ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
ZFCP_STATUS_COMMON_ERP_FAILED);
ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
ZFCP_STATUS_COMMON_ERP_FAILED, 140, 0);
}
return retval;
}
......
......@@ -529,7 +529,7 @@ static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
unit->fcp_lun, unit->port->wwpn,
zfcp_get_busid_by_adapter(unit->port->adapter));
zfcp_erp_adapter_reopen(adapter, 0);
zfcp_erp_adapter_reopen(adapter, 0, 141, (u64)scpnt);
zfcp_erp_wait(adapter);
return SUCCESS;
......
......@@ -89,7 +89,7 @@ zfcp_sysfs_port_add_store(struct device *dev, struct device_attribute *attr, con
retval = 0;
zfcp_erp_port_reopen(port, 0);
zfcp_erp_port_reopen(port, 0, 91, 0);
zfcp_erp_wait(port->adapter);
zfcp_port_put(port);
out:
......@@ -147,7 +147,7 @@ zfcp_sysfs_port_remove_store(struct device *dev, struct device_attribute *attr,
goto out;
}
zfcp_erp_port_shutdown(port, 0);
zfcp_erp_port_shutdown(port, 0, 92, 0);
zfcp_erp_wait(adapter);
zfcp_port_put(port);
zfcp_port_dequeue(port);
......@@ -193,7 +193,7 @@ zfcp_sysfs_adapter_failed_store(struct device *dev, struct device_attribute *att
zfcp_erp_modify_adapter_status(adapter, 44, 0,
ZFCP_STATUS_COMMON_RUNNING, ZFCP_SET);
zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED);
zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, 93, 0);
zfcp_erp_wait(adapter);
out:
up(&zfcp_data.config_sema);
......
......@@ -94,7 +94,7 @@ zfcp_sysfs_unit_add_store(struct device *dev, struct device_attribute *attr, con
retval = 0;
zfcp_erp_unit_reopen(unit, 0);
zfcp_erp_unit_reopen(unit, 0, 94, 0);
zfcp_erp_wait(unit->port->adapter);
zfcp_unit_put(unit);
out:
......@@ -150,7 +150,7 @@ zfcp_sysfs_unit_remove_store(struct device *dev, struct device_attribute *attr,
goto out;
}
zfcp_erp_unit_shutdown(unit, 0);
zfcp_erp_unit_shutdown(unit, 0, 95, 0);
zfcp_erp_wait(unit->port->adapter);
zfcp_unit_put(unit);
zfcp_unit_dequeue(unit);
......@@ -195,7 +195,7 @@ zfcp_sysfs_port_failed_store(struct device *dev, struct device_attribute *attr,
zfcp_erp_modify_port_status(port, 45, 0,
ZFCP_STATUS_COMMON_RUNNING, ZFCP_SET);
zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED);
zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED, 96, 0);
zfcp_erp_wait(port->adapter);
out:
up(&zfcp_data.config_sema);
......
......@@ -96,7 +96,7 @@ zfcp_sysfs_unit_failed_store(struct device *dev, struct device_attribute *attr,
zfcp_erp_modify_unit_status(unit, 46, 0,
ZFCP_STATUS_COMMON_RUNNING, ZFCP_SET);
zfcp_erp_unit_reopen(unit, ZFCP_STATUS_COMMON_ERP_FAILED);
zfcp_erp_unit_reopen(unit, ZFCP_STATUS_COMMON_ERP_FAILED, 97, 0);
zfcp_erp_wait(unit->port->adapter);
out:
up(&zfcp_data.config_sema);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment