Commit 1b28f170 authored by Gavin Shan's avatar Gavin Shan Committed by Michael Ellerman

powerpc/eeh: Allow to set maximal frozen times

When PE's frozen count hits maximal allowed frozen times, which is
5 currently, it will be forced to be offline permanently. Once the
PE is removed permanently, rebooting machine is required to bring
the PE back. It's not convienent when testing EEH functionality.

The patch exports the maximal allowed frozen times through debugfs
entry (/sys/kernel/debug/powerpc/eeh_max_freezes).
Requested-by: default avatarRyan Grimm <grimm@linux.vnet.ibm.com>
Signed-off-by: default avatarGavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 432227e9
...@@ -218,6 +218,7 @@ struct eeh_ops { ...@@ -218,6 +218,7 @@ struct eeh_ops {
}; };
extern int eeh_subsystem_flags; extern int eeh_subsystem_flags;
extern int eeh_max_freezes;
extern struct eeh_ops *eeh_ops; extern struct eeh_ops *eeh_ops;
extern raw_spinlock_t confirm_error_lock; extern raw_spinlock_t confirm_error_lock;
...@@ -255,12 +256,6 @@ static inline void eeh_serialize_unlock(unsigned long flags) ...@@ -255,12 +256,6 @@ static inline void eeh_serialize_unlock(unsigned long flags)
raw_spin_unlock_irqrestore(&confirm_error_lock, flags); raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
} }
/*
* Max number of EEH freezes allowed before we consider the device
* to be permanently disabled.
*/
#define EEH_MAX_ALLOWED_FREEZES 5
typedef void *(*eeh_traverse_func)(void *data, void *flag); typedef void *(*eeh_traverse_func)(void *data, void *flag);
void eeh_set_pe_aux_size(int size); void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb); int eeh_phb_pe_create(struct pci_controller *phb);
......
...@@ -104,6 +104,13 @@ ...@@ -104,6 +104,13 @@
int eeh_subsystem_flags; int eeh_subsystem_flags;
EXPORT_SYMBOL(eeh_subsystem_flags); EXPORT_SYMBOL(eeh_subsystem_flags);
/*
* EEH allowed maximal frozen times. If one particular PE's
* frozen count in last hour exceeds this limit, the PE will
* be forced to be offline permanently.
*/
int eeh_max_freezes = 5;
/* Platform dependent EEH operations */ /* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL; struct eeh_ops *eeh_ops = NULL;
...@@ -1652,8 +1659,22 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val) ...@@ -1652,8 +1659,22 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val)
return 0; return 0;
} }
static int eeh_freeze_dbgfs_set(void *data, u64 val)
{
eeh_max_freezes = val;
return 0;
}
static int eeh_freeze_dbgfs_get(void *data, u64 *val)
{
*val = eeh_max_freezes;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
eeh_enable_dbgfs_set, "0x%llx\n"); eeh_enable_dbgfs_set, "0x%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
eeh_freeze_dbgfs_set, "0x%llx\n");
#endif #endif
static int __init eeh_init_proc(void) static int __init eeh_init_proc(void)
...@@ -1664,6 +1685,9 @@ static int __init eeh_init_proc(void) ...@@ -1664,6 +1685,9 @@ static int __init eeh_init_proc(void)
debugfs_create_file("eeh_enable", 0600, debugfs_create_file("eeh_enable", 0600,
powerpc_debugfs_root, NULL, powerpc_debugfs_root, NULL,
&eeh_enable_dbgfs_ops); &eeh_enable_dbgfs_ops);
debugfs_create_file("eeh_max_freezes", 0600,
powerpc_debugfs_root, NULL,
&eeh_freeze_dbgfs_ops);
#endif #endif
} }
......
...@@ -667,7 +667,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -667,7 +667,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_pe_update_time_stamp(pe); eeh_pe_update_time_stamp(pe);
pe->freeze_count++; pe->freeze_count++;
if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) if (pe->freeze_count > eeh_max_freezes)
goto excess_failures; goto excess_failures;
pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pr_warn("EEH: This PCI device has failed %d times in the last hour\n",
pe->freeze_count); pe->freeze_count);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment