Commit 177bc936 authored by Linas Vepstas's avatar Linas Vepstas Committed by Paul Mackerras

[PATCH] ppc64: PCI error rate statistics

04-eeh-statistics.patch

This minor patch adds some statistics-gathering counters that allow the
behaviour of the EEH subsystem o be monitored. While far from perfect,
it does provide a rudimentary device that makes understanding of the
current state of the system a bit easier.
Signed-off-by: default avatarLinas Vepstas <linas@linas.org>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent 56b0fca3
...@@ -102,6 +102,10 @@ static DEFINE_SPINLOCK(slot_errbuf_lock); ...@@ -102,6 +102,10 @@ static DEFINE_SPINLOCK(slot_errbuf_lock);
static int eeh_error_buf_size; static int eeh_error_buf_size;
/* System monitoring statistics */ /* System monitoring statistics */
static DEFINE_PER_CPU(unsigned long, no_device);
static DEFINE_PER_CPU(unsigned long, no_dn);
static DEFINE_PER_CPU(unsigned long, no_cfg_addr);
static DEFINE_PER_CPU(unsigned long, ignored_check);
static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
static DEFINE_PER_CPU(unsigned long, false_positives); static DEFINE_PER_CPU(unsigned long, false_positives);
static DEFINE_PER_CPU(unsigned long, ignored_failures); static DEFINE_PER_CPU(unsigned long, ignored_failures);
...@@ -493,8 +497,6 @@ static void eeh_event_handler(void *dummy) ...@@ -493,8 +497,6 @@ static void eeh_event_handler(void *dummy)
notifier_call_chain (&eeh_notifier_chain, notifier_call_chain (&eeh_notifier_chain,
EEH_NOTIFY_FREEZE, event); EEH_NOTIFY_FREEZE, event);
__get_cpu_var(slot_resets)++;
pci_dev_put(event->dev); pci_dev_put(event->dev);
kfree(event); kfree(event);
} }
...@@ -546,17 +548,24 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) ...@@ -546,17 +548,24 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
if (!eeh_subsystem_enabled) if (!eeh_subsystem_enabled)
return 0; return 0;
if (!dn) if (!dn) {
__get_cpu_var(no_dn)++;
return 0; return 0;
}
pdn = PCI_DN(dn); pdn = PCI_DN(dn);
/* Access to IO BARs might get this far and still not want checking. */ /* Access to IO BARs might get this far and still not want checking. */
if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) || if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
pdn->eeh_mode & EEH_MODE_NOCHECK) { pdn->eeh_mode & EEH_MODE_NOCHECK) {
__get_cpu_var(ignored_check)++;
#ifdef DEBUG
printk ("EEH:ignored check for %s %s\n", pci_name (dev), dn->full_name);
#endif
return 0; return 0;
} }
if (!pdn->eeh_config_addr) { if (!pdn->eeh_config_addr) {
__get_cpu_var(no_cfg_addr)++;
return 0; return 0;
} }
...@@ -590,6 +599,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) ...@@ -590,6 +599,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
/* prevent repeated reports of this failure */ /* prevent repeated reports of this failure */
pdn->eeh_mode |= EEH_MODE_ISOLATED; pdn->eeh_mode |= EEH_MODE_ISOLATED;
__get_cpu_var(slot_resets)++;
reset_state = rets[0]; reset_state = rets[0];
...@@ -657,8 +667,10 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon ...@@ -657,8 +667,10 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
/* Finding the phys addr + pci device; this is pretty quick. */ /* Finding the phys addr + pci device; this is pretty quick. */
addr = eeh_token_to_phys((unsigned long __force) token); addr = eeh_token_to_phys((unsigned long __force) token);
dev = pci_get_device_by_addr(addr); dev = pci_get_device_by_addr(addr);
if (!dev) if (!dev) {
__get_cpu_var(no_device)++;
return val; return val;
}
dn = pci_device_to_OF_node(dev); dn = pci_device_to_OF_node(dev);
eeh_dn_check_failure (dn, dev); eeh_dn_check_failure (dn, dev);
...@@ -903,12 +915,17 @@ static int proc_eeh_show(struct seq_file *m, void *v) ...@@ -903,12 +915,17 @@ static int proc_eeh_show(struct seq_file *m, void *v)
unsigned int cpu; unsigned int cpu;
unsigned long ffs = 0, positives = 0, failures = 0; unsigned long ffs = 0, positives = 0, failures = 0;
unsigned long resets = 0; unsigned long resets = 0;
unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;
for_each_cpu(cpu) { for_each_cpu(cpu) {
ffs += per_cpu(total_mmio_ffs, cpu); ffs += per_cpu(total_mmio_ffs, cpu);
positives += per_cpu(false_positives, cpu); positives += per_cpu(false_positives, cpu);
failures += per_cpu(ignored_failures, cpu); failures += per_cpu(ignored_failures, cpu);
resets += per_cpu(slot_resets, cpu); resets += per_cpu(slot_resets, cpu);
no_dev += per_cpu(no_device, cpu);
no_dn += per_cpu(no_dn, cpu);
no_cfg += per_cpu(no_cfg_addr, cpu);
no_check += per_cpu(ignored_check, cpu);
} }
if (0 == eeh_subsystem_enabled) { if (0 == eeh_subsystem_enabled) {
...@@ -916,13 +933,17 @@ static int proc_eeh_show(struct seq_file *m, void *v) ...@@ -916,13 +933,17 @@ static int proc_eeh_show(struct seq_file *m, void *v)
seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs); seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
} else { } else {
seq_printf(m, "EEH Subsystem is enabled\n"); seq_printf(m, "EEH Subsystem is enabled\n");
seq_printf(m, "eeh_total_mmio_ffs=%ld\n" seq_printf(m,
"eeh_false_positives=%ld\n" "no device=%ld\n"
"eeh_ignored_failures=%ld\n" "no device node=%ld\n"
"eeh_slot_resets=%ld\n" "no config address=%ld\n"
"eeh_fail_count=%d\n", "check not wanted=%ld\n"
ffs, positives, failures, resets, "eeh_total_mmio_ffs=%ld\n"
eeh_fail_count.counter); "eeh_false_positives=%ld\n"
"eeh_ignored_failures=%ld\n"
"eeh_slot_resets=%ld\n",
no_dev, no_dn, no_cfg, no_check,
ffs, positives, failures, resets);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment