Commit 66523d9f authored by Gavin Shan's avatar Gavin Shan Committed by Benjamin Herrenschmidt

powerpc/eeh: Trace error based on PE from beginning

There're 2 conditions to trigger EEH error detection: invalid value
returned from reading I/O or config space. On each case, the function
eeh_dn_check_failure will be called to initialize EEH event and put
it into the poll for further processing.

The patch changes the function for a little bit so that the EEH error
will be traced based on PE instead of EEH device any more. Also, the
function eeh_find_device_pe() has been removed since the eeh device
is tracing the PE by struct eeh_dev::pe.
Signed-off-by: default avatarGavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 5b663529
...@@ -59,7 +59,6 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); ...@@ -59,7 +59,6 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_state_mark(struct eeh_pe *pe, int state);
void eeh_pe_state_clear(struct eeh_pe *pe, int state); void eeh_pe_state_clear(struct eeh_pe *pe, int state);
struct device_node *eeh_find_device_pe(struct device_node *dn);
void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev);
......
...@@ -263,21 +263,6 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) ...@@ -263,21 +263,6 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
return pa | (token & (PAGE_SIZE-1)); return pa | (token & (PAGE_SIZE-1));
} }
/**
* eeh_find_device_pe - Retrieve the PE for the given device
* @dn: device node
*
* Return the PE under which this device lies
*/
struct device_node *eeh_find_device_pe(struct device_node *dn)
{
while (dn->parent && of_node_to_eeh_dev(dn->parent) &&
(of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
dn = dn->parent;
}
return dn;
}
/** /**
* eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
* @dn: device node * @dn: device node
...@@ -297,6 +282,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) ...@@ -297,6 +282,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
{ {
int ret; int ret;
unsigned long flags; unsigned long flags;
struct eeh_pe *pe;
struct eeh_dev *edev; struct eeh_dev *edev;
int rc = 0; int rc = 0;
const char *location; const char *location;
...@@ -306,23 +292,26 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) ...@@ -306,23 +292,26 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
if (!eeh_subsystem_enabled) if (!eeh_subsystem_enabled)
return 0; return 0;
if (!dn) { if (dn) {
edev = of_node_to_eeh_dev(dn);
} else if (dev) {
edev = pci_dev_to_eeh_dev(dev);
dn = pci_device_to_OF_node(dev);
} else {
eeh_stats.no_dn++; eeh_stats.no_dn++;
return 0; return 0;
} }
dn = eeh_find_device_pe(dn); pe = edev->pe;
edev = of_node_to_eeh_dev(dn);
/* Access to IO BARs might get this far and still not want checking. */ /* Access to IO BARs might get this far and still not want checking. */
if (!(edev->mode & EEH_MODE_SUPPORTED) || if (!pe) {
edev->mode & EEH_MODE_NOCHECK) {
eeh_stats.ignored_check++; eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check (%x) for %s %s\n", pr_debug("EEH: Ignored check for %s %s\n",
edev->mode, eeh_pci_name(dev), dn->full_name); eeh_pci_name(dev), dn->full_name);
return 0; return 0;
} }
if (!edev->config_addr && !edev->pe_config_addr) { if (!pe->addr && !pe->config_addr) {
eeh_stats.no_cfg_addr++; eeh_stats.no_cfg_addr++;
return 0; return 0;
} }
...@@ -335,13 +324,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) ...@@ -335,13 +324,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
*/ */
raw_spin_lock_irqsave(&confirm_error_lock, flags); raw_spin_lock_irqsave(&confirm_error_lock, flags);
rc = 1; rc = 1;
if (edev->mode & EEH_MODE_ISOLATED) { if (pe->state & EEH_PE_ISOLATED) {
edev->check_count++; pe->check_count++;
if (edev->check_count % EEH_MAX_FAILS == 0) { if (pe->check_count % EEH_MAX_FAILS == 0) {
location = of_get_property(dn, "ibm,loc-code", NULL); location = of_get_property(dn, "ibm,loc-code", NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at " printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n", "location=%s driver=%s pci addr=%s\n",
edev->check_count, location, pe->check_count, location,
eeh_driver_name(dev), eeh_pci_name(dev)); eeh_driver_name(dev), eeh_pci_name(dev));
printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
eeh_driver_name(dev)); eeh_driver_name(dev));
...@@ -357,7 +346,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) ...@@ -357,7 +346,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* function zero of a multi-function device. * function zero of a multi-function device.
* In any case they must share a common PHB. * In any case they must share a common PHB.
*/ */
ret = eeh_ops->get_state(dn, NULL); ret = eeh_ops->get_state(pe, NULL);
/* Note that config-io to empty slots may fail; /* Note that config-io to empty slots may fail;
* they are empty when they don't have children. * they are empty when they don't have children.
...@@ -370,7 +359,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) ...@@ -370,7 +359,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
eeh_stats.false_positives++; eeh_stats.false_positives++;
edev->false_positives ++; pe->false_positives++;
rc = 0; rc = 0;
goto dn_unlock; goto dn_unlock;
} }
...@@ -381,10 +370,10 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) ...@@ -381,10 +370,10 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
* with other functions on this device, and functions under * with other functions on this device, and functions under
* bridges. * bridges.
*/ */
eeh_mark_slot(dn, EEH_MODE_ISOLATED); eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
raw_spin_unlock_irqrestore(&confirm_error_lock, flags); raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
eeh_send_failure_event(edev); eeh_send_failure_event(pe);
/* Most EEH events are due to device driver bugs. Having /* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure * a stack trace will help the device-driver authors figure
......
...@@ -210,6 +210,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) ...@@ -210,6 +210,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
{ {
struct device_node *dn; struct device_node *dn;
struct eeh_dev *edev;
/* Found our PE and assume 8 at that point. */ /* Found our PE and assume 8 at that point. */
...@@ -217,7 +218,10 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) ...@@ -217,7 +218,10 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
if (!dn) if (!dn)
return NULL; return NULL;
dn = eeh_find_device_pe(dn); /* Get the top level device in the PE */
edev = of_node_to_eeh_dev(dn);
edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
dn = eeh_dev_to_of_node(edev);
if (!dn) if (!dn)
return NULL; return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment