Commit c8608558 authored by Gavin Shan's avatar Gavin Shan Committed by Benjamin Herrenschmidt

powerpc/eeh: Single kthread to handle events

We possiblly have multiple kthreads running for multiple EEH errors
(events) and use one spinlock to make the process of handling those
EEH events serialized. That's unnecessary and the patch creates only
one kthread, which is started during EEH core initialization time in
eeh_init(). A new semaphore introduced to count the number of existing
EEH events in the queue and the kthread waiting on the semaphore.
Signed-off-by: default avatarGavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 26a74850
...@@ -31,6 +31,7 @@ struct eeh_event { ...@@ -31,6 +31,7 @@ struct eeh_event {
struct eeh_pe *pe; /* EEH PE */ struct eeh_pe *pe; /* EEH PE */
}; };
int eeh_event_init(void);
int eeh_send_failure_event(struct eeh_pe *pe); int eeh_send_failure_event(struct eeh_pe *pe);
void eeh_handle_event(struct eeh_pe *pe); void eeh_handle_event(struct eeh_pe *pe);
......
...@@ -704,6 +704,11 @@ int __init eeh_init(void) ...@@ -704,6 +704,11 @@ int __init eeh_init(void)
raw_spin_lock_init(&confirm_error_lock); raw_spin_lock_init(&confirm_error_lock);
/* Initialize EEH event */
ret = eeh_event_init();
if (ret)
return ret;
/* Enable EEH for all adapters */ /* Enable EEH for all adapters */
if (eeh_probe_mode_devtree()) { if (eeh_probe_mode_devtree()) {
list_for_each_entry_safe(hose, tmp, list_for_each_entry_safe(hose, tmp,
......
...@@ -18,11 +18,10 @@ ...@@ -18,11 +18,10 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/mutex.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <asm/eeh_event.h> #include <asm/eeh_event.h>
#include <asm/ppc-pci.h> #include <asm/ppc-pci.h>
...@@ -35,14 +34,9 @@ ...@@ -35,14 +34,9 @@
* work-queue, where a worker thread can drive recovery. * work-queue, where a worker thread can drive recovery.
*/ */
/* EEH event workqueue setup. */
static DEFINE_SPINLOCK(eeh_eventlist_lock); static DEFINE_SPINLOCK(eeh_eventlist_lock);
static struct semaphore eeh_eventlist_sem;
LIST_HEAD(eeh_eventlist); LIST_HEAD(eeh_eventlist);
static void eeh_thread_launcher(struct work_struct *);
DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
/* Serialize reset sequences for a given pci device */
DEFINE_MUTEX(eeh_event_mutex);
/** /**
* eeh_event_handler - Dispatch EEH events. * eeh_event_handler - Dispatch EEH events.
...@@ -60,55 +54,62 @@ static int eeh_event_handler(void * dummy) ...@@ -60,55 +54,62 @@ static int eeh_event_handler(void * dummy)
struct eeh_event *event; struct eeh_event *event;
struct eeh_pe *pe; struct eeh_pe *pe;
spin_lock_irqsave(&eeh_eventlist_lock, flags); while (!kthread_should_stop()) {
event = NULL; down(&eeh_eventlist_sem);
/* Unqueue the event, get ready to process. */ /* Fetch EEH event from the queue */
if (!list_empty(&eeh_eventlist)) { spin_lock_irqsave(&eeh_eventlist_lock, flags);
event = list_entry(eeh_eventlist.next, struct eeh_event, list); event = NULL;
list_del(&event->list); if (!list_empty(&eeh_eventlist)) {
} event = list_entry(eeh_eventlist.next,
spin_unlock_irqrestore(&eeh_eventlist_lock, flags); struct eeh_event, list);
list_del(&event->list);
if (event == NULL) }
return 0; spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
if (!event)
/* Serialize processing of EEH events */ continue;
mutex_lock(&eeh_event_mutex);
pe = event->pe; /* We might have event without binding PE */
eeh_pe_state_mark(pe, EEH_PE_RECOVERING); pe = event->pe;
pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", if (pe) {
pe->phb->global_number, pe->addr); eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ pe->phb->global_number, pe->addr);
eeh_handle_event(pe); eeh_handle_event(pe);
eeh_pe_state_clear(pe, EEH_PE_RECOVERING); eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
} else {
kfree(event); eeh_handle_event(NULL);
mutex_unlock(&eeh_event_mutex); }
/* If there are no new errors after an hour, clear the counter. */ kfree(event);
if (pe && pe->freeze_count > 0) {
msleep_interruptible(3600*1000);
if (pe->freeze_count > 0)
pe->freeze_count--;
} }
return 0; return 0;
} }
/** /**
* eeh_thread_launcher - Start kernel thread to handle EEH events * eeh_event_init - Start kernel thread to handle EEH events
* @dummy - unused
* *
* This routine is called to start the kernel thread for processing * This routine is called to start the kernel thread for processing
* EEH event. * EEH event.
*/ */
static void eeh_thread_launcher(struct work_struct *dummy) int eeh_event_init(void)
{ {
if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) struct task_struct *t;
printk(KERN_ERR "Failed to start EEH daemon\n"); int ret = 0;
/* Initialize semaphore */
sema_init(&eeh_eventlist_sem, 0);
t = kthread_run(eeh_event_handler, NULL, "eehd");
if (IS_ERR(t)) {
ret = PTR_ERR(t);
pr_err("%s: Failed to start EEH daemon (%d)\n",
__func__, ret);
return ret;
}
return 0;
} }
/** /**
...@@ -136,7 +137,8 @@ int eeh_send_failure_event(struct eeh_pe *pe) ...@@ -136,7 +137,8 @@ int eeh_send_failure_event(struct eeh_pe *pe)
list_add(&event->list, &eeh_eventlist); list_add(&event->list, &eeh_eventlist);
spin_unlock_irqrestore(&eeh_eventlist_lock, flags); spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
schedule_work(&eeh_event_wq); /* For EEH deamon to knick in */
up(&eeh_eventlist_sem);
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment