Commit 5217192b authored by Paul E. McKenney's avatar Paul E. McKenney

Merge remote-tracking branch 'tip/smp/hotplug' into next.2012.09.25b

The conflicts between kernel/rcutree.h and kernel/rcutree_plugin.h
were due to adjacent insertions and deletions, which were resolved
by simply accepting the changes on both branches.
parents bda4ec9f bff4a394
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
*/ */
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/smpboot.h>
#include "ehca_classes.h" #include "ehca_classes.h"
#include "ehca_irq.h" #include "ehca_irq.h"
...@@ -652,7 +653,7 @@ void ehca_tasklet_eq(unsigned long data) ...@@ -652,7 +653,7 @@ void ehca_tasklet_eq(unsigned long data)
ehca_process_eq((struct ehca_shca*)data, 1); ehca_process_eq((struct ehca_shca*)data, 1);
} }
static inline int find_next_online_cpu(struct ehca_comp_pool *pool) static int find_next_online_cpu(struct ehca_comp_pool *pool)
{ {
int cpu; int cpu;
unsigned long flags; unsigned long flags;
...@@ -662,17 +663,20 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) ...@@ -662,17 +663,20 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
ehca_dmp(cpu_online_mask, cpumask_size(), ""); ehca_dmp(cpu_online_mask, cpumask_size(), "");
spin_lock_irqsave(&pool->last_cpu_lock, flags); spin_lock_irqsave(&pool->last_cpu_lock, flags);
do {
cpu = cpumask_next(pool->last_cpu, cpu_online_mask); cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
if (cpu >= nr_cpu_ids) if (cpu >= nr_cpu_ids)
cpu = cpumask_first(cpu_online_mask); cpu = cpumask_first(cpu_online_mask);
pool->last_cpu = cpu; pool->last_cpu = cpu;
} while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
spin_unlock_irqrestore(&pool->last_cpu_lock, flags); spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
return cpu; return cpu;
} }
static void __queue_comp_task(struct ehca_cq *__cq, static void __queue_comp_task(struct ehca_cq *__cq,
struct ehca_cpu_comp_task *cct) struct ehca_cpu_comp_task *cct,
struct task_struct *thread)
{ {
unsigned long flags; unsigned long flags;
...@@ -683,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq, ...@@ -683,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq,
__cq->nr_callbacks++; __cq->nr_callbacks++;
list_add_tail(&__cq->entry, &cct->cq_list); list_add_tail(&__cq->entry, &cct->cq_list);
cct->cq_jobs++; cct->cq_jobs++;
wake_up(&cct->wait_queue); wake_up_process(thread);
} else } else
__cq->nr_callbacks++; __cq->nr_callbacks++;
...@@ -695,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq) ...@@ -695,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
{ {
int cpu_id; int cpu_id;
struct ehca_cpu_comp_task *cct; struct ehca_cpu_comp_task *cct;
struct task_struct *thread;
int cq_jobs; int cq_jobs;
unsigned long flags; unsigned long flags;
...@@ -702,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq) ...@@ -702,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq)
BUG_ON(!cpu_online(cpu_id)); BUG_ON(!cpu_online(cpu_id));
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
BUG_ON(!cct); thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
BUG_ON(!cct || !thread);
spin_lock_irqsave(&cct->task_lock, flags); spin_lock_irqsave(&cct->task_lock, flags);
cq_jobs = cct->cq_jobs; cq_jobs = cct->cq_jobs;
...@@ -710,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq) ...@@ -710,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq)
if (cq_jobs > 0) { if (cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool); cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
BUG_ON(!cct); thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
BUG_ON(!cct || !thread);
} }
__queue_comp_task(__cq, cct, thread);
__queue_comp_task(__cq, cct);
} }
static void run_comp_task(struct ehca_cpu_comp_task *cct) static void run_comp_task(struct ehca_cpu_comp_task *cct)
{ {
struct ehca_cq *cq; struct ehca_cq *cq;
unsigned long flags;
spin_lock_irqsave(&cct->task_lock, flags);
while (!list_empty(&cct->cq_list)) { while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
spin_unlock_irqrestore(&cct->task_lock, flags); spin_unlock_irq(&cct->task_lock);
comp_event_callback(cq); comp_event_callback(cq);
if (atomic_dec_and_test(&cq->nr_events)) if (atomic_dec_and_test(&cq->nr_events))
wake_up(&cq->wait_completion); wake_up(&cq->wait_completion);
spin_lock_irqsave(&cct->task_lock, flags); spin_lock_irq(&cct->task_lock);
spin_lock(&cq->task_lock); spin_lock(&cq->task_lock);
cq->nr_callbacks--; cq->nr_callbacks--;
if (!cq->nr_callbacks) { if (!cq->nr_callbacks) {
...@@ -740,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct) ...@@ -740,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct)
} }
spin_unlock(&cq->task_lock); spin_unlock(&cq->task_lock);
} }
spin_unlock_irqrestore(&cct->task_lock, flags);
} }
static int comp_task(void *__cct) static void comp_task_park(unsigned int cpu)
{ {
struct ehca_cpu_comp_task *cct = __cct; struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
int cql_empty; struct ehca_cpu_comp_task *target;
DECLARE_WAITQUEUE(wait, current); struct task_struct *thread;
struct ehca_cq *cq, *tmp;
set_current_state(TASK_INTERRUPTIBLE); LIST_HEAD(list);
while (!kthread_should_stop()) {
add_wait_queue(&cct->wait_queue, &wait);
spin_lock_irq(&cct->task_lock);
cql_empty = list_empty(&cct->cq_list);
spin_unlock_irq(&cct->task_lock);
if (cql_empty)
schedule();
else
__set_current_state(TASK_RUNNING);
remove_wait_queue(&cct->wait_queue, &wait);
spin_lock_irq(&cct->task_lock); spin_lock_irq(&cct->task_lock);
cql_empty = list_empty(&cct->cq_list); cct->cq_jobs = 0;
cct->active = 0;
list_splice_init(&cct->cq_list, &list);
spin_unlock_irq(&cct->task_lock); spin_unlock_irq(&cct->task_lock);
if (!cql_empty)
run_comp_task(__cct);
set_current_state(TASK_INTERRUPTIBLE); cpu = find_next_online_cpu(pool);
target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
spin_lock_irq(&target->task_lock);
list_for_each_entry_safe(cq, tmp, &list, entry) {
list_del(&cq->entry);
__queue_comp_task(cq, target, thread);
} }
__set_current_state(TASK_RUNNING); spin_unlock_irq(&target->task_lock);
return 0;
} }
static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, static void comp_task_stop(unsigned int cpu, bool online)
int cpu)
{ {
struct ehca_cpu_comp_task *cct; struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
spin_lock_init(&cct->task_lock);
INIT_LIST_HEAD(&cct->cq_list);
init_waitqueue_head(&cct->wait_queue);
cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu),
"ehca_comp/%d", cpu);
return cct->task;
}
static void destroy_comp_task(struct ehca_comp_pool *pool,
int cpu)
{
struct ehca_cpu_comp_task *cct;
struct task_struct *task;
unsigned long flags_cct;
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
spin_lock_irqsave(&cct->task_lock, flags_cct);
task = cct->task; spin_lock_irq(&cct->task_lock);
cct->task = NULL;
cct->cq_jobs = 0; cct->cq_jobs = 0;
cct->active = 0;
spin_unlock_irqrestore(&cct->task_lock, flags_cct); WARN_ON(!list_empty(&cct->cq_list));
spin_unlock_irq(&cct->task_lock);
if (task)
kthread_stop(task);
} }
static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) static int comp_task_should_run(unsigned int cpu)
{ {
struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
LIST_HEAD(list);
struct ehca_cq *cq;
unsigned long flags_cct;
spin_lock_irqsave(&cct->task_lock, flags_cct);
list_splice_init(&cct->cq_list, &list);
while (!list_empty(&list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
list_del(&cq->entry);
__queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks));
}
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
return cct->cq_jobs;
} }
static int __cpuinit comp_pool_callback(struct notifier_block *nfb, static void comp_task(unsigned int cpu)
unsigned long action,
void *hcpu)
{ {
unsigned int cpu = (unsigned long)hcpu; struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
struct ehca_cpu_comp_task *cct; int cql_empty;
switch (action) { spin_lock_irq(&cct->task_lock);
case CPU_UP_PREPARE: cql_empty = list_empty(&cct->cq_list);
case CPU_UP_PREPARE_FROZEN: if (!cql_empty) {
ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); __set_current_state(TASK_RUNNING);
if (!create_comp_task(pool, cpu)) { run_comp_task(cct);
ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
return notifier_from_errno(-ENOMEM);
}
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
kthread_bind(cct->task, cpumask_any(cpu_online_mask));
destroy_comp_task(pool, cpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
kthread_bind(cct->task, cpu);
wake_up_process(cct->task);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
destroy_comp_task(pool, cpu);
take_over_work(pool, cpu);
break;
} }
spin_unlock_irq(&cct->task_lock);
return NOTIFY_OK;
} }
static struct notifier_block comp_pool_callback_nb __cpuinitdata = { static struct smp_hotplug_thread comp_pool_threads = {
.notifier_call = comp_pool_callback, .thread_should_run = comp_task_should_run,
.priority = 0, .thread_fn = comp_task,
.thread_comm = "ehca_comp/%u",
.cleanup = comp_task_stop,
.park = comp_task_park,
}; };
int ehca_create_comp_pool(void) int ehca_create_comp_pool(void)
{ {
int cpu; int cpu, ret = -ENOMEM;
struct task_struct *task;
if (!ehca_scaling_code) if (!ehca_scaling_code)
return 0; return 0;
...@@ -905,38 +825,46 @@ int ehca_create_comp_pool(void) ...@@ -905,38 +825,46 @@ int ehca_create_comp_pool(void)
pool->last_cpu = cpumask_any(cpu_online_mask); pool->last_cpu = cpumask_any(cpu_online_mask);
pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
if (pool->cpu_comp_tasks == NULL) { if (!pool->cpu_comp_tasks)
kfree(pool); goto out_pool;
return -EINVAL;
}
for_each_online_cpu(cpu) { pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
task = create_comp_task(pool, cpu); if (!pool->cpu_comp_threads)
if (task) { goto out_tasks;
kthread_bind(task, cpu);
wake_up_process(task); for_each_present_cpu(cpu) {
} struct ehca_cpu_comp_task *cct;
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
spin_lock_init(&cct->task_lock);
INIT_LIST_HEAD(&cct->cq_list);
} }
register_hotcpu_notifier(&comp_pool_callback_nb); comp_pool_threads.store = pool->cpu_comp_threads;
ret = smpboot_register_percpu_thread(&comp_pool_threads);
if (ret)
goto out_threads;
printk(KERN_INFO "eHCA scaling code enabled\n"); pr_info("eHCA scaling code enabled\n");
return ret;
return 0; out_threads:
free_percpu(pool->cpu_comp_threads);
out_tasks:
free_percpu(pool->cpu_comp_tasks);
out_pool:
kfree(pool);
return ret;
} }
void ehca_destroy_comp_pool(void) void ehca_destroy_comp_pool(void)
{ {
int i;
if (!ehca_scaling_code) if (!ehca_scaling_code)
return; return;
unregister_hotcpu_notifier(&comp_pool_callback_nb); smpboot_unregister_percpu_thread(&comp_pool_threads);
for_each_online_cpu(i)
destroy_comp_task(pool, i);
free_percpu(pool->cpu_comp_threads);
free_percpu(pool->cpu_comp_tasks); free_percpu(pool->cpu_comp_tasks);
kfree(pool); kfree(pool);
} }
...@@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data); ...@@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data);
void ehca_process_eq(struct ehca_shca *shca, int is_irq); void ehca_process_eq(struct ehca_shca *shca, int is_irq);
struct ehca_cpu_comp_task { struct ehca_cpu_comp_task {
wait_queue_head_t wait_queue;
struct list_head cq_list; struct list_head cq_list;
struct task_struct *task;
spinlock_t task_lock; spinlock_t task_lock;
int cq_jobs; int cq_jobs;
int active;
}; };
struct ehca_comp_pool { struct ehca_comp_pool {
struct ehca_cpu_comp_task *cpu_comp_tasks; struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
struct task_struct * __percpu *cpu_comp_threads;
int last_cpu; int last_cpu;
spinlock_t last_cpu_lock; spinlock_t last_cpu_lock;
}; };
......
...@@ -14,6 +14,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), ...@@ -14,6 +14,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) kthread_create_on_node(threadfn, data, -1, namefmt, ##arg)
struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
void *data,
unsigned int cpu,
const char *namefmt);
/** /**
* kthread_run - create and wake a thread. * kthread_run - create and wake a thread.
* @threadfn: the function to run until signal_pending(current). * @threadfn: the function to run until signal_pending(current).
...@@ -34,9 +39,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), ...@@ -34,9 +39,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind(struct task_struct *k, unsigned int cpu);
int kthread_stop(struct task_struct *k); int kthread_stop(struct task_struct *k);
int kthread_should_stop(void); bool kthread_should_stop(void);
bool kthread_should_park(void);
bool kthread_freezable_should_stop(bool *was_frozen); bool kthread_freezable_should_stop(bool *was_frozen);
void *kthread_data(struct task_struct *k); void *kthread_data(struct task_struct *k);
int kthread_park(struct task_struct *k);
void kthread_unpark(struct task_struct *k);
void kthread_parkme(void);
int kthreadd(void *unused); int kthreadd(void *unused);
extern struct task_struct *kthreadd_task; extern struct task_struct *kthreadd_task;
......
#ifndef _LINUX_SMPBOOT_H
#define _LINUX_SMPBOOT_H
#include <linux/types.h>
struct task_struct;
/* Cookie handed to the thread_fn*/
struct smpboot_thread_data;
/**
* struct smp_hotplug_thread - CPU hotplug related thread descriptor
* @store: Pointer to per cpu storage for the task pointers
* @list: List head for core management
* @thread_should_run: Check whether the thread should run or not. Called with
* preemption disabled.
* @thread_fn: The associated thread function
* @setup: Optional setup function, called when the thread gets
* operational the first time
* @cleanup: Optional cleanup function, called when the thread
* should stop (module exit)
* @park: Optional park function, called when the thread is
* parked (cpu offline)
* @unpark: Optional unpark function, called when the thread is
* unparked (cpu online)
* @thread_comm: The base name of the thread
*/
struct smp_hotplug_thread {
struct task_struct __percpu **store;
struct list_head list;
int (*thread_should_run)(unsigned int cpu);
void (*thread_fn)(unsigned int cpu);
void (*setup)(unsigned int cpu);
void (*cleanup)(unsigned int cpu, bool online);
void (*park)(unsigned int cpu);
void (*unpark)(unsigned int cpu);
const char *thread_comm;
};
int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
int smpboot_thread_schedule(void);
#endif
...@@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ ...@@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o cred.o \ notifier.o ksysfs.o cred.o \
async.o range.o groups.o lglock.o async.o range.o groups.o lglock.o smpboot.o
ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files # Do not trace debug files and internal ftrace files
...@@ -46,7 +46,6 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o ...@@ -46,7 +46,6 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += smpboot.o
ifneq ($(CONFIG_SMP),y) ifneq ($(CONFIG_SMP),y)
obj-y += up.o obj-y += up.o
endif endif
......
...@@ -280,12 +280,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -280,12 +280,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
__func__, cpu); __func__, cpu);
goto out_release; goto out_release;
} }
smpboot_park_threads(cpu);
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) { if (err) {
/* CPU didn't die: tell everyone. Can't complain. */ /* CPU didn't die: tell everyone. Can't complain. */
smpboot_unpark_threads(cpu);
cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
goto out_release; goto out_release;
} }
BUG_ON(cpu_online(cpu)); BUG_ON(cpu_online(cpu));
...@@ -354,6 +355,10 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) ...@@ -354,6 +355,10 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
goto out; goto out;
} }
ret = smpboot_create_threads(cpu);
if (ret)
goto out;
ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
if (ret) { if (ret) {
nr_calls--; nr_calls--;
...@@ -368,6 +373,9 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) ...@@ -368,6 +373,9 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
goto out_notify; goto out_notify;
BUG_ON(!cpu_online(cpu)); BUG_ON(!cpu_online(cpu));
/* Wake the per cpu threads */
smpboot_unpark_threads(cpu);
/* Now call notifier in preparation. */ /* Now call notifier in preparation. */
cpu_notify(CPU_ONLINE | mod, hcpu); cpu_notify(CPU_ONLINE | mod, hcpu);
......
...@@ -37,11 +37,20 @@ struct kthread_create_info ...@@ -37,11 +37,20 @@ struct kthread_create_info
}; };
struct kthread { struct kthread {
int should_stop; unsigned long flags;
unsigned int cpu;
void *data; void *data;
struct completion parked;
struct completion exited; struct completion exited;
}; };
enum KTHREAD_BITS {
KTHREAD_IS_PER_CPU = 0,
KTHREAD_SHOULD_STOP,
KTHREAD_SHOULD_PARK,
KTHREAD_IS_PARKED,
};
#define to_kthread(tsk) \ #define to_kthread(tsk) \
container_of((tsk)->vfork_done, struct kthread, exited) container_of((tsk)->vfork_done, struct kthread, exited)
...@@ -52,12 +61,28 @@ struct kthread { ...@@ -52,12 +61,28 @@ struct kthread {
* and this will return true. You should then return, and your return * and this will return true. You should then return, and your return
* value will be passed through to kthread_stop(). * value will be passed through to kthread_stop().
*/ */
int kthread_should_stop(void) bool kthread_should_stop(void)
{ {
return to_kthread(current)->should_stop; return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags);
} }
EXPORT_SYMBOL(kthread_should_stop); EXPORT_SYMBOL(kthread_should_stop);
/**
* kthread_should_park - should this kthread park now?
*
* When someone calls kthread_park() on your kthread, it will be woken
* and this will return true. You should then do the necessary
* cleanup and call kthread_parkme()
*
* Similar to kthread_should_stop(), but this keeps the thread alive
* and in a park position. kthread_unpark() "restarts" the thread and
* calls the thread function again.
*/
bool kthread_should_park(void)
{
return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
}
/** /**
* kthread_freezable_should_stop - should this freezable kthread return now? * kthread_freezable_should_stop - should this freezable kthread return now?
* @was_frozen: optional out parameter, indicates whether %current was frozen * @was_frozen: optional out parameter, indicates whether %current was frozen
...@@ -96,6 +121,24 @@ void *kthread_data(struct task_struct *task) ...@@ -96,6 +121,24 @@ void *kthread_data(struct task_struct *task)
return to_kthread(task)->data; return to_kthread(task)->data;
} }
static void __kthread_parkme(struct kthread *self)
{
__set_current_state(TASK_INTERRUPTIBLE);
while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
complete(&self->parked);
schedule();
__set_current_state(TASK_INTERRUPTIBLE);
}
clear_bit(KTHREAD_IS_PARKED, &self->flags);
__set_current_state(TASK_RUNNING);
}
void kthread_parkme(void)
{
__kthread_parkme(to_kthread(current));
}
static int kthread(void *_create) static int kthread(void *_create)
{ {
/* Copy data: it's on kthread's stack */ /* Copy data: it's on kthread's stack */
...@@ -105,9 +148,10 @@ static int kthread(void *_create) ...@@ -105,9 +148,10 @@ static int kthread(void *_create)
struct kthread self; struct kthread self;
int ret; int ret;
self.should_stop = 0; self.flags = 0;
self.data = data; self.data = data;
init_completion(&self.exited); init_completion(&self.exited);
init_completion(&self.parked);
current->vfork_done = &self.exited; current->vfork_done = &self.exited;
/* OK, tell user we're spawned, wait for stop or wakeup */ /* OK, tell user we're spawned, wait for stop or wakeup */
...@@ -117,9 +161,11 @@ static int kthread(void *_create) ...@@ -117,9 +161,11 @@ static int kthread(void *_create)
schedule(); schedule();
ret = -EINTR; ret = -EINTR;
if (!self.should_stop)
ret = threadfn(data);
if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
__kthread_parkme(&self);
ret = threadfn(data);
}
/* we can't just return, we must preserve "self" on stack */ /* we can't just return, we must preserve "self" on stack */
do_exit(ret); do_exit(ret);
} }
...@@ -172,8 +218,7 @@ static void create_kthread(struct kthread_create_info *create) ...@@ -172,8 +218,7 @@ static void create_kthread(struct kthread_create_info *create)
* Returns a task_struct or ERR_PTR(-ENOMEM). * Returns a task_struct or ERR_PTR(-ENOMEM).
*/ */
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
void *data, void *data, int node,
int node,
const char namefmt[], const char namefmt[],
...) ...)
{ {
...@@ -210,6 +255,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), ...@@ -210,6 +255,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
} }
EXPORT_SYMBOL(kthread_create_on_node); EXPORT_SYMBOL(kthread_create_on_node);
static void __kthread_bind(struct task_struct *p, unsigned int cpu)
{
/* It's safe because the task is inactive. */
do_set_cpus_allowed(p, cpumask_of(cpu));
p->flags |= PF_THREAD_BOUND;
}
/** /**
* kthread_bind - bind a just-created kthread to a cpu. * kthread_bind - bind a just-created kthread to a cpu.
* @p: thread created by kthread_create(). * @p: thread created by kthread_create().
...@@ -226,13 +278,111 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) ...@@ -226,13 +278,111 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
WARN_ON(1); WARN_ON(1);
return; return;
} }
__kthread_bind(p, cpu);
/* It's safe because the task is inactive. */
do_set_cpus_allowed(p, cpumask_of(cpu));
p->flags |= PF_THREAD_BOUND;
} }
EXPORT_SYMBOL(kthread_bind); EXPORT_SYMBOL(kthread_bind);
/**
* kthread_create_on_cpu - Create a cpu bound kthread
* @threadfn: the function to run until signal_pending(current).
* @data: data ptr for @threadfn.
* @cpu: The cpu on which the thread should be bound,
* @namefmt: printf-style name for the thread. Format is restricted
* to "name.*%u". Code fills in cpu number.
*
* Description: This helper function creates and names a kernel thread
* The thread will be woken and put into park mode.
*/
struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
void *data, unsigned int cpu,
const char *namefmt)
{
struct task_struct *p;
p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt,
cpu);
if (IS_ERR(p))
return p;
set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
to_kthread(p)->cpu = cpu;
/* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */
kthread_park(p);
return p;
}
static struct kthread *task_get_live_kthread(struct task_struct *k)
{
struct kthread *kthread;
get_task_struct(k);
kthread = to_kthread(k);
/* It might have exited */
barrier();
if (k->vfork_done != NULL)
return kthread;
return NULL;
}
/**
* kthread_unpark - unpark a thread created by kthread_create().
* @k: thread created by kthread_create().
*
* Sets kthread_should_park() for @k to return false, wakes it, and
* waits for it to return. If the thread is marked percpu then its
* bound to the cpu again.
*/
void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = task_get_live_kthread(k);
if (kthread) {
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/*
* We clear the IS_PARKED bit here as we don't wait
* until the task has left the park code. So if we'd
* park before that happens we'd see the IS_PARKED bit
* which might be about to be cleared.
*/
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu);
wake_up_process(k);
}
}
put_task_struct(k);
}
/**
* kthread_park - park a thread created by kthread_create().
* @k: thread created by kthread_create().
*
* Sets kthread_should_park() for @k to return true, wakes it, and
* waits for it to return. This can also be called after kthread_create()
* instead of calling wake_up_process(): the thread will park without
* calling threadfn().
*
* Returns 0 if the thread is parked, -ENOSYS if the thread exited.
* If called by the kthread itself just the park bit is set.
*/
int kthread_park(struct task_struct *k)
{
struct kthread *kthread = task_get_live_kthread(k);
int ret = -ENOSYS;
if (kthread) {
if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) {
wake_up_process(k);
wait_for_completion(&kthread->parked);
}
}
ret = 0;
}
put_task_struct(k);
return ret;
}
/** /**
* kthread_stop - stop a thread created by kthread_create(). * kthread_stop - stop a thread created by kthread_create().
* @k: thread created by kthread_create(). * @k: thread created by kthread_create().
...@@ -250,16 +400,13 @@ EXPORT_SYMBOL(kthread_bind); ...@@ -250,16 +400,13 @@ EXPORT_SYMBOL(kthread_bind);
*/ */
int kthread_stop(struct task_struct *k) int kthread_stop(struct task_struct *k)
{ {
struct kthread *kthread; struct kthread *kthread = task_get_live_kthread(k);
int ret; int ret;
trace_sched_kthread_stop(k); trace_sched_kthread_stop(k);
get_task_struct(k); if (kthread) {
set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
kthread = to_kthread(k); clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
barrier(); /* it might have exited */
if (k->vfork_done != NULL) {
kthread->should_stop = 1;
wake_up_process(k); wake_up_process(k);
wait_for_completion(&kthread->exited); wait_for_completion(&kthread->exited);
} }
......
...@@ -134,13 +134,12 @@ static int rcu_scheduler_fully_active __read_mostly; ...@@ -134,13 +134,12 @@ static int rcu_scheduler_fully_active __read_mostly;
*/ */
static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DEFINE_PER_CPU(char, rcu_cpu_has_work); DEFINE_PER_CPU(char, rcu_cpu_has_work);
#endif /* #ifdef CONFIG_RCU_BOOST */ #endif /* #ifdef CONFIG_RCU_BOOST */
static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void invoke_rcu_core(void); static void invoke_rcu_core(void);
static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
...@@ -1543,8 +1542,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) ...@@ -1543,8 +1542,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
/* Adjust any no-longer-needed kthreads. */ /* Adjust any no-longer-needed kthreads. */
rcu_stop_cpu_kthread(cpu); rcu_boost_kthread_setaffinity(rnp, -1);
rcu_node_kthread_setaffinity(rnp, -1);
/* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
...@@ -2572,12 +2570,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, ...@@ -2572,12 +2570,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
break; break;
case CPU_ONLINE: case CPU_ONLINE:
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
rcu_node_kthread_setaffinity(rnp, -1); rcu_boost_kthread_setaffinity(rnp, -1);
rcu_cpu_kthread_setrt(cpu, 1);
break; break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
rcu_node_kthread_setaffinity(rnp, cpu); rcu_boost_kthread_setaffinity(rnp, cpu);
rcu_cpu_kthread_setrt(cpu, 0);
break; break;
case CPU_DYING: case CPU_DYING:
case CPU_DYING_FROZEN: case CPU_DYING_FROZEN:
......
...@@ -196,12 +196,6 @@ struct rcu_node { ...@@ -196,12 +196,6 @@ struct rcu_node {
/* Refused to boost: not sure why, though. */ /* Refused to boost: not sure why, though. */
/* This can happen due to race conditions. */ /* This can happen due to race conditions. */
#endif /* #ifdef CONFIG_RCU_BOOST */ #endif /* #ifdef CONFIG_RCU_BOOST */
struct task_struct *node_kthread_task;
/* kthread that takes care of this rcu_node */
/* structure, for example, awakening the */
/* per-CPU kthreads as needed. */
unsigned int node_kthread_status;
/* State of node_kthread_task for tracing. */
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
...@@ -465,7 +459,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); ...@@ -465,7 +459,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
unsigned long flags); unsigned long flags);
static void rcu_stop_cpu_kthread(int cpu);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */ #endif /* #ifdef CONFIG_HOTPLUG_CPU */
static void rcu_print_detail_task_stall(struct rcu_state *rsp); static void rcu_print_detail_task_stall(struct rcu_state *rsp);
static int rcu_print_task_stall(struct rcu_node *rnp); static int rcu_print_task_stall(struct rcu_node *rnp);
...@@ -488,15 +481,9 @@ static void invoke_rcu_callbacks_kthread(void); ...@@ -488,15 +481,9 @@ static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void); static bool rcu_is_callbacks_kthread(void);
#ifdef CONFIG_RCU_BOOST #ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void); static void rcu_preempt_do_callbacks(void);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
cpumask_var_t cm);
static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp, struct rcu_node *rnp);
int rnp_index);
static void invoke_rcu_node_kthread(struct rcu_node *rnp);
static void rcu_yield(void (*f)(unsigned long), unsigned long arg);
#endif /* #ifdef CONFIG_RCU_BOOST */ #endif /* #ifdef CONFIG_RCU_BOOST */
static void rcu_cpu_kthread_setrt(int cpu, int to_rt);
static void __cpuinit rcu_prepare_kthreads(int cpu); static void __cpuinit rcu_prepare_kthreads(int cpu);
static void rcu_prepare_for_idle_init(int cpu); static void rcu_prepare_for_idle_init(int cpu);
static void rcu_cleanup_after_idle(int cpu); static void rcu_cleanup_after_idle(int cpu);
......
This diff is collapsed.
...@@ -107,11 +107,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) ...@@ -107,11 +107,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
rdp->nxttail[RCU_WAIT_TAIL]], rdp->nxttail[RCU_WAIT_TAIL]],
".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
#ifdef CONFIG_RCU_BOOST #ifdef CONFIG_RCU_BOOST
seq_printf(m, " kt=%d/%c/%d ktl=%x", seq_printf(m, " kt=%d/%c ktl=%x",
per_cpu(rcu_cpu_has_work, rdp->cpu), per_cpu(rcu_cpu_has_work, rdp->cpu),
convert_kthread_status(per_cpu(rcu_cpu_kthread_status, convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
rdp->cpu)), rdp->cpu)),
per_cpu(rcu_cpu_kthread_cpu, rdp->cpu),
per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
#endif /* #ifdef CONFIG_RCU_BOOST */ #endif /* #ifdef CONFIG_RCU_BOOST */
seq_printf(m, " b=%ld", rdp->blimit); seq_printf(m, " b=%ld", rdp->blimit);
......
/* /*
* Common SMP CPU bringup/teardown functions * Common SMP CPU bringup/teardown functions
*/ */
#include <linux/cpu.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/export.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/kthread.h>
#include <linux/smpboot.h>
#include "smpboot.h" #include "smpboot.h"
#ifdef CONFIG_SMP
#ifdef CONFIG_GENERIC_SMP_IDLE_THREAD #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD
/* /*
* For the hotplug case we keep the task structs around and reuse * For the hotplug case we keep the task structs around and reuse
...@@ -65,3 +73,228 @@ void __init idle_threads_init(void) ...@@ -65,3 +73,228 @@ void __init idle_threads_init(void)
} }
} }
#endif #endif
#endif /* #ifdef CONFIG_SMP */
static LIST_HEAD(hotplug_threads);
static DEFINE_MUTEX(smpboot_threads_lock);
struct smpboot_thread_data {
unsigned int cpu;
unsigned int status;
struct smp_hotplug_thread *ht;
};
enum {
HP_THREAD_NONE = 0,
HP_THREAD_ACTIVE,
HP_THREAD_PARKED,
};
/**
* smpboot_thread_fn - percpu hotplug thread loop function
* @data: thread data pointer
*
* Checks for thread stop and park conditions. Calls the necessary
* setup, cleanup, park and unpark functions for the registered
* thread.
*
* Returns 1 when the thread should exit, 0 otherwise.
*/
static int smpboot_thread_fn(void *data)
{
struct smpboot_thread_data *td = data;
struct smp_hotplug_thread *ht = td->ht;
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
preempt_disable();
if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
preempt_enable();
if (ht->cleanup)
ht->cleanup(td->cpu, cpu_online(td->cpu));
kfree(td);
return 0;
}
if (kthread_should_park()) {
__set_current_state(TASK_RUNNING);
preempt_enable();
if (ht->park && td->status == HP_THREAD_ACTIVE) {
BUG_ON(td->cpu != smp_processor_id());
ht->park(td->cpu);
td->status = HP_THREAD_PARKED;
}
kthread_parkme();
/* We might have been woken for stop */
continue;
}
BUG_ON(td->cpu != smp_processor_id());
/* Check for state change setup */
switch (td->status) {
case HP_THREAD_NONE:
preempt_enable();
if (ht->setup)
ht->setup(td->cpu);
td->status = HP_THREAD_ACTIVE;
preempt_disable();
break;
case HP_THREAD_PARKED:
preempt_enable();
if (ht->unpark)
ht->unpark(td->cpu);
td->status = HP_THREAD_ACTIVE;
preempt_disable();
break;
}
if (!ht->thread_should_run(td->cpu)) {
preempt_enable();
schedule();
} else {
set_current_state(TASK_RUNNING);
preempt_enable();
ht->thread_fn(td->cpu);
}
}
}
static int
__smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
{
struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
struct smpboot_thread_data *td;
if (tsk)
return 0;
td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu));
if (!td)
return -ENOMEM;
td->cpu = cpu;
td->ht = ht;
tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu,
ht->thread_comm);
if (IS_ERR(tsk)) {
kfree(td);
return PTR_ERR(tsk);
}
get_task_struct(tsk);
*per_cpu_ptr(ht->store, cpu) = tsk;
return 0;
}
int smpboot_create_threads(unsigned int cpu)
{
struct smp_hotplug_thread *cur;
int ret = 0;
mutex_lock(&smpboot_threads_lock);
list_for_each_entry(cur, &hotplug_threads, list) {
ret = __smpboot_create_thread(cur, cpu);
if (ret)
break;
}
mutex_unlock(&smpboot_threads_lock);
return ret;
}
static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
{
struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
kthread_unpark(tsk);
}
void smpboot_unpark_threads(unsigned int cpu)
{
struct smp_hotplug_thread *cur;
mutex_lock(&smpboot_threads_lock);
list_for_each_entry(cur, &hotplug_threads, list)
smpboot_unpark_thread(cur, cpu);
mutex_unlock(&smpboot_threads_lock);
}
static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
{
struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
if (tsk)
kthread_park(tsk);
}
void smpboot_park_threads(unsigned int cpu)
{
struct smp_hotplug_thread *cur;
mutex_lock(&smpboot_threads_lock);
list_for_each_entry_reverse(cur, &hotplug_threads, list)
smpboot_park_thread(cur, cpu);
mutex_unlock(&smpboot_threads_lock);
}
static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
{
unsigned int cpu;
/* We need to destroy also the parked threads of offline cpus */
for_each_possible_cpu(cpu) {
struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
if (tsk) {
kthread_stop(tsk);
put_task_struct(tsk);
*per_cpu_ptr(ht->store, cpu) = NULL;
}
}
}
/**
* smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug
* @plug_thread: Hotplug thread descriptor
*
* Creates and starts the threads on all online cpus.
*/
int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
{
unsigned int cpu;
int ret = 0;
mutex_lock(&smpboot_threads_lock);
for_each_online_cpu(cpu) {
ret = __smpboot_create_thread(plug_thread, cpu);
if (ret) {
smpboot_destroy_threads(plug_thread);
goto out;
}
smpboot_unpark_thread(plug_thread, cpu);
}
list_add(&plug_thread->list, &hotplug_threads);
out:
mutex_unlock(&smpboot_threads_lock);
return ret;
}
EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
/**
* smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug
* @plug_thread: Hotplug thread descriptor
*
* Stops all threads on all possible cpus.
*/
void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
{
get_online_cpus();
mutex_lock(&smpboot_threads_lock);
list_del(&plug_thread->list);
smpboot_destroy_threads(plug_thread);
mutex_unlock(&smpboot_threads_lock);
put_online_cpus();
}
EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
...@@ -13,4 +13,8 @@ static inline void idle_thread_set_boot_cpu(void) { } ...@@ -13,4 +13,8 @@ static inline void idle_thread_set_boot_cpu(void) { }
static inline void idle_threads_init(void) { } static inline void idle_threads_init(void) { }
#endif #endif
int smpboot_create_threads(unsigned int cpu);
void smpboot_park_threads(unsigned int cpu);
void smpboot_unpark_threads(unsigned int cpu);
#endif #endif
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/smpboot.h>
#include <linux/tick.h> #include <linux/tick.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
...@@ -742,49 +743,22 @@ void __init softirq_init(void) ...@@ -742,49 +743,22 @@ void __init softirq_init(void)
open_softirq(HI_SOFTIRQ, tasklet_hi_action); open_softirq(HI_SOFTIRQ, tasklet_hi_action);
} }
static int run_ksoftirqd(void * __bind_cpu) static int ksoftirqd_should_run(unsigned int cpu)
{ {
set_current_state(TASK_INTERRUPTIBLE); return local_softirq_pending();
}
while (!kthread_should_stop()) {
preempt_disable();
if (!local_softirq_pending()) {
schedule_preempt_disabled();
}
__set_current_state(TASK_RUNNING);
while (local_softirq_pending()) { static void run_ksoftirqd(unsigned int cpu)
/* Preempt disable stops cpu going offline. {
If already offline, we'll be on wrong CPU:
don't process */
if (cpu_is_offline((long)__bind_cpu))
goto wait_to_die;
local_irq_disable(); local_irq_disable();
if (local_softirq_pending()) if (local_softirq_pending()) {
__do_softirq(); __do_softirq();
rcu_note_context_switch(cpu);
local_irq_enable(); local_irq_enable();
sched_preempt_enable_no_resched();
cond_resched(); cond_resched();
preempt_disable(); return;
rcu_note_context_switch((long)__bind_cpu);
}
preempt_enable();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
wait_to_die:
preempt_enable();
/* Wait for kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
schedule();
set_current_state(TASK_INTERRUPTIBLE);
} }
__set_current_state(TASK_RUNNING); local_irq_enable();
return 0;
} }
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
...@@ -850,48 +824,12 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, ...@@ -850,48 +824,12 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
unsigned long action, unsigned long action,
void *hcpu) void *hcpu)
{ {
int hotcpu = (unsigned long)hcpu;
struct task_struct *p;
switch (action) { switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
p = kthread_create_on_node(run_ksoftirqd,
hcpu,
cpu_to_node(hotcpu),
"ksoftirqd/%d", hotcpu);
if (IS_ERR(p)) {
printk("ksoftirqd for %i failed\n", hotcpu);
return notifier_from_errno(PTR_ERR(p));
}
kthread_bind(p, hotcpu);
per_cpu(ksoftirqd, hotcpu) = p;
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
wake_up_process(per_cpu(ksoftirqd, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
if (!per_cpu(ksoftirqd, hotcpu))
break;
/* Unbind so it can run. Fall thru. */
kthread_bind(per_cpu(ksoftirqd, hotcpu),
cpumask_any(cpu_online_mask));
case CPU_DEAD: case CPU_DEAD:
case CPU_DEAD_FROZEN: { case CPU_DEAD_FROZEN:
static const struct sched_param param = { takeover_tasklets((unsigned long)hcpu);
.sched_priority = MAX_RT_PRIO-1
};
p = per_cpu(ksoftirqd, hotcpu);
per_cpu(ksoftirqd, hotcpu) = NULL;
sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
kthread_stop(p);
takeover_tasklets(hotcpu);
break; break;
}
#endif /* CONFIG_HOTPLUG_CPU */ #endif /* CONFIG_HOTPLUG_CPU */
} }
return NOTIFY_OK; return NOTIFY_OK;
...@@ -901,14 +839,19 @@ static struct notifier_block __cpuinitdata cpu_nfb = { ...@@ -901,14 +839,19 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
.notifier_call = cpu_callback .notifier_call = cpu_callback
}; };
static struct smp_hotplug_thread softirq_threads = {
.store = &ksoftirqd,
.thread_should_run = ksoftirqd_should_run,
.thread_fn = run_ksoftirqd,
.thread_comm = "ksoftirqd/%u",
};
static __init int spawn_ksoftirqd(void) static __init int spawn_ksoftirqd(void)
{ {
void *cpu = (void *)(long)smp_processor_id();
int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
BUG_ON(err != NOTIFY_OK);
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb); register_cpu_notifier(&cpu_nfb);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
return 0; return 0;
} }
early_initcall(spawn_ksoftirqd); early_initcall(spawn_ksoftirqd);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/smpboot.h>
#include <asm/irq_regs.h> #include <asm/irq_regs.h>
#include <linux/kvm_para.h> #include <linux/kvm_para.h>
...@@ -29,16 +30,18 @@ ...@@ -29,16 +30,18 @@
int watchdog_enabled = 1; int watchdog_enabled = 1;
int __read_mostly watchdog_thresh = 10; int __read_mostly watchdog_thresh = 10;
static int __read_mostly watchdog_disabled;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn); static DEFINE_PER_CPU(bool, soft_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR
static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif #endif
...@@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event, ...@@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event,
__this_cpu_write(hard_watchdog_warn, false); __this_cpu_write(hard_watchdog_warn, false);
return; return;
} }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
static void watchdog_interrupt_count(void) static void watchdog_interrupt_count(void)
{ {
__this_cpu_inc(hrtimer_interrupts); __this_cpu_inc(hrtimer_interrupts);
} }
#else
static inline void watchdog_interrupt_count(void) { return; } static int watchdog_nmi_enable(unsigned int cpu);
#endif /* CONFIG_HARDLOCKUP_DETECTOR */ static void watchdog_nmi_disable(unsigned int cpu);
/* watchdog kicker functions */ /* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
...@@ -327,49 +332,68 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) ...@@ -327,49 +332,68 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
return HRTIMER_RESTART; return HRTIMER_RESTART;
} }
static void watchdog_set_prio(unsigned int policy, unsigned int prio)
{
struct sched_param param = { .sched_priority = prio };
/* sched_setscheduler(current, policy, &param);
* The watchdog thread - touches the timestamp. }
*/
static int watchdog(void *unused) static void watchdog_enable(unsigned int cpu)
{ {
struct sched_param param = { .sched_priority = 0 };
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
/* initialize timestamp */ if (!watchdog_enabled) {
__touch_watchdog(); kthread_park(current);
return;
}
/* Enable the perf event */
watchdog_nmi_enable(cpu);
/* kick off the timer for the hardlockup detector */ /* kick off the timer for the hardlockup detector */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
/* done here because hrtimer_start can only pin to smp_processor_id() */ /* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
HRTIMER_MODE_REL_PINNED); HRTIMER_MODE_REL_PINNED);
set_current_state(TASK_INTERRUPTIBLE); /* initialize timestamp */
/* watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
* Run briefly (kicked by the hrtimer callback function) once every
* get_sample_period() seconds (4 seconds by default) to reset the
* softlockup timestamp. If this gets delayed for more than
* 2*watchdog_thresh seconds then the debug-printout triggers in
* watchdog_timer_fn().
*/
while (!kthread_should_stop()) {
__touch_watchdog(); __touch_watchdog();
schedule(); }
if (kthread_should_stop()) static void watchdog_disable(unsigned int cpu)
break; {
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
set_current_state(TASK_INTERRUPTIBLE); watchdog_set_prio(SCHED_NORMAL, 0);
} hrtimer_cancel(hrtimer);
/* /* disable the perf event */
* Drop the policy/priority elevation during thread exit to avoid a watchdog_nmi_disable(cpu);
* scheduling latency spike.
*/
__set_current_state(TASK_RUNNING);
sched_setscheduler(current, SCHED_NORMAL, &param);
return 0;
} }
static int watchdog_should_run(unsigned int cpu)
{
return __this_cpu_read(hrtimer_interrupts) !=
__this_cpu_read(soft_lockup_hrtimer_cnt);
}
/*
* The watchdog thread function - touches the timestamp.
*
* It only runs once every get_sample_period() seconds (4 seconds by
* default) to reset the softlockup timestamp. If this gets delayed
* for more than 2*watchdog_thresh seconds then the debug-printout
* triggers in watchdog_timer_fn().
*/
static void watchdog(unsigned int cpu)
{
__this_cpu_write(soft_lockup_hrtimer_cnt,
__this_cpu_read(hrtimer_interrupts));
__touch_watchdog();
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR
/* /*
...@@ -379,7 +403,7 @@ static int watchdog(void *unused) ...@@ -379,7 +403,7 @@ static int watchdog(void *unused)
*/ */
static unsigned long cpu0_err; static unsigned long cpu0_err;
static int watchdog_nmi_enable(int cpu) static int watchdog_nmi_enable(unsigned int cpu)
{ {
struct perf_event_attr *wd_attr; struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu); struct perf_event *event = per_cpu(watchdog_ev, cpu);
...@@ -433,7 +457,7 @@ static int watchdog_nmi_enable(int cpu) ...@@ -433,7 +457,7 @@ static int watchdog_nmi_enable(int cpu)
return 0; return 0;
} }
static void watchdog_nmi_disable(int cpu) static void watchdog_nmi_disable(unsigned int cpu)
{ {
struct perf_event *event = per_cpu(watchdog_ev, cpu); struct perf_event *event = per_cpu(watchdog_ev, cpu);
...@@ -447,107 +471,35 @@ static void watchdog_nmi_disable(int cpu) ...@@ -447,107 +471,35 @@ static void watchdog_nmi_disable(int cpu)
return; return;
} }
#else #else
static int watchdog_nmi_enable(int cpu) { return 0; } static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(int cpu) { return; } static void watchdog_nmi_disable(unsigned int cpu) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */ #endif /* CONFIG_HARDLOCKUP_DETECTOR */
/* prepare/enable/disable routines */ /* prepare/enable/disable routines */
static void watchdog_prepare_cpu(int cpu)
{
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
WARN_ON(per_cpu(softlockup_watchdog, cpu));
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
}
static int watchdog_enable(int cpu)
{
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
int err = 0;
/* enable the perf event */
err = watchdog_nmi_enable(cpu);
/* Regardless of err above, fall through and start softlockup */
/* create the watchdog thread */
if (!p) {
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
if (IS_ERR(p)) {
pr_err("softlockup watchdog for %i failed\n", cpu);
if (!err) {
/* if hardlockup hasn't already set this */
err = PTR_ERR(p);
/* and disable the perf event */
watchdog_nmi_disable(cpu);
}
goto out;
}
sched_setscheduler(p, SCHED_FIFO, &param);
kthread_bind(p, cpu);
per_cpu(watchdog_touch_ts, cpu) = 0;
per_cpu(softlockup_watchdog, cpu) = p;
wake_up_process(p);
}
out:
return err;
}
static void watchdog_disable(int cpu)
{
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
/*
* cancel the timer first to stop incrementing the stats
* and waking up the kthread
*/
hrtimer_cancel(hrtimer);
/* disable the perf event */
watchdog_nmi_disable(cpu);
/* stop the watchdog thread */
if (p) {
per_cpu(softlockup_watchdog, cpu) = NULL;
kthread_stop(p);
}
}
/* sysctl functions */ /* sysctl functions */
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
static void watchdog_enable_all_cpus(void) static void watchdog_enable_all_cpus(void)
{ {
int cpu; unsigned int cpu;
watchdog_enabled = 0;
if (watchdog_disabled) {
watchdog_disabled = 0;
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
if (!watchdog_enable(cpu)) kthread_unpark(per_cpu(softlockup_watchdog, cpu));
/* if any cpu succeeds, watchdog is considered }
enabled for the system */
watchdog_enabled = 1;
if (!watchdog_enabled)
pr_err("failed to be enabled on some cpus\n");
} }
static void watchdog_disable_all_cpus(void) static void watchdog_disable_all_cpus(void)
{ {
int cpu; unsigned int cpu;
if (!watchdog_disabled) {
watchdog_disabled = 1;
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
watchdog_disable(cpu); kthread_park(per_cpu(softlockup_watchdog, cpu));
}
/* if all watchdogs are disabled, then they are disabled for the system */
watchdog_enabled = 0;
} }
/* /*
* proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
*/ */
...@@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write, ...@@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write,
{ {
int ret; int ret;
if (watchdog_disabled < 0)
return -ENODEV;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write) if (ret || !write)
goto out; return ret;
if (watchdog_enabled && watchdog_thresh) if (watchdog_enabled && watchdog_thresh)
watchdog_enable_all_cpus(); watchdog_enable_all_cpus();
else else
watchdog_disable_all_cpus(); watchdog_disable_all_cpus();
out:
return ret; return ret;
} }
#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_SYSCTL */
static struct smp_hotplug_thread watchdog_threads = {
/* .store = &softlockup_watchdog,
* Create/destroy watchdog threads as CPUs come and go: .thread_should_run = watchdog_should_run,
*/ .thread_fn = watchdog,
static int __cpuinit .thread_comm = "watchdog/%u",
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) .setup = watchdog_enable,
{ .park = watchdog_disable,
int hotcpu = (unsigned long)hcpu; .unpark = watchdog_enable,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
watchdog_prepare_cpu(hotcpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
if (watchdog_enabled)
watchdog_enable(hotcpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
watchdog_disable(hotcpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
watchdog_disable(hotcpu);
break;
#endif /* CONFIG_HOTPLUG_CPU */
}
/*
* hardlockup and softlockup are not important enough
* to block cpu bring up. Just always succeed and
* rely on printk output to flag problems.
*/
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata cpu_nfb = {
.notifier_call = cpu_callback
}; };
void __init lockup_detector_init(void) void __init lockup_detector_init(void)
{ {
void *cpu = (void *)(long)smp_processor_id(); if (smpboot_register_percpu_thread(&watchdog_threads)) {
int err; pr_err("Failed to create watchdog threads, disabled\n");
watchdog_disabled = -ENODEV;
err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); }
WARN_ON(notifier_to_errno(err));
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb);
return;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment