Commit 470fd646 authored by Cliff Wickman's avatar Cliff Wickman Committed by Linus Torvalds

hotplug cpu: migrate a task within its cpuset

When a cpu is disabled, move_task_off_dead_cpu() is called for tasks that have
been running on that cpu.

Currently, such a task is migrated:
 1) to any cpu on the same node as the disabled cpu, which is both online
    and among that task's cpus_allowed
 2) to any cpu which is both online and among that task's cpus_allowed

It is typical of a multithreaded application running on a large NUMA system to
have its tasks confined to a cpuset so as to cluster them near the memory that
they share.  Furthermore, it is typical to explicitly place such a task on a
specific cpu in that cpuset.  And in that case the task's cpus_allowed
includes only a single cpu.

This patch would insert a preference to migrate such a task to some cpu within
its cpuset (and set its cpus_allowed to its entire cpuset).

With this patch, migrate the task to:
 1) to any cpu on the same node as the disabled cpu, which is both online
    and among that task's cpus_allowed
 2) to any online cpu within the task's cpuset
 3) to any cpu which is both online and among that task's cpus_allowed

In order to do this, move_task_off_dead_cpu() must make a call to
cpuset_cpus_allowed_locked(), a new subset of cpuset_cpus_allowed(), that will
not block.  (name change - per Oleg's suggestion)

Calls are made to cpuset_lock() and cpuset_unlock() in migration_call() to set
the cpuset mutex during the whole migrate_live_tasks() and
migrate_dead_tasks() procedure.

[akpm@linux-foundation.org: build fix]
[pj@sgi.com: Fix indentation and spacing]
Signed-off-by: default avatarCliff Wickman <cpw@sgi.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: default avatarPaul Jackson <pj@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent bd89aabc
...@@ -220,7 +220,9 @@ A: The following happen, listed in no particular order :-) ...@@ -220,7 +220,9 @@ A: The following happen, listed in no particular order :-)
CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
CPU is being offlined while tasks are frozen due to a suspend operation in CPU is being offlined while tasks are frozen due to a suspend operation in
progress progress
- All process is migrated away from this outgoing CPU to a new CPU - All processes are migrated away from this outgoing CPU to new CPUs.
The new CPU is chosen from each process' current cpuset, which may be
a subset of all online CPUs.
- All interrupts targeted to this CPU is migrated to a new CPU - All interrupts targeted to this CPU is migrated to a new CPU
- timers/bottom half/task lets are also migrated to a new CPU - timers/bottom half/task lets are also migrated to a new CPU
- Once all services are migrated, kernel calls an arch specific routine - Once all services are migrated, kernel calls an arch specific routine
......
...@@ -21,6 +21,7 @@ extern int cpuset_init_early(void); ...@@ -21,6 +21,7 @@ extern int cpuset_init_early(void);
extern int cpuset_init(void); extern int cpuset_init(void);
extern void cpuset_init_smp(void); extern void cpuset_init_smp(void);
extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed) #define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void); void cpuset_init_current_mems_allowed(void);
...@@ -87,6 +88,10 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) ...@@ -87,6 +88,10 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
{ {
return cpu_possible_map; return cpu_possible_map;
} }
static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p)
{
return cpu_possible_map;
}
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
{ {
......
...@@ -1818,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) ...@@ -1818,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
cpumask_t mask; cpumask_t mask;
mutex_lock(&callback_mutex); mutex_lock(&callback_mutex);
mask = cpuset_cpus_allowed_locked(tsk);
mutex_unlock(&callback_mutex);
return mask;
}
/**
* cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
* Must be called with callback_mutex held.
**/
cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
{
cpumask_t mask;
task_lock(tsk); task_lock(tsk);
guarantee_online_cpus(task_cs(tsk), &mask); guarantee_online_cpus(task_cs(tsk), &mask);
task_unlock(tsk); task_unlock(tsk);
mutex_unlock(&callback_mutex);
return mask; return mask;
} }
......
...@@ -5160,8 +5160,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) ...@@ -5160,8 +5160,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
/* No more Mr. Nice Guy. */ /* No more Mr. Nice Guy. */
if (dest_cpu == NR_CPUS) { if (dest_cpu == NR_CPUS) {
cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p);
/*
* Try to stay on the same cpuset, where the
* current cpuset may be a subset of all cpus.
* The cpuset_cpus_allowed_locked() variant of
* cpuset_cpus_allowed() will not block. It must be
* called within calls to cpuset_lock/cpuset_unlock.
*/
rq = task_rq_lock(p, &flags); rq = task_rq_lock(p, &flags);
cpus_setall(p->cpus_allowed); p->cpus_allowed = cpus_allowed;
dest_cpu = any_online_cpu(p->cpus_allowed); dest_cpu = any_online_cpu(p->cpus_allowed);
task_rq_unlock(rq, &flags); task_rq_unlock(rq, &flags);
...@@ -5527,6 +5535,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) ...@@ -5527,6 +5535,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD: case CPU_DEAD:
case CPU_DEAD_FROZEN: case CPU_DEAD_FROZEN:
cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
migrate_live_tasks(cpu); migrate_live_tasks(cpu);
rq = cpu_rq(cpu); rq = cpu_rq(cpu);
kthread_stop(rq->migration_thread); kthread_stop(rq->migration_thread);
...@@ -5540,6 +5549,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) ...@@ -5540,6 +5549,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
rq->idle->sched_class = &idle_sched_class; rq->idle->sched_class = &idle_sched_class;
migrate_dead_tasks(cpu); migrate_dead_tasks(cpu);
spin_unlock_irq(&rq->lock); spin_unlock_irq(&rq->lock);
cpuset_unlock();
migrate_nr_uninterruptible(rq); migrate_nr_uninterruptible(rq);
BUG_ON(rq->nr_running != 0); BUG_ON(rq->nr_running != 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment