Commit e271fc85 authored by Rusty Russell's avatar Rusty Russell Committed by Linus Torvalds

[PATCH] Hotplug CPU vs TASK_ZOMBIEs: The Sequel to Hotplug CPU vs TASK_DEAD

release_task can sleep.  Sleeping allows a CPU to go down underneath you.
release_task removes you from the tasklist, so you don't get migrated off the
CPU: BUG() in sched.c.

In last week's episode, our dashing hero (Ingo Molnar) solved this for
self-reaping tasks by grabbing the hotplug cpu lock to prevent this. 
However, in an unexpected twist, the problem remains for tasks whose
parents call release_task on them: the zombies are off the task list, and
lurk on the dead CPU.

Fortunately, the comedic sidekick (Rusty Russell) has an answer: let's make
the hotplug callback walk the runqueue of the dead CPU as well, taking care
of the zombies.

1) Restore exit.c to its former form.  The comment is incorrect: sched.c
   checks PF_DEAD, not the state, to decide to do the final
   put_task_struct(), and it does it for all tasks, self-reaping or no.

2) Implement migrate_dead_tasks() in the sched.c hotplug CPU callback.

3) Rename migrate_all_tasks() to migrate_live_tasks().
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent a5212682
...@@ -754,8 +754,8 @@ static void exit_notify(struct task_struct *tsk) ...@@ -754,8 +754,8 @@ static void exit_notify(struct task_struct *tsk)
state = TASK_ZOMBIE; state = TASK_ZOMBIE;
if (tsk->exit_signal == -1 && tsk->ptrace == 0) if (tsk->exit_signal == -1 && tsk->ptrace == 0)
state = TASK_DEAD; state = TASK_DEAD;
else tsk->state = state;
tsk->state = state;
/* /*
* Clear these here so that update_process_times() won't try to deliver * Clear these here so that update_process_times() won't try to deliver
* itimer, profile or rlimit signals to this task while it is in late exit. * itimer, profile or rlimit signals to this task while it is in late exit.
...@@ -764,14 +764,6 @@ static void exit_notify(struct task_struct *tsk) ...@@ -764,14 +764,6 @@ static void exit_notify(struct task_struct *tsk)
tsk->it_prof_value = 0; tsk->it_prof_value = 0;
tsk->rlim[RLIMIT_CPU].rlim_cur = RLIM_INFINITY; tsk->rlim[RLIMIT_CPU].rlim_cur = RLIM_INFINITY;
/*
* Get a reference to it so that we can set the state
* as the last step. The state-setting only matters if the
* current task is releasing itself, to trigger the final
* put_task_struct() in finish_task_switch(). (thread self-reap)
*/
get_task_struct(tsk);
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
list_for_each_safe(_p, _n, &ptrace_dead) { list_for_each_safe(_p, _n, &ptrace_dead) {
...@@ -781,23 +773,12 @@ static void exit_notify(struct task_struct *tsk) ...@@ -781,23 +773,12 @@ static void exit_notify(struct task_struct *tsk)
} }
/* If the process is dead, release it - nobody will wait for it */ /* If the process is dead, release it - nobody will wait for it */
if (state == TASK_DEAD) { if (state == TASK_DEAD)
lock_cpu_hotplug();
release_task(tsk); release_task(tsk);
write_lock_irq(&tasklist_lock);
/*
* No preemption may happen from this point on,
* or CPU hotplug (and task exit) breaks:
*/
unlock_cpu_hotplug();
tsk->state = state;
_raw_write_unlock(&tasklist_lock);
local_irq_enable();
} else
preempt_disable();
/* PF_DEAD causes final put_task_struct after we schedule. */
preempt_disable();
tsk->flags |= PF_DEAD; tsk->flags |= PF_DEAD;
put_task_struct(tsk);
} }
asmlinkage NORET_TYPE void do_exit(long code) asmlinkage NORET_TYPE void do_exit(long code)
......
...@@ -3990,8 +3990,8 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) ...@@ -3990,8 +3990,8 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
__migrate_task(tsk, dead_cpu, dest_cpu); __migrate_task(tsk, dead_cpu, dest_cpu);
} }
/* migrate_all_tasks - function to migrate all tasks from the dead cpu. */ /* Run through task list and migrate tasks from the dead cpu. */
static void migrate_all_tasks(int src_cpu) static void migrate_live_tasks(int src_cpu)
{ {
struct task_struct *tsk, *t; struct task_struct *tsk, *t;
...@@ -4033,6 +4033,47 @@ void sched_idle_next(void) ...@@ -4033,6 +4033,47 @@ void sched_idle_next(void)
spin_unlock_irqrestore(&rq->lock, flags); spin_unlock_irqrestore(&rq->lock, flags);
} }
static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
{
struct runqueue *rq = cpu_rq(dead_cpu);
/* Must be exiting, otherwise would be on tasklist. */
BUG_ON(tsk->state != TASK_ZOMBIE && tsk->state != TASK_DEAD);
/* Cannot have done final schedule yet: would have vanished. */
BUG_ON(tsk->flags & PF_DEAD);
get_task_struct(tsk);
/*
* Drop lock around migration; if someone else moves it,
* that's OK. No task can be added to this CPU, so iteration is
* fine.
*/
spin_unlock_irq(&rq->lock);
move_task_off_dead_cpu(dead_cpu, tsk);
spin_lock_irq(&rq->lock);
put_task_struct(tsk);
}
/* release_task() removes task from tasklist, so we won't find dead tasks. */
static void migrate_dead_tasks(unsigned int dead_cpu)
{
unsigned arr, i;
struct runqueue *rq = cpu_rq(dead_cpu);
for (arr = 0; arr < 2; arr++) {
for (i = 0; i < MAX_PRIO; i++) {
struct list_head *list = &rq->arrays[arr].queue[i];
while (!list_empty(list))
migrate_dead(dead_cpu,
list_entry(list->next, task_t,
run_list));
}
}
}
#endif /* CONFIG_HOTPLUG_CPU */ #endif /* CONFIG_HOTPLUG_CPU */
/* /*
...@@ -4072,7 +4113,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, ...@@ -4072,7 +4113,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
cpu_rq(cpu)->migration_thread = NULL; cpu_rq(cpu)->migration_thread = NULL;
break; break;
case CPU_DEAD: case CPU_DEAD:
migrate_all_tasks(cpu); migrate_live_tasks(cpu);
rq = cpu_rq(cpu); rq = cpu_rq(cpu);
kthread_stop(rq->migration_thread); kthread_stop(rq->migration_thread);
rq->migration_thread = NULL; rq->migration_thread = NULL;
...@@ -4081,6 +4122,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, ...@@ -4081,6 +4122,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
deactivate_task(rq->idle, rq); deactivate_task(rq->idle, rq);
rq->idle->static_prio = MAX_PRIO; rq->idle->static_prio = MAX_PRIO;
__setscheduler(rq->idle, SCHED_NORMAL, 0); __setscheduler(rq->idle, SCHED_NORMAL, 0);
migrate_dead_tasks(cpu);
task_rq_unlock(rq, &flags); task_rq_unlock(rq, &flags);
BUG_ON(rq->nr_running != 0); BUG_ON(rq->nr_running != 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment