Commit ea86cb4b authored by Vincent Guittot's avatar Vincent Guittot Committed by Ingo Molnar

sched/cgroup: Fix cpu_cgroup_fork() handling

A new fair task is detached and attached from/to task_group with:

  cgroup_post_fork()
    ss->fork(child) := cpu_cgroup_fork()
      sched_move_task()
        task_move_group_fair()

Which is wrong, because at this point in fork() the task isn't fully
initialized and it cannot 'move' to another group, because its not
attached to any group as yet.

In fact, cpu_cgroup_fork() needs a small part of sched_move_task() so we
can just call this small part directly instead sched_move_task(). And
the task doesn't really migrate because it is not yet attached so we
need the following sequence:

  do_fork()
    sched_fork()
      __set_task_cpu()

    cgroup_post_fork()
      set_task_rq() # set task group and runqueue

    wake_up_new_task()
      select_task_rq() can select a new cpu
      __set_task_cpu
      post_init_entity_util_avg
        attach_task_cfs_rq()
      activate_task
        enqueue_task

This patch makes that happen.
Signed-off-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
[ Added TASK_SET_GROUP to set depth properly. ]
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 01011473
......@@ -7744,27 +7744,9 @@ void sched_offline_group(struct task_group *tg)
spin_unlock_irqrestore(&task_group_lock, flags);
}
/* change task's runqueue when it moves between groups.
* The caller of this function should have put the task in its new group
* by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
* reflect its new group.
*/
void sched_move_task(struct task_struct *tsk)
static void sched_change_group(struct task_struct *tsk, int type)
{
struct task_group *tg;
int queued, running;
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(tsk, &rf);
running = task_current(rq, tsk);
queued = task_on_rq_queued(tsk);
if (queued)
dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
if (unlikely(running))
put_prev_task(rq, tsk);
/*
* All callers are synchronized by task_rq_lock(); we do not use RCU
......@@ -7777,11 +7759,37 @@ void sched_move_task(struct task_struct *tsk)
tsk->sched_task_group = tg;
#ifdef CONFIG_FAIR_GROUP_SCHED
if (tsk->sched_class->task_move_group)
tsk->sched_class->task_move_group(tsk);
if (tsk->sched_class->task_change_group)
tsk->sched_class->task_change_group(tsk, type);
else
#endif
set_task_rq(tsk, task_cpu(tsk));
}
/*
* Change task's runqueue when it moves between groups.
*
* The caller of this function should have put the task in its new group by
* now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
* its new group.
*/
void sched_move_task(struct task_struct *tsk)
{
int queued, running;
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(tsk, &rf);
running = task_current(rq, tsk);
queued = task_on_rq_queued(tsk);
if (queued)
dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
if (unlikely(running))
put_prev_task(rq, tsk);
sched_change_group(tsk, TASK_MOVE_GROUP);
if (unlikely(running))
tsk->sched_class->set_curr_task(rq);
......@@ -8209,9 +8217,20 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
sched_free_group(tg);
}
/*
* This is called before wake_up_new_task(), therefore we really only
* have to set its group bits, all the other stuff does not apply.
*/
static void cpu_cgroup_fork(struct task_struct *task)
{
sched_move_task(task);
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(task, &rf);
sched_change_group(task, TASK_SET_GROUP);
task_rq_unlock(rq, task, &rf);
}
static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
......
......@@ -8466,6 +8466,14 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
}
#ifdef CONFIG_FAIR_GROUP_SCHED
static void task_set_group_fair(struct task_struct *p)
{
struct sched_entity *se = &p->se;
set_task_rq(p, task_cpu(p));
se->depth = se->parent ? se->parent->depth + 1 : 0;
}
static void task_move_group_fair(struct task_struct *p)
{
detach_task_cfs_rq(p);
......@@ -8478,6 +8486,19 @@ static void task_move_group_fair(struct task_struct *p)
attach_task_cfs_rq(p);
}
static void task_change_group_fair(struct task_struct *p, int type)
{
switch (type) {
case TASK_SET_GROUP:
task_set_group_fair(p);
break;
case TASK_MOVE_GROUP:
task_move_group_fair(p);
break;
}
}
void free_fair_sched_group(struct task_group *tg)
{
int i;
......@@ -8706,7 +8727,7 @@ const struct sched_class fair_sched_class = {
.update_curr = update_curr_fair,
#ifdef CONFIG_FAIR_GROUP_SCHED
.task_move_group = task_move_group_fair,
.task_change_group = task_change_group_fair,
#endif
};
......
......@@ -1246,8 +1246,11 @@ struct sched_class {
void (*update_curr) (struct rq *rq);
#define TASK_SET_GROUP 0
#define TASK_MOVE_GROUP 1
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_move_group) (struct task_struct *p);
void (*task_change_group) (struct task_struct *p, int type);
#endif
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment