Commit 6363ca57 authored by Ingo Molnar's avatar Ingo Molnar

revert ("sched: fair-group: SMP-nice for group scheduling")

Yanmin Zhang reported:

Comparing with 2.6.25, volanoMark has big regression with kernel 2.6.26-rc1.
It's about 50% on my 8-core stoakley, 16-core tigerton, and Itanium Montecito.

With bisect, I located the following patch:

| 18d95a28 is first bad commit
| commit 18d95a28
| Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
| Date:   Sat Apr 19 19:45:00 2008 +0200
|
|     sched: fair-group: SMP-nice for group scheduling

Revert it so that we get v2.6.25 behavior.
Bisected-by: default avatarYanmin Zhang <yanmin_zhang@linux.intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 4285f594
...@@ -766,7 +766,6 @@ struct sched_domain { ...@@ -766,7 +766,6 @@ struct sched_domain {
struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_domain *child; /* bottom domain must be null terminated */
struct sched_group *groups; /* the balancing groups of the domain */ struct sched_group *groups; /* the balancing groups of the domain */
cpumask_t span; /* span of all CPUs in this domain */ cpumask_t span; /* span of all CPUs in this domain */
int first_cpu; /* cache of the first cpu in this domain */
unsigned long min_interval; /* Minimum balance interval ms */ unsigned long min_interval; /* Minimum balance interval ms */
unsigned long max_interval; /* Maximum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */
unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int busy_factor; /* less balancing by factor if busy */
......
This diff is collapsed.
...@@ -167,11 +167,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) ...@@ -167,11 +167,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#endif #endif
SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
cfs_rq->nr_spread_over); cfs_rq->nr_spread_over);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
#endif
#endif
} }
static void print_cpu(struct seq_file *m, int cpu) static void print_cpu(struct seq_file *m, int cpu)
......
...@@ -510,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -510,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Scheduling class queueing methods: * Scheduling class queueing methods:
*/ */
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
static void
add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
{
cfs_rq->task_weight += weight;
}
#else
static inline void
add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
{
}
#endif
static void static void
account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
update_load_add(&cfs_rq->load, se->load.weight); update_load_add(&cfs_rq->load, se->load.weight);
if (!parent_entity(se))
inc_cpu_load(rq_of(cfs_rq), se->load.weight);
if (entity_is_task(se))
add_cfs_task_weight(cfs_rq, se->load.weight);
cfs_rq->nr_running++; cfs_rq->nr_running++;
se->on_rq = 1; se->on_rq = 1;
list_add(&se->group_node, &cfs_rq->tasks); list_add(&se->group_node, &cfs_rq->tasks);
...@@ -540,10 +523,6 @@ static void ...@@ -540,10 +523,6 @@ static void
account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
update_load_sub(&cfs_rq->load, se->load.weight); update_load_sub(&cfs_rq->load, se->load.weight);
if (!parent_entity(se))
dec_cpu_load(rq_of(cfs_rq), se->load.weight);
if (entity_is_task(se))
add_cfs_task_weight(cfs_rq, -se->load.weight);
cfs_rq->nr_running--; cfs_rq->nr_running--;
se->on_rq = 0; se->on_rq = 0;
list_del_init(&se->group_node); list_del_init(&se->group_node);
...@@ -1327,90 +1306,75 @@ static struct task_struct *load_balance_next_fair(void *arg) ...@@ -1327,90 +1306,75 @@ static struct task_struct *load_balance_next_fair(void *arg)
return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
} }
static unsigned long #ifdef CONFIG_FAIR_GROUP_SCHED
__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned, int *this_best_prio,
struct cfs_rq *cfs_rq)
{ {
struct rq_iterator cfs_rq_iterator; struct sched_entity *curr;
struct task_struct *p;
cfs_rq_iterator.start = load_balance_start_fair; if (!cfs_rq->nr_running || !first_fair(cfs_rq))
cfs_rq_iterator.next = load_balance_next_fair; return MAX_PRIO;
cfs_rq_iterator.arg = cfs_rq;
curr = cfs_rq->curr;
if (!curr)
curr = __pick_next_entity(cfs_rq);
return balance_tasks(this_rq, this_cpu, busiest, p = task_of(curr);
max_load_move, sd, idle, all_pinned,
this_best_prio, &cfs_rq_iterator); return p->prio;
} }
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
static unsigned long static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle, struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio) int *all_pinned, int *this_best_prio)
{ {
struct cfs_rq *busy_cfs_rq;
long rem_load_move = max_load_move; long rem_load_move = max_load_move;
int busiest_cpu = cpu_of(busiest); struct rq_iterator cfs_rq_iterator;
struct task_group *tg;
rcu_read_lock();
list_for_each_entry(tg, &task_groups, list) {
long imbalance;
unsigned long this_weight, busiest_weight;
long rem_load, max_load, moved_load;
/*
* empty group
*/
if (!aggregate(tg, sd)->task_weight)
continue;
rem_load = rem_load_move * aggregate(tg, sd)->rq_weight;
rem_load /= aggregate(tg, sd)->load + 1;
this_weight = tg->cfs_rq[this_cpu]->task_weight;
busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight;
imbalance = (busiest_weight - this_weight) / 2; cfs_rq_iterator.start = load_balance_start_fair;
cfs_rq_iterator.next = load_balance_next_fair;
if (imbalance < 0) for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
imbalance = busiest_weight; #ifdef CONFIG_FAIR_GROUP_SCHED
struct cfs_rq *this_cfs_rq;
long imbalance;
unsigned long maxload;
max_load = max(rem_load, imbalance); this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
max_load, sd, idle, all_pinned, this_best_prio,
tg->cfs_rq[busiest_cpu]);
if (!moved_load) imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
if (imbalance <= 0)
continue; continue;
move_group_shares(tg, sd, busiest_cpu, this_cpu); /* Don't pull more than imbalance/2 */
imbalance /= 2;
maxload = min(rem_load_move, imbalance);
moved_load *= aggregate(tg, sd)->load; *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
moved_load /= aggregate(tg, sd)->rq_weight + 1; #else
# define maxload rem_load_move
#endif
/*
* pass busy_cfs_rq argument into
* load_balance_[start|next]_fair iterators
*/
cfs_rq_iterator.arg = busy_cfs_rq;
rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
maxload, sd, idle, all_pinned,
this_best_prio,
&cfs_rq_iterator);
rem_load_move -= moved_load; if (rem_load_move <= 0)
if (rem_load_move < 0)
break; break;
} }
rcu_read_unlock();
return max_load_move - rem_load_move; return max_load_move - rem_load_move;
} }
#else
static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
return __load_balance_fair(this_rq, this_cpu, busiest,
max_load_move, sd, idle, all_pinned,
this_best_prio, &busiest->cfs);
}
#endif
static int static int
move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
......
...@@ -513,8 +513,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) ...@@ -513,8 +513,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
*/ */
for_each_sched_rt_entity(rt_se) for_each_sched_rt_entity(rt_se)
enqueue_rt_entity(rt_se); enqueue_rt_entity(rt_se);
inc_cpu_load(rq, p->se.load.weight);
} }
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
...@@ -534,8 +532,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) ...@@ -534,8 +532,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
if (rt_rq && rt_rq->rt_nr_running) if (rt_rq && rt_rq->rt_nr_running)
enqueue_rt_entity(rt_se); enqueue_rt_entity(rt_se);
} }
dec_cpu_load(rq, p->se.load.weight);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment