Commit a3f4a698 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] sched: rework schedstats

Move balancing fields into struct sched_domain, so we can get more useful
results on systems with multiple domains (eg SMT+SMP, CMP+NUMA, SMP+NUMA,
etc).
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>
Acked-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 992ee3c5
...@@ -489,17 +489,26 @@ struct sched_domain { ...@@ -489,17 +489,26 @@ struct sched_domain {
/* load_balance() stats */ /* load_balance() stats */
unsigned long lb_cnt[MAX_IDLE_TYPES]; unsigned long lb_cnt[MAX_IDLE_TYPES];
unsigned long lb_failed[MAX_IDLE_TYPES]; unsigned long lb_failed[MAX_IDLE_TYPES];
unsigned long lb_balanced[MAX_IDLE_TYPES];
unsigned long lb_imbalance[MAX_IDLE_TYPES]; unsigned long lb_imbalance[MAX_IDLE_TYPES];
unsigned long lb_gained[MAX_IDLE_TYPES];
unsigned long lb_hot_gained[MAX_IDLE_TYPES];
unsigned long lb_nobusyg[MAX_IDLE_TYPES]; unsigned long lb_nobusyg[MAX_IDLE_TYPES];
unsigned long lb_nobusyq[MAX_IDLE_TYPES]; unsigned long lb_nobusyq[MAX_IDLE_TYPES];
/* Active load balancing */
unsigned long alb_cnt;
unsigned long alb_failed;
unsigned long alb_pushed;
/* sched_balance_exec() stats */ /* sched_balance_exec() stats */
unsigned long sbe_attempts; unsigned long sbe_attempts;
unsigned long sbe_pushed; unsigned long sbe_pushed;
/* try_to_wake_up() stats */ /* try_to_wake_up() stats */
unsigned long ttwu_wake_affine; unsigned long ttwu_wake_remote;
unsigned long ttwu_wake_balance; unsigned long ttwu_move_affine;
unsigned long ttwu_move_balance;
#endif #endif
}; };
......
...@@ -248,35 +248,13 @@ struct runqueue { ...@@ -248,35 +248,13 @@ struct runqueue {
unsigned long yld_cnt; unsigned long yld_cnt;
/* schedule() stats */ /* schedule() stats */
unsigned long sched_noswitch;
unsigned long sched_switch; unsigned long sched_switch;
unsigned long sched_cnt; unsigned long sched_cnt;
unsigned long sched_goidle; unsigned long sched_goidle;
/* pull_task() stats */
unsigned long pt_gained[MAX_IDLE_TYPES];
unsigned long pt_lost[MAX_IDLE_TYPES];
/* active_load_balance() stats */
unsigned long alb_cnt;
unsigned long alb_lost;
unsigned long alb_gained;
unsigned long alb_failed;
/* try_to_wake_up() stats */ /* try_to_wake_up() stats */
unsigned long ttwu_cnt; unsigned long ttwu_cnt;
unsigned long ttwu_attempts; unsigned long ttwu_local;
unsigned long ttwu_moved;
/* wake_up_new_task() stats */
unsigned long wunt_cnt;
unsigned long wunt_moved;
/* sched_migrate_task() stats */
unsigned long smt_cnt;
/* sched_balance_exec() stats */
unsigned long sbe_cnt;
#endif #endif
}; };
...@@ -331,7 +309,7 @@ static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) ...@@ -331,7 +309,7 @@ static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
* bump this up when changing the output format or the meaning of an existing * bump this up when changing the output format or the meaning of an existing
* format, so that tools can adapt (or abort) * format, so that tools can adapt (or abort)
*/ */
#define SCHEDSTAT_VERSION 10 #define SCHEDSTAT_VERSION 11
static int show_schedstat(struct seq_file *seq, void *v) static int show_schedstat(struct seq_file *seq, void *v)
{ {
...@@ -349,22 +327,14 @@ static int show_schedstat(struct seq_file *seq, void *v) ...@@ -349,22 +327,14 @@ static int show_schedstat(struct seq_file *seq, void *v)
/* runqueue-specific stats */ /* runqueue-specific stats */
seq_printf(seq, seq_printf(seq,
"cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu " "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
"%lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
cpu, rq->yld_both_empty, cpu, rq->yld_both_empty,
rq->yld_act_empty, rq->yld_exp_empty, rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
rq->yld_cnt, rq->sched_noswitch,
rq->sched_switch, rq->sched_cnt, rq->sched_goidle, rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
rq->alb_cnt, rq->alb_gained, rq->alb_lost, rq->ttwu_cnt, rq->ttwu_local,
rq->alb_failed, rq->rq_sched_info.cpu_time,
rq->ttwu_cnt, rq->ttwu_moved, rq->ttwu_attempts,
rq->wunt_cnt, rq->wunt_moved,
rq->smt_cnt, rq->sbe_cnt, rq->rq_sched_info.cpu_time,
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; itype++)
seq_printf(seq, " %lu %lu", rq->pt_gained[itype],
rq->pt_lost[itype]);
seq_printf(seq, "\n"); seq_printf(seq, "\n");
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -376,16 +346,20 @@ static int show_schedstat(struct seq_file *seq, void *v) ...@@ -376,16 +346,20 @@ static int show_schedstat(struct seq_file *seq, void *v)
seq_printf(seq, "domain%d %s", dcnt++, mask_str); seq_printf(seq, "domain%d %s", dcnt++, mask_str);
for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
itype++) { itype++) {
seq_printf(seq, " %lu %lu %lu %lu %lu", seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu",
sd->lb_cnt[itype], sd->lb_cnt[itype],
sd->lb_balanced[itype],
sd->lb_failed[itype], sd->lb_failed[itype],
sd->lb_imbalance[itype], sd->lb_imbalance[itype],
sd->lb_gained[itype],
sd->lb_hot_gained[itype],
sd->lb_nobusyq[itype], sd->lb_nobusyq[itype],
sd->lb_nobusyg[itype]); sd->lb_nobusyg[itype]);
} }
seq_printf(seq, " %lu %lu %lu %lu\n", seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu\n",
sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
sd->sbe_pushed, sd->sbe_attempts, sd->sbe_pushed, sd->sbe_attempts,
sd->ttwu_wake_affine, sd->ttwu_wake_balance); sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance);
} }
#endif #endif
} }
...@@ -998,7 +972,6 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) ...@@ -998,7 +972,6 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
#endif #endif
rq = task_rq_lock(p, &flags); rq = task_rq_lock(p, &flags);
schedstat_inc(rq, ttwu_cnt);
old_state = p->state; old_state = p->state;
if (!(old_state & state)) if (!(old_state & state))
goto out; goto out;
...@@ -1013,8 +986,21 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) ...@@ -1013,8 +986,21 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
if (unlikely(task_running(rq, p))) if (unlikely(task_running(rq, p)))
goto out_activate; goto out_activate;
new_cpu = cpu; #ifdef CONFIG_SCHEDSTATS
schedstat_inc(rq, ttwu_cnt);
if (cpu == this_cpu) {
schedstat_inc(rq, ttwu_local);
} else {
for_each_domain(this_cpu, sd) {
if (cpu_isset(cpu, sd->span)) {
schedstat_inc(sd, ttwu_wake_remote);
break;
}
}
}
#endif
new_cpu = cpu;
if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
goto out_set_cpu; goto out_set_cpu;
...@@ -1053,7 +1039,7 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) ...@@ -1053,7 +1039,7 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
* in this domain. * in this domain.
*/ */
if (cpu_isset(cpu, sd->span)) { if (cpu_isset(cpu, sd->span)) {
schedstat_inc(sd, ttwu_wake_affine); schedstat_inc(sd, ttwu_move_affine);
goto out_set_cpu; goto out_set_cpu;
} }
} else if ((sd->flags & SD_WAKE_BALANCE) && } else if ((sd->flags & SD_WAKE_BALANCE) &&
...@@ -1063,7 +1049,7 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) ...@@ -1063,7 +1049,7 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
* an imbalance. * an imbalance.
*/ */
if (cpu_isset(cpu, sd->span)) { if (cpu_isset(cpu, sd->span)) {
schedstat_inc(sd, ttwu_wake_balance); schedstat_inc(sd, ttwu_move_balance);
goto out_set_cpu; goto out_set_cpu;
} }
} }
...@@ -1071,10 +1057,8 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) ...@@ -1071,10 +1057,8 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
out_set_cpu: out_set_cpu:
schedstat_inc(rq, ttwu_attempts);
new_cpu = wake_idle(new_cpu, p); new_cpu = wake_idle(new_cpu, p);
if (new_cpu != cpu) { if (new_cpu != cpu) {
schedstat_inc(rq, ttwu_moved);
set_task_cpu(p, new_cpu); set_task_cpu(p, new_cpu);
task_rq_unlock(rq, &flags); task_rq_unlock(rq, &flags);
/* might preempt at this point */ /* might preempt at this point */
...@@ -1217,7 +1201,6 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) ...@@ -1217,7 +1201,6 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
BUG_ON(p->state != TASK_RUNNING); BUG_ON(p->state != TASK_RUNNING);
schedstat_inc(rq, wunt_cnt);
/* /*
* We decrease the sleep average of forking parents * We decrease the sleep average of forking parents
* and children as well, to keep max-interactive tasks * and children as well, to keep max-interactive tasks
...@@ -1269,7 +1252,6 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) ...@@ -1269,7 +1252,6 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
if (TASK_PREEMPTS_CURR(p, rq)) if (TASK_PREEMPTS_CURR(p, rq))
resched_task(rq->curr); resched_task(rq->curr);
schedstat_inc(rq, wunt_moved);
/* /*
* Parent and child are on different CPUs, now get the * Parent and child are on different CPUs, now get the
* parent runqueue to update the parent's ->sleep_avg: * parent runqueue to update the parent's ->sleep_avg:
...@@ -1573,7 +1555,6 @@ static void sched_migrate_task(task_t *p, int dest_cpu) ...@@ -1573,7 +1555,6 @@ static void sched_migrate_task(task_t *p, int dest_cpu)
|| unlikely(cpu_is_offline(dest_cpu))) || unlikely(cpu_is_offline(dest_cpu)))
goto out; goto out;
schedstat_inc(rq, smt_cnt);
/* force the process onto the specified CPU */ /* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) { if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */ /* Need to wait for migration thread (might exit: take ref). */
...@@ -1601,7 +1582,6 @@ void sched_exec(void) ...@@ -1601,7 +1582,6 @@ void sched_exec(void)
struct sched_domain *tmp, *sd = NULL; struct sched_domain *tmp, *sd = NULL;
int new_cpu, this_cpu = get_cpu(); int new_cpu, this_cpu = get_cpu();
schedstat_inc(this_rq(), sbe_cnt);
/* Prefer the current CPU if there's only this task running */ /* Prefer the current CPU if there's only this task running */
if (this_rq()->nr_running <= 1) if (this_rq()->nr_running <= 1)
goto out; goto out;
...@@ -1744,13 +1724,10 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, ...@@ -1744,13 +1724,10 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
goto skip_bitmap; goto skip_bitmap;
} }
/* #ifdef CONFIG_SCHEDSTATS
* Right now, this is the only place pull_task() is called, if (task_hot(tmp, busiest->timestamp_last_tick, sd))
* so we can safely collect pull_task() stats here rather than schedstat_inc(sd, lb_hot_gained[idle]);
* inside pull_task(). #endif
*/
schedstat_inc(this_rq, pt_gained[idle]);
schedstat_inc(busiest, pt_lost[idle]);
pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
pulled++; pulled++;
...@@ -1763,6 +1740,12 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, ...@@ -1763,6 +1740,12 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
goto skip_bitmap; goto skip_bitmap;
} }
out: out:
/*
* Right now, this is the only place pull_task() is called,
* so we can safely collect pull_task() stats here rather than
* inside pull_task().
*/
schedstat_add(sd, lb_gained[idle], pulled);
return pulled; return pulled;
} }
...@@ -2023,6 +2006,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, ...@@ -2023,6 +2006,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
out_balanced: out_balanced:
spin_unlock(&this_rq->lock); spin_unlock(&this_rq->lock);
schedstat_inc(sd, lb_balanced[idle]);
/* tune up the balancing interval */ /* tune up the balancing interval */
if (sd->balance_interval < sd->max_interval) if (sd->balance_interval < sd->max_interval)
sd->balance_interval *= 2; sd->balance_interval *= 2;
...@@ -2048,12 +2033,14 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, ...@@ -2048,12 +2033,14 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE);
if (!group) { if (!group) {
schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
goto out; goto out;
} }
busiest = find_busiest_queue(group); busiest = find_busiest_queue(group);
if (!busiest || busiest == this_rq) { if (!busiest || busiest == this_rq) {
schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
goto out; goto out;
} }
...@@ -2107,7 +2094,6 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) ...@@ -2107,7 +2094,6 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
cpumask_t visited_cpus; cpumask_t visited_cpus;
int cpu; int cpu;
schedstat_inc(busiest_rq, alb_cnt);
/* /*
* Search for suitable CPUs to push tasks to in successively higher * Search for suitable CPUs to push tasks to in successively higher
* domains with SD_LOAD_BALANCE set. * domains with SD_LOAD_BALANCE set.
...@@ -2118,6 +2104,8 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) ...@@ -2118,6 +2104,8 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
/* no more domains to search */ /* no more domains to search */
break; break;
schedstat_inc(sd, alb_cnt);
cpu_group = sd->groups; cpu_group = sd->groups;
do { do {
for_each_cpu_mask(cpu, cpu_group->cpumask) { for_each_cpu_mask(cpu, cpu_group->cpumask) {
...@@ -2142,10 +2130,9 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) ...@@ -2142,10 +2130,9 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
double_lock_balance(busiest_rq, target_rq); double_lock_balance(busiest_rq, target_rq);
if (move_tasks(target_rq, cpu, busiest_rq, if (move_tasks(target_rq, cpu, busiest_rq,
1, sd, SCHED_IDLE)) { 1, sd, SCHED_IDLE)) {
schedstat_inc(busiest_rq, alb_lost); schedstat_inc(sd, alb_pushed);
schedstat_inc(target_rq, alb_gained);
} else { } else {
schedstat_inc(busiest_rq, alb_failed); schedstat_inc(sd, alb_failed);
} }
spin_unlock(&target_rq->lock); spin_unlock(&target_rq->lock);
} }
...@@ -2736,8 +2723,7 @@ asmlinkage void __sched schedule(void) ...@@ -2736,8 +2723,7 @@ asmlinkage void __sched schedule(void)
array = rq->active; array = rq->active;
rq->expired_timestamp = 0; rq->expired_timestamp = 0;
rq->best_expired_prio = MAX_PRIO; rq->best_expired_prio = MAX_PRIO;
} else }
schedstat_inc(rq, sched_noswitch);
idx = sched_find_first_bit(array->bitmap); idx = sched_find_first_bit(array->bitmap);
queue = array->queue + idx; queue = array->queue + idx;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment