Commit 74f5187a authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched: Cure load average vs NO_HZ woes

Chase reported that due to us decrementing calc_load_task prematurely
(before the next LOAD_FREQ sample), the load average could be scewed
by as much as the number of CPUs in the machine.

This patch, based on Chase's patch, cures the problem by keeping the
delta of the CPU going into NO_HZ idle separately and folding that in
on the next LOAD_FREQ update.

This restores the balance and we get strict LOAD_FREQ period samples.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: default avatarChase Douglas <chase.douglas@canonical.com>
LKML-Reference: <1271934490.1776.343.camel@laptop>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 09a40af5
...@@ -1815,7 +1815,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) ...@@ -1815,7 +1815,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
} }
#endif #endif
static void calc_load_account_active(struct rq *this_rq); static void calc_load_account_idle(struct rq *this_rq);
static void update_sysctl(void); static void update_sysctl(void);
static int get_update_sysctl_factor(void); static int get_update_sysctl_factor(void);
...@@ -2950,6 +2950,61 @@ static unsigned long calc_load_update; ...@@ -2950,6 +2950,61 @@ static unsigned long calc_load_update;
unsigned long avenrun[3]; unsigned long avenrun[3];
EXPORT_SYMBOL(avenrun); EXPORT_SYMBOL(avenrun);
static long calc_load_fold_active(struct rq *this_rq)
{
long nr_active, delta = 0;
nr_active = this_rq->nr_running;
nr_active += (long) this_rq->nr_uninterruptible;
if (nr_active != this_rq->calc_load_active) {
delta = nr_active - this_rq->calc_load_active;
this_rq->calc_load_active = nr_active;
}
return delta;
}
#ifdef CONFIG_NO_HZ
/*
* For NO_HZ we delay the active fold to the next LOAD_FREQ update.
*
* When making the ILB scale, we should try to pull this in as well.
*/
static atomic_long_t calc_load_tasks_idle;
static void calc_load_account_idle(struct rq *this_rq)
{
long delta;
delta = calc_load_fold_active(this_rq);
if (delta)
atomic_long_add(delta, &calc_load_tasks_idle);
}
static long calc_load_fold_idle(void)
{
long delta = 0;
/*
* Its got a race, we don't care...
*/
if (atomic_long_read(&calc_load_tasks_idle))
delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
return delta;
}
#else
static void calc_load_account_idle(struct rq *this_rq)
{
}
static inline long calc_load_fold_idle(void)
{
return 0;
}
#endif
/** /**
* get_avenrun - get the load average array * get_avenrun - get the load average array
* @loads: pointer to dest load array * @loads: pointer to dest load array
...@@ -2996,20 +3051,22 @@ void calc_global_load(void) ...@@ -2996,20 +3051,22 @@ void calc_global_load(void)
} }
/* /*
* Either called from update_cpu_load() or from a cpu going idle * Called from update_cpu_load() to periodically update this CPU's
* active count.
*/ */
static void calc_load_account_active(struct rq *this_rq) static void calc_load_account_active(struct rq *this_rq)
{ {
long nr_active, delta; long delta;
nr_active = this_rq->nr_running; if (time_before(jiffies, this_rq->calc_load_update))
nr_active += (long) this_rq->nr_uninterruptible; return;
if (nr_active != this_rq->calc_load_active) { delta = calc_load_fold_active(this_rq);
delta = nr_active - this_rq->calc_load_active; delta += calc_load_fold_idle();
this_rq->calc_load_active = nr_active; if (delta)
atomic_long_add(delta, &calc_load_tasks); atomic_long_add(delta, &calc_load_tasks);
}
this_rq->calc_load_update += LOAD_FREQ;
} }
/* /*
...@@ -3041,10 +3098,7 @@ static void update_cpu_load(struct rq *this_rq) ...@@ -3041,10 +3098,7 @@ static void update_cpu_load(struct rq *this_rq)
this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
} }
if (time_after_eq(jiffies, this_rq->calc_load_update)) {
this_rq->calc_load_update += LOAD_FREQ;
calc_load_account_active(this_rq); calc_load_account_active(this_rq);
}
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -23,8 +23,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl ...@@ -23,8 +23,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
static struct task_struct *pick_next_task_idle(struct rq *rq) static struct task_struct *pick_next_task_idle(struct rq *rq)
{ {
schedstat_inc(rq, sched_goidle); schedstat_inc(rq, sched_goidle);
/* adjust the active tasks as we might go into a long sleep */ calc_load_account_idle(rq);
calc_load_account_active(rq);
return rq->idle; return rq->idle;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment