Commit db05a192 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] sched: handle inter-CPU jiffies skew

From: Nick Piggin <piggin@cyberone.com.au>

John Hawkes discribed this problem to me:

There *is* a small problem in this area, though, that SuSE avoids.
"jiffies" gets updated by cpu0.  The other CPUs may, over time, get out of
sync (and they're initialized on ia64 to start out being out of sync), so
it's no guarantee that every CPU will wake up from its timer interrupt and
see a "jiffies" value that is guaranteed to be last_jiffies+1.  Sometimes
the jiffies value may be unchanged since the last wakeup.  Sometimes the
jiffies value may have incremented by 2 (or more, especially if cpu0's
interrupts are disabled for long stretches of time).  So an algoithm that
says, "I'll call load_balance() only when jiffies is *exactly* N" is going
to fail on occasion, either by calling load_balance() too often or not
often enough.  ***

I fixed this by adding a last_balance field to struct sched_domain, and
working off that.
parent e18e19ad
...@@ -568,6 +568,7 @@ struct sched_domain { ...@@ -568,6 +568,7 @@ struct sched_domain {
int flags; /* See SD_FLAG_* */ int flags; /* See SD_FLAG_* */
/* Runtime fields. */ /* Runtime fields. */
unsigned long last_balance; /* init to jiffies. units in jiffies */
unsigned int balance_interval; /* initialise to 1. units in ms. */ unsigned int balance_interval; /* initialise to 1. units in ms. */
unsigned int nr_balance_failed; /* initialise to 0 */ unsigned int nr_balance_failed; /* initialise to 0 */
}; };
...@@ -584,6 +585,7 @@ struct sched_domain { ...@@ -584,6 +585,7 @@ struct sched_domain {
.cache_hot_time = 0, \ .cache_hot_time = 0, \
.cache_nice_tries = 0, \ .cache_nice_tries = 0, \
.flags = SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE | SD_FLAG_WAKE,\ .flags = SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE | SD_FLAG_WAKE,\
.last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
} }
...@@ -600,6 +602,7 @@ struct sched_domain { ...@@ -600,6 +602,7 @@ struct sched_domain {
.cache_hot_time = (5*1000000/2), \ .cache_hot_time = (5*1000000/2), \
.cache_nice_tries = 1, \ .cache_nice_tries = 1, \
.flags = SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE,\ .flags = SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE,\
.last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
} }
...@@ -617,6 +620,7 @@ struct sched_domain { ...@@ -617,6 +620,7 @@ struct sched_domain {
.cache_hot_time = (10*1000000), \ .cache_hot_time = (10*1000000), \
.cache_nice_tries = 1, \ .cache_nice_tries = 1, \
.flags = SD_FLAG_EXEC, \ .flags = SD_FLAG_EXEC, \
.last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
} }
......
...@@ -1759,26 +1759,26 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idl ...@@ -1759,26 +1759,26 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idl
/* Run through all this CPU's domains */ /* Run through all this CPU's domains */
do { do {
int modulo; unsigned long interval;
if (unlikely(!domain->groups)) if (unlikely(!domain->groups))
break; break;
modulo = domain->balance_interval; interval = domain->balance_interval;
if (idle != IDLE) if (idle != IDLE)
modulo *= domain->busy_factor; interval *= domain->busy_factor;
/* scale ms to jiffies */ /* scale ms to jiffies */
modulo = modulo * HZ / 1000; interval = interval * HZ / 1000;
if (modulo == 0) if (unlikely(interval == 0))
modulo = 1; interval = 1;
if (!(j % modulo)) { if (j - domain->last_balance >= interval) {
if (load_balance(this_cpu, this_rq, domain, idle)) { if (load_balance(this_cpu, this_rq, domain, idle)) {
/* We've pulled tasks over so no longer idle */ /* We've pulled tasks over so no longer idle */
idle = NOT_IDLE; idle = NOT_IDLE;
} }
domain->last_balance += interval;
} }
domain = domain->parent; domain = domain->parent;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment