Commit 98962465 authored by Jon Hunter's avatar Jon Hunter Committed by Thomas Gleixner

nohz: Prevent clocksource wrapping during idle

The dynamic tick allows the kernel to sleep for periods longer than a
single tick, but it does not limit the sleep time currently. In the
worst case the kernel could sleep longer than the wrap around time of
the time keeping clock source which would result in losing track of
time.

Prevent this by limiting it to the safe maximum sleep time of the
current time keeping clock source. The value is calculated when the
clock source is registered.

[ tglx: simplified the code a bit and massaged the commit msg ]
Signed-off-by: default avatarJon Hunter <jon-hunter@ti.com>
Cc: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1250617512-23567-2-git-send-email-jon-hunter@ti.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 529eaccd
...@@ -151,6 +151,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, ...@@ -151,6 +151,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
* subtraction of non 64 bit counters * subtraction of non 64 bit counters
* @mult: cycle to nanosecond multiplier * @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two) * @shift: cycle to nanosecond divisor (power of two)
* @max_idle_ns: max idle time permitted by the clocksource (nsecs)
* @flags: flags describing special properties * @flags: flags describing special properties
* @vread: vsyscall based read * @vread: vsyscall based read
* @resume: resume function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary
...@@ -168,6 +169,7 @@ struct clocksource { ...@@ -168,6 +169,7 @@ struct clocksource {
cycle_t mask; cycle_t mask;
u32 mult; u32 mult;
u32 shift; u32 shift;
u64 max_idle_ns;
unsigned long flags; unsigned long flags;
cycle_t (*vread)(void); cycle_t (*vread)(void);
void (*resume)(void); void (*resume)(void);
......
...@@ -148,6 +148,7 @@ extern void monotonic_to_bootbased(struct timespec *ts); ...@@ -148,6 +148,7 @@ extern void monotonic_to_bootbased(struct timespec *ts);
extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
extern int timekeeping_valid_for_hres(void); extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void);
extern void update_wall_time(void); extern void update_wall_time(void);
extern void update_xtime_cache(u64 nsec); extern void update_xtime_cache(u64 nsec);
extern void timekeeping_leap_insert(int leapsecond); extern void timekeeping_leap_insert(int leapsecond);
......
...@@ -468,6 +468,47 @@ void clocksource_touch_watchdog(void) ...@@ -468,6 +468,47 @@ void clocksource_touch_watchdog(void)
#ifdef CONFIG_GENERIC_TIME #ifdef CONFIG_GENERIC_TIME
/**
* clocksource_max_deferment - Returns max time the clocksource can be deferred
* @cs: Pointer to clocksource
*
*/
static u64 clocksource_max_deferment(struct clocksource *cs)
{
u64 max_nsecs, max_cycles;
/*
* Calculate the maximum number of cycles that we can pass to the
* cyc2ns function without overflowing a 64-bit signed result. The
* maximum number of cycles is equal to ULLONG_MAX/cs->mult which
* is equivalent to the below.
* max_cycles < (2^63)/cs->mult
* max_cycles < 2^(log2((2^63)/cs->mult))
* max_cycles < 2^(log2(2^63) - log2(cs->mult))
* max_cycles < 2^(63 - log2(cs->mult))
* max_cycles < 1 << (63 - log2(cs->mult))
* Please note that we add 1 to the result of the log2 to account for
* any rounding errors, ensure the above inequality is satisfied and
* no overflow will occur.
*/
max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
/*
* The actual maximum number of cycles we can defer the clocksource is
* determined by the minimum of max_cycles and cs->mask.
*/
max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
/*
* To ensure that the clocksource does not wrap whilst we are idle,
* limit the time the clocksource can be deferred by 12.5%. Please
* note a margin of 12.5% is used because this can be computed with
* a shift, versus say 10% which would require division.
*/
return max_nsecs - (max_nsecs >> 5);
}
/** /**
* clocksource_select - Select the best clocksource available * clocksource_select - Select the best clocksource available
* *
...@@ -564,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs) ...@@ -564,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs)
*/ */
int clocksource_register(struct clocksource *cs) int clocksource_register(struct clocksource *cs)
{ {
/* calculate max idle time permitted for this clocksource */
cs->max_idle_ns = clocksource_max_deferment(cs);
mutex_lock(&clocksource_mutex); mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs); clocksource_enqueue(cs);
clocksource_select(); clocksource_select();
......
...@@ -208,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -208,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle)
struct tick_sched *ts; struct tick_sched *ts;
ktime_t last_update, expires, now; ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
u64 time_delta;
int cpu; int cpu;
local_irq_save(flags); local_irq_save(flags);
...@@ -262,6 +263,17 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -262,6 +263,17 @@ void tick_nohz_stop_sched_tick(int inidle)
seq = read_seqbegin(&xtime_lock); seq = read_seqbegin(&xtime_lock);
last_update = last_jiffies_update; last_update = last_jiffies_update;
last_jiffies = jiffies; last_jiffies = jiffies;
/*
* On SMP we really should only care for the CPU which
* has the do_timer duty assigned. All other CPUs can
* sleep as long as they want.
*/
if (cpu == tick_do_timer_cpu ||
tick_do_timer_cpu == TICK_DO_TIMER_NONE)
time_delta = timekeeping_max_deferment();
else
time_delta = KTIME_MAX;
} while (read_seqretry(&xtime_lock, seq)); } while (read_seqretry(&xtime_lock, seq));
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
...@@ -285,10 +297,25 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -285,10 +297,25 @@ void tick_nohz_stop_sched_tick(int inidle)
/* /*
* calculate the expiry time for the next timer wheel * calculate the expiry time for the next timer wheel
* timer * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
* that there is no timer pending or at least extremely
* far into the future (12 days for HZ=1000). In this
* case we set the expiry to the end of time.
*/ */
expires = ktime_add_ns(last_update, tick_period.tv64 * if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
delta_jiffies); /*
* Calculate the time delta for the next timer event.
* If the time delta exceeds the maximum time delta
* permitted by the current clocksource then adjust
* the time delta accordingly to ensure the
* clocksource does not wrap.
*/
time_delta = min_t(u64, time_delta,
tick_period.tv64 * delta_jiffies);
expires = ktime_add_ns(last_update, time_delta);
} else {
expires.tv64 = KTIME_MAX;
}
/* /*
* If this cpu is the one which updates jiffies, then * If this cpu is the one which updates jiffies, then
...@@ -332,22 +359,19 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -332,22 +359,19 @@ void tick_nohz_stop_sched_tick(int inidle)
ts->idle_sleeps++; ts->idle_sleeps++;
/* Mark expires */
ts->idle_expires = expires;
/* /*
* delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that * If the expiration time == KTIME_MAX, then
* there is no timer pending or at least extremly far * in this case we simply stop the tick timer.
* into the future (12 days for HZ=1000). In this case
* we simply stop the tick timer:
*/ */
if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { if (unlikely(expires.tv64 == KTIME_MAX)) {
ts->idle_expires.tv64 = KTIME_MAX;
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer); hrtimer_cancel(&ts->sched_timer);
goto out; goto out;
} }
/* Mark expiries */
ts->idle_expires = expires;
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, expires, hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS_PINNED); HRTIMER_MODE_ABS_PINNED);
......
...@@ -477,6 +477,17 @@ int timekeeping_valid_for_hres(void) ...@@ -477,6 +477,17 @@ int timekeeping_valid_for_hres(void)
return ret; return ret;
} }
/**
* timekeeping_max_deferment - Returns max time the clocksource can be deferred
*
* Caller must observe xtime_lock via read_seqbegin/read_seqretry to
* ensure that the clocksource does not change!
*/
u64 timekeeping_max_deferment(void)
{
return timekeeper.clock->max_idle_ns;
}
/** /**
* read_persistent_clock - Return time from the persistent clock. * read_persistent_clock - Return time from the persistent clock.
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment