Commit 354879bb authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched_clock: fix cpu_clock()

This patch fixes 3 issues:

a) it removes the dependency on jiffies, because jiffies are incremented
   by a single CPU, and the tick is not synchronized between CPUs. Therefore
   relying on it to calculate a window to clip whacky TSC values doesn't work
   as it can drift around.

   So instead use [GTOD, GTOD+TICK_NSEC) as the window.

b) __update_sched_clock() did (roughly speaking):

   delta = sched_clock() - scd->tick_raw;
   clock += delta;

   Which gives exponential growth, instead of linear.

c) allows the sched_clock_cpu() value to warp the u64 without breaking.

the results are more reliable sched_clock() deltas:

           before       after   sched_clock

cpu_clock: 15750        51312   51488
cpu_clock: 59719        51052   50947
cpu_clock: 15879        51249   51061
cpu_clock: 1            50933   51198
cpu_clock: 1            50931   51039
cpu_clock: 1            51093   50981
cpu_clock: 1            51043   51040
cpu_clock: 1            50959   50938
cpu_clock: 1            50981   51011
cpu_clock: 1            51364   51212
cpu_clock: 1            51219   51273
cpu_clock: 1            51389   51048
cpu_clock: 1            51285   51611
cpu_clock: 1            50964   51137
cpu_clock: 1            50973   50968
cpu_clock: 1            50967   50972
cpu_clock: 1            58910   58485
cpu_clock: 1            51082   51025
cpu_clock: 1            50957   50958
cpu_clock: 1            50958   50957
cpu_clock: 1006128      51128   50971
cpu_clock: 1            51107   51155
cpu_clock: 1            51371   51081
cpu_clock: 1            51104   51365
cpu_clock: 1            51363   51309
cpu_clock: 1            51107   51160
cpu_clock: 1            51139   51100
cpu_clock: 1            51216   51136
cpu_clock: 1            51207   51215
cpu_clock: 1            51087   51263
cpu_clock: 1            51249   51177
cpu_clock: 1            51519   51412
cpu_clock: 1            51416   51255
cpu_clock: 1            51591   51594
cpu_clock: 1            50966   51374
cpu_clock: 1            50966   50966
cpu_clock: 1            51291   50948
cpu_clock: 1            50973   50867
cpu_clock: 1            50970   50970
cpu_clock: 998306       50970   50971
cpu_clock: 1            50971   50970
cpu_clock: 1            50970   50970
cpu_clock: 1            50971   50971
cpu_clock: 1            50970   50970
cpu_clock: 1            51351   50970
cpu_clock: 1            50970   51352
cpu_clock: 1            50971   50970
cpu_clock: 1            50970   50970
cpu_clock: 1            51321   50971
cpu_clock: 1            50974   51324
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent efc2dead
...@@ -12,19 +12,17 @@ ...@@ -12,19 +12,17 @@
* *
* Create a semi stable clock from a mixture of other events, including: * Create a semi stable clock from a mixture of other events, including:
* - gtod * - gtod
* - jiffies
* - sched_clock() * - sched_clock()
* - explicit idle events * - explicit idle events
* *
* We use gtod as base and the unstable clock deltas. The deltas are filtered, * We use gtod as base and the unstable clock deltas. The deltas are filtered,
* making it monotonic and keeping it within an expected window. This window * making it monotonic and keeping it within an expected window.
* is set up using jiffies.
* *
* Furthermore, explicit sleep and wakeup hooks allow us to account for time * Furthermore, explicit sleep and wakeup hooks allow us to account for time
* that is otherwise invisible (TSC gets stopped). * that is otherwise invisible (TSC gets stopped).
* *
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
* consistent between cpus (never more than 1 jiffies difference). * consistent between cpus (never more than 2 jiffies difference).
*/ */
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/percpu.h> #include <linux/percpu.h>
...@@ -54,7 +52,6 @@ struct sched_clock_data { ...@@ -54,7 +52,6 @@ struct sched_clock_data {
*/ */
raw_spinlock_t lock; raw_spinlock_t lock;
unsigned long tick_jiffies;
u64 tick_raw; u64 tick_raw;
u64 tick_gtod; u64 tick_gtod;
u64 clock; u64 clock;
...@@ -75,14 +72,12 @@ static inline struct sched_clock_data *cpu_sdc(int cpu) ...@@ -75,14 +72,12 @@ static inline struct sched_clock_data *cpu_sdc(int cpu)
void sched_clock_init(void) void sched_clock_init(void)
{ {
u64 ktime_now = ktime_to_ns(ktime_get()); u64 ktime_now = ktime_to_ns(ktime_get());
unsigned long now_jiffies = jiffies;
int cpu; int cpu;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct sched_clock_data *scd = cpu_sdc(cpu); struct sched_clock_data *scd = cpu_sdc(cpu);
scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
scd->tick_jiffies = now_jiffies;
scd->tick_raw = 0; scd->tick_raw = 0;
scd->tick_gtod = ktime_now; scd->tick_gtod = ktime_now;
scd->clock = ktime_now; scd->clock = ktime_now;
...@@ -91,47 +86,52 @@ void sched_clock_init(void) ...@@ -91,47 +86,52 @@ void sched_clock_init(void)
sched_clock_running = 1; sched_clock_running = 1;
} }
/*
* min,max except they take wrapping into account
*/
static inline u64 wrap_min(u64 x, u64 y)
{
return (s64)(x - y) < 0 ? x : y;
}
static inline u64 wrap_max(u64 x, u64 y)
{
return (s64)(x - y) > 0 ? x : y;
}
/* /*
* update the percpu scd from the raw @now value * update the percpu scd from the raw @now value
* *
* - filter out backward motion * - filter out backward motion
* - use jiffies to generate a min,max window to clip the raw values * - use the GTOD tick value to create a window to filter crazy TSC values
*/ */
static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
{ {
unsigned long now_jiffies = jiffies;
long delta_jiffies = now_jiffies - scd->tick_jiffies;
u64 clock = scd->clock;
u64 min_clock, max_clock;
s64 delta = now - scd->tick_raw; s64 delta = now - scd->tick_raw;
u64 clock, min_clock, max_clock;
WARN_ON_ONCE(!irqs_disabled()); WARN_ON_ONCE(!irqs_disabled());
min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
if (unlikely(delta < 0)) { if (unlikely(delta < 0))
clock++; delta = 0;
goto out;
}
max_clock = min_clock + TICK_NSEC; /*
* scd->clock = clamp(scd->tick_gtod + delta,
* max(scd->tick_gtod, scd->clock),
* scd->tick_gtod + TICK_NSEC);
*/
if (unlikely(clock + delta > max_clock)) { clock = scd->tick_gtod + delta;
if (clock < max_clock) min_clock = wrap_max(scd->tick_gtod, scd->clock);
clock = max_clock; max_clock = scd->tick_gtod + TICK_NSEC;
else
clock++;
} else {
clock += delta;
}
out: clock = wrap_max(clock, min_clock);
if (unlikely(clock < min_clock)) clock = wrap_min(clock, max_clock);
clock = min_clock;
scd->tick_jiffies = now_jiffies;
scd->clock = clock; scd->clock = clock;
return clock; return scd->clock;
} }
static void lock_double_clock(struct sched_clock_data *data1, static void lock_double_clock(struct sched_clock_data *data1,
...@@ -171,7 +171,7 @@ u64 sched_clock_cpu(int cpu) ...@@ -171,7 +171,7 @@ u64 sched_clock_cpu(int cpu)
* larger time as the latest time for both * larger time as the latest time for both
* runqueues. (this creates monotonic movement) * runqueues. (this creates monotonic movement)
*/ */
if (likely(remote_clock < this_clock)) { if (likely((s64)(remote_clock - this_clock) < 0)) {
clock = this_clock; clock = this_clock;
scd->clock = clock; scd->clock = clock;
} else { } else {
...@@ -207,14 +207,9 @@ void sched_clock_tick(void) ...@@ -207,14 +207,9 @@ void sched_clock_tick(void)
now = sched_clock(); now = sched_clock();
__raw_spin_lock(&scd->lock); __raw_spin_lock(&scd->lock);
__update_sched_clock(scd, now);
/*
* update tick_gtod after __update_sched_clock() because that will
* already observe 1 new jiffy; adding a new tick_gtod to that would
* increase the clock 2 jiffies.
*/
scd->tick_raw = now; scd->tick_raw = now;
scd->tick_gtod = now_gtod; scd->tick_gtod = now_gtod;
__update_sched_clock(scd, now);
__raw_spin_unlock(&scd->lock); __raw_spin_unlock(&scd->lock);
} }
...@@ -232,18 +227,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); ...@@ -232,18 +227,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
*/ */
void sched_clock_idle_wakeup_event(u64 delta_ns) void sched_clock_idle_wakeup_event(u64 delta_ns)
{ {
struct sched_clock_data *scd = this_scd(); sched_clock_tick();
/*
* Override the previous timestamp and ignore all
* sched_clock() deltas that occured while we idled,
* and use the PM-provided delta_ns to advance the
* rq clock:
*/
__raw_spin_lock(&scd->lock);
scd->clock += delta_ns;
__raw_spin_unlock(&scd->lock);
touch_softlockup_watchdog(); touch_softlockup_watchdog();
} }
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment