Merge branch 'timers/core' into perf/timer, to apply dependent patch

An upcoming patch will depend on tai_ns() and NMI-safe ktime_get_raw_fast(), so merge timers/core here in a separate topic branch until it's all cooked and timers/core is merged upstream. Signed-off-by: Ingo Molnar <mingo@kernel.org>

Merge branch 'timers/core' into perf/timer, to apply dependent patch
An upcoming patch will depend on tai_ns() and NMI-safe ktime_get_raw_fast(), so merge timers/core here in a separate topic branch until it's all cooked and timers/core is merged upstream. Signed-off-by: Ingo Molnar <mingo@kernel.org>
4e6d7c2a · Ingo Molnar · 3c435c1e · fe5fba05 · 4e6d7c2a · 4e6d7c2a
Commit 4e6d7c2a authored Mar 27, 2015 by Ingo Molnar
21 changed files
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -103,7 +103,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
 	/*
 	 * 120000 rough estimate from the calculations in
-	 * __clocksource_updatefreq_scale.
+	 * __clocksource_update_freq_scale.
 	 */
 	clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
 			32768, NSEC_PER_SEC, 120000);

--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -200,7 +200,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 void update_vsyscall(struct timekeeper *tk)
 {
 	struct timespec xtime_coarse;
-	u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+	u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
 	++vdso_data->tb_seq_count;
 	smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 	if (!use_syscall) {
-		vdso_data->cs_cycle_last	= tk->tkr.cycle_last;
+		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
-		vdso_data->xtime_clock_nsec	= tk->tkr.xtime_nsec;
+		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
-		vdso_data->cs_mult		= tk->tkr.mult;
+		vdso_data->cs_mult		= tk->tkr_mono.mult;
-		vdso_data->cs_shift		= tk->tkr.shift;
+		vdso_data->cs_shift		= tk->tkr_mono.shift;
 	}
 	smp_wmb();

--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
 {
 	u64 nsecps;
-	if (tk->tkr.clock != &clocksource_tod)
+	if (tk->tkr_mono.clock != &clocksource_tod)
 		return;
 	/* Make userspace gettimeofday spin until we're done. */
 	++vdso_data->tb_update_count;
 	smp_wmb();
-	vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+	vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
 	vdso_data->xtime_clock_sec = tk->xtime_sec;
-	vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+	vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
 	vdso_data->wtom_clock_sec =
 		tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-	vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
+	vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
-		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
+		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
-	nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+	nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
 	while (vdso_data->wtom_clock_nsec >= nsecps) {
 		vdso_data->wtom_clock_nsec -= nsecps;
 		vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->xtime_coarse_sec = tk->xtime_sec;
 	vdso_data->xtime_coarse_nsec =
-		(long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+		(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	vdso_data->wtom_coarse_sec =
 		vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
 	vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
 		vdso_data->wtom_coarse_sec++;
 	}
-	vdso_data->tk_mult = tk->tkr.mult;
+	vdso_data->tk_mult = tk->tkr_mono.mult;
-	vdso_data->tk_shift = tk->tkr.shift;
+	vdso_data->tk_shift = tk->tkr_mono.shift;
 	smp_wmb();
 	++vdso_data->tb_update_count;
 }
@@ -283,7 +283,7 @@ void __init time_init(void)
 	if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
 		panic("Couldn't request external interrupt 0x1406");
-	if (clocksource_register(&clocksource_tod) != 0)
+	if (__clocksource_register(&clocksource_tod) != 0)
 		panic("Could not register TOD clock source");
 	/* Enable TOD clock interrupts on the boot cpu. */

--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
 	.rating	= 100,
 	.read	= timer_cs_read,
 	.mask	= CLOCKSOURCE_MASK(64),
-	.shift	= 2,
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 static __init int setup_timer_cs(void)
 {
 	timer_cs_enabled = 1;
-	timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
+	return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
-	                                    timer_cs.shift);
-	return clocksource_register(&timer_cs);
 }
 #ifdef CONFIG_SMP

--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
 void update_vsyscall(struct timekeeper *tk)
 {
-	if (tk->tkr.clock != &cycle_counter_cs)
+	if (tk->tkr_mono.clock != &cycle_counter_cs)
 		return;
 	write_seqcount_begin(&vdso_data->tb_seq);
-	vdso_data->cycle_last		= tk->tkr.cycle_last;
+	vdso_data->cycle_last		= tk->tkr_mono.cycle_last;
-	vdso_data->mask			= tk->tkr.mask;
+	vdso_data->mask			= tk->tkr_mono.mask;
-	vdso_data->mult			= tk->tkr.mult;
+	vdso_data->mult			= tk->tkr_mono.mult;
-	vdso_data->shift		= tk->tkr.shift;
+	vdso_data->shift		= tk->tkr_mono.shift;
 	vdso_data->wall_time_sec	= tk->xtime_sec;
-	vdso_data->wall_time_snsec	= tk->tkr.xtime_nsec;
+	vdso_data->wall_time_snsec	= tk->tkr_mono.xtime_nsec;
 	vdso_data->monotonic_time_sec	= tk->xtime_sec
 					+ tk->wall_to_monotonic.tv_sec;
-	vdso_data->monotonic_time_snsec	= tk->tkr.xtime_nsec
+	vdso_data->monotonic_time_snsec	= tk->tkr_mono.xtime_nsec
 					+ ((u64)tk->wall_to_monotonic.tv_nsec
-						<< tk->tkr.shift);
+						<< tk->tkr_mono.shift);
 	while (vdso_data->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+					(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
 		vdso_data->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->tkr.shift;
+					((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
 		vdso_data->monotonic_time_sec++;
 	}
 	vdso_data->wall_time_coarse_sec	= tk->xtime_sec;
-	vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
+	vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
-						 tk->tkr.shift);
+						 tk->tkr_mono.shift);
 	vdso_data->monotonic_time_coarse_sec =
 		vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;

--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
 	gtod_write_begin(vdata);
 	/* copy vsyscall data */
-	vdata->vclock_mode	= tk->tkr.clock->archdata.vclock_mode;
+	vdata->vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
-	vdata->cycle_last	= tk->tkr.cycle_last;
+	vdata->cycle_last	= tk->tkr_mono.cycle_last;
-	vdata->mask		= tk->tkr.mask;
+	vdata->mask		= tk->tkr_mono.mask;
-	vdata->mult		= tk->tkr.mult;
+	vdata->mult		= tk->tkr_mono.mult;
-	vdata->shift		= tk->tkr.shift;
+	vdata->shift		= tk->tkr_mono.shift;
 	vdata->wall_time_sec		= tk->xtime_sec;
-	vdata->wall_time_snsec		= tk->tkr.xtime_nsec;
+	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 	vdata->monotonic_time_sec	= tk->xtime_sec
 					+ tk->wall_to_monotonic.tv_sec;
-	vdata->monotonic_time_snsec	= tk->tkr.xtime_nsec
+	vdata->monotonic_time_snsec	= tk->tkr_mono.xtime_nsec
 					+ ((u64)tk->wall_to_monotonic.tv_nsec
-						<< tk->tkr.shift);
+						<< tk->tkr_mono.shift);
 	while (vdata->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+					(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
 		vdata->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->tkr.shift;
+					((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
 		vdata->monotonic_time_sec++;
 	}
 	vdata->wall_time_coarse_sec	= tk->xtime_sec;
-	vdata->wall_time_coarse_nsec	= (long)(tk->tkr.xtime_nsec >>
+	vdata->wall_time_coarse_nsec	= (long)(tk->tkr_mono.xtime_nsec >>
-						 tk->tkr.shift);
+						 tk->tkr_mono.shift);
 	vdata->monotonic_time_coarse_sec =
 		vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1070,19 +1070,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 	struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
 	u64 boot_ns;
-	boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+	boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
 	write_seqcount_begin(&vdata->seq);
 	/* copy pvclock gtod data */
-	vdata->clock.vclock_mode	= tk->tkr.clock->archdata.vclock_mode;
+	vdata->clock.vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
-	vdata->clock.cycle_last		= tk->tkr.cycle_last;
+	vdata->clock.cycle_last		= tk->tkr_mono.cycle_last;
-	vdata->clock.mask		= tk->tkr.mask;
+	vdata->clock.mask		= tk->tkr_mono.mask;
-	vdata->clock.mult		= tk->tkr.mult;
+	vdata->clock.mult		= tk->tkr_mono.mult;
-	vdata->clock.shift		= tk->tkr.shift;
+	vdata->clock.shift		= tk->tkr_mono.shift;
 	vdata->boot_ns			= boot_ns;
-	vdata->nsec_base		= tk->tkr.xtime_nsec;
+	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
 	write_seqcount_end(&vdata->seq);
 }

--- a/drivers/clocksource/em_sti.c
+++ b/drivers/clocksource/em_sti.c
@@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
 	ret = em_sti_start(p, USER_CLOCKSOURCE);
 	if (!ret)
-		__clocksource_updatefreq_hz(cs, p->rate);
+		__clocksource_update_freq_hz(cs, p->rate);
 	return ret;
 }

--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
 	ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
 	if (!ret) {
-		__clocksource_updatefreq_hz(cs, ch->rate);
+		__clocksource_update_freq_hz(cs, ch->rate);
 		ch->cs_enabled = true;
 	}
 	return ret;

--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
 	ret = sh_tmu_enable(ch);
 	if (!ret) {
-		__clocksource_updatefreq_hz(cs, ch->rate);
+		__clocksource_update_freq_hz(cs, ch->rate);
 		ch->cs_enabled = true;
 	}

--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -39,6 +39,8 @@ enum clock_event_mode {
 	CLOCK_EVT_MODE_PERIODIC,
 	CLOCK_EVT_MODE_ONESHOT,
 	CLOCK_EVT_MODE_RESUME,
+	/* Legacy ->set_mode() callback doesn't support below modes */
 };
 /*
@@ -81,7 +83,11 @@ enum clock_event_mode {
 * @mode:		operating mode assigned by the management code
 * @features:		features
 * @retries:		number of forced programming retries
- * @set_mode:		set mode function
+ * @set_mode:		legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
+ * @set_mode_periodic:	switch mode to periodic, if !set_mode
+ * @set_mode_oneshot:	switch mode to oneshot, if !set_mode
+ * @set_mode_shutdown:	switch mode to shutdown, if !set_mode
+ * @set_mode_resume:	resume clkevt device, if !set_mode
 * @broadcast:		function to broadcast events
 * @min_delta_ticks:	minimum delta value in ticks stored for reconfiguration
 * @max_delta_ticks:	maximum delta value in ticks stored for reconfiguration
@@ -108,9 +114,20 @@ struct clock_event_device {
 	unsigned int		features;
 	unsigned long		retries;
-	void			(*broadcast)(const struct cpumask *mask);
+	/*
+	 * Mode transition callback(s): Only one of the two groups should be
+	 * defined:
+	 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
+	 * - set_mode_{shutdown|periodic|oneshot|resume}().
+	 */
 	void			(*set_mode)(enum clock_event_mode mode,
 					    struct clock_event_device *);
+	int			(*set_mode_periodic)(struct clock_event_device *);
+	int			(*set_mode_oneshot)(struct clock_event_device *);
+	int			(*set_mode_shutdown)(struct clock_event_device *);
+	int			(*set_mode_resume)(struct clock_event_device *);
+	void			(*broadcast)(const struct cpumask *mask);
 	void			(*suspend)(struct clock_event_device *);
 	void			(*resume)(struct clock_event_device *);
 	unsigned long		min_delta_ticks;

--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -56,6 +56,7 @@ struct module;
 * @shift:		cycle to nanosecond divisor (power of two)
 * @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
 * @maxadj:		maximum adjustment value to mult (~11%)
+ * @max_cycles:		maximum safe cycle value which won't overflow on multiplication
 * @flags:		flags describing special properties
 * @archdata:		arch-specific data
 * @suspend:		suspend function for the clocksource, if necessary
@@ -76,7 +77,7 @@ struct clocksource {
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 	struct arch_clocksource_data archdata;
 #endif
+	u64 max_cycles;
 	const char *name;
 	struct list_head list;
 	int rating;
@@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 }
-extern int clocksource_register(struct clocksource*);
 extern int clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
@@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 extern u64
-clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
 extern void
 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
@@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 extern int
 __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
 extern void
-__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
+__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
+/*
+ * Don't call this unless you are a default clocksource
+ * (AKA: jiffies) and absolutely have to.
+ */
+static inline int __clocksource_register(struct clocksource *cs)
+{
+	return __clocksource_register_scale(cs, 1, 0);
+}
 static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
 {
@@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
 	return __clocksource_register_scale(cs, 1000, khz);
 }
-static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
+static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
 {
-	__clocksource_updatefreq_scale(cs, 1, hz);
+	__clocksource_update_freq_scale(cs, 1, hz);
 }
-static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
+static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
 {
-	__clocksource_updatefreq_scale(cs, 1000, khz);
+	__clocksource_update_freq_scale(cs, 1000, khz);
 }

--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -16,16 +16,16 @@
 * @read:	Read function of @clock
 * @mask:	Bitmask for two's complement subtraction of non 64bit clocks
 * @cycle_last: @clock cycle value at last update
- * @mult:	NTP adjusted multiplier for scaled math conversion
+ * @mult:	(NTP adjusted) multiplier for scaled math conversion
 * @shift:	Shift value for scaled math conversion
 * @xtime_nsec: Shifted (fractional) nano seconds offset for readout
- * @base_mono:  ktime_t (nanoseconds) base time for readout
+ * @base:	ktime_t (nanoseconds) base time for readout
 *
 * This struct has size 56 byte on 64 bit. Together with a seqcount it
 * occupies a single 64byte cache line.
 *
 * The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessor to clock monotonic.
+ * for a fast NMI safe accessors.
 */
 struct tk_read_base {
 	struct clocksource	*clock;
@@ -35,12 +35,13 @@ struct tk_read_base {
 	u32			mult;
 	u32			shift;
 	u64			xtime_nsec;
-	ktime_t			base_mono;
+	ktime_t			base;
 };
 /**
 * struct timekeeper - Structure holding internal timekeeping values.
- * @tkr:		The readout base structure
+ * @tkr_mono:		The readout base structure for CLOCK_MONOTONIC
+ * @tkr_raw:		The readout base structure for CLOCK_MONOTONIC_RAW
 * @xtime_sec:		Current CLOCK_REALTIME time in seconds
 * @ktime_sec:		Current CLOCK_MONOTONIC time in seconds
 * @wall_to_monotonic:	CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -48,7 +49,6 @@ struct tk_read_base {
 * @offs_boot:		Offset clock monotonic -> clock boottime
 * @offs_tai:		Offset clock monotonic -> clock tai
 * @tai_offset:		The current UTC to TAI offset in seconds
- * @base_raw:		Monotonic raw base time in ktime_t format
 * @raw_time:		Monotonic raw base time in timespec64 format
 * @cycle_interval:	Number of clock cycles in one NTP interval
 * @xtime_interval:	Number of clock shifted nano seconds in one NTP
@@ -76,7 +76,8 @@ struct tk_read_base {
 * used instead.
 */
 struct timekeeper {
-	struct tk_read_base	tkr;
+	struct tk_read_base	tkr_mono;
+	struct tk_read_base	tkr_raw;
 	u64			xtime_sec;
 	unsigned long		ktime_sec;
 	struct timespec64	wall_to_monotonic;
@@ -84,7 +85,6 @@ struct timekeeper {
 	ktime_t			offs_boot;
 	ktime_t			offs_tai;
 	s32			tai_offset;
-	ktime_t			base_raw;
 	struct timespec64	raw_time;
 	/* The following members are for timekeeping internal use */

--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
 	return ktime_to_ns(ktime_get_boottime());
 }
+static inline u64 ktime_get_tai_ns(void)
+{
+	return ktime_to_ns(ktime_get_clocktai());
+}
 static inline u64 ktime_get_raw_ns(void)
 {
 	return ktime_to_ns(ktime_get_raw());
 }
 extern u64 ktime_get_mono_fast_ns(void);
+extern u64 ktime_get_raw_fast_ns(void);
 /*
 * Timespec interfaces utilizing the ktime based ones

--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
 }
 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
+static int __clockevents_set_mode(struct clock_event_device *dev,
+				  enum clock_event_mode mode)
+{
+	/* Transition with legacy set_mode() callback */
+	if (dev->set_mode) {
+		/* Legacy callback doesn't support new modes */
+		if (mode > CLOCK_EVT_MODE_RESUME)
+			return -ENOSYS;
+		dev->set_mode(mode, dev);
+		return 0;
+	}
+	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+		return 0;
+	/* Transition with new mode-specific callbacks */
+	switch (mode) {
+	case CLOCK_EVT_MODE_UNUSED:
+		/*
+		 * This is an internal state, which is guaranteed to go from
+		 * SHUTDOWN to UNUSED. No driver interaction required.
+		 */
+		return 0;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		return dev->set_mode_shutdown(dev);
+	case CLOCK_EVT_MODE_PERIODIC:
+		/* Core internal bug */
+		if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
+			return -ENOSYS;
+		return dev->set_mode_periodic(dev);
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* Core internal bug */
+		if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+			return -ENOSYS;
+		return dev->set_mode_oneshot(dev);
+	case CLOCK_EVT_MODE_RESUME:
+		/* Optional callback */
+		if (dev->set_mode_resume)
+			return dev->set_mode_resume(dev);
+		else
+			return 0;
+	default:
+		return -ENOSYS;
+	}
+}
 /**
 * clockevents_set_mode - set the operating mode of a clock event device
 * @dev:	device to modify
@@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev,
 				 enum clock_event_mode mode)
 {
 	if (dev->mode != mode) {
-		dev->set_mode(mode, dev);
+		if (__clockevents_set_mode(dev, mode))
+			return;
 		dev->mode = mode;
 		/*
@@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
 }
 EXPORT_SYMBOL_GPL(clockevents_unbind);
+/* Sanity check of mode transition callbacks */
+static int clockevents_sanity_check(struct clock_event_device *dev)
+{
+	/* Legacy set_mode() callback */
+	if (dev->set_mode) {
+		/* We shouldn't be supporting new modes now */
+		WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
+			dev->set_mode_shutdown || dev->set_mode_resume);
+		return 0;
+	}
+	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+		return 0;
+	/* New mode-specific callbacks */
+	if (!dev->set_mode_shutdown)
+		return -EINVAL;
+	if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+	    !dev->set_mode_periodic)
+		return -EINVAL;
+	if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
+	    !dev->set_mode_oneshot)
+		return -EINVAL;
+	return 0;
+}
 /**
 * clockevents_register_device - register a clock event device
 * @dev:	device to register
@@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev)
 	unsigned long flags;
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+	BUG_ON(clockevents_sanity_check(dev));
 	if (!dev->cpumask) {
 		WARN_ON(num_possible_cpus() > 1);
 		dev->cpumask = cpumask_of(smp_processor_id());
@@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
 		return clockevents_program_event(dev, dev->next_event, false);
 	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
-		dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+		return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
 	return 0;
 }

--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
 		schedule_work(&watchdog_work);
 }
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
-{
-	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-	       cs->name, delta);
-	__clocksource_unstable(cs);
-}
 /**
 * clocksource_mark_unstable - mark clocksource unstable via watchdog
 * @cs:		clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 static void clocksource_watchdog(unsigned long data)
 {
 	struct clocksource *cs;
-	cycle_t csnow, wdnow, delta;
+	cycle_t csnow, wdnow, cslast, wdlast, delta;
 	int64_t wd_nsec, cs_nsec;
 	int next_cpu, reset_pending;
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
 		delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
 		cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+		wdlast = cs->wd_last; /* save these in case we print them */
+		cslast = cs->cs_last;
 		cs->cs_last = csnow;
 		cs->wd_last = wdnow;
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
 		/* Check the deviation from the watchdog clocksource. */
 		if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
-			clocksource_unstable(cs, cs_nsec - wd_nsec);
+			pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
+			pr_warn("	'%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+				watchdog->name, wdnow, wdlast, watchdog->mask);
+			pr_warn("	'%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+				cs->name, csnow, cslast, cs->mask);
+			__clocksource_unstable(cs);
 			continue;
 		}
@@ -469,26 +469,22 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
 * @shift:	cycle to nanosecond divisor (power of two)
 * @maxadj:	maximum adjustment value to mult (~11%)
 * @mask:	bitmask for two's complement subtraction of non 64 bit counters
+ * @max_cyc:	maximum cycle value before potential overflow (does not include
+ *		any safety margin)
+ *
+ * NOTE: This function includes a safety margin of 50%, so that bad clock values
+ * can be detected.
 */
-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
 	u64 max_nsecs, max_cycles;
 	/*
 	 * Calculate the maximum number of cycles that we can pass to the
-	 * cyc2ns function without overflowing a 64-bit signed result. The
+	 * cyc2ns() function without overflowing a 64-bit result.
-	 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
-	 * which is equivalent to the below.
-	 * max_cycles < (2^63)/(mult + maxadj)
-	 * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
-	 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
-	 * max_cycles < 2^(63 - log2(mult + maxadj))
-	 * max_cycles < 1 << (63 - log2(mult + maxadj))
-	 * Please note that we add 1 to the result of the log2 to account for
-	 * any rounding errors, ensure the above inequality is satisfied and
-	 * no overflow will occur.
 	 */
-	max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
+	max_cycles = ULLONG_MAX;
+	do_div(max_cycles, mult+maxadj);
 	/*
 	 * The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +495,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
 	max_cycles = min(max_cycles, mask);
 	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
+	/* return the max_cycles value as well if requested */
+	if (max_cyc)
+		*max_cyc = max_cycles;
+	/* Return 50% of the actual maximum, so we can detect bad values */
+	max_nsecs >>= 1;
 	return max_nsecs;
 }
 /**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
- * @cs:         Pointer to clocksource
+ * @cs:         Pointer to clocksource to be updated
 *
 */
-static u64 clocksource_max_deferment(struct clocksource *cs)
+static inline void clocksource_update_max_deferment(struct clocksource *cs)
 {
-	u64 max_nsecs;
+	cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+						cs->maxadj, cs->mask,
-	max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
+						&cs->max_cycles);
-					  cs->mask);
-	/*
-	 * To ensure that the clocksource does not wrap whilst we are idle,
-	 * limit the time the clocksource can be deferred by 12.5%. Please
-	 * note a margin of 12.5% is used because this can be computed with
-	 * a shift, versus say 10% which would require division.
-	 */
-	return max_nsecs - (max_nsecs >> 3);
 }
 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs)
 }
 /**
- * __clocksource_updatefreq_scale - Used update clocksource with new freq
+ * __clocksource_update_freq_scale - Used update clocksource with new freq
 * @cs:		clocksource to be registered
 * @scale:	Scale factor multiplied against freq to get clocksource hz
 * @freq:	clocksource frequency (cycles per second) divided by scale
@@ -656,22 +651,28 @@ static void clocksource_enqueue(struct clocksource *cs)
 * This should only be called from the clocksource->enable() method.
 *
 * This *SHOULD NOT* be called directly! Please use the
- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
+ * functions.
 */
-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	u64 sec;
+	/*
+	 * Default clocksources are *special* and self-define their mult/shift.
+	 * But, you're not special, so you should specify a freq value.
+	 */
+	if (freq) {
 		/*
 		 * Calc the maximum number of seconds which we can run before
-	 * wrapping around. For clocksources which have a mask > 32bit
+		 * wrapping around. For clocksources which have a mask > 32-bit
 		 * we need to limit the max sleep time to have a good
 		 * conversion precision. 10 minutes is still a reasonable
 		 * amount. That results in a shift value of 24 for a
-	 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
+		 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
-	 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
+		 * ~ 0.06ppm granularity for NTP.
-	 * margin as we do in clocksource_max_deferment()
 		 */
-	sec = (cs->mask - (cs->mask >> 3));
+		sec = cs->mask;
 		do_div(sec, freq);
 		do_div(sec, scale);
 		if (!sec)
@@ -681,23 +682,33 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 		clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 				       NSEC_PER_SEC / scale, sec * scale);
+	}
 	/*
-	 * for clocksources that have large mults, to avoid overflow.
+	 * Ensure clocksources that have large 'mult' values don't overflow
-	 * Since mult may be adjusted by ntp, add an safety extra margin
+	 * when adjusted.
-	 *
 	 */
 	cs->maxadj = clocksource_max_adjustment(cs);
-	while ((cs->mult + cs->maxadj < cs->mult)
+	while (freq && ((cs->mult + cs->maxadj < cs->mult)
-		|| (cs->mult - cs->maxadj > cs->mult)) {
+		|| (cs->mult - cs->maxadj > cs->mult))) {
 		cs->mult >>= 1;
 		cs->shift--;
 		cs->maxadj = clocksource_max_adjustment(cs);
 	}
-	cs->max_idle_ns = clocksource_max_deferment(cs);
+	/*
+	 * Only warn for *special* clocksources that self-define
+	 * their mult/shift values and don't specify a freq.
+	 */
+	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+		"timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
+		cs->name);
+	clocksource_update_max_deferment(cs);
+	pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+			cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 }
-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 /**
 * __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	/* Initialize mult/shift and max_idle_ns */
-	__clocksource_updatefreq_scale(cs, scale, freq);
+	__clocksource_update_freq_scale(cs, scale, freq);
 	/* Add clocksource to the clocksource list */
 	mutex_lock(&clocksource_mutex);
@@ -726,33 +737,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 }
 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
-/**
- * clocksource_register - Used to install new clocksources
- * @cs:		clocksource to be registered
- *
- * Returns -EBUSY if registration fails, zero otherwise.
- */
-int clocksource_register(struct clocksource *cs)
-{
-	/* calculate max adjustment for given mult/shift */
-	cs->maxadj = clocksource_max_adjustment(cs);
-	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
-		"Clocksource %s might overflow on 11%% adjustment\n",
-		cs->name);
-	/* calculate max idle time permitted for this clocksource */
-	cs->max_idle_ns = clocksource_max_deferment(cs);
-	mutex_lock(&clocksource_mutex);
-	clocksource_enqueue(cs);
-	clocksource_enqueue_watchdog(cs);
-	clocksource_select();
-	mutex_unlock(&clocksource_mutex);
-	return 0;
-}
-EXPORT_SYMBOL(clocksource_register);
 static void __clocksource_change_rating(struct clocksource *cs, int rating)
 {
 	list_del(&cs->list);

--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
 	.mask		= 0xffffffff, /*32bits*/
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
 	.shift		= JIFFIES_SHIFT,
+	.max_cycles	= 10,
 };
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
 static int __init init_jiffies_clocksource(void)
 {
-	return clocksource_register(&clocksource_jiffies);
+	return __clocksource_register(&clocksource_jiffies);
 }
 core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
 	refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
-	clocksource_register(&refined_jiffies);
+	__clocksource_register(&refined_jiffies);
 	return 0;
 }
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
 /*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
+ * sched_clock.c: Generic sched_clock() support, to extend low level
+ *                hardware time counters to full 64-bit ns values.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
@@ -18,15 +19,53 @@
 #include <linux/seqlock.h>
 #include <linux/bitops.h>
-struct clock_data {
+/**
-	ktime_t wrap_kt;
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns:		sched_clock() value at last update
+ * @epoch_cyc:		Clock cycle value at last update.
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ *			clocks.
+ * @read_sched_clock:	Current clock source (or dummy source when suspended).
+ * @mult:		Multipler for scaled math conversion.
+ * @shift:		Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
 	u64 epoch_ns;
 	u64 epoch_cyc;
-	seqcount_t seq;
+	u64 sched_clock_mask;
-	unsigned long rate;
+	u64 (*read_sched_clock)(void);
 	u32 mult;
 	u32 shift;
-	bool suspended;
+};
+/**
+ * struct clock_data - all data needed for sched_clock() (including
+ *                     registration of a new clock source)
+ *
+ * @seq:		Sequence counter for protecting updates. The lowest
+ *			bit is the index for @read_data.
+ * @read_data:		Data required to read from sched_clock.
+ * @wrap_kt:		Duration for which clock can run before wrapping.
+ * @rate:		Tick rate of the registered clock.
+ * @actual_read_sched_clock: Registered hardware level clock read function.
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
+ * into a single 64-byte cache line.
+ */
+struct clock_data {
+	seqcount_t		seq;
+	struct clock_read_data	read_data[2];
+	ktime_t			wrap_kt;
+	unsigned long		rate;
+	u64 (*actual_read_sched_clock)(void);
 };
 static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@ static int irqtime = -1;
 core_param(irqtime, irqtime, int, 0400);
-static struct clock_data cd = {
-	.mult	= NSEC_PER_SEC / HZ,
-};
-static u64 __read_mostly sched_clock_mask;
 static u64 notrace jiffy_sched_clock_read(void)
 {
 	/*
@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
 	return (u64)(jiffies - INITIAL_JIFFIES);
 }
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd ____cacheline_aligned = {
+	.read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+			  .read_sched_clock = jiffy_sched_clock_read, },
+	.actual_read_sched_clock = jiffy_sched_clock_read,
+};
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 unsigned long long notrace sched_clock(void)
 {
-	u64 epoch_ns;
+	u64 cyc, res;
-	u64 epoch_cyc;
-	u64 cyc;
 	unsigned long seq;
+	struct clock_read_data *rd;
-	if (cd.suspended)
-		return cd.epoch_ns;
 	do {
-		seq = raw_read_seqcount_begin(&cd.seq);
+		seq = raw_read_seqcount(&cd.seq);
-		epoch_cyc = cd.epoch_cyc;
+		rd = cd.read_data + (seq & 1);
-		epoch_ns = cd.epoch_ns;
+		cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+		      rd->sched_clock_mask;
+		res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
 	} while (read_seqcount_retry(&cd.seq, seq));
-	cyc = read_sched_clock();
+	return res;
-	cyc = (cyc - epoch_cyc) & sched_clock_mask;
-	return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
 }
 /*
- * Atomically update the sched_clock epoch.
+ * Updating the data required to read the clock.
+ *
+ * sched_clock() will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock() to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock() as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
 */
-static void notrace update_sched_clock(void)
+static void update_clock_read_data(struct clock_read_data *rd)
+{
+	/* update the backup (odd) copy with the new data */
+	cd.read_data[1] = *rd;
+	/* steer readers towards the odd copy */
+	raw_write_seqcount_latch(&cd.seq);
+	/* now its safe for us to update the normal (even) copy */
+	cd.read_data[0] = *rd;
+	/* switch readers back to the even copy */
+	raw_write_seqcount_latch(&cd.seq);
+}
+/*
+ * Atomically update the sched_clock() epoch.
+ */
+static void update_sched_clock(void)
 {
-	unsigned long flags;
 	u64 cyc;
 	u64 ns;
+	struct clock_read_data rd;
-	cyc = read_sched_clock();
+	rd = cd.read_data[0];
-	ns = cd.epoch_ns +
-		cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
+	cyc = cd.actual_read_sched_clock();
-			  cd.mult, cd.shift);
+	ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
-	raw_local_irq_save(flags);
+	rd.epoch_ns = ns;
-	raw_write_seqcount_begin(&cd.seq);
+	rd.epoch_cyc = cyc;
-	cd.epoch_ns = ns;
-	cd.epoch_cyc = cyc;
+	update_clock_read_data(&rd);
-	raw_write_seqcount_end(&cd.seq);
-	raw_local_irq_restore(flags);
 }
 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
 {
 	update_sched_clock();
 	hrtimer_forward_now(hrt, cd.wrap_kt);
 	return HRTIMER_RESTART;
 }
-void __init sched_clock_register(u64 (*read)(void), int bits,
+void __init
-				 unsigned long rate)
+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 {
 	u64 res, wrap, new_mask, new_epoch, cyc, ns;
 	u32 new_mult, new_shift;
-	ktime_t new_wrap_kt;
 	unsigned long r;
 	char r_unit;
+	struct clock_read_data rd;
 	if (cd.rate > rate)
 		return;
 	WARN_ON(!irqs_disabled());
-	/* calculate the mult/shift to convert counter ticks to ns. */
+	/* Calculate the mult/shift to convert counter ticks to ns. */
 	clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
 	new_mask = CLOCKSOURCE_MASK(bits);
+	cd.rate = rate;
+	/* Calculate how many nanosecs until we risk wrapping */
+	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
+	cd.wrap_kt = ns_to_ktime(wrap);
-	/* calculate how many ns until we wrap */
+	rd = cd.read_data[0];
-	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
-	new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
-	/* update epoch for new counter and update epoch_ns from old counter*/
+	/* Update epoch for new counter and update 'epoch_ns' from old counter*/
 	new_epoch = read();
-	cyc = read_sched_clock();
+	cyc = cd.actual_read_sched_clock();
-	ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
+	ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
-			  cd.mult, cd.shift);
+	cd.actual_read_sched_clock = read;
-	raw_write_seqcount_begin(&cd.seq);
+	rd.read_sched_clock	= read;
-	read_sched_clock = read;
+	rd.sched_clock_mask	= new_mask;
-	sched_clock_mask = new_mask;
+	rd.mult			= new_mult;
-	cd.rate = rate;
+	rd.shift		= new_shift;
-	cd.wrap_kt = new_wrap_kt;
+	rd.epoch_cyc		= new_epoch;
-	cd.mult = new_mult;
+	rd.epoch_ns		= ns;
-	cd.shift = new_shift;
-	cd.epoch_cyc = new_epoch;
+	update_clock_read_data(&rd);
-	cd.epoch_ns = ns;
-	raw_write_seqcount_end(&cd.seq);
 	r = rate;
 	if (r >= 4000000) {
 		r /= 1000000;
 		r_unit = 'M';
-	} else if (r >= 1000) {
+	} else {
+		if (r >= 1000) {
 			r /= 1000;
 			r_unit = 'k';
-	} else
+		} else {
 			r_unit = ' ';
+		}
+	}
-	/* calculate the ns resolution of this counter */
+	/* Calculate the ns resolution of this counter */
 	res = cyc_to_ns(1ULL, new_mult, new_shift);
 	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
 		bits, r, r_unit, res, wrap);
-	/* Enable IRQ time accounting if we have a fast enough sched_clock */
+	/* Enable IRQ time accounting if we have a fast enough sched_clock() */
 	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
 		enable_sched_clock_irqtime();
@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 void __init sched_clock_postinit(void)
 {
 	/*
-	 * If no sched_clock function has been provided at that point,
+	 * If no sched_clock() function has been provided at that point,
 	 * make it the final one one.
 	 */
-	if (read_sched_clock == jiffy_sched_clock_read)
+	if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
 		sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 	update_sched_clock();
@@ -189,19 +251,42 @@ void __init sched_clock_postinit(void)
 	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
 }
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ *
+ * This function must only be called from the critical
+ * section in sched_clock(). It relies on the read_seqcount_retry()
+ * at the end of the critical section to be sure we observe the
+ * correct copy of 'epoch_cyc'.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+	unsigned long seq = raw_read_seqcount(&cd.seq);
+	return cd.read_data[seq & 1].epoch_cyc;
+}
 static int sched_clock_suspend(void)
 {
+	struct clock_read_data *rd = &cd.read_data[0];
 	update_sched_clock();
 	hrtimer_cancel(&sched_clock_timer);
-	cd.suspended = true;
+	rd->read_sched_clock = suspended_sched_clock_read;
 	return 0;
 }
 static void sched_clock_resume(void)
 {
-	cd.epoch_cyc = read_sched_clock();
+	struct clock_read_data *rd = &cd.read_data[0];
+	rd->epoch_cyc = cd.actual_read_sched_clock();
 	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-	cd.suspended = false;
+	rd->read_sched_clock = cd.actual_read_sched_clock;
 }
 static struct syscore_ops sched_clock_ops = {
@@ -212,6 +297,7 @@ static struct syscore_ops sched_clock_ops = {
 static int __init sched_clock_syscore_init(void)
 {
 	register_syscore_ops(&sched_clock_ops);
 	return 0;
 }
 device_initcall(sched_clock_syscore_init);
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -59,6 +59,7 @@ struct tk_fast {
 };
 static struct tk_fast tk_fast_mono ____cacheline_aligned;
+static struct tk_fast tk_fast_raw  ____cacheline_aligned;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
@@ -68,8 +69,8 @@ bool __read_mostly persistent_clock_exist = false;
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-	while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
+	while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
-		tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
+		tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
 		tk->xtime_sec++;
 	}
 }
@@ -79,20 +80,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
 	struct timespec64 ts;
 	ts.tv_sec = tk->xtime_sec;
-	ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+	ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	return ts;
 }
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec = ts->tv_sec;
-	tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
 }
 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec += ts->tv_sec;
-	tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
 	tk_normalize_xtime(tk);
 }
@@ -118,6 +119,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 	tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+/*
+ * These simple flag variables are managed
+ * without locks, which is racy, but ok since
+ * we don't really care about being super
+ * precise about how many events were seen,
+ * just that a problem was observed.
+ */
+static int timekeeping_underflow_seen;
+static int timekeeping_overflow_seen;
+/* last_warning is only modified under the timekeeping lock */
+static long timekeeping_last_warning;
+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+	cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+	const char *name = tk->tkr_mono.clock->name;
+	if (offset > max_cycles) {
+		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+				offset, name, max_cycles);
+		printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+	} else {
+		if (offset > (max_cycles >> 1)) {
+			printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+					offset, name, max_cycles >> 1);
+			printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+		}
+	}
+	if (timekeeping_underflow_seen) {
+		if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+			printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+			printk_deferred("         Your kernel is probably still fine.\n");
+			timekeeping_last_warning = jiffies;
+		}
+		timekeeping_underflow_seen = 0;
+	}
+	if (timekeeping_overflow_seen) {
+		if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+			printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+			printk_deferred("         Your kernel is probably still fine.\n");
+			timekeeping_last_warning = jiffies;
+		}
+		timekeeping_overflow_seen = 0;
+	}
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+	cycle_t now, last, mask, max, delta;
+	unsigned int seq;
+	/*
+	 * Since we're called holding a seqlock, the data may shift
+	 * under us while we're doing the calculation. This can cause
+	 * false positives, since we'd note a problem but throw the
+	 * results away. So nest another seqlock here to atomically
+	 * grab the points we are checking with.
+	 */
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		now = tkr->read(tkr->clock);
+		last = tkr->cycle_last;
+		mask = tkr->mask;
+		max = tkr->clock->max_cycles;
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+	delta = clocksource_delta(now, last, mask);
+	/*
+	 * Try to catch underflows by checking if we are seeing small
+	 * mask-relative negative values.
+	 */
+	if (unlikely((~delta & mask) < (mask >> 3))) {
+		timekeeping_underflow_seen = 1;
+		delta = 0;
+	}
+	/* Cap delta value to the max_cycles values to avoid mult overflows */
+	if (unlikely(delta > max)) {
+		timekeeping_overflow_seen = 1;
+		delta = tkr->clock->max_cycles;
+	}
+	return delta;
+}
+#else
+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+	cycle_t cycle_now, delta;
+	/* read clocksource */
+	cycle_now = tkr->read(tkr->clock);
+	/* calculate the delta since the last update_wall_time */
+	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+	return delta;
+}
+#endif
 /**
 * tk_setup_internals - Set up internals to use clocksource clock.
 *
@@ -135,11 +247,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	u64 tmp, ntpinterval;
 	struct clocksource *old_clock;
-	old_clock = tk->tkr.clock;
+	old_clock = tk->tkr_mono.clock;
-	tk->tkr.clock = clock;
+	tk->tkr_mono.clock = clock;
-	tk->tkr.read = clock->read;
+	tk->tkr_mono.read = clock->read;
-	tk->tkr.mask = clock->mask;
+	tk->tkr_mono.mask = clock->mask;
-	tk->tkr.cycle_last = tk->tkr.read(clock);
+	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+	tk->tkr_raw.clock = clock;
+	tk->tkr_raw.read = clock->read;
+	tk->tkr_raw.mask = clock->mask;
+	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +280,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	if (old_clock) {
 		int shift_change = clock->shift - old_clock->shift;
 		if (shift_change < 0)
-			tk->tkr.xtime_nsec >>= -shift_change;
+			tk->tkr_mono.xtime_nsec >>= -shift_change;
 		else
-			tk->tkr.xtime_nsec <<= shift_change;
+			tk->tkr_mono.xtime_nsec <<= shift_change;
 	}
-	tk->tkr.shift = clock->shift;
+	tk->tkr_raw.xtime_nsec = 0;
+	tk->tkr_mono.shift = clock->shift;
+	tk->tkr_raw.shift = clock->shift;
 	tk->ntp_error = 0;
 	tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +298,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	 * active clocksource. These value will be adjusted via NTP
 	 * to counteract clock drifting.
 	 */
-	tk->tkr.mult = clock->mult;
+	tk->tkr_mono.mult = clock->mult;
+	tk->tkr_raw.mult = clock->mult;
 	tk->ntp_err_mult = 0;
 }
@@ -193,14 +314,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
 static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-	cycle_t cycle_now, delta;
+	cycle_t delta;
 	s64 nsec;
-	/* read clocksource: */
+	delta = timekeeping_get_delta(tkr);
-	cycle_now = tkr->read(tkr->clock);
-	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
 	nsec = delta * tkr->mult + tkr->xtime_nsec;
 	nsec >>= tkr->shift;
@@ -209,25 +326,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 	return nsec + arch_gettimeoffset();
 }
-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
-{
-	struct clocksource *clock = tk->tkr.clock;
-	cycle_t cycle_now, delta;
-	s64 nsec;
-	/* read clocksource: */
-	cycle_now = tk->tkr.read(clock);
-	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-	/* convert delta to nanoseconds. */
-	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-	/* If arch requires, add in get_arch_timeoffset() */
-	return nsec + arch_gettimeoffset();
-}
 /**
 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
 * @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +365,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 * slightly wrong timestamp (a few nanoseconds). See
 * @ktime_get_mono_fast_ns.
 */
-static void update_fast_timekeeper(struct tk_read_base *tkr)
+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
 {
-	struct tk_read_base *base = tk_fast_mono.base;
+	struct tk_read_base *base = tkf->base;
 	/* Force readers off to base[1] */
-	raw_write_seqcount_latch(&tk_fast_mono.seq);
+	raw_write_seqcount_latch(&tkf->seq);
 	/* Update base[0] */
 	memcpy(base, tkr, sizeof(*base));
 	/* Force readers back to base[0] */
-	raw_write_seqcount_latch(&tk_fast_mono.seq);
+	raw_write_seqcount_latch(&tkf->seq);
 	/* Update base[1] */
 	memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +414,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
 * of the following timestamps. Callers need to be aware of that and
 * deal with it.
 */
-u64 notrace ktime_get_mono_fast_ns(void)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
 	struct tk_read_base *tkr;
 	unsigned int seq;
 	u64 now;
 	do {
-		seq = raw_read_seqcount(&tk_fast_mono.seq);
+		seq = raw_read_seqcount(&tkf->seq);
-		tkr = tk_fast_mono.base + (seq & 0x01);
+		tkr = tkf->base + (seq & 0x01);
-		now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+		now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+	} while (read_seqcount_retry(&tkf->seq, seq));
-	} while (read_seqcount_retry(&tk_fast_mono.seq, seq));
 	return now;
 }
+u64 ktime_get_mono_fast_ns(void)
+{
+	return __ktime_get_fast_ns(&tk_fast_mono);
+}
 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+u64 ktime_get_raw_fast_ns(void)
+{
+	return __ktime_get_fast_ns(&tk_fast_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
@@ -353,12 +462,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
 static void halt_fast_timekeeper(struct timekeeper *tk)
 {
 	static struct tk_read_base tkr_dummy;
-	struct tk_read_base *tkr = &tk->tkr;
+	struct tk_read_base *tkr = &tk->tkr_mono;
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 	cycles_at_suspend = tkr->read(tkr->clock);
 	tkr_dummy.read = dummy_clock_read;
-	update_fast_timekeeper(&tkr_dummy);
+	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+	tkr = &tk->tkr_raw;
+	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+	tkr_dummy.read = dummy_clock_read;
+	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +483,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
 	xt = timespec64_to_timespec(tk_xtime(tk));
 	wm = timespec64_to_timespec(tk->wall_to_monotonic);
-	update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
+	update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
-			    tk->tkr.cycle_last);
+			    tk->tkr_mono.cycle_last);
 }
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +501,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
 	* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
 	* users are removed, this can be killed.
 	*/
-	remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
+	remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
-	tk->tkr.xtime_nsec -= remainder;
+	tk->tkr_mono.xtime_nsec -= remainder;
-	tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
 	tk->ntp_error += remainder << tk->ntp_error_shift;
-	tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+	tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -456,17 +570,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	 */
 	seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
 	nsec = (u32) tk->wall_to_monotonic.tv_nsec;
-	tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+	tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 	/* Update the monotonic raw base */
-	tk->base_raw = timespec64_to_ktime(tk->raw_time);
+	tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
 	/*
 	 * The sum of the nanoseconds portions of xtime and
 	 * wall_to_monotonic can be greater/equal one second. Take
 	 * this into account before updating tk->ktime_sec.
 	 */
-	nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+	nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	if (nsec >= NSEC_PER_SEC)
 		seconds++;
 	tk->ktime_sec = seconds;
@@ -489,7 +603,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 		memcpy(&shadow_timekeeper, &tk_core.timekeeper,
 		       sizeof(tk_core.timekeeper));
-	update_fast_timekeeper(&tk->tkr);
+	update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+	update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
 }
 /**
@@ -501,22 +616,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->tkr.clock;
+	struct clocksource *clock = tk->tkr_mono.clock;
 	cycle_t cycle_now, delta;
 	s64 nsec;
-	cycle_now = tk->tkr.read(clock);
+	cycle_now = tk->tkr_mono.read(clock);
-	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
+	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
-	tk->tkr.cycle_last = cycle_now;
+	tk->tkr_mono.cycle_last = cycle_now;
+	tk->tkr_raw.cycle_last  = cycle_now;
-	tk->tkr.xtime_nsec += delta * tk->tkr.mult;
+	tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
 	/* If arch requires, add in get_arch_timeoffset() */
-	tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
 	tk_normalize_xtime(tk);
-	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
+	nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
 	timespec64_add_ns(&tk->raw_time, nsec);
 }
@@ -537,7 +653,7 @@ int __getnstimeofday64(struct timespec64 *ts)
 		seq = read_seqcount_begin(&tk_core.seq);
 		ts->tv_sec = tk->xtime_sec;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -577,8 +693,8 @@ ktime_t ktime_get(void)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->tkr.base_mono;
+		base = tk->tkr_mono.base;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -603,8 +719,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = ktime_add(tk->tkr.base_mono, *offset);
+		base = ktime_add(tk->tkr_mono.base, *offset);
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -645,8 +761,8 @@ ktime_t ktime_get_raw(void)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->base_raw;
+		base = tk->tkr_raw.base;
-		nsecs = timekeeping_get_ns_raw(tk);
+		nsecs = timekeeping_get_ns(&tk->tkr_raw);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -674,7 +790,7 @@ void ktime_get_ts64(struct timespec64 *ts)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 		ts->tv_sec = tk->xtime_sec;
-		nsec = timekeeping_get_ns(&tk->tkr);
+		nsec = timekeeping_get_ns(&tk->tkr_mono);
 		tomono = tk->wall_to_monotonic;
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +875,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 		ts_real->tv_sec = tk->xtime_sec;
 		ts_real->tv_nsec = 0;
-		nsecs_raw = timekeeping_get_ns_raw(tk);
+		nsecs_raw  = timekeeping_get_ns(&tk->tkr_raw);
-		nsecs_real = timekeeping_get_ns(&tk->tkr);
+		nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -943,7 +1059,7 @@ static int change_clocksource(void *data)
 	 */
 	if (try_module_get(new->owner)) {
 		if (!new->enable || new->enable(new) == 0) {
-			old = tk->tkr.clock;
+			old = tk->tkr_mono.clock;
 			tk_setup_internals(tk, new);
 			if (old->disable)
 				old->disable(old);
@@ -971,11 +1087,11 @@ int timekeeping_notify(struct clocksource *clock)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	if (tk->tkr.clock == clock)
+	if (tk->tkr_mono.clock == clock)
 		return 0;
 	stop_machine(change_clocksource, clock, NULL);
 	tick_clock_notify();
-	return tk->tkr.clock == clock ? 0 : -1;
+	return tk->tkr_mono.clock == clock ? 0 : -1;
 }
 /**
@@ -993,7 +1109,7 @@ void getrawmonotonic64(struct timespec64 *ts)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		nsecs = timekeeping_get_ns_raw(tk);
+		nsecs = timekeeping_get_ns(&tk->tkr_raw);
 		ts64 = tk->raw_time;
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1132,7 @@ int timekeeping_valid_for_hres(void)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+		ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1035,7 +1151,7 @@ u64 timekeeping_max_deferment(void)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		ret = tk->tkr.clock->max_idle_ns;
+		ret = tk->tkr_mono.clock->max_idle_ns;
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1114,7 +1230,6 @@ void __init timekeeping_init(void)
 	tk_set_xtime(tk, &now);
 	tk->raw_time.tv_sec = 0;
 	tk->raw_time.tv_nsec = 0;
-	tk->base_raw.tv64 = 0;
 	if (boot.tv_sec == 0 && boot.tv_nsec == 0)
 		boot = tk_xtime(tk);
@@ -1200,7 +1315,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
 void timekeeping_resume(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct clocksource *clock = tk->tkr.clock;
+	struct clocksource *clock = tk->tkr_mono.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
 	struct timespec tmp;
@@ -1228,16 +1343,16 @@ void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->tkr.read(clock);
+	cycle_now = tk->tkr_mono.read(clock);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-		cycle_now > tk->tkr.cycle_last) {
+		cycle_now > tk->tkr_mono.cycle_last) {
 		u64 num, max = ULLONG_MAX;
 		u32 mult = clock->mult;
 		u32 shift = clock->shift;
 		s64 nsec = 0;
-		cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
+		cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
-						tk->tkr.mask);
+						tk->tkr_mono.mask);
 		/*
 		 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1263,7 +1378,9 @@ void timekeeping_resume(void)
 		__timekeeping_inject_sleeptime(tk, &ts_delta);
 	/* Re-base the last cycle value */
-	tk->tkr.cycle_last = cycle_now;
+	tk->tkr_mono.cycle_last = cycle_now;
+	tk->tkr_raw.cycle_last  = cycle_now;
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1416,15 +1533,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
 	 *
 	 * XXX - TODO: Doc ntp_error calculation.
 	 */
-	if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
+	if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
 		/* NTP adjustment caused clocksource mult overflow */
 		WARN_ON_ONCE(1);
 		return;
 	}
-	tk->tkr.mult += mult_adj;
+	tk->tkr_mono.mult += mult_adj;
 	tk->xtime_interval += interval;
-	tk->tkr.xtime_nsec -= offset;
+	tk->tkr_mono.xtime_nsec -= offset;
 	tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
 }
@@ -1486,13 +1603,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 		tk->ntp_err_mult = 0;
 	}
-	if (unlikely(tk->tkr.clock->maxadj &&
+	if (unlikely(tk->tkr_mono.clock->maxadj &&
-		(abs(tk->tkr.mult - tk->tkr.clock->mult)
+		(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
-			> tk->tkr.clock->maxadj))) {
+			> tk->tkr_mono.clock->maxadj))) {
 		printk_once(KERN_WARNING
 			"Adjusting %s more than 11%% (%ld vs %ld)\n",
-			tk->tkr.clock->name, (long)tk->tkr.mult,
+			tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
-			(long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+			(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
 	}
 	/*
@@ -1509,9 +1626,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 	 * We'll correct this error next time through this function, when
 	 * xtime_nsec is not as small.
 	 */
-	if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
+	if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
-		s64 neg = -(s64)tk->tkr.xtime_nsec;
+		s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
-		tk->tkr.xtime_nsec = 0;
+		tk->tkr_mono.xtime_nsec = 0;
 		tk->ntp_error += neg << tk->ntp_error_shift;
 	}
 }
@@ -1526,13 +1643,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
+	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
 	unsigned int clock_set = 0;
-	while (tk->tkr.xtime_nsec >= nsecps) {
+	while (tk->tkr_mono.xtime_nsec >= nsecps) {
 		int leap;
-		tk->tkr.xtime_nsec -= nsecps;
+		tk->tkr_mono.xtime_nsec -= nsecps;
 		tk->xtime_sec++;
 		/* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1694,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 	/* Accumulate one shifted interval */
 	offset -= interval;
-	tk->tkr.cycle_last += interval;
+	tk->tkr_mono.cycle_last += interval;
+	tk->tkr_raw.cycle_last  += interval;
-	tk->tkr.xtime_nsec += tk->xtime_interval << shift;
+	tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 	/* Accumulate raw time */
@@ -1622,14 +1740,17 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
+	offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
-				   tk->tkr.cycle_last, tk->tkr.mask);
+				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 	/* Check if there's really nothing to do */
 	if (offset < real_tk->cycle_interval)
 		goto out;
+	/* Do some additional sanity checking */
+	timekeeping_check_update(real_tk, offset);
 	/*
 	 * With NO_HZ we may have to accumulate many cycle_intervals
 	 * (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1905,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->tkr.base_mono;
+		base = tk->tkr_mono.base;
-		nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+		nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
@@ -1816,8 +1937,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->tkr.base_mono;
+		base = tk->tkr_mono.base;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;

--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 	print_name_offset(m, dev->set_next_event);
 	SEQ_printf(m, "\n");
+	if (dev->set_mode) {
 		SEQ_printf(m, " set_mode:       ");
 		print_name_offset(m, dev->set_mode);
 		SEQ_printf(m, "\n");
+	} else {
+		if (dev->set_mode_shutdown) {
+			SEQ_printf(m, " shutdown: ");
+			print_name_offset(m, dev->set_mode_shutdown);
+			SEQ_printf(m, "\n");
+		}
+		if (dev->set_mode_periodic) {
+			SEQ_printf(m, " periodic: ");
+			print_name_offset(m, dev->set_mode_periodic);
+			SEQ_printf(m, "\n");
+		}
+		if (dev->set_mode_oneshot) {
+			SEQ_printf(m, " oneshot:  ");
+			print_name_offset(m, dev->set_mode_oneshot);
+			SEQ_printf(m, "\n");
+		}
+		if (dev->set_mode_resume) {
+			SEQ_printf(m, " resume:   ");
+			print_name_offset(m, dev->set_mode_resume);
+			SEQ_printf(m, "\n");
+		}
+	}
 	SEQ_printf(m, " event_handler:  ");
 	print_name_offset(m, dev->event_handler);

--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
 	  data corruption or a sporadic crash at a later stage once the region
 	  is examined. The runtime overhead introduced is minimal.
+config DEBUG_TIMEKEEPING
+	bool "Enable extra timekeeping sanity checking"
+	help
+	  This option will enable additional timekeeping sanity checks
+	  which may be helpful when diagnosing issues where timekeeping
+	  problems are suspected.
+	  This may include checks in the timekeeping hotpaths, so this
+	  option may have a (very small) performance impact to some
+	  workloads.
+	  If unsure, say N.
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS