Commit 84c2e179 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'timers-core-2022-03-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer and timekeeping updates from Thomas Gleixner:
 "Core code:

   - Make the NOHZ handling of the timekeeping/tick core more robust to
     prevent a rare jiffies update stall.

   - Handle softirqs in the NOHZ/idle case correctly

  Drivers:

   - Add support for event stream scaling of the 1GHz counter on ARM(64)

   - Correct an error code check in the timer-of layer

   - The usual cleanups and improvements all over the place"

* tag 'timers-core-2022-03-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
  lib/irq_poll: Declare IRQ_POLL softirq vector as ksoftirqd-parking safe
  tick/rcu: Stop allowing RCU_SOFTIRQ in idle
  tick/rcu: Remove obsolete rcu_needs_cpu() parameters
  tick: Detect and fix jiffies update stall
  clocksource/drivers/timer-of: Check return value of of_iomap in timer_of_base_init()
  clocksource/drivers/timer-microchip-pit64b: Use 5MHz for clockevent
  clocksource/drivers/timer-microchip-pit64b: Use notrace
  clocksource/drivers/timer-microchip-pit64b: Remove mmio selection
  dt-bindings: timer: Tegra: Convert text bindings to yaml
  clocksource/drivers/imx-tpm: Move tpm_read_sched_clock() under CONFIG_ARM
  clocksource/drivers/arm_arch_timer: Use event stream scaling when available
  clocksource/drivers/exynos_mct: Increase the size of name array
  clocksource/drivers/exynos_mct: Bump up mct max irq number
  clocksource/drivers/exynos_mct: Remove mct interrupt index enum
  clocksource/drivers/exynos_mct: Handle DTS with higher number of interrupts
  clocksource/drivers/timer-ti-dm: Fix regression from errata i940 fix
  clocksource/drivers/imx-tpm: Exclude sched clock for ARM64
  clocksource: Add a Kconfig option for WATCHDOG_MAX_SKEW
  clocksource/drivers/imx-tpm: Update name of clkevt
  clocksource/drivers/imx-tpm: Add CLOCK_EVT_FEAT_DYNIRQ
  ...
parents bba90e09 b166e525
# SPDX-License-Identifier: GPL-2.0-only
%YAML 1.2
---
$id: "http://devicetree.org/schemas/timer/nvidia,tegra-timer.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: NVIDIA Tegra timer
maintainers:
- Stephen Warren <swarren@nvidia.com>
allOf:
- if:
properties:
compatible:
contains:
const: nvidia,tegra210-timer
then:
properties:
interrupts:
# Either a single combined interrupt or up to 14 individual interrupts
minItems: 1
maxItems: 14
description: >
A list of 14 interrupts; one per each timer channels 0 through 13
- if:
properties:
compatible:
oneOf:
- items:
- enum:
- nvidia,tegra114-timer
- nvidia,tegra124-timer
- nvidia,tegra132-timer
- const: nvidia,tegra30-timer
- items:
- const: nvidia,tegra30-timer
- const: nvidia,tegra20-timer
then:
properties:
interrupts:
# Either a single combined interrupt or up to 6 individual interrupts
minItems: 1
maxItems: 6
description: >
A list of 6 interrupts; one per each of timer channels 1 through 5,
and one for the shared interrupt for the remaining channels.
- if:
properties:
compatible:
const: nvidia,tegra20-timer
then:
properties:
interrupts:
# Either a single combined interrupt or up to 4 individual interrupts
minItems: 1
maxItems: 4
description: |
A list of 4 interrupts; one per timer channel.
properties:
compatible:
oneOf:
- const: nvidia,tegra210-timer
description: >
The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit
timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived
from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock
(TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic,
or watchdog interrupts.
- items:
- enum:
- nvidia,tegra114-timer
- nvidia,tegra124-timer
- nvidia,tegra132-timer
- const: nvidia,tegra30-timer
- items:
- const: nvidia,tegra30-timer
- const: nvidia,tegra20-timer
description: >
The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
running counter, and 5 watchdog modules. The first two channels may also
trigger a legacy watchdog reset.
- const: nvidia,tegra20-timer
description: >
The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
running counter. The first two channels may also trigger a watchdog reset.
reg:
maxItems: 1
interrupts: true
clocks:
maxItems: 1
clock-names:
items:
- const: timer
required:
- compatible
- reg
- interrupts
- clocks
additionalProperties: false
examples:
- |
#include <dt-bindings/interrupt-controller/irq.h>
timer@60005000 {
compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
reg = <0x60005000 0x400>;
interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>,
<0 1 IRQ_TYPE_LEVEL_HIGH>,
<0 41 IRQ_TYPE_LEVEL_HIGH>,
<0 42 IRQ_TYPE_LEVEL_HIGH>,
<0 121 IRQ_TYPE_LEVEL_HIGH>,
<0 122 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&tegra_car 214>;
};
- |
#include <dt-bindings/clock/tegra210-car.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/interrupt-controller/irq.h>
timer@60005000 {
compatible = "nvidia,tegra210-timer";
reg = <0x60005000 0x400>;
interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&tegra_car TEGRA210_CLK_TIMER>;
clock-names = "timer";
};
NVIDIA Tegra20 timer
The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
running counter. The first two channels may also trigger a watchdog reset.
Required properties:
- compatible : should be "nvidia,tegra20-timer".
- reg : Specifies base physical address and size of the registers.
- interrupts : A list of 4 interrupts; one per timer channel.
- clocks : Must contain one entry, for the module clock.
See ../clocks/clock-bindings.txt for details.
Example:
timer {
compatible = "nvidia,tegra20-timer";
reg = <0x60005000 0x60>;
interrupts = <0 0 0x04
0 1 0x04
0 41 0x04
0 42 0x04>;
clocks = <&tegra_car 132>;
};
NVIDIA Tegra210 timer
The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit
timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived
from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock
(TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic,
or watchdog interrupts.
Required properties:
- compatible : "nvidia,tegra210-timer".
- reg : Specifies base physical address and size of the registers.
- interrupts : A list of 14 interrupts; one per each timer channels 0 through
13.
- clocks : Must contain one entry, for the module clock.
See ../clocks/clock-bindings.txt for details.
timer@60005000 {
compatible = "nvidia,tegra210-timer";
reg = <0x0 0x60005000 0x0 0x400>;
interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&tegra_car TEGRA210_CLK_TIMER>;
clock-names = "timer";
};
NVIDIA Tegra30 timer
The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
running counter, and 5 watchdog modules. The first two channels may also
trigger a legacy watchdog reset.
Required properties:
- compatible : For Tegra30, must contain "nvidia,tegra30-timer". Otherwise,
must contain '"nvidia,<chip>-timer", "nvidia,tegra30-timer"' where
<chip> is tegra124 or tegra132.
- reg : Specifies base physical address and size of the registers.
- interrupts : A list of 6 interrupts; one per each of timer channels 1
through 5, and one for the shared interrupt for the remaining channels.
- clocks : Must contain one entry, for the module clock.
See ../clocks/clock-bindings.txt for details.
timer {
compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
reg = <0x60005000 0x400>;
interrupts = <0 0 0x04
0 1 0x04
0 41 0x04
0 42 0x04
0 121 0x04
0 122 0x04>;
clocks = <&tegra_car 214>;
};
......@@ -3482,8 +3482,7 @@ timer14: timer@0 {
ti,timer-pwm;
};
};
target-module@2c000 { /* 0x4882c000, ap 17 02.0 */
timer15_target: target-module@2c000 { /* 0x4882c000, ap 17 02.0 */
compatible = "ti,sysc-omap4-timer", "ti,sysc";
reg = <0x2c000 0x4>,
<0x2c010 0x4>;
......@@ -3511,7 +3510,7 @@ timer15: timer@0 {
};
};
target-module@2e000 { /* 0x4882e000, ap 19 14.0 */
timer16_target: target-module@2e000 { /* 0x4882e000, ap 19 14.0 */
compatible = "ti,sysc-omap4-timer", "ti,sysc";
reg = <0x2e000 0x4>,
<0x2e010 0x4>;
......
......@@ -1339,20 +1339,20 @@ timer@0 {
};
/* Local timers, see ARM architected timer wrap erratum i940 */
&timer3_target {
&timer15_target {
ti,no-reset-on-init;
ti,no-idle;
timer@0 {
assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>;
assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>;
assigned-clock-parents = <&timer_sys_clk_div>;
};
};
&timer4_target {
&timer16_target {
ti,no-reset-on-init;
ti,no-idle;
timer@0 {
assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>;
assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>;
assigned-clock-parents = <&timer_sys_clk_div>;
};
};
......@@ -713,7 +713,6 @@ config INGENIC_OST
config MICROCHIP_PIT64B
bool "Microchip PIT64B support"
depends on OF || COMPILE_TEST
select CLKSRC_MMIO
select TIMER_OF
help
This option enables Microchip PIT64B timer for Atmel
......
......@@ -880,10 +880,19 @@ static void __arch_timer_setup(unsigned type,
clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta);
}
static void arch_timer_evtstrm_enable(int divider)
static void arch_timer_evtstrm_enable(unsigned int divider)
{
u32 cntkctl = arch_timer_get_cntkctl();
#ifdef CONFIG_ARM64
/* ECV is likely to require a large divider. Use the EVNTIS flag. */
if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) {
cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE;
divider -= 8;
}
#endif
divider = min(divider, 15U);
cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
/* Set the divider and enable virtual event stream */
cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
......@@ -912,7 +921,7 @@ static void arch_timer_configure_evtstream(void)
lsb++;
/* enable event stream */
arch_timer_evtstrm_enable(max(0, min(lsb, 15)));
arch_timer_evtstrm_enable(max(0, lsb));
}
static void arch_counter_set_user_access(void)
......
......@@ -60,27 +60,18 @@
#define MCT_CLKEVENTS_RATING 350
#endif
/* There are four Global timers starting with 0 offset */
#define MCT_G0_IRQ 0
/* Local timers count starts after global timer count */
#define MCT_L0_IRQ 4
/* Max number of IRQ as per DT binding document */
#define MCT_NR_IRQS 20
enum {
MCT_INT_SPI,
MCT_INT_PPI
};
enum {
MCT_G0_IRQ,
MCT_G1_IRQ,
MCT_G2_IRQ,
MCT_G3_IRQ,
MCT_L0_IRQ,
MCT_L1_IRQ,
MCT_L2_IRQ,
MCT_L3_IRQ,
MCT_L4_IRQ,
MCT_L5_IRQ,
MCT_L6_IRQ,
MCT_L7_IRQ,
MCT_NR_IRQS,
};
static void __iomem *reg_base;
static unsigned long clk_rate;
static unsigned int mct_int_type;
......@@ -89,7 +80,11 @@ static int mct_irqs[MCT_NR_IRQS];
struct mct_clock_event_device {
struct clock_event_device evt;
unsigned long base;
char name[10];
/**
* The length of the name must be adjusted if number of
* local timer interrupts grow over two digits
*/
char name[11];
};
static void exynos4_mct_write(unsigned int value, unsigned long offset)
......@@ -541,6 +536,11 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
* irqs are specified.
*/
nr_irqs = of_irq_count(np);
if (nr_irqs > ARRAY_SIZE(mct_irqs)) {
pr_err("exynos-mct: too many (%d) interrupts configured in DT\n",
nr_irqs);
nr_irqs = ARRAY_SIZE(mct_irqs);
}
for (i = MCT_L0_IRQ; i < nr_irqs; i++)
mct_irqs[i] = irq_of_parse_and_map(np, i);
......@@ -553,11 +553,14 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
mct_irqs[MCT_L0_IRQ], err);
} else {
for_each_possible_cpu(cpu) {
int mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
int mct_irq;
struct mct_clock_event_device *pcpu_mevt =
per_cpu_ptr(&percpu_mct_tick, cpu);
pcpu_mevt->evt.irq = -1;
if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs))
break;
mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
if (request_irq(mct_irq,
......
......@@ -110,7 +110,7 @@ static struct timer_of to_sysctr = {
},
.of_irq = {
.handler = sysctr_timer_interrupt,
.flags = IRQF_TIMER | IRQF_IRQPOLL,
.flags = IRQF_TIMER,
},
.of_clk = {
.name = "per",
......
......@@ -32,8 +32,8 @@
#define TPM_C0SC_CHF_MASK (0x1 << 7)
#define TPM_C0V 0x24
static int counter_width;
static void __iomem *timer_base;
static int counter_width __ro_after_init;
static void __iomem *timer_base __ro_after_init;
static inline void tpm_timer_disable(void)
{
......@@ -73,12 +73,12 @@ static unsigned long tpm_read_current_timer(void)
{
return tpm_read_counter();
}
#endif
static u64 notrace tpm_read_sched_clock(void)
{
return tpm_read_counter();
}
#endif
static int tpm_set_next_event(unsigned long delta,
struct clock_event_device *evt)
......@@ -127,9 +127,9 @@ static irqreturn_t tpm_timer_interrupt(int irq, void *dev_id)
static struct timer_of to_tpm = {
.flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
.clkevt = {
.name = "i.MX7ULP TPM Timer",
.name = "i.MX TPM Timer",
.rating = 200,
.features = CLOCK_EVT_FEAT_ONESHOT,
.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ,
.set_state_shutdown = tpm_set_state_shutdown,
.set_state_oneshot = tpm_set_state_oneshot,
.set_next_event = tpm_set_next_event,
......@@ -137,7 +137,7 @@ static struct timer_of to_tpm = {
},
.of_irq = {
.handler = tpm_timer_interrupt,
.flags = IRQF_TIMER | IRQF_IRQPOLL,
.flags = IRQF_TIMER,
},
.of_clk = {
.name = "per",
......@@ -150,10 +150,10 @@ static int __init tpm_clocksource_init(void)
tpm_delay_timer.read_current_timer = &tpm_read_current_timer;
tpm_delay_timer.freq = timer_of_rate(&to_tpm) >> 3;
register_current_timer_delay(&tpm_delay_timer);
#endif
sched_clock_register(tpm_read_sched_clock, counter_width,
timer_of_rate(&to_tpm) >> 3);
#endif
return clocksource_mmio_init(timer_base + TPM_CNT,
"imx-tpm",
......
......@@ -42,8 +42,7 @@
#define MCHP_PIT64B_LSBMASK GENMASK_ULL(31, 0)
#define MCHP_PIT64B_PRES_TO_MODE(p) (MCHP_PIT64B_MR_PRES & ((p) << 8))
#define MCHP_PIT64B_MODE_TO_PRES(m) ((MCHP_PIT64B_MR_PRES & (m)) >> 8)
#define MCHP_PIT64B_DEF_CS_FREQ 5000000UL /* 5 MHz */
#define MCHP_PIT64B_DEF_CE_FREQ 32768 /* 32 KHz */
#define MCHP_PIT64B_DEF_FREQ 5000000UL /* 5 MHz */
#define MCHP_PIT64B_NAME "pit64b"
......@@ -165,7 +164,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs)
return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
}
static u64 mchp_pit64b_sched_read_clk(void)
static u64 notrace mchp_pit64b_sched_read_clk(void)
{
return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
}
......@@ -418,7 +417,6 @@ static int __init mchp_pit64b_init_clkevt(struct mchp_pit64b_timer *timer,
static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
bool clkevt)
{
u32 freq = clkevt ? MCHP_PIT64B_DEF_CE_FREQ : MCHP_PIT64B_DEF_CS_FREQ;
struct mchp_pit64b_timer timer;
unsigned long clk_rate;
u32 irq = 0;
......@@ -446,7 +444,7 @@ static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
}
/* Initialize mode (prescaler + SGCK bit). To be used at runtime. */
ret = mchp_pit64b_init_mode(&timer, freq);
ret = mchp_pit64b_init_mode(&timer, MCHP_PIT64B_DEF_FREQ);
if (ret)
goto irq_unmap;
......
......@@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np,
of_base->base = of_base->name ?
of_io_request_and_map(np, of_base->index, of_base->name) :
of_iomap(np, of_base->index);
if (IS_ERR(of_base->base)) {
pr_err("Failed to iomap (%s)\n", of_base->name);
return PTR_ERR(of_base->base);
if (IS_ERR_OR_NULL(of_base->base)) {
pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name);
return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM;
}
return 0;
......
......@@ -694,9 +694,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa)
return 0;
}
if (pa == 0x48034000) /* dra7 dmtimer3 */
if (pa == 0x4882c000) /* dra7 dmtimer15 */
return dmtimer_percpu_timer_init(np, 0);
else if (pa == 0x48036000) /* dra7 dmtimer4 */
else if (pa == 0x4882e000) /* dra7 dmtimer16 */
return dmtimer_percpu_timer_init(np, 1);
return 0;
......
......@@ -56,6 +56,7 @@ enum arch_timer_spi_nr {
#define ARCH_TIMER_EVT_TRIGGER_MASK (0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT)
#define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */
#define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */
#define ARCH_TIMER_EVT_INTERVAL_SCALE (1 << 17) /* EVNTIS in the ARMv8 ARM */
#define ARCH_TIMER_EVT_STREAM_PERIOD_US 100
#define ARCH_TIMER_EVT_STREAM_FREQ \
......
......@@ -579,7 +579,16 @@ enum
NR_SOFTIRQS
};
#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
/*
* The following vectors can be safely ignored after ksoftirqd is parked:
*
* _ RCU:
* 1) rcutree_migrate_callbacks() migrates the queue.
* 2) rcu_report_dead() reports the final quiescent states.
*
* _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue
*/
#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ))
/* map softirq index to softirq name. update 'softirq_to_name' in
* kernel/softirq.c when adding a new softirq.
......
......@@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void)
rcu_tasks_qs(current, (preempt)); \
} while (0)
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
static inline int rcu_needs_cpu(void)
{
*nextevt = KTIME_MAX;
return 0;
}
......
......@@ -19,7 +19,7 @@
void rcu_softirq_qs(void);
void rcu_note_context_switch(bool preempt);
int rcu_needs_cpu(u64 basem, u64 *nextevt);
int rcu_needs_cpu(void);
void rcu_cpu_stall_reset(void);
/*
......
......@@ -1086,9 +1086,8 @@ void rcu_irq_enter_irqson(void)
* Just check whether or not this CPU has non-offloaded RCU callbacks
* queued.
*/
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
int rcu_needs_cpu(void)
{
*nextevt = KTIME_MAX;
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
}
......
......@@ -181,5 +181,14 @@ config HIGH_RES_TIMERS
hardware is not capable then this option only increases
the size of the kernel image.
config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
int "Clocksource watchdog maximum allowable skew (in μs)"
depends on CLOCKSOURCE_WATCHDOG
range 50 1000
default 100
help
Specify the maximum amount of allowable watchdog skew in
microseconds before reporting the clocksource to be unstable.
endmenu
endif
......@@ -107,7 +107,13 @@ static u64 suspend_start;
* This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as
* a lower bound for cs->uncertainty_margin values when registering clocks.
*/
#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
#define MAX_SKEW_USEC CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
#else
#define MAX_SKEW_USEC 100
#endif
#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
......
......@@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void)
return period;
}
#define MAX_STALLED_JIFFIES 5
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
int cpu = smp_processor_id();
......@@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
if (tick_do_timer_cpu == cpu)
tick_do_update_jiffies64(now);
/*
* If jiffies update stalled for too long (timekeeper in stop_machine()
* or VMEXIT'ed for several msecs), force an update.
*/
if (ts->last_tick_jiffies != jiffies) {
ts->stalled_jiffies = 0;
ts->last_tick_jiffies = READ_ONCE(jiffies);
} else {
if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
tick_do_update_jiffies64(now);
ts->stalled_jiffies = 0;
ts->last_tick_jiffies = READ_ONCE(jiffies);
}
}
if (ts->inidle)
ts->got_idle_tick = 1;
}
......@@ -768,7 +785,7 @@ static inline bool local_timer_softirq_pending(void)
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
unsigned int seq;
......@@ -791,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* minimal delta which brings us back to this place
* immediately. Lather, rinse and repeat...
*/
if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
if (rcu_needs_cpu() || arch_needs_cpu() ||
irq_work_needs_cpu() || local_timer_softirq_pending()) {
next_tick = basemono + TICK_NSEC;
} else {
......@@ -802,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* disabled this also looks at the next expiring
* hrtimer.
*/
next_tmr = get_next_timer_interrupt(basejiff, basemono);
ts->next_timer = next_tmr;
/* Take the next rcu event into account */
next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
next_tick = get_next_timer_interrupt(basejiff, basemono);
ts->next_timer = next_tick;
}
/*
......@@ -984,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
__tick_nohz_full_update_tick(ts, ktime_get());
}
/*
* A pending softirq outside an IRQ (or softirq disabled section) context
* should be waiting for ksoftirqd to handle it. Therefore we shouldn't
* reach here due to the need_resched() early check in can_stop_idle_tick().
*
* However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the
* cpu_down() process, softirqs can still be raised while ksoftirqd is parked,
* triggering the below since wakep_softirqd() is ignored.
*
*/
static bool report_idle_softirq(void)
{
static int ratelimit;
unsigned int pending = local_softirq_pending();
if (likely(!pending))
return false;
/* Some softirqs claim to be safe against hotplug and ksoftirqd parking */
if (!cpu_active(smp_processor_id())) {
pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK;
if (!pending)
return false;
}
if (ratelimit < 10)
return false;
/* On RT, softirqs handling may be waiting on some lock */
if (!local_bh_blocked())
return false;
pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
pending);
ratelimit++;
return true;
}
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
/*
......@@ -1010,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
if (need_resched())
return false;
if (unlikely(local_softirq_pending())) {
static int ratelimit;
if (ratelimit < 10 && !local_bh_blocked() &&
(local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
(unsigned int) local_softirq_pending());
ratelimit++;
}
if (unlikely(report_idle_softirq()))
return false;
}
if (tick_nohz_full_enabled()) {
/*
......
......@@ -49,6 +49,8 @@ enum tick_nohz_mode {
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @next_timer: Expiry time of next expiring timer for debugging purpose only
* @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
* @last_tick_jiffies: Value of jiffies seen on last tick
* @stalled_jiffies: Number of stalled jiffies detected across ticks
*/
struct tick_sched {
struct hrtimer sched_timer;
......@@ -77,6 +79,8 @@ struct tick_sched {
u64 next_timer;
ktime_t idle_expires;
atomic_t tick_dep_mask;
unsigned long last_tick_jiffies;
unsigned int stalled_jiffies;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment