Commit 9269d27e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'timers-nohz-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timers/nohz updates from Ingo Molnar:

 - Micro-optimize tick_nohz_full_cpu()

 - Optimize idle exit tick restarts to be less eager

 - Optimize tick_nohz_dep_set_task() to only wake up a single CPU.
   This reduces IPIs and interruptions on nohz_full CPUs.

 - Optimize tick_nohz_dep_set_signal() in a similar fashion.

 - Skip IPIs in tick_nohz_kick_task() when trying to kick a
   non-running task.

 - Micro-optimize tick_nohz_task_switch() IRQ flags handling to
   reduce context switching costs.

 - Misc cleanups and fixes

* tag 'timers-nohz-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  MAINTAINERS: Add myself as context tracking maintainer
  tick/nohz: Call tick_nohz_task_switch() with interrupts disabled
  tick/nohz: Kick only _queued_ task whose tick dependency is updated
  tick/nohz: Change signal tick dependency to wake up CPUs of member tasks
  tick/nohz: Only wake up a single target cpu when kicking a task
  tick/nohz: Update nohz_full Kconfig help
  tick/nohz: Update idle_exittime on actual idle exit
  tick/nohz: Remove superflous check for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  tick/nohz: Conditionally restart tick on idle exit
  tick/nohz: Evaluate the CPU expression after the static key
parents 54a728dc 09fe880e
...@@ -4610,6 +4610,12 @@ S: Supported ...@@ -4610,6 +4610,12 @@ S: Supported
F: drivers/video/console/ F: drivers/video/console/
F: include/linux/console* F: include/linux/console*
CONTEXT TRACKING
M: Frederic Weisbecker <frederic@kernel.org>
S: Maintained
F: kernel/context_tracking.c
F: include/linux/context_tracking*
CONTROL GROUP (CGROUP) CONTROL GROUP (CGROUP)
M: Tejun Heo <tj@kernel.org> M: Tejun Heo <tj@kernel.org>
M: Zefan Li <lizefan.x@bytedance.com> M: Zefan Li <lizefan.x@bytedance.com>
......
...@@ -2028,6 +2028,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) ...@@ -2028,6 +2028,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
extern bool sched_task_on_rq(struct task_struct *p);
/* /*
* In order to reduce various lock holder preemption latencies provide an * In order to reduce various lock holder preemption latencies provide an
* interface to see if a vCPU is currently running or not. * interface to see if a vCPU is currently running or not.
......
...@@ -186,13 +186,17 @@ static inline bool tick_nohz_full_enabled(void) ...@@ -186,13 +186,17 @@ static inline bool tick_nohz_full_enabled(void)
return tick_nohz_full_running; return tick_nohz_full_running;
} }
static inline bool tick_nohz_full_cpu(int cpu) /*
{ * Check if a CPU is part of the nohz_full subset. Arrange for evaluating
if (!tick_nohz_full_enabled()) * the cpu expression (typically smp_processor_id()) _after_ the static
return false; * key.
*/
return cpumask_test_cpu(cpu, tick_nohz_full_mask); #define tick_nohz_full_cpu(_cpu) ({ \
} bool __ret = false; \
if (tick_nohz_full_enabled()) \
__ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask); \
__ret; \
})
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
{ {
...@@ -208,7 +212,7 @@ extern void tick_nohz_dep_set_task(struct task_struct *tsk, ...@@ -208,7 +212,7 @@ extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit); enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk, extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit); enum tick_dep_bits bit);
extern void tick_nohz_dep_set_signal(struct signal_struct *signal, extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit); enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit); enum tick_dep_bits bit);
...@@ -253,11 +257,11 @@ static inline void tick_dep_clear_task(struct task_struct *tsk, ...@@ -253,11 +257,11 @@ static inline void tick_dep_clear_task(struct task_struct *tsk,
if (tick_nohz_full_enabled()) if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit); tick_nohz_dep_clear_task(tsk, bit);
} }
static inline void tick_dep_set_signal(struct signal_struct *signal, static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) enum tick_dep_bits bit)
{ {
if (tick_nohz_full_enabled()) if (tick_nohz_full_enabled())
tick_nohz_dep_set_signal(signal, bit); tick_nohz_dep_set_signal(tsk, bit);
} }
static inline void tick_dep_clear_signal(struct signal_struct *signal, static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) enum tick_dep_bits bit)
...@@ -285,7 +289,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk, ...@@ -285,7 +289,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { } enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk, static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { } enum tick_dep_bits bit) { }
static inline void tick_dep_set_signal(struct signal_struct *signal, static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { } enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal, static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { } enum tick_dep_bits bit) { }
......
...@@ -1928,6 +1928,11 @@ static inline void uclamp_post_fork(struct task_struct *p) { } ...@@ -1928,6 +1928,11 @@ static inline void uclamp_post_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { } static inline void init_uclamp(void) { }
#endif /* CONFIG_UCLAMP_TASK */ #endif /* CONFIG_UCLAMP_TASK */
bool sched_task_on_rq(struct task_struct *p)
{
return task_on_rq_queued(p);
}
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{ {
if (!(flags & ENQUEUE_NOCLOCK)) if (!(flags & ENQUEUE_NOCLOCK))
...@@ -4546,6 +4551,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) ...@@ -4546,6 +4551,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
vtime_task_switch(prev); vtime_task_switch(prev);
perf_event_task_sched_in(prev, current); perf_event_task_sched_in(prev, current);
finish_task(prev); finish_task(prev);
tick_nohz_task_switch();
finish_lock_switch(rq); finish_lock_switch(rq);
finish_arch_post_lock_switch(); finish_arch_post_lock_switch();
kcov_finish_switch(current); kcov_finish_switch(current);
...@@ -4591,7 +4597,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) ...@@ -4591,7 +4597,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
put_task_struct_rcu_user(prev); put_task_struct_rcu_user(prev);
} }
tick_nohz_task_switch();
return rq; return rq;
} }
......
...@@ -117,13 +117,14 @@ config NO_HZ_FULL ...@@ -117,13 +117,14 @@ config NO_HZ_FULL
the task mostly runs in userspace and has few kernel activity. the task mostly runs in userspace and has few kernel activity.
You need to fill up the nohz_full boot parameter with the You need to fill up the nohz_full boot parameter with the
desired range of dynticks CPUs. desired range of dynticks CPUs to use it. This is implemented at
the expense of some overhead in user <-> kernel transitions:
syscalls, exceptions and interrupts.
This is implemented at the expense of some overhead in user <-> kernel By default, without passing the nohz_full parameter, this behaves just
transitions: syscalls, exceptions and interrupts. Even when it's like NO_HZ_IDLE.
dynamically off.
Say N. If you're a distro say Y.
endchoice endchoice
......
...@@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p) ...@@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p)
if (CPUCLOCK_PERTHREAD(timer->it_clock)) if (CPUCLOCK_PERTHREAD(timer->it_clock))
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
else else
tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
} }
/* /*
...@@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, ...@@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
if (*newval < *nextevt) if (*newval < *nextevt)
*nextevt = *newval; *nextevt = *newval;
tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
} }
static int do_cpu_nanosleep(const clockid_t which_clock, int flags, static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
......
...@@ -323,6 +323,46 @@ void tick_nohz_full_kick_cpu(int cpu) ...@@ -323,6 +323,46 @@ void tick_nohz_full_kick_cpu(int cpu)
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
} }
static void tick_nohz_kick_task(struct task_struct *tsk)
{
int cpu;
/*
* If the task is not running, run_posix_cpu_timers()
* has nothing to elapse, IPI can then be spared.
*
* activate_task() STORE p->tick_dep_mask
* STORE p->on_rq
* __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or())
* LOCK rq->lock LOAD p->on_rq
* smp_mb__after_spin_lock()
* tick_nohz_task_switch()
* LOAD p->tick_dep_mask
*/
if (!sched_task_on_rq(tsk))
return;
/*
* If the task concurrently migrates to another CPU,
* we guarantee it sees the new tick dependency upon
* schedule.
*
* set_task_cpu(p, cpu);
* STORE p->cpu = @cpu
* __schedule() (switch to task 'p')
* LOCK rq->lock
* smp_mb__after_spin_lock() STORE p->tick_dep_mask
* tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
* LOAD p->tick_dep_mask LOAD p->cpu
*/
cpu = task_cpu(tsk);
preempt_disable();
if (cpu_online(cpu))
tick_nohz_full_kick_cpu(cpu);
preempt_enable();
}
/* /*
* Kick all full dynticks CPUs in order to force these to re-evaluate * Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary. * their dependency on the tick and restart it if necessary.
...@@ -405,19 +445,8 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu); ...@@ -405,19 +445,8 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
*/ */
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{ {
if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) { if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
if (tsk == current) { tick_nohz_kick_task(tsk);
preempt_disable();
tick_nohz_full_kick();
preempt_enable();
} else {
/*
* Some future tick_nohz_full_kick_task()
* should optimize this.
*/
tick_nohz_full_kick_all();
}
}
} }
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task); EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
...@@ -431,9 +460,20 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task); ...@@ -431,9 +460,20 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers. * per process timers.
*/ */
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit) void tick_nohz_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{ {
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit); int prev;
struct signal_struct *sig = tsk->signal;
prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
if (!prev) {
struct task_struct *t;
lockdep_assert_held(&tsk->sighand->siglock);
__for_each_thread(sig, t)
tick_nohz_kick_task(t);
}
} }
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit) void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
...@@ -448,13 +488,10 @@ void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bi ...@@ -448,13 +488,10 @@ void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bi
*/ */
void __tick_nohz_task_switch(void) void __tick_nohz_task_switch(void)
{ {
unsigned long flags;
struct tick_sched *ts; struct tick_sched *ts;
local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id())) if (!tick_nohz_full_cpu(smp_processor_id()))
goto out; return;
ts = this_cpu_ptr(&tick_cpu_sched); ts = this_cpu_ptr(&tick_cpu_sched);
...@@ -463,8 +500,6 @@ void __tick_nohz_task_switch(void) ...@@ -463,8 +500,6 @@ void __tick_nohz_task_switch(void)
atomic_read(&current->signal->tick_dep_mask)) atomic_read(&current->signal->tick_dep_mask))
tick_nohz_full_kick(); tick_nohz_full_kick();
} }
out:
local_irq_restore(flags);
} }
/* Get the boot-time nohz CPU list from the kernel parameters. */ /* Get the boot-time nohz CPU list from the kernel parameters. */
...@@ -922,27 +957,31 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) ...@@ -922,27 +957,31 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
* Cancel the scheduled timer and restore the tick * Cancel the scheduled timer and restore the tick
*/ */
ts->tick_stopped = 0; ts->tick_stopped = 0;
ts->idle_exittime = now;
tick_nohz_restart(ts, now); tick_nohz_restart(ts, now);
} }
static void tick_nohz_full_update_tick(struct tick_sched *ts) static void __tick_nohz_full_update_tick(struct tick_sched *ts,
ktime_t now)
{ {
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
int cpu = smp_processor_id(); int cpu = smp_processor_id();
if (!tick_nohz_full_cpu(cpu)) if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, now);
#endif
}
static void tick_nohz_full_update_tick(struct tick_sched *ts)
{
if (!tick_nohz_full_cpu(smp_processor_id()))
return; return;
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return; return;
if (can_stop_full_tick(cpu, ts)) __tick_nohz_full_update_tick(ts, ktime_get());
tick_nohz_stop_sched_tick(ts, cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get());
#endif
} }
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
...@@ -1189,11 +1228,13 @@ unsigned long tick_nohz_get_idle_calls(void) ...@@ -1189,11 +1228,13 @@ unsigned long tick_nohz_get_idle_calls(void)
return ts->idle_calls; return ts->idle_calls;
} }
static void tick_nohz_account_idle_ticks(struct tick_sched *ts) static void tick_nohz_account_idle_time(struct tick_sched *ts,
ktime_t now)
{ {
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks; unsigned long ticks;
ts->idle_exittime = now;
if (vtime_accounting_enabled_this_cpu()) if (vtime_accounting_enabled_this_cpu())
return; return;
/* /*
...@@ -1207,21 +1248,27 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) ...@@ -1207,21 +1248,27 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
*/ */
if (ticks && ticks < LONG_MAX) if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks); account_idle_ticks(ticks);
#endif
} }
static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now) void tick_nohz_idle_restart_tick(void)
{ {
tick_nohz_restart_sched_tick(ts, now); struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
tick_nohz_account_idle_ticks(ts);
if (ts->tick_stopped) {
ktime_t now = ktime_get();
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_time(ts, now);
}
} }
void tick_nohz_idle_restart_tick(void) static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
{ {
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (tick_nohz_full_cpu(smp_processor_id()))
__tick_nohz_full_update_tick(ts, now);
else
tick_nohz_restart_sched_tick(ts, now);
if (ts->tick_stopped) tick_nohz_account_idle_time(ts, now);
__tick_nohz_idle_restart_tick(ts, ktime_get());
} }
/** /**
...@@ -1253,7 +1300,7 @@ void tick_nohz_idle_exit(void) ...@@ -1253,7 +1300,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now); tick_nohz_stop_idle(ts, now);
if (tick_stopped) if (tick_stopped)
__tick_nohz_idle_restart_tick(ts, now); tick_nohz_idle_update_tick(ts, now);
local_irq_enable(); local_irq_enable();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment