Commit d490b3e2 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'timers/nohz-irq-work-v7' of...

Merge branch 'timers/nohz-irq-work-v7' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz

Pull nohz updates from Frederic Weisbecker:

 " This set moves the nohz kick, used to notify a full dynticks CPU when
   events require tick rescheduling, out of the scheduler tick to a
   dedicated IPI.

   This debloats a bit the scheduler IPI from off-topic work that was
   abusing that scheduler fast path for its convenient asynchronous
   properties. Now the nohz kick uses irq-work for its own needs.

   Of course this implied quite some background infrastructure rework,
   including:

     * Clean up some irq-work internals
     * Implement remote irq-work
     * Implement nohz kick on top of remote irq-work
     * Move full dynticks timer enqueue notification to new kick
     * Move multi-task notification to new kick
     * Remove unecessary barriers on multi-task notification
 "
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents ebe06187 3882ec64
......@@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), }
bool irq_work_queue(struct irq_work *work);
#ifdef CONFIG_SMP
bool irq_work_queue_on(struct irq_work *work, int cpu);
#endif
void irq_work_run(void);
void irq_work_sync(struct irq_work *work);
......
......@@ -181,7 +181,13 @@ static inline bool tick_nohz_full_cpu(int cpu)
extern void tick_nohz_init(void);
extern void __tick_nohz_full_check(void);
extern void tick_nohz_full_kick(void);
extern void tick_nohz_full_kick_cpu(int cpu);
static inline void tick_nohz_full_kick(void)
{
tick_nohz_full_kick_cpu(smp_processor_id());
}
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(struct task_struct *tsk);
#else
......@@ -189,6 +195,7 @@ static inline void tick_nohz_init(void) { }
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void __tick_nohz_full_check(void) { }
static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
......
......@@ -16,11 +16,12 @@
#include <linux/tick.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <asm/processor.h>
static DEFINE_PER_CPU(struct llist_head, irq_work_list);
static DEFINE_PER_CPU(int, irq_work_raised);
static DEFINE_PER_CPU(struct llist_head, raised_list);
static DEFINE_PER_CPU(struct llist_head, lazy_list);
/*
* Claim the entry so that no one else will poke at it.
......@@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void)
*/
}
#ifdef CONFIG_SMP
/*
* Enqueue the irq_work @entry unless it's already pending
* Enqueue the irq_work @work on @cpu unless it's already pending
* somewhere.
*
* Can be re-enqueued while the callback is still in progress.
*/
bool irq_work_queue_on(struct irq_work *work, int cpu)
{
/* All work should have been flushed before going offline */
WARN_ON_ONCE(cpu_is_offline(cpu));
/* Arch remote IPI send/receive backend aren't NMI safe */
WARN_ON_ONCE(in_nmi());
/* Only queue if not already pending */
if (!irq_work_claim(work))
return false;
if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
arch_send_call_function_single_ipi(cpu);
return true;
}
EXPORT_SYMBOL_GPL(irq_work_queue_on);
#endif
/* Enqueue the irq work @work on the current CPU */
bool irq_work_queue(struct irq_work *work)
{
/* Only queue if not already pending */
......@@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work)
/* Queue the entry and raise the IPI if needed. */
preempt_disable();
llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
/*
* If the work is not "lazy" or the tick is stopped, raise the irq
* work interrupt (if supported by the arch), otherwise, just wait
* for the next tick.
*/
if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
/* If the work is "lazy", handle it from next tick if any */
if (work->flags & IRQ_WORK_LAZY) {
if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) &&
tick_nohz_tick_stopped())
arch_irq_work_raise();
} else {
if (llist_add(&work->llnode, &__get_cpu_var(raised_list)))
arch_irq_work_raise();
}
......@@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
bool irq_work_needs_cpu(void)
{
struct llist_head *this_list;
struct llist_head *raised, *lazy;
this_list = &__get_cpu_var(irq_work_list);
if (llist_empty(this_list))
raised = &__get_cpu_var(raised_list);
lazy = &__get_cpu_var(lazy_list);
if (llist_empty(raised) && llist_empty(lazy))
return false;
/* All work should have been flushed before going offline */
......@@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void)
return true;
}
static void __irq_work_run(void)
static void irq_work_run_list(struct llist_head *list)
{
unsigned long flags;
struct irq_work *work;
struct llist_head *this_list;
struct llist_node *llnode;
BUG_ON(!irqs_disabled());
/*
* Reset the "raised" state right before we check the list because
* an NMI may enqueue after we find the list empty from the runner.
*/
__this_cpu_write(irq_work_raised, 0);
barrier();
this_list = &__get_cpu_var(irq_work_list);
if (llist_empty(this_list))
if (llist_empty(list))
return;
BUG_ON(!irqs_disabled());
llnode = llist_del_all(this_list);
llnode = llist_del_all(list);
while (llnode != NULL) {
work = llist_entry(llnode, struct irq_work, llnode);
......@@ -148,6 +160,12 @@ static void __irq_work_run(void)
}
}
static void __irq_work_run(void)
{
irq_work_run_list(&__get_cpu_var(raised_list));
irq_work_run_list(&__get_cpu_var(lazy_list));
}
/*
* Run the irq_work entries on this cpu. Requires to be ran from hardirq
* context with local IRQs disabled.
......
......@@ -684,10 +684,16 @@ static void wake_up_idle_cpu(int cpu)
static bool wake_up_full_nohz_cpu(int cpu)
{
/*
* We just need the target to call irq_exit() and re-evaluate
* the next tick. The nohz full kick at least implies that.
* If needed we can still optimize that later with an
* empty IRQ.
*/
if (tick_nohz_full_cpu(cpu)) {
if (cpu != smp_processor_id() ||
tick_nohz_tick_stopped())
smp_send_reschedule(cpu);
tick_nohz_full_kick_cpu(cpu);
return true;
}
......@@ -734,10 +740,11 @@ bool sched_can_stop_tick(void)
rq = this_rq();
/* Make sure rq->nr_running update is visible after the IPI */
smp_rmb();
/* More than one running task need preemption */
/*
* More than one running task need preemption.
* nr_running update is assumed to be visible
* after IPI is sent from wakers.
*/
if (rq->nr_running > 1)
return false;
......@@ -1568,9 +1575,7 @@ void scheduler_ipi(void)
*/
preempt_fold_need_resched();
if (llist_empty(&this_rq()->wake_list)
&& !tick_nohz_full_cpu(smp_processor_id())
&& !got_nohz_idle_kick())
if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
return;
/*
......@@ -1587,7 +1592,6 @@ void scheduler_ipi(void)
* somewhat pessimize the simple resched case.
*/
irq_enter();
tick_nohz_full_check();
sched_ttwu_pending();
/*
......
......@@ -1221,9 +1221,15 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
#ifdef CONFIG_NO_HZ_FULL
if (prev_nr < 2 && rq->nr_running >= 2) {
if (tick_nohz_full_cpu(rq->cpu)) {
/* Order rq->nr_running write against the IPI */
smp_wmb();
smp_send_reschedule(rq->cpu);
/*
* Tick is needed if more than one task runs on a CPU.
* Send the target an IPI to kick it out of nohz mode.
*
* We assume that IPI implies full memory barrier and the
* new value of rq->nr_running is visible on reception
* from the target.
*/
tick_nohz_full_kick_cpu(rq->cpu);
}
}
#endif
......
......@@ -3,6 +3,7 @@
*
* (C) Jens Axboe <jens.axboe@oracle.com> 2008
*/
#include <linux/irq_work.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/kernel.h>
......@@ -210,6 +211,14 @@ void generic_smp_call_function_single_interrupt(void)
csd->func(csd->info);
csd_unlock(csd);
}
/*
* Handle irq works queued remotely by irq_work_queue_on().
* Smp functions above are typically synchronous so they
* better run first since some other CPUs may be busy waiting
* for them.
*/
irq_work_run();
}
/*
......
......@@ -224,13 +224,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
};
/*
* Kick the current CPU if it's full dynticks in order to force it to
* Kick the CPU if it's full dynticks in order to force it to
* re-evaluate its dependency on the tick and restart it if necessary.
*/
void tick_nohz_full_kick(void)
void tick_nohz_full_kick_cpu(int cpu)
{
if (tick_nohz_full_cpu(smp_processor_id()))
irq_work_queue(&__get_cpu_var(nohz_full_kick_work));
if (!tick_nohz_full_cpu(cpu))
return;
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}
static void nohz_full_kick_ipi(void *info)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment