Commit bf9282dc authored by Peter Zijlstra's avatar Peter Zijlstra

cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic

This allows moving the leave_mm() call into generic code before
rcu_idle_enter(). Gets rid of more trace_*_rcuidle() users.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarSteven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: default avatarThomas Gleixner <tglx@linutronix.de>
Acked-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: default avatarMarco Elver <elver@google.com>
Link: https://lkml.kernel.org/r/20200821085348.369441600@infradead.org
parent 1098582a
...@@ -59,5 +59,6 @@ typedef struct { ...@@ -59,5 +59,6 @@ typedef struct {
} }
void leave_mm(int cpu); void leave_mm(int cpu);
#define leave_mm leave_mm
#endif /* _ASM_X86_MMU_H */ #endif /* _ASM_X86_MMU_H */
...@@ -555,21 +555,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -555,21 +555,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
load_new_mm_cr3(next->pgd, new_asid, true); load_new_mm_cr3(next->pgd, new_asid, true);
/* trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
* NB: This gets called via leave_mm() in the idle path
* where RCU functions differently. Tracing normally
* uses RCU, so we need to use the _rcuidle variant.
*
* (There is no good reason for this. The idle code should
* be rearranged to call this before rcu_idle_enter().)
*/
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else { } else {
/* The new ASID is already up to date. */ /* The new ASID is already up to date. */
load_new_mm_cr3(next->pgd, new_asid, false); load_new_mm_cr3(next->pgd, new_asid, false);
/* See above wrt _rcuidle. */ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
} }
/* Make sure we write CR3 before loaded_mm. */ /* Make sure we write CR3 before loaded_mm. */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/mmu_context.h>
#include <trace/events/power.h> #include <trace/events/power.h>
#include "cpuidle.h" #include "cpuidle.h"
...@@ -228,6 +229,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, ...@@ -228,6 +229,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
broadcast = false; broadcast = false;
} }
if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
leave_mm(dev->cpu);
/* Take note of the planned idle state. */ /* Take note of the planned idle state. */
sched_idle_set_state(target_state); sched_idle_set_state(target_state);
......
...@@ -89,14 +89,6 @@ static unsigned int mwait_substates __initdata; ...@@ -89,14 +89,6 @@ static unsigned int mwait_substates __initdata;
*/ */
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
/*
* Set this flag for states where the HW flushes the TLB for us
* and so we don't need cross-calls to keep it consistent.
* If this flag is set, SW flushes the TLB, so even if the
* HW doesn't do the flushing, this flag is safe to use.
*/
#define CPUIDLE_FLAG_TLB_FLUSHED BIT(16)
/* /*
* MWAIT takes an 8-bit "hint" in EAX "suggesting" * MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble) * the C-state (top nibble) and sub-state (bottom nibble)
...@@ -131,14 +123,6 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, ...@@ -131,14 +123,6 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
unsigned long eax = flg2MWAIT(state->flags); unsigned long eax = flg2MWAIT(state->flags);
unsigned long ecx = 1; /* break on interrupt flag */ unsigned long ecx = 1; /* break on interrupt flag */
bool tick; bool tick;
int cpu = smp_processor_id();
/*
* leave_mm() to avoid costly and often unnecessary wakeups
* for flushing the user TLB's associated with the active mm.
*/
if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
leave_mm(cpu);
if (!static_cpu_has(X86_FEATURE_ARAT)) { if (!static_cpu_has(X86_FEATURE_ARAT)) {
/* /*
......
...@@ -81,6 +81,7 @@ struct cpuidle_state { ...@@ -81,6 +81,7 @@ struct cpuidle_state {
#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ #define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */
#define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */ #define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */
#define CPUIDLE_FLAG_OFF BIT(4) /* disable this state by default */ #define CPUIDLE_FLAG_OFF BIT(4) /* disable this state by default */
#define CPUIDLE_FLAG_TLB_FLUSHED BIT(5) /* idle-state flushes TLBs */
struct cpuidle_device_kobj; struct cpuidle_device_kobj;
struct cpuidle_state_kobj; struct cpuidle_state_kobj;
......
...@@ -3,10 +3,15 @@ ...@@ -3,10 +3,15 @@
#define _LINUX_MMU_CONTEXT_H #define _LINUX_MMU_CONTEXT_H
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/mmu.h>
/* Architectures that care about IRQ state in switch_mm can override this. */ /* Architectures that care about IRQ state in switch_mm can override this. */
#ifndef switch_mm_irqs_off #ifndef switch_mm_irqs_off
# define switch_mm_irqs_off switch_mm # define switch_mm_irqs_off switch_mm
#endif #endif
#ifndef leave_mm
static inline void leave_mm(int cpu) { }
#endif
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment