Commit bde6f5f5 authored by Venki Pallipadi's avatar Venki Pallipadi Committed by Ingo Molnar

x86: voluntary leave_mm before entering ACPI C3

Aviod TLB flush IPIs during C3 states by voluntary leave_mm()
before entering C3.

The performance impact of TLB flush on C3 should not be significant with
respect to C3 wakeup latency. Also, CPUs tend to flush TLB in hardware while in
C3 anyways.

On a 8 logical CPU system, running make -j2, the number of tlbflush IPIs goes
down from 40 per second to ~ 0. Total number of interrupts during the run
of this workload was ~1200 per second, which makes it ~3% savings in wakeups.

There was no measurable performance or power impact however.

[ akpm@linux-foundation.org: symbol export fixes. ]
Signed-off-by: default avatarVenkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 7d409d60
...@@ -256,13 +256,14 @@ static DEFINE_SPINLOCK(tlbstate_lock); ...@@ -256,13 +256,14 @@ static DEFINE_SPINLOCK(tlbstate_lock);
* We need to reload %cr3 since the page tables may be going * We need to reload %cr3 since the page tables may be going
* away from under us.. * away from under us..
*/ */
void leave_mm(unsigned long cpu) void leave_mm(int cpu)
{ {
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
BUG(); BUG();
cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
load_cr3(swapper_pg_dir); load_cr3(swapper_pg_dir);
} }
EXPORT_SYMBOL_GPL(leave_mm);
/* /*
* *
......
...@@ -69,13 +69,14 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state); ...@@ -69,13 +69,14 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
* We cannot call mmdrop() because we are in interrupt context, * We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask. * instead update mm->cpu_vm_mask.
*/ */
static inline void leave_mm(int cpu) void leave_mm(int cpu)
{ {
if (read_pda(mmu_state) == TLBSTATE_OK) if (read_pda(mmu_state) == TLBSTATE_OK)
BUG(); BUG();
cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir); load_cr3(swapper_pg_dir);
} }
EXPORT_SYMBOL_GPL(leave_mm);
/* /*
* *
......
...@@ -534,6 +534,7 @@ static void acpi_processor_idle(void) ...@@ -534,6 +534,7 @@ static void acpi_processor_idle(void)
break; break;
case ACPI_STATE_C3: case ACPI_STATE_C3:
acpi_unlazy_tlb(smp_processor_id());
/* /*
* Must be done before busmaster disable as we might * Must be done before busmaster disable as we might
* need to access HPET ! * need to access HPET !
...@@ -1423,6 +1424,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, ...@@ -1423,6 +1424,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
return 0; return 0;
} }
acpi_unlazy_tlb(smp_processor_id());
/* /*
* Must be done before busmaster disable as we might need to * Must be done before busmaster disable as we might need to
* access HPET ! * access HPET !
......
...@@ -127,6 +127,8 @@ extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; ...@@ -127,6 +127,8 @@ extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
#endif #endif
#define acpi_unlazy_tlb(x)
#endif /*__KERNEL__*/ #endif /*__KERNEL__*/
#endif /*_ASM_ACPI_H*/ #endif /*_ASM_ACPI_H*/
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <asm/numa.h> #include <asm/numa.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/mmu.h>
#define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long #define COMPILER_DEPENDENT_UINT64 unsigned long long
...@@ -167,4 +168,6 @@ static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, ...@@ -167,4 +168,6 @@ static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
} }
#endif #endif
#define acpi_unlazy_tlb(x) leave_mm(x)
#endif /*__X86_ASM_ACPI_H*/ #endif /*__X86_ASM_ACPI_H*/
...@@ -20,4 +20,12 @@ typedef struct { ...@@ -20,4 +20,12 @@ typedef struct {
void *vdso; void *vdso;
} mm_context_t; } mm_context_t;
#ifdef CONFIG_SMP
void leave_mm(int cpu);
#else
static inline void leave_mm(int cpu)
{
}
#endif
#endif /* _ASM_X86_MMU_H */ #endif /* _ASM_X86_MMU_H */
...@@ -32,8 +32,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) ...@@ -32,8 +32,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
#endif #endif
} }
void leave_mm(unsigned long cpu);
static inline void switch_mm(struct mm_struct *prev, static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next, struct mm_struct *next,
struct task_struct *tsk) struct task_struct *tsk)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment