Commit bfe2a3c3 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'core/percpu' into perfcounters/core

Conflicts:
	arch/x86/include/asm/hardirq_32.h
	arch/x86/include/asm/hardirq_64.h

Semantic merge:
	arch/x86/include/asm/hardirq.h
	[ added apic_perf_irqs field. ]
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents 77835492 35d266a2
...@@ -84,7 +84,7 @@ void build_cpu_to_node_map(void); ...@@ -84,7 +84,7 @@ void build_cpu_to_node_map(void);
.child = NULL, \ .child = NULL, \
.groups = NULL, \ .groups = NULL, \
.min_interval = 8, \ .min_interval = 8, \
.max_interval = 8*(min(num_online_cpus(), 32)), \ .max_interval = 8*(min(num_online_cpus(), 32U)), \
.busy_factor = 64, \ .busy_factor = 64, \
.imbalance_pct = 125, \ .imbalance_pct = 125, \
.cache_nice_tries = 2, \ .cache_nice_tries = 2, \
......
...@@ -391,6 +391,13 @@ config X86_RDC321X ...@@ -391,6 +391,13 @@ config X86_RDC321X
as R-8610-(G). as R-8610-(G).
If you don't have one of these chips, you should say N here. If you don't have one of these chips, you should say N here.
config X86_UV
bool "SGI Ultraviolet"
depends on X86_64
help
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
config SCHED_OMIT_FRAME_POINTER config SCHED_OMIT_FRAME_POINTER
def_bool y def_bool y
prompt "Single-depth WCHAN output" prompt "Single-depth WCHAN output"
...@@ -1341,13 +1348,17 @@ config SECCOMP ...@@ -1341,13 +1348,17 @@ config SECCOMP
If unsure, say Y. Only embedded should say N here. If unsure, say Y. Only embedded should say N here.
config CC_STACKPROTECTOR_ALL
bool
config CC_STACKPROTECTOR config CC_STACKPROTECTOR
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
depends on X86_64 && EXPERIMENTAL && BROKEN depends on X86_64
select CC_STACKPROTECTOR_ALL
help help
This option turns on the -fstack-protector GCC feature. This This option turns on the -fstack-protector GCC feature. This
feature puts, at the beginning of critical functions, a canary feature puts, at the beginning of functions, a canary value on
value on the stack just before the return address, and validates the stack just before the return address, and validates
the value just before actually returning. Stack based buffer the value just before actually returning. Stack based buffer
overflows (that need to overwrite this return address) now also overflows (that need to overwrite this return address) now also
overwrite the canary, which gets detected and the attack is then overwrite the canary, which gets detected and the attack is then
...@@ -1355,15 +1366,8 @@ config CC_STACKPROTECTOR ...@@ -1355,15 +1366,8 @@ config CC_STACKPROTECTOR
This feature requires gcc version 4.2 or above, or a distribution This feature requires gcc version 4.2 or above, or a distribution
gcc with the feature backported. Older versions are automatically gcc with the feature backported. Older versions are automatically
detected and for those versions, this configuration option is ignored. detected and for those versions, this configuration option is
ignored. (and a warning is printed during bootup)
config CC_STACKPROTECTOR_ALL
bool "Use stack-protector for all functions"
depends on CC_STACKPROTECTOR
help
Normally, GCC only inserts the canary value protection for
functions that use large-ish on-stack buffers. By enabling
this option, GCC will be asked to do this for ALL functions.
source kernel/Kconfig.hz source kernel/Kconfig.hz
......
...@@ -292,25 +292,23 @@ config X86_CPU ...@@ -292,25 +292,23 @@ config X86_CPU
# Define implied options from the CPU selection here # Define implied options from the CPU selection here
config X86_L1_CACHE_BYTES config X86_L1_CACHE_BYTES
int int
default "128" if GENERIC_CPU || MPSC default "128" if MPSC
default "64" if MK8 || MCORE2 default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32
depends on X86_64
config X86_INTERNODE_CACHE_BYTES config X86_INTERNODE_CACHE_BYTES
int int
default "4096" if X86_VSMP default "4096" if X86_VSMP
default X86_L1_CACHE_BYTES if !X86_VSMP default X86_L1_CACHE_BYTES if !X86_VSMP
depends on X86_64
config X86_CMPXCHG config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386) def_bool X86_64 || (X86_32 && !M386)
config X86_L1_CACHE_SHIFT config X86_L1_CACHE_SHIFT
int int
default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC default "7" if MPENTIUM4 || MPSC
default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
config X86_XADD config X86_XADD
def_bool y def_bool y
......
...@@ -117,6 +117,7 @@ config DEBUG_RODATA ...@@ -117,6 +117,7 @@ config DEBUG_RODATA
config DEBUG_RODATA_TEST config DEBUG_RODATA_TEST
bool "Testcase for the DEBUG_RODATA feature" bool "Testcase for the DEBUG_RODATA feature"
depends on DEBUG_RODATA depends on DEBUG_RODATA
default y
help help
This option enables a testcase for the DEBUG_RODATA This option enables a testcase for the DEBUG_RODATA
feature as well as for the change_page_attr() infrastructure. feature as well as for the change_page_attr() infrastructure.
......
...@@ -73,7 +73,7 @@ else ...@@ -73,7 +73,7 @@ else
stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
"$(CC)" -fstack-protector ) "$(CC)" "-fstack-protector -DGCC_HAS_SP" )
stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
"$(CC)" -fstack-protector-all ) "$(CC)" -fstack-protector-all )
......
...@@ -138,11 +138,4 @@ struct genapic { ...@@ -138,11 +138,4 @@ struct genapic {
extern struct genapic *genapic; extern struct genapic *genapic;
extern void es7000_update_genapic_to_cluster(void); extern void es7000_update_genapic_to_cluster(void);
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
#define get_uv_system_type() UV_NONE
#define is_uv_system() 0
#define uv_wakeup_secondary(a, b) 1
#define uv_system_init() do {} while (0)
#endif /* _ASM_X86_GENAPIC_32_H */ #endif /* _ASM_X86_GENAPIC_32_H */
...@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys; ...@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
extern int acpi_madt_oem_check(char *, char *); extern int acpi_madt_oem_check(char *, char *);
extern void apic_send_IPI_self(int vector); extern void apic_send_IPI_self(int vector);
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
extern enum uv_system_type get_uv_system_type(void);
extern int is_uv_system(void);
extern struct genapic apic_x2apic_uv_x; extern struct genapic apic_x2apic_uv_x;
DECLARE_PER_CPU(int, x2apic_extra_bits); DECLARE_PER_CPU(int, x2apic_extra_bits);
extern void uv_cpu_init(void);
extern void uv_system_init(void);
extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
extern void setup_apic_routing(void); extern void setup_apic_routing(void);
......
#ifdef CONFIG_X86_32 #ifndef _ASM_X86_HARDIRQ_H
# include "hardirq_32.h" #define _ASM_X86_HARDIRQ_H
#else
# include "hardirq_64.h" #include <linux/threads.h>
#include <linux/irq.h>
typedef struct {
unsigned int __softirq_pending;
unsigned int __nmi_count; /* arch dependent */
unsigned int irq0_irqs;
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
#endif
unsigned int apic_perf_irqs;
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
unsigned int irq_tlb_count;
#endif
#ifdef CONFIG_X86_MCE
unsigned int irq_thermal_count;
# ifdef CONFIG_X86_64
unsigned int irq_threshold_count;
# endif
#endif #endif
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
#define __ARCH_IRQ_STAT
#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING
#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
extern void ack_bad_irq(unsigned int irq);
extern u64 arch_irq_stat_cpu(unsigned int cpu); extern u64 arch_irq_stat_cpu(unsigned int cpu);
#define arch_irq_stat_cpu arch_irq_stat_cpu #define arch_irq_stat_cpu arch_irq_stat_cpu
extern u64 arch_irq_stat(void); extern u64 arch_irq_stat(void);
#define arch_irq_stat arch_irq_stat #define arch_irq_stat arch_irq_stat
#endif /* _ASM_X86_HARDIRQ_H */
#ifndef _ASM_X86_HARDIRQ_32_H
#define _ASM_X86_HARDIRQ_32_H
#include <linux/threads.h>
#include <linux/irq.h>
typedef struct {
unsigned int __softirq_pending;
unsigned long idle_timestamp;
unsigned int __nmi_count; /* arch dependent */
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int apic_perf_irqs; /* arch dependent */
unsigned int irq0_irqs;
unsigned int irq_resched_count;
unsigned int irq_call_count;
unsigned int irq_tlb_count;
unsigned int irq_thermal_count;
unsigned int irq_spurious_count;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
#define __ARCH_IRQ_STAT
#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++)
void ack_bad_irq(unsigned int irq);
#include <linux/irq_cpustat.h>
#endif /* _ASM_X86_HARDIRQ_32_H */
#ifndef _ASM_X86_HARDIRQ_64_H
#define _ASM_X86_HARDIRQ_64_H
#include <linux/threads.h>
#include <linux/irq.h>
#include <asm/apic.h>
typedef struct {
unsigned int __softirq_pending;
unsigned int __nmi_count; /* arch dependent */
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int apic_perf_irqs; /* arch dependent */
unsigned int irq0_irqs;
unsigned int irq_resched_count;
unsigned int irq_call_count;
unsigned int irq_tlb_count;
unsigned int irq_thermal_count;
unsigned int irq_spurious_count;
unsigned int irq_threshold_count;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
#define __ARCH_IRQ_STAT 1
#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING 1
#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
extern void ack_bad_irq(unsigned int irq);
#endif /* _ASM_X86_HARDIRQ_64_H */
#ifdef CONFIG_X86_32 /*
# include "irq_regs_32.h" * Per-cpu current frame pointer - the location of the last exception frame on
#else * the stack, stored in the per-cpu area.
# include "irq_regs_64.h" *
#endif * Jeremy Fitzhardinge <jeremy@goop.org>
*/
#ifndef _ASM_X86_IRQ_REGS_H
#define _ASM_X86_IRQ_REGS_H
#include <asm/percpu.h>
#define ARCH_HAS_OWN_IRQ_REGS
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
return percpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
{
struct pt_regs *old_regs;
old_regs = get_irq_regs();
percpu_write(irq_regs, new_regs);
return old_regs;
}
#endif /* _ASM_X86_IRQ_REGS_32_H */
/*
* Per-cpu current frame pointer - the location of the last exception frame on
* the stack, stored in the per-cpu area.
*
* Jeremy Fitzhardinge <jeremy@goop.org>
*/
#ifndef _ASM_X86_IRQ_REGS_32_H
#define _ASM_X86_IRQ_REGS_32_H
#include <asm/percpu.h>
#define ARCH_HAS_OWN_IRQ_REGS
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
return percpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
{
struct pt_regs *old_regs;
old_regs = get_irq_regs();
percpu_write(irq_regs, new_regs);
return old_regs;
}
#endif /* _ASM_X86_IRQ_REGS_32_H */
#include <asm-generic/irq_regs.h>
...@@ -49,31 +49,33 @@ ...@@ -49,31 +49,33 @@
* some of the following vectors are 'rare', they are merged * some of the following vectors are 'rare', they are merged
* into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
* TLB, reschedule and local APIC vectors are performance-critical. * TLB, reschedule and local APIC vectors are performance-critical.
*
* Vectors 0xf0-0xfa are free (reserved for future Linux use).
*/ */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
# define SPURIOUS_APIC_VECTOR 0xff # define SPURIOUS_APIC_VECTOR 0xff
# define ERROR_APIC_VECTOR 0xfe # define ERROR_APIC_VECTOR 0xfe
# define INVALIDATE_TLB_VECTOR 0xfd # define RESCHEDULE_VECTOR 0xfd
# define RESCHEDULE_VECTOR 0xfc # define CALL_FUNCTION_VECTOR 0xfc
# define CALL_FUNCTION_VECTOR 0xfb # define CALL_FUNCTION_SINGLE_VECTOR 0xfb
# define CALL_FUNCTION_SINGLE_VECTOR 0xfa # define THERMAL_APIC_VECTOR 0xfa
# define THERMAL_APIC_VECTOR 0xf0 /* 0xf8 - 0xf9 : free */
# define INVALIDATE_TLB_VECTOR_END 0xf7
# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
# define NUM_INVALIDATE_TLB_VECTORS 8
#else #else
#define SPURIOUS_APIC_VECTOR 0xff # define SPURIOUS_APIC_VECTOR 0xff
#define ERROR_APIC_VECTOR 0xfe # define ERROR_APIC_VECTOR 0xfe
#define RESCHEDULE_VECTOR 0xfd # define RESCHEDULE_VECTOR 0xfd
#define CALL_FUNCTION_VECTOR 0xfc # define CALL_FUNCTION_VECTOR 0xfc
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb # define CALL_FUNCTION_SINGLE_VECTOR 0xfb
#define THERMAL_APIC_VECTOR 0xfa # define THERMAL_APIC_VECTOR 0xfa
#define THRESHOLD_APIC_VECTOR 0xf9 # define THRESHOLD_APIC_VECTOR 0xf9
#define UV_BAU_MESSAGE 0xf8 # define UV_BAU_MESSAGE 0xf8
#define INVALIDATE_TLB_VECTOR_END 0xf7 # define INVALIDATE_TLB_VECTOR_END 0xf7
#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ # define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
#define NUM_INVALIDATE_TLB_VECTORS 8 #define NUM_INVALIDATE_TLB_VECTORS 8
......
...@@ -11,10 +11,26 @@ ...@@ -11,10 +11,26 @@
*/ */
#ifdef CONFIG_X86_SMP #ifdef CONFIG_X86_SMP
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
smp_invalidate_interrupt)
#endif #endif
/* /*
......
...@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, ...@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
int init_new_context(struct task_struct *tsk, struct mm_struct *mm); int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context(struct mm_struct *mm); void destroy_context(struct mm_struct *mm);
#ifdef CONFIG_X86_32
# include "mmu_context_32.h" static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
#else {
# include "mmu_context_64.h" #ifdef CONFIG_SMP
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif #endif
cpu_set(cpu, next->cpu_vm_mask);
/* Re-load page tables */
load_cr3(next->pgd);
/*
* load the LDT, if the LDT is different:
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
else {
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
*/
load_cr3(next->pgd);
load_LDT_nolock(&next->context);
}
}
#endif
}
#define activate_mm(prev, next) \ #define activate_mm(prev, next) \
do { \ do { \
...@@ -33,5 +76,17 @@ do { \ ...@@ -33,5 +76,17 @@ do { \
switch_mm((prev), (next), NULL); \ switch_mm((prev), (next), NULL); \
} while (0); } while (0);
#ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm) \
do { \
loadsegment(gs, 0); \
} while (0)
#else
#define deactivate_mm(tsk, mm) \
do { \
load_gs_index(0); \
loadsegment(fs, 0); \
} while (0)
#endif
#endif /* _ASM_X86_MMU_CONTEXT_H */ #endif /* _ASM_X86_MMU_CONTEXT_H */
#ifndef _ASM_X86_MMU_CONTEXT_32_H
#define _ASM_X86_MMU_CONTEXT_32_H
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
int cpu = smp_processor_id();
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
cpu_set(cpu, next->cpu_vm_mask);
/* Re-load page tables */
load_cr3(next->pgd);
/*
* load the LDT, if the LDT is different:
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
else {
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload %cr3.
*/
load_cr3(next->pgd);
load_LDT_nolock(&next->context);
}
}
#endif
}
#define deactivate_mm(tsk, mm) \
asm("movl %0,%%gs": :"r" (0));
#endif /* _ASM_X86_MMU_CONTEXT_32_H */
#ifndef _ASM_X86_MMU_CONTEXT_64_H
#define _ASM_X86_MMU_CONTEXT_64_H
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
cpu_set(cpu, next->cpu_vm_mask);
load_cr3(next->pgd);
if (unlikely(next->context.ldt != prev->context.ldt))
load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
else {
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
*/
load_cr3(next->pgd);
load_LDT_nolock(&next->context);
}
}
#endif
}
#define deactivate_mm(tsk, mm) \
do { \
load_gs_index(0); \
asm volatile("movl %0,%%fs"::"r"(0)); \
} while (0)
#endif /* _ASM_X86_MMU_CONTEXT_64_H */
#ifndef _ASM_X86_PDA_H
#define _ASM_X86_PDA_H
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <asm/page.h>
#include <asm/percpu.h>
/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
unsigned long unused1;
unsigned long unused2;
unsigned long unused3;
unsigned long unused4;
int unused5;
unsigned int unused6; /* 36 was cpunumber */
#ifdef CONFIG_CC_STACKPROTECTOR
unsigned long stack_canary; /* 40 stack canary value */
/* gcc-ABI: this canary MUST be at
offset 40!!! */
#endif
short in_bootmem; /* pda lives in bootmem */
} ____cacheline_aligned_in_smp;
DECLARE_PER_CPU(struct x8664_pda, __pda);
extern void pda_init(int);
#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
#define read_pda(field) percpu_read(__pda.field)
#define write_pda(field, val) percpu_write(__pda.field, val)
#define add_pda(field, val) percpu_add(__pda.field, val)
#define sub_pda(field, val) percpu_sub(__pda.field, val)
#define or_pda(field, val) percpu_or(__pda.field, val)
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
#define test_and_clear_bit_pda(bit, field) \
x86_test_and_clear_bit_percpu(bit, __pda.field)
#endif
#endif /* _ASM_X86_PDA_H */
...@@ -75,7 +75,7 @@ do { \ ...@@ -75,7 +75,7 @@ do { \
case 8: \ case 8: \
asm(op "q %1,"__percpu_arg(0) \ asm(op "q %1,"__percpu_arg(0) \
: "+m" (var) \ : "+m" (var) \
: "r" ((T__)val)); \ : "re" ((T__)val)); \
break; \ break; \
default: __bad_percpu_size(); \ default: __bad_percpu_size(); \
} \ } \
...@@ -133,12 +133,6 @@ do { \ ...@@ -133,12 +133,6 @@ do { \
/* We can use this directly for local CPU (faster). */ /* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off); DECLARE_PER_CPU(unsigned long, this_cpu_off);
#ifdef CONFIG_X86_64
extern void load_pda_offset(int cpu);
#else
static inline void load_pda_offset(int cpu) { }
#endif
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <asm/pda.h>
extern pud_t level3_kernel_pgt[512]; extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512]; extern pud_t level3_ident_pgt[512];
......
...@@ -379,8 +379,29 @@ union thread_xstate { ...@@ -379,8 +379,29 @@ union thread_xstate {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist); DECLARE_PER_CPU(struct orig_ist, orig_ist);
DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); union irq_stack_union {
char irq_stack[IRQ_STACK_SIZE];
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
struct {
char gs_base[40];
unsigned long stack_canary;
};
};
DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(char *, irq_stack_ptr);
static inline void load_gs_base(int cpu)
{
/* Memory clobbers used to order pda/percpu accesses */
mb();
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
mb();
}
#endif #endif
extern void print_cpu_info(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *);
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
# include <asm/io_apic.h> # include <asm/io_apic.h>
# endif # endif
#endif #endif
#include <asm/pda.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/cpumask.h> #include <asm/cpumask.h>
......
#ifndef _ASM_STACKPROTECTOR_H
#define _ASM_STACKPROTECTOR_H 1
#include <asm/tsc.h>
#include <asm/processor.h>
/*
* Initialize the stackprotector canary value.
*
* NOTE: this must only be called from functions that never return,
* and it must always be inlined.
*/
static __always_inline void boot_init_stack_canary(void)
{
u64 canary;
u64 tsc;
/*
* Build time only check to make sure the stack_canary is at
* offset 40 in the pda; this is a gcc ABI requirement
*/
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
/*
* We both use the random pool and the current TSC as a source
* of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later
* on during the bootup the random pool has true entropy too.
*/
get_random_bytes(&canary, sizeof(canary));
tsc = __native_read_tsc();
canary += tsc + (tsc << 32UL);
current->stack_canary = canary;
percpu_write(irq_stack_union.stack_canary, canary);
}
#endif
...@@ -86,27 +86,44 @@ do { \ ...@@ -86,27 +86,44 @@ do { \
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
"r12", "r13", "r14", "r15" "r12", "r13", "r14", "r15"
#ifdef CONFIG_CC_STACKPROTECTOR
#define __switch_canary \
"movq %P[task_canary](%%rsi),%%r8\n\t" \
"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
#define __switch_canary_oparam \
, [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
#define __switch_canary
#define __switch_canary_oparam
#define __switch_canary_iparam
#endif /* CC_STACKPROTECTOR */
/* Save restore flags to clear handle leaking NT */ /* Save restore flags to clear handle leaking NT */
#define switch_to(prev, next, last) \ #define switch_to(prev, next, last) \
asm volatile(SAVE_CONTEXT \ asm volatile(SAVE_CONTEXT \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
"call __switch_to\n\t" \ "call __switch_to\n\t" \
".globl thread_return\n" \ ".globl thread_return\n" \
"thread_return:\n\t" \ "thread_return:\n\t" \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \
__switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \ "movq %%rax,%%rdi\n\t" \
"jc ret_from_fork\n\t" \ "jc ret_from_fork\n\t" \
RESTORE_CONTEXT \ RESTORE_CONTEXT \
: "=a" (last) \ : "=a" (last) \
__switch_canary_oparam \
: [next] "S" (next), [prev] "D" (prev), \ : [next] "S" (next), [prev] "D" (prev), \
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \
[tif_fork] "i" (TIF_FORK), \ [tif_fork] "i" (TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \
[current_task] "m" (per_cpu_var(current_task)) \ [current_task] "m" (per_cpu_var(current_task)) \
__switch_canary_iparam \
: "memory", "cc" __EXTRA_CLOBBER) : "memory", "cc" __EXTRA_CLOBBER)
#endif #endif
......
...@@ -190,9 +190,20 @@ extern int __node_distance(int, int); ...@@ -190,9 +190,20 @@ extern int __node_distance(int, int);
#else /* !CONFIG_NUMA */ #else /* !CONFIG_NUMA */
#define numa_node_id() 0 static inline int numa_node_id(void)
#define cpu_to_node(cpu) 0 {
#define early_cpu_to_node(cpu) 0 return 0;
}
static inline int cpu_to_node(int cpu)
{
return 0;
}
static inline int early_cpu_to_node(int cpu)
{
return 0;
}
static inline const cpumask_t *cpumask_of_node(int node) static inline const cpumask_t *cpumask_of_node(int node)
{ {
......
#ifndef _ASM_X86_UV_UV_H
#define _ASM_X86_UV_UV_H
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
#ifdef CONFIG_X86_UV
extern enum uv_system_type get_uv_system_type(void);
extern int is_uv_system(void);
extern void uv_cpu_init(void);
extern void uv_system_init(void);
extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va,
unsigned int cpu);
#else /* X86_UV */
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline int is_uv_system(void) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
{ return 1; }
static inline const struct cpumask *
uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
unsigned long va, unsigned int cpu)
{ return cpumask; }
#endif /* X86_UV */
#endif /* _ASM_X86_UV_UV_H */
...@@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) ...@@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
#define cpubit_isset(cpu, bau_local_cpumask) \ #define cpubit_isset(cpu, bau_local_cpumask) \
test_bit((cpu), (bau_local_cpumask).bits) test_bit((cpu), (bau_local_cpumask).bits)
extern int uv_flush_tlb_others(struct cpumask *,
struct mm_struct *, unsigned long);
extern void uv_bau_message_intr1(void); extern void uv_bau_message_intr1(void);
extern void uv_bau_timeout_intr1(void); extern void uv_bau_timeout_intr1(void);
......
...@@ -23,6 +23,7 @@ nostackp := $(call cc-option, -fno-stack-protector) ...@@ -23,6 +23,7 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp) CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp) CFLAGS_tsc.o := $(nostackp)
CFLAGS_paravirt.o := $(nostackp)
obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
...@@ -57,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o ...@@ -57,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smp.o
obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o
obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o
obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
...@@ -114,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 ...@@ -114,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
### ###
# 64 bit specific files # 64 bit specific files
ifeq ($(CONFIG_X86_64),y) ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += genapic_64.o genapic_flat_64.o
obj-y += bios_uv.o uv_irq.o uv_sysfs.o
obj-y += genx2apic_cluster.o obj-y += genx2apic_cluster.o
obj-y += genx2apic_phys.o obj-y += genx2apic_phys.o
obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o
obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_AUDIT) += audit_64.o
......
...@@ -1132,7 +1132,9 @@ void __cpuinit setup_local_APIC(void) ...@@ -1132,7 +1132,9 @@ void __cpuinit setup_local_APIC(void)
int i, j; int i, j;
if (disable_apic) { if (disable_apic) {
#ifdef CONFIG_X86_IO_APIC
disable_ioapic_setup(); disable_ioapic_setup();
#endif
return; return;
} }
...@@ -1844,6 +1846,11 @@ void __cpuinit generic_processor_info(int apicid, int version) ...@@ -1844,6 +1846,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
num_processors++; num_processors++;
cpu = cpumask_next_zero(-1, cpu_present_mask); cpu = cpumask_next_zero(-1, cpu_present_mask);
if (version != apic_version[boot_cpu_physical_apicid])
WARN_ONCE(1,
"ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
apic_version[boot_cpu_physical_apicid], cpu, version);
physid_set(apicid, phys_cpu_present_map); physid_set(apicid, phys_cpu_present_map);
if (apicid == boot_cpu_physical_apicid) { if (apicid == boot_cpu_physical_apicid) {
/* /*
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/kbuild.h> #include <linux/kbuild.h>
#include <asm/pda.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
...@@ -48,10 +47,6 @@ int main(void) ...@@ -48,10 +47,6 @@ int main(void)
#endif #endif
BLANK(); BLANK();
#undef ENTRY #undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
DEFINE(pda_size, sizeof(struct x8664_pda));
BLANK();
#undef ENTRY
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
BLANK(); BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
......
...@@ -29,9 +29,9 @@ ...@@ -29,9 +29,9 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <mach_apic.h> #include <mach_apic.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <asm/uv/uv.h>
#endif #endif
#include <asm/pda.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/desc.h> #include <asm/desc.h>
...@@ -65,23 +65,23 @@ cpumask_t cpu_sibling_setup_map; ...@@ -65,23 +65,23 @@ cpumask_t cpu_sibling_setup_map;
static struct cpu_dev *this_cpu __cpuinitdata; static struct cpu_dev *this_cpu __cpuinitdata;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* We need valid kernel segments for data and code in long mode too /*
* IRET will check the segment types kkeil 2000/10/28 * We need valid kernel segments for data and code in long mode too
* Also sysret mandates a special GDT layout * IRET will check the segment types kkeil 2000/10/28
*/ * Also sysret mandates a special GDT layout
/* The TLS descriptors are currently at a different place compared to i386. *
Hopefully nobody expects them at a fixed place (Wine?) */ * The TLS descriptors are currently at a different place compared to i386.
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { * Hopefully nobody expects them at a fixed place (Wine?)
*/
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
} };
#else #else
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
...@@ -113,9 +113,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { ...@@ -113,9 +113,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
} };
#endif #endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -883,12 +883,13 @@ __setup("clearcpuid=", setup_disablecpuid); ...@@ -883,12 +883,13 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
#else #else
DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(char *, irq_stack_ptr) =
per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
#endif #endif
DEFINE_PER_CPU(unsigned long, kernel_stack) = DEFINE_PER_CPU(unsigned long, kernel_stack) =
...@@ -897,15 +898,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); ...@@ -897,15 +898,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(unsigned int, irq_count) = -1; DEFINE_PER_CPU(unsigned int, irq_count) = -1;
void __cpuinit pda_init(int cpu)
{
/* Setup up data that may be needed in __get_free_pages early */
loadsegment(fs, 0);
loadsegment(gs, 0);
load_pda_offset(cpu);
}
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
__aligned(PAGE_SIZE); __aligned(PAGE_SIZE);
...@@ -969,9 +961,9 @@ void __cpuinit cpu_init(void) ...@@ -969,9 +961,9 @@ void __cpuinit cpu_init(void)
struct task_struct *me; struct task_struct *me;
int i; int i;
/* CPU 0 is initialised in head64.c */ loadsegment(fs, 0);
if (cpu != 0) loadsegment(gs, 0);
pda_init(cpu); load_gs_base(cpu);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 && if (cpu != 0 && percpu_read(node_number) == 0 &&
......
...@@ -145,13 +145,14 @@ typedef union { ...@@ -145,13 +145,14 @@ typedef union {
struct drv_cmd { struct drv_cmd {
unsigned int type; unsigned int type;
cpumask_var_t mask; const struct cpumask *mask;
drv_addr_union addr; drv_addr_union addr;
u32 val; u32 val;
}; };
static void do_drv_read(struct drv_cmd *cmd) static long do_drv_read(void *_cmd)
{ {
struct drv_cmd *cmd = _cmd;
u32 h; u32 h;
switch (cmd->type) { switch (cmd->type) {
...@@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd) ...@@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd)
default: default:
break; break;
} }
return 0;
} }
static void do_drv_write(struct drv_cmd *cmd) static long do_drv_write(void *_cmd)
{ {
struct drv_cmd *cmd = _cmd;
u32 lo, hi; u32 lo, hi;
switch (cmd->type) { switch (cmd->type) {
...@@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd) ...@@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd)
default: default:
break; break;
} }
return 0;
} }
static void drv_read(struct drv_cmd *cmd) static void drv_read(struct drv_cmd *cmd)
{ {
cpumask_t saved_mask = current->cpus_allowed;
cmd->val = 0; cmd->val = 0;
set_cpus_allowed_ptr(current, cmd->mask); work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
do_drv_read(cmd);
set_cpus_allowed_ptr(current, &saved_mask);
} }
static void drv_write(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd)
{ {
cpumask_t saved_mask = current->cpus_allowed;
unsigned int i; unsigned int i;
for_each_cpu(i, cmd->mask) { for_each_cpu(i, cmd->mask) {
set_cpus_allowed_ptr(current, cpumask_of(i)); work_on_cpu(i, do_drv_write, cmd);
do_drv_write(cmd);
} }
set_cpus_allowed_ptr(current, &saved_mask);
return;
} }
static u32 get_cur_val(const struct cpumask *mask) static u32 get_cur_val(const struct cpumask *mask)
...@@ -235,6 +231,7 @@ static u32 get_cur_val(const struct cpumask *mask) ...@@ -235,6 +231,7 @@ static u32 get_cur_val(const struct cpumask *mask)
return 0; return 0;
} }
cmd.mask = mask;
drv_read(&cmd); drv_read(&cmd);
dprintk("get_cur_val = %u\n", cmd.val); dprintk("get_cur_val = %u\n", cmd.val);
...@@ -366,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) ...@@ -366,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
return freq; return freq;
} }
static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
struct acpi_cpufreq_data *data) struct acpi_cpufreq_data *data)
{ {
unsigned int cur_freq; unsigned int cur_freq;
...@@ -401,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, ...@@ -401,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
return -ENODEV; return -ENODEV;
} }
if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
return -ENOMEM;
perf = data->acpi_data; perf = data->acpi_data;
result = cpufreq_frequency_table_target(policy, result = cpufreq_frequency_table_target(policy,
data->freq_table, data->freq_table,
...@@ -448,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, ...@@ -448,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
/* cpufreq holds the hotplug lock, so we are safe from here on */ /* cpufreq holds the hotplug lock, so we are safe from here on */
if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); cmd.mask = policy->cpus;
else else
cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); cmd.mask = cpumask_of(policy->cpu);
freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.old = perf->states[perf->state].core_frequency * 1000;
freqs.new = data->freq_table[next_state].frequency; freqs.new = data->freq_table[next_state].frequency;
...@@ -477,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, ...@@ -477,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
perf->state = next_perf_state; perf->state = next_perf_state;
out: out:
free_cpumask_var(cmd.mask);
return result; return result;
} }
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/apic.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/mce.h> #include <asm/mce.h>
#include <asm/hw_irq.h> #include <asm/hw_irq.h>
......
...@@ -366,10 +366,12 @@ void __init efi_init(void) ...@@ -366,10 +366,12 @@ void __init efi_init(void)
SMBIOS_TABLE_GUID)) { SMBIOS_TABLE_GUID)) {
efi.smbios = config_tables[i].table; efi.smbios = config_tables[i].table;
printk(" SMBIOS=0x%lx ", config_tables[i].table); printk(" SMBIOS=0x%lx ", config_tables[i].table);
#ifdef CONFIG_X86_UV
} else if (!efi_guidcmp(config_tables[i].guid, } else if (!efi_guidcmp(config_tables[i].guid,
UV_SYSTEM_TABLE_GUID)) { UV_SYSTEM_TABLE_GUID)) {
efi.uv_systab = config_tables[i].table; efi.uv_systab = config_tables[i].table;
printk(" UVsystab=0x%lx ", config_tables[i].table); printk(" UVsystab=0x%lx ", config_tables[i].table);
#endif
} else if (!efi_guidcmp(config_tables[i].guid, } else if (!efi_guidcmp(config_tables[i].guid,
HCDP_TABLE_GUID)) { HCDP_TABLE_GUID)) {
efi.hcdp = config_tables[i].table; efi.hcdp = config_tables[i].table;
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/efi.h> #include <asm/efi.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/fixmap.h>
static pgd_t save_pgd __initdata; static pgd_t save_pgd __initdata;
static unsigned long efi_flags __initdata; static unsigned long efi_flags __initdata;
......
...@@ -672,7 +672,7 @@ common_interrupt: ...@@ -672,7 +672,7 @@ common_interrupt:
ENDPROC(common_interrupt) ENDPROC(common_interrupt)
CFI_ENDPROC CFI_ENDPROC
#define BUILD_INTERRUPT(name, nr) \ #define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \ ENTRY(name) \
RING0_INT_FRAME; \ RING0_INT_FRAME; \
pushl $~(nr); \ pushl $~(nr); \
...@@ -680,11 +680,13 @@ ENTRY(name) \ ...@@ -680,11 +680,13 @@ ENTRY(name) \
SAVE_ALL; \ SAVE_ALL; \
TRACE_IRQS_OFF \ TRACE_IRQS_OFF \
movl %esp,%eax; \ movl %esp,%eax; \
call smp_##name; \ call fn; \
jmp ret_from_intr; \ jmp ret_from_intr; \
CFI_ENDPROC; \ CFI_ENDPROC; \
ENDPROC(name) ENDPROC(name)
#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
/* The include is where all of the SMP etc. interrupts come from */ /* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h" #include "entry_arch.h"
......
...@@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ ...@@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
#endif #endif
#ifdef CONFIG_X86_UV
apicinterrupt UV_BAU_MESSAGE \ apicinterrupt UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt uv_bau_message_intr1 uv_bau_message_interrupt
#endif
apicinterrupt LOCAL_TIMER_VECTOR \ apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt apic_timer_interrupt smp_apic_timer_interrupt
......
...@@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster; ...@@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster;
struct genapic __read_mostly *genapic = &apic_flat; struct genapic __read_mostly *genapic = &apic_flat;
static struct genapic *apic_probe[] __initdata = { static struct genapic *apic_probe[] __initdata = {
#ifdef CONFIG_X86_UV
&apic_x2apic_uv_x, &apic_x2apic_uv_x,
#endif
&apic_x2apic_phys, &apic_x2apic_phys,
&apic_x2apic_cluster, &apic_x2apic_cluster,
&apic_physflat, &apic_physflat,
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <asm/ipi.h> #include <asm/ipi.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h> #include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h> #include <asm/uv/bios.h>
......
...@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) ...@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
if (console_loglevel == 10) if (console_loglevel == 10)
early_printk("Kernel alive\n"); early_printk("Kernel alive\n");
pda_init(0);
x86_64_start_reservations(real_mode_data); x86_64_start_reservations(real_mode_data);
} }
......
...@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP ...@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
ljmp $(__KERNEL_CS),$1f ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers 1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt. movl %eax,%ss # after changing gdt.
movl %eax,%fs # gets reset once there's real percpu
movl $(__USER_DS),%eax # DS/ES contains default USER segment movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds movl %eax,%ds
movl %eax,%es movl %eax,%es
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
xorl %eax,%eax # Clear GS and LDT xorl %eax,%eax # Clear GS and LDT
movl %eax,%gs movl %eax,%gs
lldt %ax lldt %ax
...@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP ...@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
movb $1, ready movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel cmpb $0,%cl # the first CPU calls start_kernel
je 1f je 1f
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
movl (stack_start), %esp movl (stack_start), %esp
1: 1:
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
......
...@@ -207,19 +207,15 @@ ENTRY(secondary_startup_64) ...@@ -207,19 +207,15 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* early_gdt_base should point to the gdt_page in static percpu init * Fix up static pointers that need __per_cpu_load added. The assembler
* data area. Computing this requires two symbols - __per_cpu_load * is unable to do this directly. This is only needed for the boot cpu.
* and per_cpu__gdt_page. As linker can't do no such relocation, do * These values are set up with the correct base addresses by C code for
* it by hand. As early_gdt_descr is manipulated by C code for * secondary cpus.
* secondary CPUs, this should be done only once for the boot CPU
* when early_gdt_descr_base contains zero.
*/ */
movq early_gdt_descr_base(%rip), %rax movq initial_gs(%rip), %rax
testq %rax, %rax cmpl $0, per_cpu__cpu_number(%rax)
jnz 1f jne 1f
movq $__per_cpu_load, %rax addq %rax, early_gdt_descr_base(%rip)
addq $per_cpu__gdt_page, %rax
movq %rax, early_gdt_descr_base(%rip)
1: 1:
#endif #endif
/* /*
...@@ -246,13 +242,10 @@ ENTRY(secondary_startup_64) ...@@ -246,13 +242,10 @@ ENTRY(secondary_startup_64)
/* Set up %gs. /* Set up %gs.
* *
* On SMP, %gs should point to the per-cpu area. For initial * The base of %gs always points to the bottom of the irqstack
* boot, make %gs point to the init data section. For a * union. If the stack protector canary is enabled, it is
* secondary CPU,initial_gs should be set to its pda address * located at %gs:40. Note that, on SMP, the boot cpu uses
* before the CPU runs this code. * init data section till per cpu areas are set up.
*
* On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
* change.
*/ */
movl $MSR_GS_BASE,%ecx movl $MSR_GS_BASE,%ecx
movq initial_gs(%rip),%rax movq initial_gs(%rip),%rax
...@@ -285,7 +278,7 @@ ENTRY(secondary_startup_64) ...@@ -285,7 +278,7 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
.quad __per_cpu_load .quad __per_cpu_load
#else #else
.quad PER_CPU_VAR(__pda) .quad PER_CPU_VAR(irq_stack_union)
#endif #endif
__FINITDATA __FINITDATA
...@@ -431,12 +424,8 @@ NEXT_PAGE(level2_spare_pgt) ...@@ -431,12 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
.globl early_gdt_descr .globl early_gdt_descr
early_gdt_descr: early_gdt_descr:
.word GDT_ENTRIES*8-1 .word GDT_ENTRIES*8-1
#ifdef CONFIG_SMP
early_gdt_descr_base: early_gdt_descr_base:
.quad 0x0000000000000000
#else
.quad per_cpu__gdt_page .quad per_cpu__gdt_page
#endif
ENTRY(phys_base) ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */ /* This must match the first entry in level2_kernel_pgt */
......
...@@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) ...@@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
} }
#endif /* CONFIG_HT_IRQ */ #endif /* CONFIG_HT_IRQ */
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_UV
/* /*
* Re-target the irq to the specified CPU and enable the specified MMR located * Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU. * on the specified blade to allow the sending of MSIs to the specified CPU.
......
...@@ -18,10 +18,14 @@ ...@@ -18,10 +18,14 @@
#include <linux/smp.h> #include <linux/smp.h>
#include <asm/io_apic.h> #include <asm/io_apic.h>
#include <asm/idle.h> #include <asm/idle.h>
#include <asm/apic.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
/* /*
* Probabilistic stack overflow check: * Probabilistic stack overflow check:
* *
......
...@@ -149,8 +149,15 @@ void __init native_init_IRQ(void) ...@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
*/ */
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for invalidation */ /* IPIs for invalidation */
alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
/* IPI for generic function call */ /* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
......
...@@ -108,7 +108,6 @@ void cpu_idle(void) ...@@ -108,7 +108,6 @@ void cpu_idle(void)
play_dead(); play_dead();
local_irq_disable(); local_irq_disable();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
/* Don't trace irqs off for idle */ /* Don't trace irqs off for idle */
stop_critical_timings(); stop_critical_timings();
pm_idle(); pm_idle();
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <stdarg.h> #include <stdarg.h>
#include <linux/stackprotector.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/sched.h> #include <linux/sched.h>
...@@ -46,7 +47,6 @@ ...@@ -46,7 +47,6 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pda.h>
#include <asm/prctl.h> #include <asm/prctl.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/proto.h> #include <asm/proto.h>
...@@ -117,6 +117,17 @@ static inline void play_dead(void) ...@@ -117,6 +117,17 @@ static inline void play_dead(void)
void cpu_idle(void) void cpu_idle(void)
{ {
current_thread_info()->status |= TS_POLLING; current_thread_info()->status |= TS_POLLING;
/*
* If we're the non-boot CPU, nothing set the PDA stack
* canary up for us - and if we are the boot CPU we have
* a 0 stack canary. This is a good place for updating
* it, as we wont ever return from this function (so the
* invalid canaries already on the stack wont ever
* trigger):
*/
boot_init_stack_canary();
/* endless idle loop with no priority at all */ /* endless idle loop with no priority at all */
while (1) { while (1) {
tick_nohz_stop_sched_tick(1); tick_nohz_stop_sched_tick(1);
...@@ -626,14 +637,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -626,14 +637,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
percpu_write(kernel_stack, percpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) + (unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET); THREAD_SIZE - KERNEL_STACK_OFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
write_pda(stack_canary, next_p->stack_canary);
/*
* Build time only check to make sure the stack_canary is at
* offset 40 in the pda; this is a gcc ABI requirement
*/
BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
#endif
/* /*
* Now maybe reload the debug registers and handle I/O bitmaps * Now maybe reload the debug registers and handle I/O bitmaps
......
...@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void); ...@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
static inline void setup_node_to_cpumask_map(void) { } static inline void setup_node_to_cpumask_map(void) { }
#endif #endif
/*
* Define load_pda_offset() and per-cpu __pda for x86_64.
* load_pda_offset() is responsible for loading the offset of pda into
* %gs.
*
* On SMP, pda offset also duals as percpu base address and thus it
* should be at the start of per-cpu area. To achieve this, it's
* preallocated in vmlinux_64.lds.S directly instead of using
* DEFINE_PER_CPU().
*/
#ifdef CONFIG_X86_64
void __cpuinit load_pda_offset(int cpu)
{
/* Memory clobbers used to order pda/percpu accesses */
mb();
wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
mb();
}
#ifndef CONFIG_SMP
DEFINE_PER_CPU(struct x8664_pda, __pda);
#endif
EXPORT_PER_CPU_SYMBOL(__pda);
#endif /* CONFIG_SMP && CONFIG_X86_64 */
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* correctly size the local cpu masks */ /* correctly size the local cpu masks */
...@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void) ...@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
per_cpu(cpu_number, cpu) = cpu; per_cpu(cpu_number, cpu) = cpu;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_ptr, cpu) =
(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
/* /*
* CPU0 modified pda in the init data area, reload pda * Up to this point, CPU0 has been using .data.init
* offset for CPU0 and clear the area for others. * area. Reload %gs offset for CPU0.
*/ */
if (cpu == 0) if (cpu == 0)
load_pda_offset(0); load_gs_base(cpu);
else
memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
#endif #endif
DBG("PERCPU: cpu %4d %p\n", cpu, ptr); DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
......
...@@ -62,6 +62,7 @@ ...@@ -62,6 +62,7 @@
#include <asm/vmi.h> #include <asm/vmi.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h> #include <linux/mc146818rtc.h>
#include <mach_apic.h> #include <mach_apic.h>
......
#include <linux/spinlock.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
#include <asm/tlbflush.h>
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };
/* must come after the send_IPI functions above for inlining */
#include <mach_ipi.h>
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
* writing to user space from interrupts. (Its not allowed anyway).
*
* Optimizations Manfred Spraul <manfred@colorfullife.com>
*/
static cpumask_var_t flush_cpumask;
static struct mm_struct *flush_mm;
static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*
* We need to reload %cr3 since the page tables may be going
* away from under us..
*/
void leave_mm(int cpu)
{
BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
/*
*
* The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches]
* 1) switch_mm() either 1a) or 1b)
* 1a) thread switch to a different mm
* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
* Stop ipi delivery for the old mm. This is not synchronized with
* the other cpus, but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm, and in the worst case we perform a superfluous
* tlb flush.
* 1a2) set cpu_tlbstate to TLBSTATE_OK
* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
* was in lazy tlb mode.
* 1a3) update cpu_tlbstate[].active_mm
* Now cpu0 accepts tlb flushes for the new mm.
* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
* Now the other cpus will send tlb flush ipis.
* 1a4) change cr3.
* 1b) thread switch without mm change
* cpu_tlbstate[].active_mm is correct, cpu0 already handles
* flush ipis.
* 1b1) set cpu_tlbstate to TLBSTATE_OK
* 1b2) test_and_set the cpu bit in cpu_vm_mask.
* Atomically set the bit [other cpus will start sending flush ipis],
* and test the bit.
* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
* 2) switch %%esp, ie current
*
* The interrupt must handle 2 special cases:
* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
* - the cpu performs speculative tlb reads, i.e. even if the cpu only
* runs in kernel space, the cpu could load tlb entries for user space
* pages.
*
* The good news is that cpu_tlbstate is local to each cpu, no
* write/read ordering problems.
*/
/*
* TLB flush IPI:
*
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
* 2) Leave the mm if we are in the lazy tlb mode.
*/
void smp_invalidate_interrupt(struct pt_regs *regs)
{
unsigned long cpu;
cpu = get_cpu();
if (!cpumask_test_cpu(cpu, flush_cpumask))
goto out;
/*
* This was a BUG() but until someone can quote me the
* line from the intel manual that guarantees an IPI to
* multiple CPUs is retried _only_ on the erroring CPUs
* its staying as a return
*
* BUG();
*/
if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
__flush_tlb_one(flush_va);
} else
leave_mm(cpu);
}
ack_APIC_irq();
smp_mb__before_clear_bit();
cpumask_clear_cpu(cpu, flush_cpumask);
smp_mb__after_clear_bit();
out:
put_cpu_no_resched();
inc_irq_stat(irq_tlb_count);
}
void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long va)
{
/*
* - mask must exist :)
*/
BUG_ON(cpumask_empty(cpumask));
BUG_ON(!mm);
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
* AK: x86-64 has a faster method that could be ported.
*/
spin_lock(&tlbstate_lock);
cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
#ifdef CONFIG_HOTPLUG_CPU
/* If a CPU which we ran on has gone down, OK. */
cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
if (unlikely(cpumask_empty(flush_cpumask))) {
spin_unlock(&tlbstate_lock);
return;
}
#endif
flush_mm = mm;
flush_va = va;
/*
* Make the above memory operations globally visible before
* sending the IPI.
*/
smp_mb();
/*
* We have to send the IPI only to
* CPUs affected.
*/
send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
while (!cpumask_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
cpu_relax();
flush_mm = NULL;
flush_va = 0;
spin_unlock(&tlbstate_lock);
}
void flush_tlb_current_task(void)
{
struct mm_struct *mm = current->mm;
preempt_disable();
local_flush_tlb();
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_mm(struct mm_struct *mm)
{
preempt_disable();
if (current->active_mm == mm) {
if (current->mm)
local_flush_tlb();
else
leave_mm(smp_processor_id());
}
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
{
struct mm_struct *mm = vma->vm_mm;
preempt_disable();
if (current->active_mm == mm) {
if (current->mm)
__flush_tlb_one(va);
else
leave_mm(smp_processor_id());
}
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(&mm->cpu_vm_mask, mm, va);
preempt_enable();
}
EXPORT_SYMBOL(flush_tlb_page);
static void do_flush_tlb_all(void *info)
{
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
void flush_tlb_all(void)
{
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
static int init_flush_cpumask(void)
{
alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
return 0;
}
early_initcall(init_flush_cpumask);
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h> #include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h> #include <asm/uv/uv_bau.h>
...@@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc, ...@@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
* *
* Send a broadcast and wait for a broadcast message to complete. * Send a broadcast and wait for a broadcast message to complete.
* *
* The cpumaskp mask contains the cpus the broadcast was sent to. * The flush_mask contains the cpus the broadcast was sent to.
* *
* Returns 1 if all remote flushing was done. The mask is zeroed. * Returns NULL if all remote flushing was done. The mask is zeroed.
* Returns 0 if some remote flushing remains to be done. The mask will have * Returns @flush_mask if some remote flushing remains to be done. The
* some bits still set. * mask will have some bits still set.
*/ */
int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
struct cpumask *cpumaskp) struct bau_desc *bau_desc,
struct cpumask *flush_mask)
{ {
int completion_status = 0; int completion_status = 0;
int right_shift; int right_shift;
...@@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, ...@@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
* Success, so clear the remote cpu's from the mask so we don't * Success, so clear the remote cpu's from the mask so we don't
* use the IPI method of shootdown on them. * use the IPI method of shootdown on them.
*/ */
for_each_cpu(bit, cpumaskp) { for_each_cpu(bit, flush_mask) {
blade = uv_cpu_to_blade_id(bit); blade = uv_cpu_to_blade_id(bit);
if (blade == this_blade) if (blade == this_blade)
continue; continue;
cpumask_clear_cpu(bit, cpumaskp); cpumask_clear_cpu(bit, flush_mask);
} }
if (!cpumask_empty(cpumaskp)) if (!cpumask_empty(flush_mask))
return 0; return flush_mask;
return 1; return NULL;
} }
/** /**
* uv_flush_tlb_others - globally purge translation cache of a virtual * uv_flush_tlb_others - globally purge translation cache of a virtual
* address or all TLB's * address or all TLB's
* @cpumaskp: mask of all cpu's in which the address is to be removed * @cpumask: mask of all cpu's in which the address is to be removed
* @mm: mm_struct containing virtual address range * @mm: mm_struct containing virtual address range
* @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
* @cpu: the current cpu
* *
* This is the entry point for initiating any UV global TLB shootdown. * This is the entry point for initiating any UV global TLB shootdown.
* *
* Purges the translation caches of all specified processors of the given * Purges the translation caches of all specified processors of the given
* virtual address, or purges all TLB's on specified processors. * virtual address, or purges all TLB's on specified processors.
* *
* The caller has derived the cpumaskp from the mm_struct and has subtracted * The caller has derived the cpumask from the mm_struct. This function
* the local cpu from the mask. This function is called only if there * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
* are bits set in the mask. (e.g. flush_tlb_page())
* *
* The cpumaskp is converted into a nodemask of the nodes containing * The cpumask is converted into a nodemask of the nodes containing
* the cpus. * the cpus.
* *
* Returns 1 if all remote flushing was done. * Note that this function should be called with preemption disabled.
* Returns 0 if some remote flushing remains to be done. *
* Returns NULL if all remote flushing was done.
* Returns pointer to cpumask if some remote flushing remains to be
* done. The returned pointer is valid till preemption is re-enabled.
*/ */
int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
unsigned long va) struct mm_struct *mm,
unsigned long va, unsigned int cpu)
{ {
static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
int i; int i;
int bit; int bit;
int blade; int blade;
int cpu; int uv_cpu;
int this_blade; int this_blade;
int locals = 0; int locals = 0;
struct bau_desc *bau_desc; struct bau_desc *bau_desc;
cpu = uv_blade_processor_id(); WARN_ON(!in_atomic());
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
uv_cpu = uv_blade_processor_id();
this_blade = uv_numa_blade_id(); this_blade = uv_numa_blade_id();
bau_desc = __get_cpu_var(bau_control).descriptor_base; bau_desc = __get_cpu_var(bau_control).descriptor_base;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
i = 0; i = 0;
for_each_cpu(bit, cpumaskp) { for_each_cpu(bit, flush_mask) {
blade = uv_cpu_to_blade_id(bit); blade = uv_cpu_to_blade_id(bit);
BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
if (blade == this_blade) { if (blade == this_blade) {
...@@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, ...@@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
* no off_node flushing; return status for local node * no off_node flushing; return status for local node
*/ */
if (locals) if (locals)
return 0; return flush_mask;
else else
return 1; return NULL;
} }
__get_cpu_var(ptcstats).requestor++; __get_cpu_var(ptcstats).requestor++;
__get_cpu_var(ptcstats).ntargeted += i; __get_cpu_var(ptcstats).ntargeted += i;
bau_desc->payload.address = va; bau_desc->payload.address = va;
bau_desc->payload.sending_cpu = smp_processor_id(); bau_desc->payload.sending_cpu = cpu;
return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
} }
/* /*
......
...@@ -59,7 +59,6 @@ ...@@ -59,7 +59,6 @@
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/pda.h>
#else #else
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/arch_hooks.h> #include <asm/arch_hooks.h>
......
...@@ -220,8 +220,7 @@ SECTIONS ...@@ -220,8 +220,7 @@ SECTIONS
* so that it can be accessed as a percpu variable. * so that it can be accessed as a percpu variable.
*/ */
. = ALIGN(PAGE_SIZE); . = ALIGN(PAGE_SIZE);
PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) PERCPU_VADDR(0, :percpu)
per_cpu____pda = __per_cpu_start;
#else #else
PERCPU(PAGE_SIZE) PERCPU(PAGE_SIZE)
#endif #endif
...@@ -262,3 +261,8 @@ SECTIONS ...@@ -262,3 +261,8 @@ SECTIONS
*/ */
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE") "kernel image bigger than KERNEL_IMAGE_SIZE")
#ifdef CONFIG_SMP
ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o gup.o pat.o pgtable.o gup.o
obj-$(CONFIG_X86_SMP) += tlb.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/kprobes.h> #include <linux/kprobes.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/kdebug.h> #include <linux/kdebug.h>
#include <linux/magic.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/desc.h> #include <asm/desc.h>
...@@ -91,8 +92,8 @@ static inline int notify_page_fault(struct pt_regs *regs) ...@@ -91,8 +92,8 @@ static inline int notify_page_fault(struct pt_regs *regs)
* *
* Opcode checker based on code by Richard Brunner * Opcode checker based on code by Richard Brunner
*/ */
static int is_prefetch(struct pt_regs *regs, unsigned long addr, static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
unsigned long error_code) unsigned long addr)
{ {
unsigned char *instr; unsigned char *instr;
int scan_more = 1; int scan_more = 1;
...@@ -409,15 +410,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, ...@@ -409,15 +410,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, static noinline void pgtable_bad(struct pt_regs *regs,
unsigned long error_code) unsigned long error_code, unsigned long address)
{ {
unsigned long flags = oops_begin(); unsigned long flags = oops_begin();
int sig = SIGKILL; int sig = SIGKILL;
struct task_struct *tsk; struct task_struct *tsk = current;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
current->comm, address); tsk->comm, address);
dump_pagetable(address); dump_pagetable(address);
tsk = current; tsk = current;
tsk->thread.cr2 = address; tsk->thread.cr2 = address;
...@@ -429,6 +430,196 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, ...@@ -429,6 +430,196 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
} }
#endif #endif
static noinline void no_context(struct pt_regs *regs,
unsigned long error_code, unsigned long address)
{
struct task_struct *tsk = current;
unsigned long *stackend;
#ifdef CONFIG_X86_64
unsigned long flags;
int sig;
#endif
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
/*
* X86_32
* Valid to do another page fault here, because if this fault
* had been triggered by is_prefetch fixup_exception would have
* handled it.
*
* X86_64
* Hall of shame of CPU/BIOS bugs.
*/
if (is_prefetch(regs, error_code, address))
return;
if (is_errata93(regs, address))
return;
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
#ifdef CONFIG_X86_32
bust_spinlocks(1);
#else
flags = oops_begin();
#endif
show_fault_oops(regs, error_code, address);
stackend = end_of_stack(tsk);
if (*stackend != STACK_END_MAGIC)
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
#ifdef CONFIG_X86_32
die("Oops", regs, error_code);
bust_spinlocks(0);
do_exit(SIGKILL);
#else
sig = SIGKILL;
if (__die("Oops", regs, error_code))
sig = 0;
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
oops_end(flags, regs, sig);
#endif
}
static void __bad_area_nosemaphore(struct pt_regs *regs,
unsigned long error_code, unsigned long address,
int si_code)
{
struct task_struct *tsk = current;
/* User mode accesses just cause a SIGSEGV */
if (error_code & PF_USER) {
/*
* It's possible to have interrupts off here.
*/
local_irq_enable();
/*
* Valid to do another page fault here because this one came
* from user space.
*/
if (is_prefetch(regs, error_code, address))
return;
if (is_errata100(regs, address))
return;
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
printk_ratelimit()) {
printk(
"%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *) regs->ip, (void *) regs->sp, error_code);
print_vma_addr(" in ", regs->ip);
printk("\n");
}
tsk->thread.cr2 = address;
/* Kernel addresses are always protection faults */
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGSEGV, si_code, address, tsk);
return;
}
if (is_f00f_bug(regs, address))
return;
no_context(regs, error_code, address);
}
static noinline void bad_area_nosemaphore(struct pt_regs *regs,
unsigned long error_code, unsigned long address)
{
__bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
}
static void __bad_area(struct pt_regs *regs,
unsigned long error_code, unsigned long address,
int si_code)
{
struct mm_struct *mm = current->mm;
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
up_read(&mm->mmap_sem);
__bad_area_nosemaphore(regs, error_code, address, si_code);
}
static noinline void bad_area(struct pt_regs *regs,
unsigned long error_code, unsigned long address)
{
__bad_area(regs, error_code, address, SEGV_MAPERR);
}
static noinline void bad_area_access_error(struct pt_regs *regs,
unsigned long error_code, unsigned long address)
{
__bad_area(regs, error_code, address, SEGV_ACCERR);
}
/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
static void out_of_memory(struct pt_regs *regs,
unsigned long error_code, unsigned long address)
{
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
up_read(&current->mm->mmap_sem);
pagefault_out_of_memory();
}
static void do_sigbus(struct pt_regs *regs,
unsigned long error_code, unsigned long address)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
up_read(&mm->mmap_sem);
/* Kernel mode? Handle exceptions or die */
if (!(error_code & PF_USER))
no_context(regs, error_code, address);
#ifdef CONFIG_X86_32
/* User space => ok to do another page fault */
if (is_prefetch(regs, error_code, address))
return;
#endif
tsk->thread.cr2 = address;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
}
static noinline void mm_fault_error(struct pt_regs *regs,
unsigned long error_code, unsigned long address, unsigned int fault)
{
if (fault & VM_FAULT_OOM)
out_of_memory(regs, error_code, address);
else if (fault & VM_FAULT_SIGBUS)
do_sigbus(regs, error_code, address);
else
BUG();
}
static int spurious_fault_check(unsigned long error_code, pte_t *pte) static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{ {
if ((error_code & PF_WRITE) && !pte_write(*pte)) if ((error_code & PF_WRITE) && !pte_write(*pte))
...@@ -448,8 +639,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) ...@@ -448,8 +639,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
* There are no security implications to leaving a stale TLB when * There are no security implications to leaving a stale TLB when
* increasing the permissions on a page. * increasing the permissions on a page.
*/ */
static int spurious_fault(unsigned long address, static noinline int spurious_fault(unsigned long error_code,
unsigned long error_code) unsigned long address)
{ {
pgd_t *pgd; pgd_t *pgd;
pud_t *pud; pud_t *pud;
...@@ -494,7 +685,7 @@ static int spurious_fault(unsigned long address, ...@@ -494,7 +685,7 @@ static int spurious_fault(unsigned long address,
* *
* This assumes no large pages in there. * This assumes no large pages in there.
*/ */
static int vmalloc_fault(unsigned long address) static noinline int vmalloc_fault(unsigned long address)
{ {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
unsigned long pgd_paddr; unsigned long pgd_paddr;
...@@ -573,6 +764,25 @@ static int vmalloc_fault(unsigned long address) ...@@ -573,6 +764,25 @@ static int vmalloc_fault(unsigned long address)
int show_unhandled_signals = 1; int show_unhandled_signals = 1;
static inline int access_error(unsigned long error_code, int write,
struct vm_area_struct *vma)
{
if (write) {
/* write, present and write, not present */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
} else if (unlikely(error_code & PF_PROT)) {
/* read, present */
return 1;
} else {
/* read, not present */
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
return 1;
}
return 0;
}
/* /*
* This routine handles page faults. It determines the address, * This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate * and the problem, and then passes it off to one of the appropriate
...@@ -583,16 +793,12 @@ asmlinkage ...@@ -583,16 +793,12 @@ asmlinkage
#endif #endif
void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
{ {
unsigned long address;
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long address; int write;
int write, si_code;
int fault; int fault;
#ifdef CONFIG_X86_64
unsigned long flags;
int sig;
#endif
tsk = current; tsk = current;
mm = tsk->mm; mm = tsk->mm;
...@@ -601,9 +807,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -601,9 +807,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
/* get the address */ /* get the address */
address = read_cr2(); address = read_cr2();
si_code = SEGV_MAPERR; if (unlikely(notify_page_fault(regs)))
if (notify_page_fault(regs))
return; return;
if (unlikely(kmmio_fault(regs, address))) if (unlikely(kmmio_fault(regs, address)))
return; return;
...@@ -631,17 +835,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -631,17 +835,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
return; return;
/* Can handle a stale RO->RW TLB */ /* Can handle a stale RO->RW TLB */
if (spurious_fault(address, error_code)) if (spurious_fault(error_code, address))
return; return;
/* /*
* Don't take the mm semaphore here. If we fixup a prefetch * Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock. * fault we could otherwise deadlock.
*/ */
goto bad_area_nosemaphore; bad_area_nosemaphore(regs, error_code, address);
return;
} }
/* /*
* It's safe to allow irq's after cr2 has been saved and the * It's safe to allow irq's after cr2 has been saved and the
* vmalloc fault has been handled. * vmalloc fault has been handled.
...@@ -657,15 +861,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -657,15 +861,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
if (unlikely(error_code & PF_RSVD)) if (unlikely(error_code & PF_RSVD))
pgtable_bad(address, regs, error_code); pgtable_bad(regs, error_code, address);
#endif #endif
/* /*
* If we're in an interrupt, have no user context or are running in an * If we're in an interrupt, have no user context or are running in an
* atomic region then we must not take the fault. * atomic region then we must not take the fault.
*/ */
if (unlikely(in_atomic() || !mm)) if (unlikely(in_atomic() || !mm)) {
goto bad_area_nosemaphore; bad_area_nosemaphore(regs, error_code, address);
return;
}
/* /*
* When running in the kernel we expect faults to occur only to * When running in the kernel we expect faults to occur only to
...@@ -683,20 +889,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -683,20 +889,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
* source. If this is invalid we can skip the address space check, * source. If this is invalid we can skip the address space check,
* thus avoiding the deadlock. * thus avoiding the deadlock.
*/ */
if (!down_read_trylock(&mm->mmap_sem)) { if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if ((error_code & PF_USER) == 0 && if ((error_code & PF_USER) == 0 &&
!search_exception_tables(regs->ip)) !search_exception_tables(regs->ip)) {
goto bad_area_nosemaphore; bad_area_nosemaphore(regs, error_code, address);
return;
}
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
} }
vma = find_vma(mm, address); vma = find_vma(mm, address);
if (!vma) if (unlikely(!vma)) {
goto bad_area; bad_area(regs, error_code, address);
if (vma->vm_start <= address) return;
}
if (likely(vma->vm_start <= address))
goto good_area; goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN)) if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
goto bad_area; bad_area(regs, error_code, address);
return;
}
if (error_code & PF_USER) { if (error_code & PF_USER) {
/* /*
* Accessing the stack below %sp is always a bug. * Accessing the stack below %sp is always a bug.
...@@ -704,31 +916,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -704,31 +916,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
* and pusha to work. ("enter $65535,$31" pushes * and pusha to work. ("enter $65535,$31" pushes
* 32 pointers and then decrements %sp by 65535.) * 32 pointers and then decrements %sp by 65535.)
*/ */
if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
goto bad_area; bad_area(regs, error_code, address);
return;
}
} }
if (expand_stack(vma, address)) if (unlikely(expand_stack(vma, address))) {
goto bad_area; bad_area(regs, error_code, address);
/* return;
* Ok, we have a good vm_area for this memory access, so }
* we can handle it..
*/ /*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
good_area: good_area:
si_code = SEGV_ACCERR; write = error_code & PF_WRITE;
write = 0; if (unlikely(access_error(error_code, write, vma))) {
switch (error_code & (PF_PROT|PF_WRITE)) { bad_area_access_error(regs, error_code, address);
default: /* 3: write, present */ return;
/* fall through */
case PF_WRITE: /* write, not present */
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
write++;
break;
case PF_PROT: /* read, present */
goto bad_area;
case 0: /* read, not present */
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
} }
/* /*
...@@ -738,11 +944,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -738,11 +944,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/ */
fault = handle_mm_fault(mm, vma, address, write); fault = handle_mm_fault(mm, vma, address, write);
if (unlikely(fault & VM_FAULT_ERROR)) { if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM) mm_fault_error(regs, error_code, address, fault);
goto out_of_memory; return;
else if (fault & VM_FAULT_SIGBUS)
goto do_sigbus;
BUG();
} }
if (fault & VM_FAULT_MAJOR) if (fault & VM_FAULT_MAJOR)
tsk->maj_flt++; tsk->maj_flt++;
...@@ -760,128 +963,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -760,128 +963,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
} }
#endif #endif
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
return;
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
bad_area:
up_read(&mm->mmap_sem);
bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (error_code & PF_USER) {
/*
* It's possible to have interrupts off here.
*/
local_irq_enable();
/*
* Valid to do another page fault here because this one came
* from user space.
*/
if (is_prefetch(regs, address, error_code))
return;
if (is_errata100(regs, address))
return;
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
printk_ratelimit()) {
printk(
"%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *) regs->ip, (void *) regs->sp, error_code);
print_vma_addr(" in ", regs->ip);
printk("\n");
}
tsk->thread.cr2 = address;
/* Kernel addresses are always protection faults */
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGSEGV, si_code, address, tsk);
return;
}
if (is_f00f_bug(regs, address))
return;
no_context:
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
/*
* X86_32
* Valid to do another page fault here, because if this fault
* had been triggered by is_prefetch fixup_exception would have
* handled it.
*
* X86_64
* Hall of shame of CPU/BIOS bugs.
*/
if (is_prefetch(regs, address, error_code))
return;
if (is_errata93(regs, address))
return;
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
#ifdef CONFIG_X86_32
bust_spinlocks(1);
#else
flags = oops_begin();
#endif
show_fault_oops(regs, error_code, address);
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
#ifdef CONFIG_X86_32
die("Oops", regs, error_code);
bust_spinlocks(0);
do_exit(SIGKILL);
#else
sig = SIGKILL;
if (__die("Oops", regs, error_code))
sig = 0;
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
oops_end(flags, regs, sig);
#endif
out_of_memory:
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
up_read(&mm->mmap_sem);
pagefault_out_of_memory();
return;
do_sigbus:
up_read(&mm->mmap_sem);
/* Kernel mode? Handle exceptions or die */
if (!(error_code & PF_USER))
goto no_context;
#ifdef CONFIG_X86_32
/* User space => ok to do another page fault */
if (is_prefetch(regs, address, error_code))
return;
#endif
tsk->thread.cr2 = address;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
} }
DEFINE_SPINLOCK(pgd_lock); DEFINE_SPINLOCK(pgd_lock);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <asm/numa.h> #include <asm/numa.h>
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <asm/uv/uv.h>
int acpi_numa __initdata; int acpi_numa __initdata;
......
#include <linux/init.h> #include <linux/init.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/delay.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/module.h>
#include <asm/mtrr.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/proto.h> #include <asm/apic.h>
#include <asm/apicdef.h> #include <asm/uv/uv.h>
#include <asm/idle.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, }; = { &init_mm, 0, };
...@@ -36,7 +29,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) ...@@ -36,7 +29,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
* To avoid global state use 8 different call vectors. * To avoid global state use 8 different call vectors.
* Each CPU uses a specific vector to trigger flushes on other * Each CPU uses a specific vector to trigger flushes on other
* CPUs. Depending on the received vector the target CPUs look into * CPUs. Depending on the received vector the target CPUs look into
* the right per cpu variable for the flush data. * the right array slot for the flush data.
* *
* With more than 8 CPUs they are hashed to the 8 available * With more than 8 CPUs they are hashed to the 8 available
* vectors. The limited global vector space forces us to this right now. * vectors. The limited global vector space forces us to this right now.
...@@ -51,13 +44,13 @@ union smp_flush_state { ...@@ -51,13 +44,13 @@ union smp_flush_state {
spinlock_t tlbstate_lock; spinlock_t tlbstate_lock;
DECLARE_BITMAP(flush_cpumask, NR_CPUS); DECLARE_BITMAP(flush_cpumask, NR_CPUS);
}; };
char pad[SMP_CACHE_BYTES]; char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
} ____cacheline_aligned; } ____cacheline_internodealigned_in_smp;
/* State is put into the per CPU data section, but padded /* State is put into the per CPU data section, but padded
to a full cache line because other CPUs can access it and we don't to a full cache line because other CPUs can access it and we don't
want false sharing in the per cpu data segment. */ want false sharing in the per cpu data segment. */
static DEFINE_PER_CPU(union smp_flush_state, flush_state); static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
/* /*
* We cannot call mmdrop() because we are in interrupt context, * We cannot call mmdrop() because we are in interrupt context,
...@@ -120,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm); ...@@ -120,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
* Interrupts are disabled. * Interrupts are disabled.
*/ */
asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) /*
* FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
* but still used for documentation purpose but the usage is slightly
* inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
* entry calls in with the first parameter in %eax. Maybe define
* intrlinkage?
*/
#ifdef CONFIG_X86_64
asmlinkage
#endif
void smp_invalidate_interrupt(struct pt_regs *regs)
{ {
int cpu; unsigned int cpu;
int sender; unsigned int sender;
union smp_flush_state *f; union smp_flush_state *f;
cpu = smp_processor_id(); cpu = smp_processor_id();
...@@ -132,7 +135,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) ...@@ -132,7 +135,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
* Use that to determine where the sender put the data. * Use that to determine where the sender put the data.
*/ */
sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
f = &per_cpu(flush_state, sender); f = &flush_state[sender];
if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
goto out; goto out;
...@@ -156,19 +159,21 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) ...@@ -156,19 +159,21 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
} }
out: out:
ack_APIC_irq(); ack_APIC_irq();
smp_mb__before_clear_bit();
cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
smp_mb__after_clear_bit();
inc_irq_stat(irq_tlb_count); inc_irq_stat(irq_tlb_count);
} }
static void flush_tlb_others_ipi(const struct cpumask *cpumask, static void flush_tlb_others_ipi(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long va) struct mm_struct *mm, unsigned long va)
{ {
int sender; unsigned int sender;
union smp_flush_state *f; union smp_flush_state *f;
/* Caller has disabled preemption */ /* Caller has disabled preemption */
sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
f = &per_cpu(flush_state, sender); f = &flush_state[sender];
/* /*
* Could avoid this lock when * Could avoid this lock when
...@@ -206,16 +211,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask, ...@@ -206,16 +211,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long va) struct mm_struct *mm, unsigned long va)
{ {
if (is_uv_system()) { if (is_uv_system()) {
/* FIXME: could be an percpu_alloc'd thing */ unsigned int cpu;
static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
cpumask_andnot(after_uv_flush, cpumask,
cpumask_of(smp_processor_id()));
if (!uv_flush_tlb_others(after_uv_flush, mm, va))
flush_tlb_others_ipi(after_uv_flush, mm, va);
put_cpu_var(flush_tlb_uv_cpumask); cpu = get_cpu();
cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
if (cpumask)
flush_tlb_others_ipi(cpumask, mm, va);
put_cpu();
return; return;
} }
flush_tlb_others_ipi(cpumask, mm, va); flush_tlb_others_ipi(cpumask, mm, va);
...@@ -225,8 +227,8 @@ static int __cpuinit init_smp_flush(void) ...@@ -225,8 +227,8 @@ static int __cpuinit init_smp_flush(void)
{ {
int i; int i;
for_each_possible_cpu(i) for (i = 0; i < ARRAY_SIZE(flush_state); i++)
spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); spin_lock_init(&flush_state[i].tlbstate_lock);
return 0; return 0;
} }
......
...@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* Disable until direct per-cpu data access. */ /* Disable until direct per-cpu data access. */
have_vcpu_info_placement = 0; have_vcpu_info_placement = 0;
pda_init(0);
#endif #endif
xen_smp_init(); xen_smp_init();
......
...@@ -170,7 +170,7 @@ config ENCLOSURE_SERVICES ...@@ -170,7 +170,7 @@ config ENCLOSURE_SERVICES
config SGI_XP config SGI_XP
tristate "Support communication between SGI SSIs" tristate "Support communication between SGI SSIs"
depends on NET depends on NET
depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
...@@ -197,7 +197,7 @@ config HP_ILO ...@@ -197,7 +197,7 @@ config HP_ILO
config SGI_GRU config SGI_GRU
tristate "SGI GRU driver" tristate "SGI GRU driver"
depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
default n default n
select MMU_NOTIFIER select MMU_NOTIFIER
---help--- ---help---
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#ifndef __GRU_H__ #ifndef __GRU_H__
#define __GRU_H__ #define __GRU_H__
#include <asm/uv/uv.h>
/* /*
* GRU architectural definitions * GRU architectural definitions
*/ */
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <asm/uv/uv.h>
#ifdef CONFIG_IA64 #ifdef CONFIG_IA64
#include <asm/system.h> #include <asm/system.h>
#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */ #include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
......
...@@ -430,22 +430,10 @@ ...@@ -430,22 +430,10 @@
*(.initcall7.init) \ *(.initcall7.init) \
*(.initcall7s.init) *(.initcall7s.init)
#define PERCPU_PROLOG(vaddr) \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
- LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_start) = .;
#define PERCPU_EPILOG(phdr) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
} phdr \
. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
/** /**
* PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc * PERCPU_VADDR - define output section for percpu area
* @vaddr: explicit base address (optional) * @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional) * @phdr: destination PHDR (optional)
* @prealloc: the size of prealloc area
* *
* Macro which expands to output section for percpu area. If @vaddr * Macro which expands to output section for percpu area. If @vaddr
* is not blank, it specifies explicit base address and all percpu * is not blank, it specifies explicit base address and all percpu
...@@ -457,39 +445,23 @@ ...@@ -457,39 +445,23 @@
* section in the linker script will go there too. @phdr should have * section in the linker script will go there too. @phdr should have
* a leading colon. * a leading colon.
* *
* If @prealloc is non-zero, the specified number of bytes will be
* reserved at the start of percpu area. As the prealloc area is
* likely to break alignment, this macro puts areas in increasing
* alignment order.
*
* This macro defines three symbols, __per_cpu_load, __per_cpu_start * This macro defines three symbols, __per_cpu_load, __per_cpu_start
* and __per_cpu_end. The first one is the vaddr of loaded percpu * and __per_cpu_end. The first one is the vaddr of loaded percpu
* init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
* end offset. * end offset.
*/ */
#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \
PERCPU_PROLOG(vaddr) \
. += prealloc; \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
*(.data.percpu.page_aligned) \
PERCPU_EPILOG(segment)
/**
* PERCPU_VADDR - define output section for percpu area
* @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional)
*
* Macro which expands to output section for percpu area. Mostly
* identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than
* using slighly different layout.
*/
#define PERCPU_VADDR(vaddr, phdr) \ #define PERCPU_VADDR(vaddr, phdr) \
PERCPU_PROLOG(vaddr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \
.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
- LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_start) = .; \
*(.data.percpu.first) \
*(.data.percpu.page_aligned) \ *(.data.percpu.page_aligned) \
*(.data.percpu) \ *(.data.percpu) \
*(.data.percpu.shared_aligned) \ *(.data.percpu.shared_aligned) \
PERCPU_EPILOG(phdr) VMLINUX_SYMBOL(__per_cpu_end) = .; \
} phdr \
. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
/** /**
* PERCPU - define output section for percpu area, simple version * PERCPU - define output section for percpu area, simple version
......
...@@ -49,4 +49,5 @@ ...@@ -49,4 +49,5 @@
#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA #define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA
#define STACK_END_MAGIC 0x57AC6E9D
#endif /* __LINUX_MAGIC_H__ */ #endif /* __LINUX_MAGIC_H__ */
...@@ -9,34 +9,39 @@ ...@@ -9,34 +9,39 @@
#include <asm/percpu.h> #include <asm/percpu.h>
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define DEFINE_PER_CPU(type, name) \ #define PER_CPU_BASE_SECTION ".data.percpu"
__attribute__((__section__(".data.percpu"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#ifdef MODULE #ifdef MODULE
#define SHARED_ALIGNED_SECTION ".data.percpu" #define PER_CPU_SHARED_ALIGNED_SECTION ""
#else #else
#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned" #define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
#endif #endif
#define PER_CPU_FIRST_SECTION ".first"
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ #else
__attribute__((__section__(SHARED_ALIGNED_SECTION))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ #define PER_CPU_BASE_SECTION ".data"
____cacheline_aligned_in_smp #define PER_CPU_SHARED_ALIGNED_SECTION ""
#define PER_CPU_FIRST_SECTION ""
#endif
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ #define DEFINE_PER_CPU_SECTION(type, name, section) \
__attribute__((__section__(".data.percpu.page_aligned"))) \ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#else
#define DEFINE_PER_CPU(type, name) \ #define DEFINE_PER_CPU(type, name) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name DEFINE_PER_CPU_SECTION(type, name, "")
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name) DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
____cacheline_aligned_in_smp
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name) DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
#endif
#define DEFINE_PER_CPU_FIRST(type, name) \
DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
......
...@@ -1159,10 +1159,9 @@ struct task_struct { ...@@ -1159,10 +1159,9 @@ struct task_struct {
pid_t pid; pid_t pid;
pid_t tgid; pid_t tgid;
#ifdef CONFIG_CC_STACKPROTECTOR
/* Canary value for the -fstack-protector gcc feature */ /* Canary value for the -fstack-protector gcc feature */
unsigned long stack_canary; unsigned long stack_canary;
#endif
/* /*
* pointers to (original) parent process, youngest child, younger sibling, * pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with * older sibling, respectively. (p->father can be replaced with
...@@ -2069,6 +2068,19 @@ static inline int object_is_on_stack(void *obj) ...@@ -2069,6 +2068,19 @@ static inline int object_is_on_stack(void *obj)
extern void thread_info_cache_init(void); extern void thread_info_cache_init(void);
#ifdef CONFIG_DEBUG_STACK_USAGE
static inline unsigned long stack_not_used(struct task_struct *p)
{
unsigned long *n = end_of_stack(p);
do { /* Skip over canary */
n++;
} while (!*n);
return (unsigned long)n - (unsigned long)end_of_stack(p);
}
#endif
/* set thread flags in other task's structures /* set thread flags in other task's structures
* - see asm/thread_info.h for TIF_xxxx flags available * - see asm/thread_info.h for TIF_xxxx flags available
*/ */
......
#ifndef _LINUX_STACKPROTECTOR_H
#define _LINUX_STACKPROTECTOR_H 1
#include <linux/compiler.h>
#include <linux/sched.h>
#include <linux/random.h>
#ifdef CONFIG_CC_STACKPROTECTOR
# include <asm/stackprotector.h>
#else
static inline void boot_init_stack_canary(void)
{
}
#endif
#endif
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/stackprotector.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/delay.h> #include <linux/delay.h>
...@@ -539,6 +540,12 @@ asmlinkage void __init start_kernel(void) ...@@ -539,6 +540,12 @@ asmlinkage void __init start_kernel(void)
*/ */
lockdep_init(); lockdep_init();
debug_objects_early_init(); debug_objects_early_init();
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary();
cgroup_init_early(); cgroup_init_early();
local_irq_disable(); local_irq_disable();
......
...@@ -980,12 +980,9 @@ static void check_stack_usage(void) ...@@ -980,12 +980,9 @@ static void check_stack_usage(void)
{ {
static DEFINE_SPINLOCK(low_water_lock); static DEFINE_SPINLOCK(low_water_lock);
static int lowest_to_date = THREAD_SIZE; static int lowest_to_date = THREAD_SIZE;
unsigned long *n = end_of_stack(current);
unsigned long free; unsigned long free;
while (*n == 0) free = stack_not_used(current);
n++;
free = (unsigned long)n - (unsigned long)end_of_stack(current);
if (free >= lowest_to_date) if (free >= lowest_to_date)
return; return;
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <trace/sched.h> #include <trace/sched.h>
#include <linux/magic.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
...@@ -212,6 +213,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) ...@@ -212,6 +213,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
{ {
struct task_struct *tsk; struct task_struct *tsk;
struct thread_info *ti; struct thread_info *ti;
unsigned long *stackend;
int err; int err;
prepare_to_copy(orig); prepare_to_copy(orig);
...@@ -237,6 +240,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) ...@@ -237,6 +240,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
goto out; goto out;
setup_thread_stack(tsk, orig); setup_thread_stack(tsk, orig);
stackend = end_of_stack(tsk);
*stackend = STACK_END_MAGIC; /* for overflow detection */
#ifdef CONFIG_CC_STACKPROTECTOR #ifdef CONFIG_CC_STACKPROTECTOR
tsk->stack_canary = get_random_int(); tsk->stack_canary = get_random_int();
......
...@@ -74,6 +74,9 @@ NORET_TYPE void panic(const char * fmt, ...) ...@@ -74,6 +74,9 @@ NORET_TYPE void panic(const char * fmt, ...)
vsnprintf(buf, sizeof(buf), fmt, args); vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args); va_end(args);
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
#ifdef CONFIG_DEBUG_BUGVERBOSE
dump_stack();
#endif
bust_spinlocks(0); bust_spinlocks(0);
/* /*
...@@ -355,15 +358,22 @@ EXPORT_SYMBOL(warn_slowpath); ...@@ -355,15 +358,22 @@ EXPORT_SYMBOL(warn_slowpath);
#endif #endif
#ifdef CONFIG_CC_STACKPROTECTOR #ifdef CONFIG_CC_STACKPROTECTOR
#ifndef GCC_HAS_SP
#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this.
#endif
/* /*
* Called when gcc's -fstack-protector feature is used, and * Called when gcc's -fstack-protector feature is used, and
* gcc detects corruption of the on-stack canary value * gcc detects corruption of the on-stack canary value
*/ */
void __stack_chk_fail(void) void __stack_chk_fail(void)
{ {
panic("stack-protector: Kernel stack is corrupted"); panic("stack-protector: Kernel stack is corrupted in: %p\n",
__builtin_return_address(0));
} }
EXPORT_SYMBOL(__stack_chk_fail); EXPORT_SYMBOL(__stack_chk_fail);
#endif #endif
core_param(panic, panic_timeout, int, 0644); core_param(panic, panic_timeout, int, 0644);
......
...@@ -6009,12 +6009,7 @@ void sched_show_task(struct task_struct *p) ...@@ -6009,12 +6009,7 @@ void sched_show_task(struct task_struct *p)
printk(KERN_CONT " %016lx ", thread_saved_pc(p)); printk(KERN_CONT " %016lx ", thread_saved_pc(p));
#endif #endif
#ifdef CONFIG_DEBUG_STACK_USAGE #ifdef CONFIG_DEBUG_STACK_USAGE
{ free = stack_not_used(p);
unsigned long *n = end_of_stack(p);
while (!*n)
n++;
free = (unsigned long)n - (unsigned long)end_of_stack(p);
}
#endif #endif
printk(KERN_CONT "%5lu %5d %6d\n", free, printk(KERN_CONT "%5lu %5d %6d\n", free,
task_pid_nr(p), task_pid_nr(p->real_parent)); task_pid_nr(p), task_pid_nr(p->real_parent));
......
...@@ -971,6 +971,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, ...@@ -971,6 +971,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static struct workqueue_struct *work_on_cpu_wq __read_mostly;
struct work_for_cpu { struct work_for_cpu {
struct work_struct work; struct work_struct work;
long (*fn)(void *); long (*fn)(void *);
...@@ -991,8 +993,8 @@ static void do_work_for_cpu(struct work_struct *w) ...@@ -991,8 +993,8 @@ static void do_work_for_cpu(struct work_struct *w)
* @fn: the function to run * @fn: the function to run
* @arg: the function arg * @arg: the function arg
* *
* This will return -EINVAL in the cpu is not online, or the return value * This will return the value @fn returns.
* of @fn otherwise. * It is up to the caller to ensure that the cpu doesn't go offline.
*/ */
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{ {
...@@ -1001,14 +1003,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) ...@@ -1001,14 +1003,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
INIT_WORK(&wfc.work, do_work_for_cpu); INIT_WORK(&wfc.work, do_work_for_cpu);
wfc.fn = fn; wfc.fn = fn;
wfc.arg = arg; wfc.arg = arg;
get_online_cpus(); queue_work_on(cpu, work_on_cpu_wq, &wfc.work);
if (unlikely(!cpu_online(cpu))) flush_work(&wfc.work);
wfc.ret = -EINVAL;
else {
schedule_work_on(cpu, &wfc.work);
flush_work(&wfc.work);
}
put_online_cpus();
return wfc.ret; return wfc.ret;
} }
...@@ -1025,4 +1021,8 @@ void __init init_workqueues(void) ...@@ -1025,4 +1021,8 @@ void __init init_workqueues(void)
hotcpu_notifier(workqueue_cpu_callback, 0); hotcpu_notifier(workqueue_cpu_callback, 0);
keventd_wq = create_workqueue("events"); keventd_wq = create_workqueue("events");
BUG_ON(!keventd_wq); BUG_ON(!keventd_wq);
#ifdef CONFIG_SMP
work_on_cpu_wq = create_workqueue("work_on_cpu");
BUG_ON(!work_on_cpu_wq);
#endif
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment