Commit e33c0197 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (36 commits)
  x86, mm: Correct the implementation of is_untracked_pat_range()
  x86/pat: Trivial: don't create debugfs for memtype if pat is disabled
  x86, mtrr: Fix sorting of mtrr after subtracting
  x86: Move find_smp_config() earlier and avoid bootmem usage
  x86, platform: Change is_untracked_pat_range() to bool; cleanup init
  x86: Change is_ISA_range() into an inline function
  x86, mm: is_untracked_pat_range() takes a normal semiclosed range
  x86, mm: Call is_untracked_pat_range() rather than is_ISA_range()
  x86: UV SGI: Don't track GRU space in PAT
  x86: SGI UV: Fix BAU initialization
  x86, numa: Use near(er) online node instead of roundrobin for NUMA
  x86, numa, bootmem: Only free bootmem on NUMA failure path
  x86: Change crash kernel to reserve via reserve_early()
  x86: Eliminate redundant/contradicting cache line size config options
  x86: When cleaning MTRRs, do not fold WP into UC
  x86: remove "extern" from function prototypes in <asm/proto.h>
  x86, mm: Report state of NX protections during boot
  x86, mm: Clean up and simplify NX enablement
  x86, pageattr: Make set_memory_(x|nx) aware of NX support
  x86, sleep: Always save the value of EFER
  ...

Fix up conflicts (added both iommu_shutdown and is_untracked_pat_range)
to 'struct x86_platform_ops') in
	arch/x86/include/asm/x86_init.h
	arch/x86/kernel/x86_init.c
parents 343036ce ccef0864
...@@ -301,15 +301,11 @@ config X86_CPU ...@@ -301,15 +301,11 @@ config X86_CPU
# #
# Define implied options from the CPU selection here # Define implied options from the CPU selection here
config X86_L1_CACHE_BYTES config X86_INTERNODE_CACHE_SHIFT
int int
default "128" if MPSC default "12" if X86_VSMP
default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32 default "7" if NUMA
default X86_L1_CACHE_SHIFT
config X86_INTERNODE_CACHE_BYTES
int
default "4096" if X86_VSMP
default X86_L1_CACHE_BYTES if !X86_VSMP
config X86_CMPXCHG config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386) def_bool X86_64 || (X86_32 && !M386)
...@@ -317,9 +313,9 @@ config X86_CMPXCHG ...@@ -317,9 +313,9 @@ config X86_CMPXCHG
config X86_L1_CACHE_SHIFT config X86_L1_CACHE_SHIFT
int int
default "7" if MPENTIUM4 || MPSC default "7" if MPENTIUM4 || MPSC
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
config X86_XADD config X86_XADD
def_bool y def_bool y
......
...@@ -107,8 +107,7 @@ ENTRY(startup_32) ...@@ -107,8 +107,7 @@ ENTRY(startup_32)
lgdt gdt(%ebp) lgdt gdt(%ebp)
/* Enable PAE mode */ /* Enable PAE mode */
xorl %eax, %eax movl $(X86_CR4_PAE), %eax
orl $(X86_CR4_PAE), %eax
movl %eax, %cr4 movl %eax, %cr4
/* /*
......
...@@ -4,6 +4,7 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) ...@@ -4,6 +4,7 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
#undef i386 #undef i386
#include <asm/cache.h>
#include <asm/page_types.h> #include <asm/page_types.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -46,7 +47,7 @@ SECTIONS ...@@ -46,7 +47,7 @@ SECTIONS
*(.data.*) *(.data.*)
_edata = . ; _edata = . ;
} }
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES); . = ALIGN(L1_CACHE_BYTES);
.bss : { .bss : {
_bss = . ; _bss = . ;
*(.bss) *(.bss)
......
...@@ -118,7 +118,7 @@ extern void acpi_restore_state_mem(void); ...@@ -118,7 +118,7 @@ extern void acpi_restore_state_mem(void);
extern unsigned long acpi_wakeup_address; extern unsigned long acpi_wakeup_address;
/* early initialization routine */ /* early initialization routine */
extern void acpi_reserve_bootmem(void); extern void acpi_reserve_wakeup_memory(void);
/* /*
* Check if the CPU can handle C2 and deeper * Check if the CPU can handle C2 and deeper
...@@ -158,6 +158,7 @@ struct bootnode; ...@@ -158,6 +158,7 @@ struct bootnode;
#ifdef CONFIG_ACPI_NUMA #ifdef CONFIG_ACPI_NUMA
extern int acpi_numa; extern int acpi_numa;
extern int acpi_get_nodes(struct bootnode *physnodes);
extern int acpi_scan_nodes(unsigned long start, unsigned long end); extern int acpi_scan_nodes(unsigned long start, unsigned long end);
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) #define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
extern void acpi_fake_nodes(const struct bootnode *fake_nodes, extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
......
...@@ -9,12 +9,13 @@ ...@@ -9,12 +9,13 @@
#define __read_mostly __attribute__((__section__(".data.read_mostly"))) #define __read_mostly __attribute__((__section__(".data.read_mostly")))
#define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT
#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT)
#ifdef CONFIG_X86_VSMP #ifdef CONFIG_X86_VSMP
/* vSMP Internode cacheline shift */
#define INTERNODE_CACHE_SHIFT (12)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define __cacheline_aligned_in_smp \ #define __cacheline_aligned_in_smp \
__attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ __attribute__((__aligned__(INTERNODE_CACHE_BYTES))) \
__page_aligned_data __page_aligned_data
#endif #endif
#endif #endif
......
...@@ -177,6 +177,7 @@ void clflush_cache_range(void *addr, unsigned int size); ...@@ -177,6 +177,7 @@ void clflush_cache_range(void *addr, unsigned int size);
#ifdef CONFIG_DEBUG_RODATA #ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void); void mark_rodata_ro(void);
extern const int rodata_test_data; extern const int rodata_test_data;
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void); void set_kernel_text_rw(void);
void set_kernel_text_ro(void); void set_kernel_text_ro(void);
#else #else
......
...@@ -61,6 +61,12 @@ struct e820map { ...@@ -61,6 +61,12 @@ struct e820map {
struct e820entry map[E820_X_MAX]; struct e820entry map[E820_X_MAX];
}; };
#define ISA_START_ADDRESS 0xa0000
#define ISA_END_ADDRESS 0x100000
#define BIOS_BEGIN 0x000a0000
#define BIOS_END 0x00100000
#ifdef __KERNEL__ #ifdef __KERNEL__
/* see comment in arch/x86/kernel/e820.c */ /* see comment in arch/x86/kernel/e820.c */
extern struct e820map e820; extern struct e820map e820;
...@@ -126,15 +132,18 @@ extern void e820_reserve_resources(void); ...@@ -126,15 +132,18 @@ extern void e820_reserve_resources(void);
extern void e820_reserve_resources_late(void); extern void e820_reserve_resources_late(void);
extern void setup_memory_map(void); extern void setup_memory_map(void);
extern char *default_machine_specific_memory_setup(void); extern char *default_machine_specific_memory_setup(void);
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#define ISA_START_ADDRESS 0xa0000 /*
#define ISA_END_ADDRESS 0x100000 * Returns true iff the specified range [s,e) is completely contained inside
#define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) * the ISA region.
*/
static inline bool is_ISA_range(u64 s, u64 e)
{
return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS;
}
#define BIOS_BEGIN 0x000a0000 #endif /* __KERNEL__ */
#define BIOS_END 0x00100000 #endif /* __ASSEMBLY__ */
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/ioport.h> #include <linux/ioport.h>
......
...@@ -4,13 +4,16 @@ ...@@ -4,13 +4,16 @@
#include <linux/pci.h> #include <linux/pci.h>
extern struct pci_device_id k8_nb_ids[]; extern struct pci_device_id k8_nb_ids[];
struct bootnode;
extern int early_is_k8_nb(u32 value); extern int early_is_k8_nb(u32 value);
extern struct pci_dev **k8_northbridges; extern struct pci_dev **k8_northbridges;
extern int num_k8_northbridges; extern int num_k8_northbridges;
extern int cache_k8_northbridges(void); extern int cache_k8_northbridges(void);
extern void k8_flush_garts(void); extern void k8_flush_garts(void);
extern int k8_scan_nodes(unsigned long start, unsigned long end); extern int k8_get_nodes(struct bootnode *nodes);
extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int k8_scan_nodes(void);
#ifdef CONFIG_K8_NB #ifdef CONFIG_K8_NB
static inline struct pci_dev *node_to_k8_nb_misc(int node) static inline struct pci_dev *node_to_k8_nb_misc(int node)
......
...@@ -71,12 +71,7 @@ static inline void early_get_smp_config(void) ...@@ -71,12 +71,7 @@ static inline void early_get_smp_config(void)
static inline void find_smp_config(void) static inline void find_smp_config(void)
{ {
x86_init.mpparse.find_smp_config(1); x86_init.mpparse.find_smp_config();
}
static inline void early_find_smp_config(void)
{
x86_init.mpparse.find_smp_config(0);
} }
#ifdef CONFIG_X86_MPPARSE #ifdef CONFIG_X86_MPPARSE
...@@ -89,7 +84,7 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str); ...@@ -89,7 +84,7 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str);
# else # else
# define default_mpc_oem_bus_info NULL # define default_mpc_oem_bus_info NULL
# endif # endif
extern void default_find_smp_config(unsigned int reserve); extern void default_find_smp_config(void);
extern void default_get_smp_config(unsigned int early); extern void default_get_smp_config(unsigned int early);
#else #else
static inline void early_reserve_e820_mpc_new(void) { } static inline void early_reserve_e820_mpc_new(void) { }
...@@ -97,7 +92,7 @@ static inline void early_reserve_e820_mpc_new(void) { } ...@@ -97,7 +92,7 @@ static inline void early_reserve_e820_mpc_new(void) { }
#define default_mpc_apic_id NULL #define default_mpc_apic_id NULL
#define default_smp_read_mpc_oem NULL #define default_smp_read_mpc_oem NULL
#define default_mpc_oem_bus_info NULL #define default_mpc_oem_bus_info NULL
#define default_find_smp_config x86_init_uint_noop #define default_find_smp_config x86_init_noop
#define default_get_smp_config x86_init_uint_noop #define default_get_smp_config x86_init_uint_noop
#endif #endif
......
...@@ -49,7 +49,8 @@ extern unsigned long max_pfn_mapped; ...@@ -49,7 +49,8 @@ extern unsigned long max_pfn_mapped;
extern unsigned long init_memory_mapping(unsigned long start, extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end); unsigned long end);
extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8);
extern void free_initmem(void); extern void free_initmem(void);
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <asm/x86_init.h>
/* /*
* ZERO_PAGE is a global shared page that is always zero: used * ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc.. * for zero-mapped memory areas etc..
...@@ -270,9 +272,9 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, ...@@ -270,9 +272,9 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
unsigned long new_flags) unsigned long new_flags)
{ {
/* /*
* PAT type is always WB for ISA. So no need to check. * PAT type is always WB for untracked ranges, so no need to check.
*/ */
if (is_ISA_range(paddr, paddr + size - 1)) if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
return 1; return 1;
/* /*
......
...@@ -5,18 +5,19 @@ ...@@ -5,18 +5,19 @@
/* misc architecture specific prototypes */ /* misc architecture specific prototypes */
extern void early_idt_handler(void); void early_idt_handler(void);
extern void system_call(void); void system_call(void);
extern void syscall_init(void); void syscall_init(void);
extern void ia32_syscall(void); void ia32_syscall(void);
extern void ia32_cstar_target(void); void ia32_cstar_target(void);
extern void ia32_sysenter_target(void); void ia32_sysenter_target(void);
extern void syscall32_cpu_init(void); void syscall32_cpu_init(void);
extern void check_efer(void); void x86_configure_nx(void);
void x86_report_nx(void);
extern int reboot_force; extern int reboot_force;
......
...@@ -2,7 +2,13 @@ ...@@ -2,7 +2,13 @@
#define _ASM_X86_SECTIONS_H #define _ASM_X86_SECTIONS_H
#include <asm-generic/sections.h> #include <asm-generic/sections.h>
#include <asm/uaccess.h>
extern char __brk_base[], __brk_limit[]; extern char __brk_base[], __brk_limit[];
extern struct exception_table_entry __stop___ex_table[];
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
extern char __end_rodata_hpage_align[];
#endif
#endif /* _ASM_X86_SECTIONS_H */ #endif /* _ASM_X86_SECTIONS_H */
...@@ -26,7 +26,7 @@ struct x86_init_mpparse { ...@@ -26,7 +26,7 @@ struct x86_init_mpparse {
void (*smp_read_mpc_oem)(struct mpc_table *mpc); void (*smp_read_mpc_oem)(struct mpc_table *mpc);
void (*mpc_oem_pci_bus)(struct mpc_bus *m); void (*mpc_oem_pci_bus)(struct mpc_bus *m);
void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
void (*find_smp_config)(unsigned int reserve); void (*find_smp_config)(void);
void (*get_smp_config)(unsigned int early); void (*get_smp_config)(unsigned int early);
}; };
...@@ -125,12 +125,14 @@ struct x86_cpuinit_ops { ...@@ -125,12 +125,14 @@ struct x86_cpuinit_ops {
* @calibrate_tsc: calibrate TSC * @calibrate_tsc: calibrate TSC
* @get_wallclock: get time from HW clock like RTC etc. * @get_wallclock: get time from HW clock like RTC etc.
* @set_wallclock: set time back to HW clock * @set_wallclock: set time back to HW clock
* @is_untracked_pat_range exclude from PAT logic
*/ */
struct x86_platform_ops { struct x86_platform_ops {
unsigned long (*calibrate_tsc)(void); unsigned long (*calibrate_tsc)(void);
unsigned long (*get_wallclock)(void); unsigned long (*get_wallclock)(void);
int (*set_wallclock)(unsigned long nowtime); int (*set_wallclock)(unsigned long nowtime);
void (*iommu_shutdown)(void); void (*iommu_shutdown)(void);
bool (*is_untracked_pat_range)(u64 start, u64 end);
}; };
extern struct x86_init_ops x86_init; extern struct x86_init_ops x86_init;
......
...@@ -78,12 +78,9 @@ int acpi_save_state_mem(void) ...@@ -78,12 +78,9 @@ int acpi_save_state_mem(void)
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt); store_gdt((struct desc_ptr *)&header->pmode_gdt);
header->pmode_efer_low = nx_enabled; if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
if (header->pmode_efer_low & 1) { &header->pmode_efer_high))
/* This is strange, why not save efer, always? */ header->pmode_efer_low = header->pmode_efer_high = 0;
rdmsr(MSR_EFER, header->pmode_efer_low,
header->pmode_efer_high);
}
#endif /* !CONFIG_64BIT */ #endif /* !CONFIG_64BIT */
header->pmode_cr0 = read_cr0(); header->pmode_cr0 = read_cr0();
...@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void) ...@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void)
/** /**
* acpi_reserve_bootmem - do _very_ early ACPI initialisation * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation
* *
* We allocate a page from the first 1MB of memory for the wakeup * We allocate a page from the first 1MB of memory for the wakeup
* routine for when we come back from a sleep state. The * routine for when we come back from a sleep state. The
* runtime allocator allows specification of <16MB pages, but not * runtime allocator allows specification of <16MB pages, but not
* <1MB pages. * <1MB pages.
*/ */
void __init acpi_reserve_bootmem(void) void __init acpi_reserve_wakeup_memory(void)
{ {
unsigned long mem;
if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
printk(KERN_ERR printk(KERN_ERR
"ACPI: Wakeup code way too big, S3 disabled.\n"); "ACPI: Wakeup code way too big, S3 disabled.\n");
return; return;
} }
acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
if (!acpi_realmode) { if (mem == -1L) {
printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
return; return;
} }
acpi_realmode = (unsigned long) phys_to_virt(mem);
acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); acpi_wakeup_address = mem;
reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
} }
......
...@@ -263,11 +263,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) ...@@ -263,11 +263,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc)
static __init void early_check_numaq(void) static __init void early_check_numaq(void)
{ {
/*
* Find possible boot-time SMP configuration:
*/
early_find_smp_config();
/* /*
* get boot-time SMP configuration: * get boot-time SMP configuration:
*/ */
......
...@@ -30,10 +30,22 @@ ...@@ -30,10 +30,22 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/ipi.h> #include <asm/ipi.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/x86_init.h>
DEFINE_PER_CPU(int, x2apic_extra_bits); DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type; static enum uv_system_type uv_system_type;
static u64 gru_start_paddr, gru_end_paddr;
static inline bool is_GRU_range(u64 start, u64 end)
{
return start >= gru_start_paddr && end <= gru_end_paddr;
}
static bool uv_is_untracked_pat_range(u64 start, u64 end)
{
return is_ISA_range(start, end) || is_GRU_range(start, end);
}
static int early_get_nodeid(void) static int early_get_nodeid(void)
{ {
...@@ -49,6 +61,7 @@ static int early_get_nodeid(void) ...@@ -49,6 +61,7 @@ static int early_get_nodeid(void)
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{ {
if (!strcmp(oem_id, "SGI")) { if (!strcmp(oem_id, "SGI")) {
x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
if (!strcmp(oem_table_id, "UVL")) if (!strcmp(oem_table_id, "UVL"))
uv_system_type = UV_LEGACY_APIC; uv_system_type = UV_LEGACY_APIC;
else if (!strcmp(oem_table_id, "UVX")) else if (!strcmp(oem_table_id, "UVX"))
...@@ -385,8 +398,12 @@ static __init void map_gru_high(int max_pnode) ...@@ -385,8 +398,12 @@ static __init void map_gru_high(int max_pnode)
int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
if (gru.s.enable) if (gru.s.enable) {
map_high("GRU", gru.s.base, shift, max_pnode, map_wb); map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
gru_start_paddr = ((u64)gru.s.base << shift);
gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
}
} }
static __init void map_mmr_high(int max_pnode) static __init void map_mmr_high(int max_pnode)
......
...@@ -1136,7 +1136,7 @@ void __cpuinit cpu_init(void) ...@@ -1136,7 +1136,7 @@ void __cpuinit cpu_init(void)
wrmsrl(MSR_KERNEL_GS_BASE, 0); wrmsrl(MSR_KERNEL_GS_BASE, 0);
barrier(); barrier();
check_efer(); x86_configure_nx();
if (cpu != 0) if (cpu != 0)
enable_x2apic(); enable_x2apic();
......
...@@ -263,8 +263,12 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) ...@@ -263,8 +263,12 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
/* Don't do the funky fallback heuristics the AMD version employs /* Don't do the funky fallback heuristics the AMD version employs
for now. */ for now. */
node = apicid_to_node[apicid]; node = apicid_to_node[apicid];
if (node == NUMA_NO_NODE || !node_online(node)) if (node == NUMA_NO_NODE)
node = first_node(node_online_map); node = first_node(node_online_map);
else if (!node_online(node)) {
/* reuse the value from init_cpu_to_node() */
node = cpu_to_node(cpu);
}
numa_set_node(cpu, node); numa_set_node(cpu, node);
printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
......
...@@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2) ...@@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2)
return start1 - start2; return start1 - start2;
} }
static int __init clean_sort_range(struct res_range *range, int az)
{
int i, j, k = az - 1, nr_range = 0;
for (i = 0; i < k; i++) {
if (range[i].end)
continue;
for (j = k; j > i; j--) {
if (range[j].end) {
k = j;
break;
}
}
if (j == i)
break;
range[i].start = range[k].start;
range[i].end = range[k].end;
range[k].start = 0;
range[k].end = 0;
k--;
}
/* count it */
for (i = 0; i < az; i++) {
if (!range[i].end) {
nr_range = i;
break;
}
}
/* sort them */
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
return nr_range;
}
#define BIOS_BUG_MSG KERN_WARNING \ #define BIOS_BUG_MSG KERN_WARNING \
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
...@@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, ...@@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
subtract_range(range, extra_remove_base, subtract_range(range, extra_remove_base,
extra_remove_base + extra_remove_size - 1); extra_remove_base + extra_remove_size - 1);
/* get new range num */
nr_range = 0;
for (i = 0; i < RANGE_NUM; i++) {
if (!range[i].end)
continue;
nr_range++;
}
if (debug_print) { if (debug_print) {
printk(KERN_DEBUG "After UC checking\n"); printk(KERN_DEBUG "After UC checking\n");
for (i = 0; i < nr_range; i++) for (i = 0; i < RANGE_NUM; i++) {
if (!range[i].end)
continue;
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
range[i].start, range[i].end + 1); range[i].start, range[i].end + 1);
}
} }
/* sort the ranges */ /* sort the ranges */
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); nr_range = clean_sort_range(range, RANGE_NUM);
if (debug_print) { if (debug_print) {
printk(KERN_DEBUG "After sorting\n"); printk(KERN_DEBUG "After sorting\n");
for (i = 0; i < nr_range; i++) for (i = 0; i < nr_range; i++)
...@@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void) ...@@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void)
continue; continue;
if (!size) if (!size)
type = MTRR_NUM_TYPES; type = MTRR_NUM_TYPES;
if (type == MTRR_TYPE_WRPROT)
type = MTRR_TYPE_UNCACHABLE;
num[type]++; num[type]++;
} }
......
...@@ -189,9 +189,26 @@ static void wait_for_nmi(void) ...@@ -189,9 +189,26 @@ static void wait_for_nmi(void)
nmi_wait_count++; nmi_wait_count++;
} }
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
return addr >= start && addr < end;
}
static int static int
do_ftrace_mod_code(unsigned long ip, void *new_code) do_ftrace_mod_code(unsigned long ip, void *new_code)
{ {
/*
* On x86_64, kernel text mappings are mapped read-only with
* CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
* of the kernel text mapping to modify the kernel text.
*
* For 32bit kernels, these mappings are same and we can use
* kernel identity mapping to modify code.
*/
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa(ip));
mod_code_ip = (void *)ip; mod_code_ip = (void *)ip;
mod_code_newcode = new_code; mod_code_newcode = new_code;
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/msr-index.h>
#include <asm/cpufeature.h>
#include <asm/percpu.h> #include <asm/percpu.h>
/* Physical address */ /* Physical address */
...@@ -297,25 +299,27 @@ ENTRY(startup_32_smp) ...@@ -297,25 +299,27 @@ ENTRY(startup_32_smp)
orl %edx,%eax orl %edx,%eax
movl %eax,%cr4 movl %eax,%cr4
btl $5, %eax # check if PAE is enabled testb $X86_CR4_PAE, %al # check if PAE is enabled
jnc 6f jz 6f
/* Check if extended functions are implemented */ /* Check if extended functions are implemented */
movl $0x80000000, %eax movl $0x80000000, %eax
cpuid cpuid
cmpl $0x80000000, %eax /* Value must be in the range 0x80000001 to 0x8000ffff */
jbe 6f subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax
ja 6f
mov $0x80000001, %eax mov $0x80000001, %eax
cpuid cpuid
/* Execute Disable bit supported? */ /* Execute Disable bit supported? */
btl $20, %edx btl $(X86_FEATURE_NX & 31), %edx
jnc 6f jnc 6f
/* Setup EFER (Extended Feature Enable Register) */ /* Setup EFER (Extended Feature Enable Register) */
movl $0xc0000080, %ecx movl $MSR_EFER, %ecx
rdmsr rdmsr
btsl $11, %eax btsl $_EFER_NX, %eax
/* Make changes effective */ /* Make changes effective */
wrmsr wrmsr
......
...@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64) ...@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64)
.quad x86_64_start_kernel .quad x86_64_start_kernel
ENTRY(initial_gs) ENTRY(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union) .quad INIT_PER_CPU_VAR(irq_stack_union)
__FINITDATA
ENTRY(stack_start) ENTRY(stack_start)
.quad init_thread_union+THREAD_SIZE-8 .quad init_thread_union+THREAD_SIZE-8
.word 0 .word 0
__FINITDATA
bad_address: bad_address:
jmp bad_address jmp bad_address
...@@ -340,6 +340,7 @@ ENTRY(name) ...@@ -340,6 +340,7 @@ ENTRY(name)
i = i + 1 ; \ i = i + 1 ; \
.endr .endr
.data
/* /*
* This default setting generates an ident mapping at address 0x100000 * This default setting generates an ident mapping at address 0x100000
* and a mapping for the kernel that precisely maps virtual address * and a mapping for the kernel that precisely maps virtual address
......
...@@ -158,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image) ...@@ -158,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image)
{ {
int error; int error;
if (nx_enabled) set_pages_x(image->control_code_page, 1);
set_pages_x(image->control_code_page, 1);
error = machine_kexec_alloc_page_tables(image); error = machine_kexec_alloc_page_tables(image);
if (error) if (error)
return error; return error;
...@@ -173,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image) ...@@ -173,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image)
*/ */
void machine_kexec_cleanup(struct kimage *image) void machine_kexec_cleanup(struct kimage *image)
{ {
if (nx_enabled) set_pages_nx(image->control_code_page, 1);
set_pages_nx(image->control_code_page, 1);
machine_kexec_free_page_tables(image); machine_kexec_free_page_tables(image);
} }
......
...@@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early) ...@@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early)
*/ */
} }
static void __init smp_reserve_bootmem(struct mpf_intel *mpf) static void __init smp_reserve_memory(struct mpf_intel *mpf)
{ {
unsigned long size = get_mpc_size(mpf->physptr); unsigned long size = get_mpc_size(mpf->physptr);
#ifdef CONFIG_X86_32
/*
* We cannot access to MPC table to compute table size yet,
* as only few megabytes from the bottom is mapped now.
* PC-9800's MPC table places on the very last of physical
* memory; so that simply reserving PAGE_SIZE from mpf->physptr
* yields BUG() in reserve_bootmem.
* also need to make sure physptr is below than max_low_pfn
* we don't need reserve the area above max_low_pfn
*/
unsigned long end = max_low_pfn * PAGE_SIZE;
if (mpf->physptr < end) { reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
if (mpf->physptr + size > end)
size = end - mpf->physptr;
reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
}
#else
reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
#endif
} }
static int __init smp_scan_config(unsigned long base, unsigned long length, static int __init smp_scan_config(unsigned long base, unsigned long length)
unsigned reserve)
{ {
unsigned int *bp = phys_to_virt(base); unsigned int *bp = phys_to_virt(base);
struct mpf_intel *mpf; struct mpf_intel *mpf;
unsigned long mem;
apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
bp, length); bp, length);
...@@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, ...@@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
mpf, (u64)virt_to_phys(mpf)); mpf, (u64)virt_to_phys(mpf));
if (!reserve) mem = virt_to_phys(mpf);
return 1; reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
BOOTMEM_DEFAULT);
if (mpf->physptr) if (mpf->physptr)
smp_reserve_bootmem(mpf); smp_reserve_memory(mpf);
return 1; return 1;
} }
...@@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, ...@@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
return 0; return 0;
} }
void __init default_find_smp_config(unsigned int reserve) void __init default_find_smp_config(void)
{ {
unsigned int address; unsigned int address;
...@@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve) ...@@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve)
* 2) Scan the top 1K of base RAM * 2) Scan the top 1K of base RAM
* 3) Scan the 64K of bios * 3) Scan the 64K of bios
*/ */
if (smp_scan_config(0x0, 0x400, reserve) || if (smp_scan_config(0x0, 0x400) ||
smp_scan_config(639 * 0x400, 0x400, reserve) || smp_scan_config(639 * 0x400, 0x400) ||
smp_scan_config(0xF0000, 0x10000, reserve)) smp_scan_config(0xF0000, 0x10000))
return; return;
/* /*
* If it is an SMP machine we should know now, unless the * If it is an SMP machine we should know now, unless the
...@@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve) ...@@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve)
address = get_bios_ebda(); address = get_bios_ebda();
if (address) if (address)
smp_scan_config(address, 0x400, reserve); smp_scan_config(address, 0x400);
} }
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
......
...@@ -106,6 +106,7 @@ ...@@ -106,6 +106,7 @@
#include <asm/percpu.h> #include <asm/percpu.h>
#include <asm/topology.h> #include <asm/topology.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
#include <asm/k8.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#include <asm/numa_64.h> #include <asm/numa_64.h>
#endif #endif
...@@ -487,42 +488,11 @@ static void __init reserve_early_setup_data(void) ...@@ -487,42 +488,11 @@ static void __init reserve_early_setup_data(void)
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
/**
* Reserve @size bytes of crashkernel memory at any suitable offset.
*
* @size: Size of the crashkernel memory to reserve.
* Returns the base address on success, and -1ULL on failure.
*/
static
unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
{
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long start = 0LL;
while (1) {
int ret;
start = find_e820_area(start, ULONG_MAX, size, alignment);
if (start == -1ULL)
return start;
/* try to reserve it */
ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
if (ret >= 0)
return start;
start += alignment;
}
}
static inline unsigned long long get_total_mem(void) static inline unsigned long long get_total_mem(void)
{ {
unsigned long long total; unsigned long long total;
total = max_low_pfn - min_low_pfn; total = max_pfn - min_low_pfn;
#ifdef CONFIG_HIGHMEM
total += highend_pfn - highstart_pfn;
#endif
return total << PAGE_SHIFT; return total << PAGE_SHIFT;
} }
...@@ -542,21 +512,25 @@ static void __init reserve_crashkernel(void) ...@@ -542,21 +512,25 @@ static void __init reserve_crashkernel(void)
/* 0 means: find the address automatically */ /* 0 means: find the address automatically */
if (crash_base <= 0) { if (crash_base <= 0) {
crash_base = find_and_reserve_crashkernel(crash_size); const unsigned long long alignment = 16<<20; /* 16M */
crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
alignment);
if (crash_base == -1ULL) { if (crash_base == -1ULL) {
pr_info("crashkernel reservation failed. " pr_info("crashkernel reservation failed - No suitable area found.\n");
"No suitable area found.\n");
return; return;
} }
} else { } else {
ret = reserve_bootmem_generic(crash_base, crash_size, unsigned long long start;
BOOTMEM_EXCLUSIVE);
if (ret < 0) { start = find_e820_area(crash_base, ULONG_MAX, crash_size,
pr_info("crashkernel reservation failed - " 1<<20);
"memory is in use\n"); if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return; return;
} }
} }
reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n", "for crashkernel (System RAM: %ldMB)\n",
...@@ -699,6 +673,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { ...@@ -699,6 +673,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p)
{ {
int acpi = 0;
int k8 = 0;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect(); visws_early_detect();
...@@ -791,21 +768,18 @@ void __init setup_arch(char **cmdline_p) ...@@ -791,21 +768,18 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line; *cmdline_p = command_line;
#ifdef CONFIG_X86_64
/* /*
* Must call this twice: Once just to detect whether hardware doesn't * x86_configure_nx() is called before parse_early_param() to detect
* support NX (so that the early EHCI debug console setup can safely * whether hardware doesn't support NX (so that the early EHCI debug
* call set_fixmap(), and then again after parsing early parameters to * console setup can safely call set_fixmap()). It may then be called
* honor the respective command line option. * again from within noexec_setup() during parsing early parameters
* to honor the respective command line option.
*/ */
check_efer(); x86_configure_nx();
#endif
parse_early_param(); parse_early_param();
#ifdef CONFIG_X86_64 x86_report_nx();
check_efer();
#endif
/* Must be before kernel pagetables are setup */ /* Must be before kernel pagetables are setup */
vmi_activate(); vmi_activate();
...@@ -901,6 +875,13 @@ void __init setup_arch(char **cmdline_p) ...@@ -901,6 +875,13 @@ void __init setup_arch(char **cmdline_p)
reserve_brk(); reserve_brk();
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
* even before init_memory_mapping
*/
acpi_reserve_wakeup_memory();
#endif
init_gbpages(); init_gbpages();
/* max_pfn_mapped is updated here */ /* max_pfn_mapped is updated here */
...@@ -927,6 +908,8 @@ void __init setup_arch(char **cmdline_p) ...@@ -927,6 +908,8 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd(); reserve_initrd();
reserve_crashkernel();
vsmp_init(); vsmp_init();
io_delay_init(); io_delay_init();
...@@ -938,27 +921,24 @@ void __init setup_arch(char **cmdline_p) ...@@ -938,27 +921,24 @@ void __init setup_arch(char **cmdline_p)
early_acpi_boot_init(); early_acpi_boot_init();
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
#ifdef CONFIG_ACPI_NUMA #ifdef CONFIG_ACPI_NUMA
/* /*
* Parse SRAT to discover nodes. * Parse SRAT to discover nodes.
*/ */
acpi_numa_init(); acpi = acpi_numa_init();
#endif #endif
initmem_init(0, max_pfn); #ifdef CONFIG_K8_NUMA
if (!acpi)
#ifdef CONFIG_ACPI_SLEEP k8 = !k8_numa_init(0, max_pfn);
/*
* Reserve low memory region for sleep support.
*/
acpi_reserve_bootmem();
#endif #endif
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
reserve_crashkernel(); initmem_init(0, max_pfn, acpi, k8);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* /*
......
...@@ -817,10 +817,8 @@ static int __init uv_init_blade(int blade) ...@@ -817,10 +817,8 @@ static int __init uv_init_blade(int blade)
*/ */
apicid = blade_to_first_apicid(blade); apicid = blade_to_first_apicid(blade);
pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
if ((pa & 0xff) != UV_BAU_MESSAGE) { uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
((apicid << 32) | UV_BAU_MESSAGE)); ((apicid << 32) | UV_BAU_MESSAGE));
}
return 0; return 0;
} }
......
...@@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) ...@@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
apic_version[m->apicid] = ver; apic_version[m->apicid] = ver;
} }
static void __init visws_find_smp_config(unsigned int reserve) static void __init visws_find_smp_config(void)
{ {
struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
......
...@@ -41,6 +41,32 @@ ENTRY(phys_startup_64) ...@@ -41,6 +41,32 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies; jiffies_64 = jiffies;
#endif #endif
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
/*
* On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
* we retain large page mappings for boundaries spanning kernel text, rodata
* and data sections.
*
* However, kernel identity mappings will have different RWX permissions
* to the pages mapping to text and to the pages padding (which are freed) the
* text section. Hence kernel identity mappings will be broken to smaller
* pages. For 64-bit, kernel text and kernel identity mappings are different,
* so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
* as well as retain 2MB large page mappings for kernel text.
*/
#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
#define X64_ALIGN_DEBUG_RODATA_END \
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
#else
#define X64_ALIGN_DEBUG_RODATA_BEGIN
#define X64_ALIGN_DEBUG_RODATA_END
#endif
PHDRS { PHDRS {
text PT_LOAD FLAGS(5); /* R_E */ text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */ data PT_LOAD FLAGS(7); /* RWE */
...@@ -90,7 +116,9 @@ SECTIONS ...@@ -90,7 +116,9 @@ SECTIONS
EXCEPTION_TABLE(16) :text = 0x9090 EXCEPTION_TABLE(16) :text = 0x9090
X64_ALIGN_DEBUG_RODATA_BEGIN
RO_DATA(PAGE_SIZE) RO_DATA(PAGE_SIZE)
X64_ALIGN_DEBUG_RODATA_END
/* Data */ /* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) { .data : AT(ADDR(.data) - LOAD_OFFSET) {
...@@ -107,13 +135,13 @@ SECTIONS ...@@ -107,13 +135,13 @@ SECTIONS
PAGE_ALIGNED_DATA(PAGE_SIZE) PAGE_ALIGNED_DATA(PAGE_SIZE)
CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
DATA_DATA DATA_DATA
CONSTRUCTORS CONSTRUCTORS
/* rarely changed data like cpu maps */ /* rarely changed data like cpu maps */
READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
/* End of data section */ /* End of data section */
_edata = .; _edata = .;
...@@ -137,12 +165,12 @@ SECTIONS ...@@ -137,12 +165,12 @@ SECTIONS
*(.vsyscall_0) *(.vsyscall_0)
} :user } :user
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES); . = ALIGN(L1_CACHE_BYTES);
.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
*(.vsyscall_fn) *(.vsyscall_fn)
} }
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES); . = ALIGN(L1_CACHE_BYTES);
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
*(.vsyscall_gtod_data) *(.vsyscall_gtod_data)
} }
...@@ -166,7 +194,7 @@ SECTIONS ...@@ -166,7 +194,7 @@ SECTIONS
} }
vgetcpu_mode = VVIRT(.vgetcpu_mode); vgetcpu_mode = VVIRT(.vgetcpu_mode);
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES); . = ALIGN(L1_CACHE_BYTES);
.jiffies : AT(VLOAD(.jiffies)) { .jiffies : AT(VLOAD(.jiffies)) {
*(.jiffies) *(.jiffies)
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/time.h> #include <asm/time.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/pat.h>
#include <asm/tsc.h> #include <asm/tsc.h>
#include <asm/iommu.h> #include <asm/iommu.h>
...@@ -80,4 +81,5 @@ struct x86_platform_ops x86_platform = { ...@@ -80,4 +81,5 @@ struct x86_platform_ops x86_platform = {
.get_wallclock = mach_get_cmos_time, .get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss, .set_wallclock = mach_set_rtc_mmss,
.iommu_shutdown = iommu_shutdown_noop, .iommu_shutdown = iommu_shutdown_noop,
.is_untracked_pat_range = is_ISA_range,
}; };
...@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, ...@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
use_gbpages = direct_gbpages; use_gbpages = direct_gbpages;
#endif #endif
set_nx();
if (nx_enabled)
printk(KERN_INFO "NX (Execute Disable) protection: active\n");
/* Enable PSE if available */ /* Enable PSE if available */
if (cpu_has_pse) if (cpu_has_pse)
set_in_cr4(X86_CR4_PSE); set_in_cr4(X86_CR4_PSE);
......
...@@ -412,7 +412,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) ...@@ -412,7 +412,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte; pkmap_page_table = pte;
} }
static void __init add_one_highpage_init(struct page *page, int pfn) static void __init add_one_highpage_init(struct page *page)
{ {
ClearPageReserved(page); ClearPageReserved(page);
init_page_count(page); init_page_count(page);
...@@ -445,7 +445,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn, ...@@ -445,7 +445,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn,
if (!pfn_valid(node_pfn)) if (!pfn_valid(node_pfn))
continue; continue;
page = pfn_to_page(node_pfn); page = pfn_to_page(node_pfn);
add_one_highpage_init(page, node_pfn); add_one_highpage_init(page);
} }
return 0; return 0;
...@@ -703,8 +703,8 @@ void __init find_low_pfn_range(void) ...@@ -703,8 +703,8 @@ void __init find_low_pfn_range(void)
} }
#ifndef CONFIG_NEED_MULTIPLE_NODES #ifndef CONFIG_NEED_MULTIPLE_NODES
void __init initmem_init(unsigned long start_pfn, void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
unsigned long end_pfn) int acpi, int k8)
{ {
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn; highstart_pfn = highend_pfn = max_pfn;
...@@ -997,7 +997,7 @@ static noinline int do_test_wp_bit(void) ...@@ -997,7 +997,7 @@ static noinline int do_test_wp_bit(void)
const int rodata_test_data = 0xC3; const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data); EXPORT_SYMBOL_GPL(rodata_test_data);
static int kernel_set_to_readonly; int kernel_set_to_readonly __read_mostly;
void set_kernel_text_rw(void) void set_kernel_text_rw(void)
{ {
......
...@@ -568,7 +568,8 @@ kernel_physical_mapping_init(unsigned long start, ...@@ -568,7 +568,8 @@ kernel_physical_mapping_init(unsigned long start,
} }
#ifndef CONFIG_NUMA #ifndef CONFIG_NUMA
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8)
{ {
unsigned long bootmap_size, bootmap; unsigned long bootmap_size, bootmap;
...@@ -694,12 +695,12 @@ void __init mem_init(void) ...@@ -694,12 +695,12 @@ void __init mem_init(void)
const int rodata_test_data = 0xC3; const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data); EXPORT_SYMBOL_GPL(rodata_test_data);
static int kernel_set_to_readonly; int kernel_set_to_readonly;
void set_kernel_text_rw(void) void set_kernel_text_rw(void)
{ {
unsigned long start = PFN_ALIGN(_stext); unsigned long start = PFN_ALIGN(_text);
unsigned long end = PFN_ALIGN(__start_rodata); unsigned long end = PFN_ALIGN(__stop___ex_table);
if (!kernel_set_to_readonly) if (!kernel_set_to_readonly)
return; return;
...@@ -707,13 +708,18 @@ void set_kernel_text_rw(void) ...@@ -707,13 +708,18 @@ void set_kernel_text_rw(void)
pr_debug("Set kernel text: %lx - %lx for read write\n", pr_debug("Set kernel text: %lx - %lx for read write\n",
start, end); start, end);
/*
* Make the kernel identity mapping for text RW. Kernel text
* mapping will always be RO. Refer to the comment in
* static_protections() in pageattr.c
*/
set_memory_rw(start, (end - start) >> PAGE_SHIFT); set_memory_rw(start, (end - start) >> PAGE_SHIFT);
} }
void set_kernel_text_ro(void) void set_kernel_text_ro(void)
{ {
unsigned long start = PFN_ALIGN(_stext); unsigned long start = PFN_ALIGN(_text);
unsigned long end = PFN_ALIGN(__start_rodata); unsigned long end = PFN_ALIGN(__stop___ex_table);
if (!kernel_set_to_readonly) if (!kernel_set_to_readonly)
return; return;
...@@ -721,14 +727,21 @@ void set_kernel_text_ro(void) ...@@ -721,14 +727,21 @@ void set_kernel_text_ro(void)
pr_debug("Set kernel text: %lx - %lx for read only\n", pr_debug("Set kernel text: %lx - %lx for read only\n",
start, end); start, end);
/*
* Set the kernel identity mapping for text RO.
*/
set_memory_ro(start, (end - start) >> PAGE_SHIFT); set_memory_ro(start, (end - start) >> PAGE_SHIFT);
} }
void mark_rodata_ro(void) void mark_rodata_ro(void)
{ {
unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); unsigned long start = PFN_ALIGN(_text);
unsigned long rodata_start = unsigned long rodata_start =
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
unsigned long end = (unsigned long) &__end_rodata_hpage_align;
unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
unsigned long data_start = (unsigned long) &_sdata;
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10); (end - start) >> 10);
...@@ -751,6 +764,14 @@ void mark_rodata_ro(void) ...@@ -751,6 +764,14 @@ void mark_rodata_ro(void)
printk(KERN_INFO "Testing CPA: again\n"); printk(KERN_INFO "Testing CPA: again\n");
set_memory_ro(start, (end-start) >> PAGE_SHIFT); set_memory_ro(start, (end-start) >> PAGE_SHIFT);
#endif #endif
free_init_pages("unused kernel memory",
(unsigned long) page_address(virt_to_page(text_end)),
(unsigned long)
page_address(virt_to_page(rodata_start)));
free_init_pages("unused kernel memory",
(unsigned long) page_address(virt_to_page(rodata_end)),
(unsigned long) page_address(virt_to_page(data_start)));
} }
#endif #endif
......
...@@ -24,6 +24,9 @@ ...@@ -24,6 +24,9 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/k8.h> #include <asm/k8.h>
static struct bootnode __initdata nodes[8];
static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
static __init int find_northbridge(void) static __init int find_northbridge(void)
{ {
int num; int num;
...@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void) ...@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void)
* need to get boot_cpu_id so can use that to create apicid_to_node * need to get boot_cpu_id so can use that to create apicid_to_node
* in k8_scan_nodes() * in k8_scan_nodes()
*/ */
/*
* Find possible boot-time SMP configuration:
*/
#ifdef CONFIG_X86_MPPARSE
early_find_smp_config();
#endif
#ifdef CONFIG_ACPI
/*
* Read APIC information from ACPI tables.
*/
early_acpi_boot_init();
#endif
#ifdef CONFIG_X86_MPPARSE #ifdef CONFIG_X86_MPPARSE
/* /*
* get boot-time SMP configuration: * get boot-time SMP configuration:
...@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void) ...@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void)
early_init_lapic_mapping(); early_init_lapic_mapping();
} }
int __init k8_scan_nodes(unsigned long start, unsigned long end) int __init k8_get_nodes(struct bootnode *physnodes)
{ {
unsigned numnodes, cores, bits, apicid_base; int i;
int ret = 0;
for_each_node_mask(i, nodes_parsed) {
physnodes[ret].start = nodes[i].start;
physnodes[ret].end = nodes[i].end;
ret++;
}
return ret;
}
int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long start = PFN_PHYS(start_pfn);
unsigned long end = PFN_PHYS(end_pfn);
unsigned numnodes;
unsigned long prevbase; unsigned long prevbase;
struct bootnode nodes[8]; int i, nb, found = 0;
int i, j, nb, found = 0;
u32 nodeid, reg; u32 nodeid, reg;
if (!early_pci_allowed()) if (!early_pci_allowed())
...@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
if (nb < 0) if (nb < 0)
return nb; return nb;
printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
reg = read_pci_config(0, nb, 0, 0x60); reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1; numnodes = ((reg >> 4) & 0xF) + 1;
if (numnodes <= 1) if (numnodes <= 1)
return -1; return -1;
printk(KERN_INFO "Number of nodes %d\n", numnodes); pr_info("Number of physical nodes %d\n", numnodes);
memset(&nodes, 0, sizeof(nodes));
prevbase = 0; prevbase = 0;
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
unsigned long base, limit; unsigned long base, limit;
...@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
nodeid = limit & 7; nodeid = limit & 7;
if ((base & 3) == 0) { if ((base & 3) == 0) {
if (i < numnodes) if (i < numnodes)
printk("Skipping disabled node %d\n", i); pr_info("Skipping disabled node %d\n", i);
continue; continue;
} }
if (nodeid >= numnodes) { if (nodeid >= numnodes) {
printk("Ignoring excess node %d (%lx:%lx)\n", nodeid, pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
base, limit); base, limit);
continue; continue;
} }
if (!limit) { if (!limit) {
printk(KERN_INFO "Skipping node entry %d (base %lx)\n", pr_info("Skipping node entry %d (base %lx)\n",
i, base); i, base);
continue; continue;
} }
if ((base >> 8) & 3 || (limit >> 8) & 3) { if ((base >> 8) & 3 || (limit >> 8) & 3) {
printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", pr_err("Node %d using interleaving mode %lx/%lx\n",
nodeid, (base>>8)&3, (limit>>8) & 3); nodeid, (base >> 8) & 3, (limit >> 8) & 3);
return -1; return -1;
} }
if (node_isset(nodeid, node_possible_map)) { if (node_isset(nodeid, nodes_parsed)) {
printk(KERN_INFO "Node %d already present. Skipping\n", pr_info("Node %d already present, skipping\n",
nodeid); nodeid);
continue; continue;
} }
...@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
limit |= (1<<24)-1; limit |= (1<<24)-1;
limit++; limit++;
if (limit > max_pfn << PAGE_SHIFT) if (limit > end)
limit = max_pfn << PAGE_SHIFT; limit = end;
if (limit <= base) if (limit <= base)
continue; continue;
...@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
if (limit > end) if (limit > end)
limit = end; limit = end;
if (limit == base) { if (limit == base) {
printk(KERN_ERR "Empty node %d\n", nodeid); pr_err("Empty node %d\n", nodeid);
continue; continue;
} }
if (limit < base) { if (limit < base) {
printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", pr_err("Node %d bogus settings %lx-%lx.\n",
nodeid, base, limit); nodeid, base, limit);
continue; continue;
} }
/* Could sort here, but pun for now. Should not happen anyroads. */ /* Could sort here, but pun for now. Should not happen anyroads. */
if (prevbase > base) { if (prevbase > base) {
printk(KERN_ERR "Node map not sorted %lx,%lx\n", pr_err("Node map not sorted %lx,%lx\n",
prevbase, base); prevbase, base);
return -1; return -1;
} }
printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", pr_info("Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit); nodeid, base, limit);
found++; found++;
...@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = base; prevbase = base;
node_set(nodeid, node_possible_map); node_set(nodeid, nodes_parsed);
} }
if (!found) if (!found)
return -1; return -1;
return 0;
}
int __init k8_scan_nodes(void)
{
unsigned int bits;
unsigned int cores;
unsigned int apicid_base;
int i;
BUG_ON(nodes_empty(nodes_parsed));
node_possible_map = nodes_parsed;
memnode_shift = compute_hash_shift(nodes, 8, NULL); memnode_shift = compute_hash_shift(nodes, 8, NULL);
if (memnode_shift < 0) { if (memnode_shift < 0) {
printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); pr_err("No NUMA node hash function found. Contact maintainer\n");
return -1; return -1;
} }
printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); pr_info("Using node hash shift of %d\n", memnode_shift);
/* use the coreid bits from early_identify_cpu */ /* use the coreid bits from early_identify_cpu */
bits = boot_cpu_data.x86_coreid_bits; bits = boot_cpu_data.x86_coreid_bits;
...@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
/* need to get boot_cpu_id early for system with apicid lifting */ /* need to get boot_cpu_id early for system with apicid lifting */
early_get_boot_cpu_id(); early_get_boot_cpu_id();
if (boot_cpu_physical_apicid > 0) { if (boot_cpu_physical_apicid > 0) {
printk(KERN_INFO "BSP APIC ID: %02x\n", pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
boot_cpu_physical_apicid);
apicid_base = boot_cpu_physical_apicid; apicid_base = boot_cpu_physical_apicid;
} }
for (i = 0; i < 8; i++) { for_each_node_mask(i, node_possible_map) {
if (nodes[i].start == nodes[i].end) int j;
continue;
e820_register_active_regions(i, e820_register_active_regions(i,
nodes[i].start >> PAGE_SHIFT, nodes[i].start >> PAGE_SHIFT,
......
...@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid) ...@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid)
(ulong) node_remap_end_vaddr[nid]); (ulong) node_remap_end_vaddr[nid]);
} }
void __init initmem_init(unsigned long start_pfn, void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
unsigned long end_pfn) int acpi, int k8)
{ {
int nid; int nid;
long kva_target_pfn; long kva_target_pfn;
......
This diff is collapsed.
...@@ -279,6 +279,22 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, ...@@ -279,6 +279,22 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
__pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW; pgprot_val(forbidden) |= _PAGE_RW;
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
/*
* Once the kernel maps the text as RO (kernel_set_to_readonly is set),
* kernel text mappings for the large page aligned text, rodata sections
* will be always read-only. For the kernel identity mappings covering
* the holes caused by this alignment can be anything that user asks.
*
* This will preserve the large page mappings for kernel text/data
* at no extra cost.
*/
if (kernel_set_to_readonly &&
within(address, (unsigned long)_text,
(unsigned long)__end_rodata_hpage_align))
pgprot_val(forbidden) |= _PAGE_RW;
#endif
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
return prot; return prot;
...@@ -1069,12 +1085,18 @@ EXPORT_SYMBOL(set_memory_array_wb); ...@@ -1069,12 +1085,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
int set_memory_x(unsigned long addr, int numpages) int set_memory_x(unsigned long addr, int numpages)
{ {
if (!(__supported_pte_mask & _PAGE_NX))
return 0;
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
} }
EXPORT_SYMBOL(set_memory_x); EXPORT_SYMBOL(set_memory_x);
int set_memory_nx(unsigned long addr, int numpages) int set_memory_nx(unsigned long addr, int numpages)
{ {
if (!(__supported_pte_mask & _PAGE_NX))
return 0;
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
} }
EXPORT_SYMBOL(set_memory_nx); EXPORT_SYMBOL(set_memory_nx);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/x86_init.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/fcntl.h> #include <asm/fcntl.h>
#include <asm/e820.h> #include <asm/e820.h>
...@@ -388,7 +389,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, ...@@ -388,7 +389,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
} }
/* Low ISA region is always mapped WB in page table. No need to track */ /* Low ISA region is always mapped WB in page table. No need to track */
if (is_ISA_range(start, end - 1)) { if (x86_platform.is_untracked_pat_range(start, end)) {
if (new_type) if (new_type)
*new_type = _PAGE_CACHE_WB; *new_type = _PAGE_CACHE_WB;
return 0; return 0;
...@@ -499,7 +500,7 @@ int free_memtype(u64 start, u64 end) ...@@ -499,7 +500,7 @@ int free_memtype(u64 start, u64 end)
return 0; return 0;
/* Low ISA region is always mapped WB. No need to track */ /* Low ISA region is always mapped WB. No need to track */
if (is_ISA_range(start, end - 1)) if (x86_platform.is_untracked_pat_range(start, end))
return 0; return 0;
is_range_ram = pat_pagerange_is_ram(start, end); is_range_ram = pat_pagerange_is_ram(start, end);
...@@ -582,7 +583,7 @@ static unsigned long lookup_memtype(u64 paddr) ...@@ -582,7 +583,7 @@ static unsigned long lookup_memtype(u64 paddr)
int rettype = _PAGE_CACHE_WB; int rettype = _PAGE_CACHE_WB;
struct memtype *entry; struct memtype *entry;
if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
return rettype; return rettype;
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
...@@ -1018,8 +1019,10 @@ static const struct file_operations memtype_fops = { ...@@ -1018,8 +1019,10 @@ static const struct file_operations memtype_fops = {
static int __init pat_memtype_list_init(void) static int __init pat_memtype_list_init(void)
{ {
debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, if (pat_enabled) {
NULL, &memtype_fops); debugfs_create_file("pat_memtype_list", S_IRUSR,
arch_debugfs_dir, NULL, &memtype_fops);
}
return 0; return 0;
} }
......
...@@ -3,10 +3,8 @@ ...@@ -3,10 +3,8 @@
#include <linux/init.h> #include <linux/init.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/proto.h>
int nx_enabled;
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static int disable_nx __cpuinitdata; static int disable_nx __cpuinitdata;
/* /*
...@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str) ...@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str)
if (!str) if (!str)
return -EINVAL; return -EINVAL;
if (!strncmp(str, "on", 2)) { if (!strncmp(str, "on", 2)) {
__supported_pte_mask |= _PAGE_NX;
disable_nx = 0; disable_nx = 0;
} else if (!strncmp(str, "off", 3)) { } else if (!strncmp(str, "off", 3)) {
disable_nx = 1; disable_nx = 1;
__supported_pte_mask &= ~_PAGE_NX;
} }
x86_configure_nx();
return 0; return 0;
} }
early_param("noexec", noexec_setup); early_param("noexec", noexec_setup);
#endif
#ifdef CONFIG_X86_PAE void __cpuinit x86_configure_nx(void)
void __init set_nx(void)
{ {
unsigned int v[4], l, h; if (cpu_has_nx && !disable_nx)
__supported_pte_mask |= _PAGE_NX;
if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { else
cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); __supported_pte_mask &= ~_PAGE_NX;
}
if ((v[3] & (1 << 20)) && !disable_nx) { void __init x86_report_nx(void)
rdmsr(MSR_EFER, l, h); {
l |= EFER_NX; if (!cpu_has_nx) {
wrmsr(MSR_EFER, l, h); printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
nx_enabled = 1; "missing in CPU or disabled in BIOS!\n");
__supported_pte_mask |= _PAGE_NX; } else {
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
if (disable_nx) {
printk(KERN_INFO "NX (Execute Disable) protection: "
"disabled by kernel command line option\n");
} else {
printk(KERN_INFO "NX (Execute Disable) protection: "
"active\n");
} }
}
}
#else #else
void set_nx(void) /* 32bit non-PAE kernel, NX cannot be used */
{ printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
} "cannot be enabled: non-PAE kernel!\n");
#endif #endif
}
#ifdef CONFIG_X86_64
void __cpuinit check_efer(void)
{
unsigned long efer;
rdmsrl(MSR_EFER, efer);
if (!(efer & EFER_NX) || disable_nx)
__supported_pte_mask &= ~_PAGE_NX;
} }
#endif
...@@ -290,8 +290,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) ...@@ -290,8 +290,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
start, end); start, end);
e820_register_active_regions(node, start >> PAGE_SHIFT,
end >> PAGE_SHIFT);
if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
update_nodes_add(node, start, end); update_nodes_add(node, start, end);
...@@ -338,6 +336,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) ...@@ -338,6 +336,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
void __init acpi_numa_arch_fixup(void) {} void __init acpi_numa_arch_fixup(void) {}
int __init acpi_get_nodes(struct bootnode *physnodes)
{
int i;
int ret = 0;
for_each_node_mask(i, nodes_parsed) {
physnodes[ret].start = nodes[i].start;
physnodes[ret].end = nodes[i].end;
ret++;
}
return ret;
}
/* Use the information discovered above to actually set up the nodes. */ /* Use the information discovered above to actually set up the nodes. */
int __init acpi_scan_nodes(unsigned long start, unsigned long end) int __init acpi_scan_nodes(unsigned long start, unsigned long end)
{ {
...@@ -350,11 +361,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) ...@@ -350,11 +361,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
for (i = 0; i < MAX_NUMNODES; i++) for (i = 0; i < MAX_NUMNODES; i++)
cutoff_node(i, start, end); cutoff_node(i, start, end);
if (!nodes_cover_memory(nodes)) {
bad_srat();
return -1;
}
memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
memblk_nodeid); memblk_nodeid);
if (memnode_shift < 0) { if (memnode_shift < 0) {
...@@ -364,6 +370,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) ...@@ -364,6 +370,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1; return -1;
} }
for_each_node_mask(i, nodes_parsed)
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
if (!nodes_cover_memory(nodes)) {
bad_srat();
return -1;
}
/* Account for nodes with cpus and no memory */ /* Account for nodes with cpus and no memory */
nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
...@@ -454,7 +468,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) ...@@ -454,7 +468,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
for (i = 0; i < num_nodes; i++) for (i = 0; i < num_nodes; i++)
if (fake_nodes[i].start != fake_nodes[i].end) if (fake_nodes[i].start != fake_nodes[i].end)
node_set(i, nodes_parsed); node_set(i, nodes_parsed);
WARN_ON(!nodes_cover_memory(fake_nodes));
} }
static int null_slit_node_compare(int a, int b) static int null_slit_node_compare(int a, int b)
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/cache.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/uv/uv.h> #include <asm/uv/uv.h>
...@@ -43,7 +44,7 @@ union smp_flush_state { ...@@ -43,7 +44,7 @@ union smp_flush_state {
spinlock_t tlbstate_lock; spinlock_t tlbstate_lock;
DECLARE_BITMAP(flush_cpumask, NR_CPUS); DECLARE_BITMAP(flush_cpumask, NR_CPUS);
}; };
char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; char pad[INTERNODE_CACHE_BYTES];
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
/* State is put into the per CPU data section, but padded /* State is put into the per CPU data section, but padded
......
...@@ -1093,10 +1093,8 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1093,10 +1093,8 @@ asmlinkage void __init xen_start_kernel(void)
__supported_pte_mask |= _PAGE_IOMAP; __supported_pte_mask |= _PAGE_IOMAP;
#ifdef CONFIG_X86_64
/* Work out if we support NX */ /* Work out if we support NX */
check_efer(); x86_configure_nx();
#endif
xen_setup_features(); xen_setup_features();
......
...@@ -283,22 +283,24 @@ acpi_table_parse_srat(enum acpi_srat_type id, ...@@ -283,22 +283,24 @@ acpi_table_parse_srat(enum acpi_srat_type id,
int __init acpi_numa_init(void) int __init acpi_numa_init(void)
{ {
int ret = 0;
/* SRAT: Static Resource Affinity Table */ /* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
acpi_parse_x2apic_affinity, NR_CPUS); acpi_parse_x2apic_affinity, NR_CPUS);
acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
acpi_parse_processor_affinity, NR_CPUS); acpi_parse_processor_affinity, NR_CPUS);
acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity, acpi_parse_memory_affinity,
NR_NODE_MEMBLKS); NR_NODE_MEMBLKS);
} }
/* SLIT: System Locality Information Table */ /* SLIT: System Locality Information Table */
acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
acpi_numa_arch_fixup(); acpi_numa_arch_fixup();
return 0; return ret;
} }
int acpi_get_pxm(acpi_handle h) int acpi_get_pxm(acpi_handle h)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment