Commit d3cc42fb authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] enable SMP Opterons boot an NX-enabled x86 kernel

This fixes a corner-case NX bug: the x86 SMP kernel doesnt boot on SMP
Opterons if NX is enabled [and mem=nopentium is specified], due to
kernel-space NX protection preventing the SMP trampoline from being
executable. 

Since the SMP trampoline is a rare case of 'dynamic code' executed by
the kernel (it has to be below 640K so it cannot be part of the kernel
text itself), i've added the necessary infrastructure to enable/disable
executability of specific kernel pages. 

We cannot simply disable NX via the MSR because we've got the NX bits in
the kernel pagetables, which are set up before we do the SMP bootup. 
The NX bit in the pagetables is undefined if EFER.NXE is 0, so we cannot
count on NX-capable CPUs not faulting when they encounter them. 

I've tested the x86 kernel on a non-NX SMP x86 box and on an NX UP box,
on which i've also tested a simulated SMP trampoline, it all works fine. 

- add infrastructure to enable/disable executability of kernel pages

- make the SMP trampoline page executable.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ed1fc67c
...@@ -82,6 +82,7 @@ int smp_threads_ready; ...@@ -82,6 +82,7 @@ int smp_threads_ready;
extern unsigned char trampoline_data []; extern unsigned char trampoline_data [];
extern unsigned char trampoline_end []; extern unsigned char trampoline_end [];
static unsigned char *trampoline_base; static unsigned char *trampoline_base;
static int trampoline_exec;
/* /*
* Currently trivial. Write the real->protected mode * Currently trivial. Write the real->protected mode
...@@ -108,6 +109,10 @@ void __init smp_alloc_memory(void) ...@@ -108,6 +109,10 @@ void __init smp_alloc_memory(void)
*/ */
if (__pa(trampoline_base) >= 0x9F000) if (__pa(trampoline_base) >= 0x9F000)
BUG(); BUG();
/*
* Make the SMP trampoline executable:
*/
trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
} }
/* /*
...@@ -1375,6 +1380,10 @@ void __init smp_cpus_done(unsigned int max_cpus) ...@@ -1375,6 +1380,10 @@ void __init smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest(); setup_ioapic_dest();
#endif #endif
zap_low_mappings(); zap_low_mappings();
/*
* Disable executability of the SMP trampoline:
*/
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
} }
void __init smp_intr_init(void) void __init smp_intr_init(void)
......
...@@ -455,6 +455,33 @@ static void __init set_nx(void) ...@@ -455,6 +455,33 @@ static void __init set_nx(void)
} }
} }
/*
* Enables/disables executability of a given kernel page and
* returns the previous setting.
*/
int __init set_kernel_exec(unsigned long vaddr, int enable)
{
pte_t *pte;
int ret = 1;
if (!nx_enabled)
goto out;
pte = lookup_address(vaddr);
BUG_ON(!pte);
if (pte_val(*pte) & _PAGE_NX)
ret = 0;
if (enable)
pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
else
pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
__flush_tlb_all();
out:
return ret;
}
#endif #endif
/* /*
......
...@@ -17,7 +17,7 @@ static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED; ...@@ -17,7 +17,7 @@ static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED;
static struct list_head df_list = LIST_HEAD_INIT(df_list); static struct list_head df_list = LIST_HEAD_INIT(df_list);
static inline pte_t *lookup_address(unsigned long address) pte_t *lookup_address(unsigned long address)
{ {
pgd_t *pgd = pgd_offset_k(address); pgd_t *pgd = pgd_offset_k(address);
pmd_t *pmd; pmd_t *pmd;
......
...@@ -285,7 +285,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -285,7 +285,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
* Chop off the NX bit (if present), and add the NX portion of * Chop off the NX bit (if present), and add the NX portion of
* the newprot (if present): * the newprot (if present):
*/ */
pte.pte_high &= -1 ^ (1 << (_PAGE_BIT_NX - 32)); pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
pte.pte_high |= (pgprot_val(newprot) >> 32) & \ pte.pte_high |= (pgprot_val(newprot) >> 32) & \
(__supported_pte_mask >> 32); (__supported_pte_mask >> 32);
#endif #endif
...@@ -344,6 +344,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -344,6 +344,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pte_offset_kernel(dir, address) \ #define pte_offset_kernel(dir, address) \
((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
/*
* Helper function that returns the kernel pagetable entry controlling
* the virtual address 'address'. NULL means no pagetable entry present.
* NOTE: the return type is pte_t but if the pmd is PSE then we return it
* as a pte too.
*/
extern pte_t *lookup_address(unsigned long address);
/*
* Make a given kernel text page executable/non-executable.
* Returns the previous executability setting of that page (which
* is used to restore the previous state). Used by the SMP bootup code.
* NOTE: this is an __init function for security reasons.
*/
#ifdef CONFIG_X86_PAE
extern int set_kernel_exec(unsigned long vaddr, int enable);
#else
static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
#endif
#if defined(CONFIG_HIGHPTE) #if defined(CONFIG_HIGHPTE)
#define pte_offset_map(dir, address) \ #define pte_offset_map(dir, address) \
((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment