Commit 855caa37 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'x86/crashdump' into cpus4096

Conflicts:
	arch/x86/kernel/crash.c

Merged for semantic conflict:
	arch/x86/kernel/reboot.c
parents 948a7b2b c415b3dc
......@@ -5,21 +5,8 @@
# define PA_CONTROL_PAGE 0
# define VA_CONTROL_PAGE 1
# define PA_PGD 2
# define VA_PGD 3
# define PA_PTE_0 4
# define VA_PTE_0 5
# define PA_PTE_1 6
# define VA_PTE_1 7
# define PA_SWAP_PAGE 8
# ifdef CONFIG_X86_PAE
# define PA_PMD_0 9
# define VA_PMD_0 10
# define PA_PMD_1 11
# define VA_PMD_1 12
# define PAGES_NR 13
# else
# define PAGES_NR 9
# endif
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
#else
# define PA_CONTROL_PAGE 0
# define VA_CONTROL_PAGE 1
......@@ -170,6 +157,20 @@ relocate_kernel(unsigned long indirection_page,
unsigned long start_address) ATTRIB_NORET;
#endif
#ifdef CONFIG_X86_32
#define ARCH_HAS_KIMAGE_ARCH
struct kimage_arch {
pgd_t *pgd;
#ifdef CONFIG_X86_PAE
pmd_t *pmd0;
pmd_t *pmd1;
#endif
pte_t *pte0;
pte_t *pte1;
};
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_KEXEC_H */
#ifndef _ASM_X86_REBOOT_H
#define _ASM_X86_REBOOT_H
#include <linux/kdebug.h>
struct pt_regs;
struct machine_ops {
......@@ -18,4 +20,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs);
void native_machine_shutdown(void);
void machine_real_restart(const unsigned char *code, int length);
typedef void (*nmi_shootdown_cb)(int, struct die_args*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
#endif /* _ASM_X86_REBOOT_H */
......@@ -29,34 +29,17 @@
#include <mach_ipi.h>
/* This keeps a track of which one is crashing cpu. */
static int crashing_cpu;
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static atomic_t waiting_for_crash_ipi;
static int crash_nmi_callback(struct notifier_block *self,
unsigned long val, void *data)
static void kdump_nmi_callback(int cpu, struct die_args *args)
{
struct pt_regs *regs;
#ifdef CONFIG_X86_32
struct pt_regs fixed_regs;
#endif
int cpu;
if (val != DIE_NMI_IPI)
return NOTIFY_OK;
regs = ((struct die_args *)data)->regs;
cpu = raw_smp_processor_id();
/* Don't do anything if this handler is invoked on crashing cpu.
* Otherwise, system will completely hang. Crashing cpu can get
* an NMI if system was initially booted with nmi_watchdog parameter.
*/
if (cpu == crashing_cpu)
return NOTIFY_STOP;
local_irq_disable();
regs = args->regs;
#ifdef CONFIG_X86_32
if (!user_mode_vm(regs)) {
......@@ -65,51 +48,19 @@ static int crash_nmi_callback(struct notifier_block *self,
}
#endif
crash_save_cpu(regs, cpu);
disable_local_APIC();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
halt();
for (;;)
cpu_relax();
return 1;
}
static void smp_send_nmi_allbutself(void)
{
send_IPI_allbutself(NMI_VECTOR);
disable_local_APIC();
}
static struct notifier_block crash_nmi_nb = {
.notifier_call = crash_nmi_callback,
};
static void nmi_shootdown_cpus(void)
static void kdump_nmi_shootdown_cpus(void)
{
unsigned long msecs;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
/* Would it be better to replace the trap vector here? */
if (register_die_notifier(&crash_nmi_nb))
return; /* return what? */
/* Ensure the new callback function is set before sending
* out the NMI
*/
wmb();
nmi_shootdown_cpus(kdump_nmi_callback);
smp_send_nmi_allbutself();
msecs = 1000; /* Wait at most a second for the other cpus to stop */
while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
mdelay(1);
msecs--;
}
/* Leave the nmi callback set */
disable_local_APIC();
}
#else
static void nmi_shootdown_cpus(void)
static void kdump_nmi_shootdown_cpus(void)
{
/* There are no cpus to shootdown */
}
......@@ -128,9 +79,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
/* The kernel is broken so disable interrupts */
local_irq_disable();
/* Make a note of crashing cpu. Will be used in NMI callback.*/
crashing_cpu = safe_smp_processor_id();
nmi_shootdown_cpus();
kdump_nmi_shootdown_cpus();
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();
......
......@@ -13,6 +13,7 @@
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <linux/suspend.h>
#include <linux/gfp.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
......@@ -25,15 +26,6 @@
#include <asm/system.h>
#include <asm/cacheflush.h>
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u32 kexec_pgd[1024] PAGE_ALIGNED;
#ifdef CONFIG_X86_PAE
static u32 kexec_pmd0[1024] PAGE_ALIGNED;
static u32 kexec_pmd1[1024] PAGE_ALIGNED;
#endif
static u32 kexec_pte0[1024] PAGE_ALIGNED;
static u32 kexec_pte1[1024] PAGE_ALIGNED;
static void set_idt(void *newidt, __u16 limit)
{
struct desc_ptr curidt;
......@@ -76,6 +68,76 @@ static void load_segments(void)
#undef __STR
}
static void machine_kexec_free_page_tables(struct kimage *image)
{
free_page((unsigned long)image->arch.pgd);
#ifdef CONFIG_X86_PAE
free_page((unsigned long)image->arch.pmd0);
free_page((unsigned long)image->arch.pmd1);
#endif
free_page((unsigned long)image->arch.pte0);
free_page((unsigned long)image->arch.pte1);
}
static int machine_kexec_alloc_page_tables(struct kimage *image)
{
image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
#ifdef CONFIG_X86_PAE
image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
#endif
image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL);
image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL);
if (!image->arch.pgd ||
#ifdef CONFIG_X86_PAE
!image->arch.pmd0 || !image->arch.pmd1 ||
#endif
!image->arch.pte0 || !image->arch.pte1) {
machine_kexec_free_page_tables(image);
return -ENOMEM;
}
return 0;
}
static void machine_kexec_page_table_set_one(
pgd_t *pgd, pmd_t *pmd, pte_t *pte,
unsigned long vaddr, unsigned long paddr)
{
pud_t *pud;
pgd += pgd_index(vaddr);
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT))
set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
#endif
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
if (!(pmd_val(*pmd) & _PAGE_PRESENT))
set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
pte = pte_offset_kernel(pmd, vaddr);
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
}
static void machine_kexec_prepare_page_tables(struct kimage *image)
{
void *control_page;
pmd_t *pmd = 0;
control_page = page_address(image->control_code_page);
#ifdef CONFIG_X86_PAE
pmd = image->arch.pmd0;
#endif
machine_kexec_page_table_set_one(
image->arch.pgd, pmd, image->arch.pte0,
(unsigned long)control_page, __pa(control_page));
#ifdef CONFIG_X86_PAE
pmd = image->arch.pmd1;
#endif
machine_kexec_page_table_set_one(
image->arch.pgd, pmd, image->arch.pte1,
__pa(control_page), __pa(control_page));
}
/*
* A architecture hook called to validate the
* proposed image and prepare the control pages
......@@ -87,12 +149,20 @@ static void load_segments(void)
* reboot code buffer to allow us to avoid allocations
* later.
*
* Make control page executable.
* - Make control page executable.
* - Allocate page tables
* - Setup page tables
*/
int machine_kexec_prepare(struct kimage *image)
{
int error;
if (nx_enabled)
set_pages_x(image->control_code_page, 1);
error = machine_kexec_alloc_page_tables(image);
if (error)
return error;
machine_kexec_prepare_page_tables(image);
return 0;
}
......@@ -104,6 +174,7 @@ void machine_kexec_cleanup(struct kimage *image)
{
if (nx_enabled)
set_pages_nx(image->control_code_page, 1);
machine_kexec_free_page_tables(image);
}
/*
......@@ -150,18 +221,7 @@ void machine_kexec(struct kimage *image)
relocate_kernel_ptr = control_page;
page_list[PA_CONTROL_PAGE] = __pa(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_PGD] = __pa(kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
#ifdef CONFIG_X86_PAE
page_list[PA_PMD_0] = __pa(kexec_pmd0);
page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
page_list[PA_PMD_1] = __pa(kexec_pmd1);
page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
#endif
page_list[PA_PTE_0] = __pa(kexec_pte0);
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
page_list[PA_PTE_1] = __pa(kexec_pte1);
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
page_list[PA_PGD] = __pa(image->arch.pgd);
if (image->type == KEXEC_TYPE_DEFAULT)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
......
......@@ -21,6 +21,9 @@
# include <asm/iommu.h>
#endif
#include <mach_ipi.h>
/*
* Power off function, if any
*/
......@@ -548,3 +551,92 @@ void machine_crash_shutdown(struct pt_regs *regs)
machine_ops.crash_shutdown(regs);
}
#endif
#if defined(CONFIG_SMP)
/* This keeps a track of which one is crashing cpu. */
static int crashing_cpu;
static nmi_shootdown_cb shootdown_callback;
static atomic_t waiting_for_crash_ipi;
static int crash_nmi_callback(struct notifier_block *self,
unsigned long val, void *data)
{
int cpu;
if (val != DIE_NMI_IPI)
return NOTIFY_OK;
cpu = raw_smp_processor_id();
/* Don't do anything if this handler is invoked on crashing cpu.
* Otherwise, system will completely hang. Crashing cpu can get
* an NMI if system was initially booted with nmi_watchdog parameter.
*/
if (cpu == crashing_cpu)
return NOTIFY_STOP;
local_irq_disable();
shootdown_callback(cpu, (struct die_args *)data);
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
halt();
for (;;)
cpu_relax();
return 1;
}
static void smp_send_nmi_allbutself(void)
{
send_IPI_allbutself(NMI_VECTOR);
}
static struct notifier_block crash_nmi_nb = {
.notifier_call = crash_nmi_callback,
};
/* Halt all other CPUs, calling the specified function on each of them
*
* This function can be used to halt all other CPUs on crash
* or emergency reboot time. The function passed as parameter
* will be called inside a NMI handler on all CPUs.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
local_irq_disable();
/* Make a note of crashing cpu. Will be used in NMI callback.*/
crashing_cpu = safe_smp_processor_id();
shootdown_callback = callback;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
/* Would it be better to replace the trap vector here? */
if (register_die_notifier(&crash_nmi_nb))
return; /* return what? */
/* Ensure the new callback function is set before sending
* out the NMI
*/
wmb();
smp_send_nmi_allbutself();
msecs = 1000; /* Wait at most a second for the other cpus to stop */
while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
mdelay(1);
msecs--;
}
/* Leave the nmi callback set */
}
#else /* !CONFIG_SMP */
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
/* No other CPUs to shoot down */
}
#endif
......@@ -10,15 +10,12 @@
#include <asm/page.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
#include <asm/pgtable.h>
/*
* Must be relocatable PIC code callable as a C function
*/
#define PTR(x) (x << 2)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
#define PAE_PGD_ATTR (_PAGE_PRESENT)
/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
......@@ -39,7 +36,6 @@
#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
.text
.align PAGE_SIZE
.globl relocate_kernel
relocate_kernel:
/* Save the CPU context, used for jumping back */
......@@ -60,117 +56,6 @@ relocate_kernel:
movl %cr4, %eax
movl %eax, CR4(%edi)
#ifdef CONFIG_X86_PAE
/* map the control page at its virtual address */
movl PTR(VA_PGD)(%ebp), %edi
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
andl $0xc0000000, %eax
shrl $27, %eax
addl %edi, %eax
movl PTR(PA_PMD_0)(%ebp), %edx
orl $PAE_PGD_ATTR, %edx
movl %edx, (%eax)
movl PTR(VA_PMD_0)(%ebp), %edi
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
andl $0x3fe00000, %eax
shrl $18, %eax
addl %edi, %eax
movl PTR(PA_PTE_0)(%ebp), %edx
orl $PAGE_ATTR, %edx
movl %edx, (%eax)
movl PTR(VA_PTE_0)(%ebp), %edi
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
andl $0x001ff000, %eax
shrl $9, %eax
addl %edi, %eax
movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
orl $PAGE_ATTR, %edx
movl %edx, (%eax)
/* identity map the control page at its physical address */
movl PTR(VA_PGD)(%ebp), %edi
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
andl $0xc0000000, %eax
shrl $27, %eax
addl %edi, %eax
movl PTR(PA_PMD_1)(%ebp), %edx
orl $PAE_PGD_ATTR, %edx
movl %edx, (%eax)
movl PTR(VA_PMD_1)(%ebp), %edi
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
andl $0x3fe00000, %eax
shrl $18, %eax
addl %edi, %eax
movl PTR(PA_PTE_1)(%ebp), %edx
orl $PAGE_ATTR, %edx
movl %edx, (%eax)
movl PTR(VA_PTE_1)(%ebp), %edi
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
andl $0x001ff000, %eax
shrl $9, %eax
addl %edi, %eax
movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
orl $PAGE_ATTR, %edx
movl %edx, (%eax)
#else
/* map the control page at its virtual address */
movl PTR(VA_PGD)(%ebp), %edi
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
andl $0xffc00000, %eax
shrl $20, %eax
addl %edi, %eax
movl PTR(PA_PTE_0)(%ebp), %edx
orl $PAGE_ATTR, %edx
movl %edx, (%eax)
movl PTR(VA_PTE_0)(%ebp), %edi
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
andl $0x003ff000, %eax
shrl $10, %eax
addl %edi, %eax
movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
orl $PAGE_ATTR, %edx
movl %edx, (%eax)
/* identity map the control page at its physical address */
movl PTR(VA_PGD)(%ebp), %edi
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
andl $0xffc00000, %eax
shrl $20, %eax
addl %edi, %eax
movl PTR(PA_PTE_1)(%ebp), %edx
orl $PAGE_ATTR, %edx
movl %edx, (%eax)
movl PTR(VA_PTE_1)(%ebp), %edi
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
andl $0x003ff000, %eax
shrl $10, %eax
addl %edi, %eax
movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
orl $PAGE_ATTR, %edx
movl %edx, (%eax)
#endif
relocate_new_kernel:
/* read the arguments and say goodbye to the stack */
movl 20+4(%esp), %ebx /* page_list */
movl 20+8(%esp), %ebp /* list of pages */
......
......@@ -100,6 +100,10 @@ struct kimage {
#define KEXEC_TYPE_DEFAULT 0
#define KEXEC_TYPE_CRASH 1
unsigned int preserve_context : 1;
#ifdef ARCH_HAS_KIMAGE_ARCH
struct kimage_arch arch;
#endif
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment