Commit c4c5ab30 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-fixes-for-linus' of...

Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (45 commits)
  x86, mce: fix error path in mce_create_device()
  x86: use zalloc_cpumask_var for mce_dev_initialized
  x86: fix duplicated sysfs attribute
  x86: de-assembler-ize asm/desc.h
  i386: fix/simplify espfix stack switching, move it into assembly
  i386: fix return to 16-bit stack from NMI handler
  x86, ioapic: Don't call disconnect_bsp_APIC if no APIC present
  x86: Remove duplicated #include's
  x86: msr.h linux/types.h is only required for __KERNEL__
  x86: nmi: Add Intel processor 0x6f4 to NMI perfctr1 workaround
  x86, mce: mce_intel.c needs <asm/apic.h>
  x86: apic/io_apic.c: dmar_msi_type should be static
  x86, io_apic.c: Work around compiler warning
  x86: mce: Don't touch THERMAL_APIC_VECTOR if no active APIC present
  x86: mce: Handle banks == 0 case in K7 quirk
  x86, boot: use .code16gcc instead of .code16
  x86: correct the conversion of EFI memory types
  x86: cap iomem_resource to addressable physical memory
  x86, mce: rename _64.c files which are no longer 64-bit-specific
  x86, mce: mce.h cleanup
  ...

Manually fix up trivial conflict in arch/x86/mm/fault.c
parents 7fd5b632 1d991001
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* touching registers they shouldn't be. * touching registers they shouldn't be.
*/ */
.code16 .code16gcc
.text .text
.globl intcall .globl intcall
.type intcall, @function .type intcall, @function
......
...@@ -29,9 +29,11 @@ extern void amd_iommu_detect(void); ...@@ -29,9 +29,11 @@ extern void amd_iommu_detect(void);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_domains(void);
extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_flush_all_devices(void);
extern void amd_iommu_shutdown(void);
#else #else
static inline int amd_iommu_init(void) { return -ENODEV; } static inline int amd_iommu_init(void) { return -ENODEV; }
static inline void amd_iommu_detect(void) { } static inline void amd_iommu_detect(void) { }
static inline void amd_iommu_shutdown(void) { }
#endif #endif
#endif /* _ASM_X86_AMD_IOMMU_H */ #endif /* _ASM_X86_AMD_IOMMU_H */
...@@ -257,7 +257,7 @@ typedef struct { ...@@ -257,7 +257,7 @@ typedef struct {
/** /**
* atomic64_read - read atomic64 variable * atomic64_read - read atomic64 variable
* @v: pointer of type atomic64_t * @ptr: pointer of type atomic64_t
* *
* Atomically reads the value of @v. * Atomically reads the value of @v.
* Doesn't imply a read memory barrier. * Doesn't imply a read memory barrier.
...@@ -294,7 +294,6 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, ...@@ -294,7 +294,6 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
* atomic64_xchg - xchg atomic64 variable * atomic64_xchg - xchg atomic64 variable
* @ptr: pointer to type atomic64_t * @ptr: pointer to type atomic64_t
* @new_val: value to assign * @new_val: value to assign
* @old_val: old value that was there
* *
* Atomically xchgs the value of @ptr to @new_val and returns * Atomically xchgs the value of @ptr to @new_val and returns
* the old value. * the old value.
......
#ifndef _ASM_X86_DESC_H #ifndef _ASM_X86_DESC_H
#define _ASM_X86_DESC_H #define _ASM_X86_DESC_H
#ifndef __ASSEMBLY__
#include <asm/desc_defs.h> #include <asm/desc_defs.h>
#include <asm/ldt.h> #include <asm/ldt.h>
#include <asm/mmu.h> #include <asm/mmu.h>
...@@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) ...@@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
} }
#else
/*
* GET_DESC_BASE reads the descriptor base of the specified segment.
*
* Args:
* idx - descriptor index
* gdt - GDT pointer
* base - 32bit register to which the base will be written
* lo_w - lo word of the "base" register
* lo_b - lo byte of the "base" register
* hi_b - hi byte of the low word of the "base" register
*
* Example:
* GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
* Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
*/
#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
movb idx * 8 + 4(gdt), lo_b; \
movb idx * 8 + 7(gdt), hi_b; \
shll $16, base; \
movw idx * 8 + 2(gdt), lo_w;
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_DESC_H */ #endif /* _ASM_X86_DESC_H */
...@@ -102,15 +102,39 @@ struct mce_log { ...@@ -102,15 +102,39 @@ struct mce_log {
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/percpu.h>
#include <linux/init.h>
#include <asm/atomic.h>
extern int mce_disabled; extern int mce_disabled;
extern int mce_p5_enabled;
#include <asm/atomic.h> #ifdef CONFIG_X86_MCE
#include <linux/percpu.h> void mcheck_init(struct cpuinfo_x86 *c);
#else
static inline void mcheck_init(struct cpuinfo_x86 *c) {}
#endif
#ifdef CONFIG_X86_OLD_MCE
extern int nr_mce_banks;
void amd_mcheck_init(struct cpuinfo_x86 *c);
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
#else
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void enable_p5_mce(void) {}
#endif
void mce_setup(struct mce *m); void mce_setup(struct mce *m);
void mce_log(struct mce *m); void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, mce_dev); DECLARE_PER_CPU(struct sys_device, mce_dev);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/* /*
* To support more than 128 would need to escape the predefined * To support more than 128 would need to escape the predefined
...@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c); ...@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count); DECLARE_PER_CPU(unsigned, mce_poll_count);
void mce_log_therm_throt_event(__u64 status);
extern atomic_t mce_entry; extern atomic_t mce_entry;
void do_machine_check(struct pt_regs *, long);
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
...@@ -167,13 +187,32 @@ void mce_notify_process(void); ...@@ -167,13 +187,32 @@ void mce_notify_process(void);
DECLARE_PER_CPU(struct mce, injectm); DECLARE_PER_CPU(struct mce, injectm);
extern struct file_operations mce_chrdev_ops; extern struct file_operations mce_chrdev_ops;
#ifdef CONFIG_X86_MCE /*
void mcheck_init(struct cpuinfo_x86 *c); * Exception handler
#else */
#define mcheck_init(c) do { } while (0)
#endif /* Call the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
void do_machine_check(struct pt_regs *, long);
/*
* Threshold handler
*/
extern void (*mce_threshold_vector)(void); extern void (*mce_threshold_vector)(void);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/*
* Thermal handler
*/
void intel_init_thermal(struct cpuinfo_x86 *c);
#ifdef CONFIG_X86_NEW_MCE
void mce_log_therm_throt_event(__u64 status);
#else
static inline void mce_log_therm_throt_event(__u64 status) {}
#endif
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */ #endif /* _ASM_X86_MCE_H */
...@@ -3,13 +3,10 @@ ...@@ -3,13 +3,10 @@
#include <asm/msr-index.h> #include <asm/msr-index.h>
#ifndef __ASSEMBLY__
# include <linux/types.h>
#endif
#ifdef __KERNEL__ #ifdef __KERNEL__
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/types.h>
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/errno.h> #include <asm/errno.h>
#include <asm/cpumask.h> #include <asm/cpumask.h>
...@@ -264,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) ...@@ -264,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_X86_MSR_H */ #endif /* _ASM_X86_MSR_H */
#ifndef _ASM_X86_THERM_THROT_H
#define _ASM_X86_THERM_THROT_H
#include <asm/atomic.h>
extern atomic_t therm_throt_en;
int therm_throt_process(int curr);
#endif /* _ASM_X86_THERM_THROT_H */
...@@ -434,6 +434,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) ...@@ -434,6 +434,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
} }
/* Flush the whole IO/TLB for a given protection domain - including PDE */
static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
{
u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
INC_STATS_COUNTER(domain_flush_single);
iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
}
/* /*
* This function is used to flush the IO/TLB for a given protection domain * This function is used to flush the IO/TLB for a given protection domain
* on every IOMMU in the system * on every IOMMU in the system
...@@ -1078,7 +1088,13 @@ static void attach_device(struct amd_iommu *iommu, ...@@ -1078,7 +1088,13 @@ static void attach_device(struct amd_iommu *iommu,
amd_iommu_pd_table[devid] = domain; amd_iommu_pd_table[devid] = domain;
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
/*
* We might boot into a crash-kernel here. The crashed kernel
* left the caches in the IOMMU dirty. So we have to flush
* here to evict all dirty stuff.
*/
iommu_queue_inv_dev_entry(iommu, devid); iommu_queue_inv_dev_entry(iommu, devid);
iommu_flush_tlb_pde(iommu, domain->id);
} }
/* /*
......
...@@ -260,6 +260,14 @@ static void iommu_enable(struct amd_iommu *iommu) ...@@ -260,6 +260,14 @@ static void iommu_enable(struct amd_iommu *iommu)
static void iommu_disable(struct amd_iommu *iommu) static void iommu_disable(struct amd_iommu *iommu)
{ {
/* Disable command buffer */
iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
/* Disable event logging and event interrupts */
iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
/* Disable IOMMU hardware itself */
iommu_feature_disable(iommu, CONTROL_IOMMU_EN); iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
} }
...@@ -478,6 +486,10 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu) ...@@ -478,6 +486,10 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu)
memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
&entry, sizeof(entry)); &entry, sizeof(entry));
/* set head and tail to zero manually */
writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
} }
...@@ -1042,6 +1054,7 @@ static void enable_iommus(void) ...@@ -1042,6 +1054,7 @@ static void enable_iommus(void)
struct amd_iommu *iommu; struct amd_iommu *iommu;
for_each_iommu(iommu) { for_each_iommu(iommu) {
iommu_disable(iommu);
iommu_set_device_table(iommu); iommu_set_device_table(iommu);
iommu_enable_command_buffer(iommu); iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu); iommu_enable_event_buffer(iommu);
...@@ -1066,12 +1079,6 @@ static void disable_iommus(void) ...@@ -1066,12 +1079,6 @@ static void disable_iommus(void)
static int amd_iommu_resume(struct sys_device *dev) static int amd_iommu_resume(struct sys_device *dev)
{ {
/*
* Disable IOMMUs before reprogramming the hardware registers.
* IOMMU is still enabled from the resume kernel.
*/
disable_iommus();
/* re-load the hardware */ /* re-load the hardware */
enable_iommus(); enable_iommus();
...@@ -1079,8 +1086,8 @@ static int amd_iommu_resume(struct sys_device *dev) ...@@ -1079,8 +1086,8 @@ static int amd_iommu_resume(struct sys_device *dev)
* we have to flush after the IOMMUs are enabled because a * we have to flush after the IOMMUs are enabled because a
* disabled IOMMU will never execute the commands we send * disabled IOMMU will never execute the commands we send
*/ */
amd_iommu_flush_all_domains();
amd_iommu_flush_all_devices(); amd_iommu_flush_all_devices();
amd_iommu_flush_all_domains();
return 0; return 0;
} }
...@@ -1273,6 +1280,11 @@ int __init amd_iommu_init(void) ...@@ -1273,6 +1280,11 @@ int __init amd_iommu_init(void)
goto out; goto out;
} }
void amd_iommu_shutdown(void)
{
disable_iommus();
}
/**************************************************************************** /****************************************************************************
* *
* Early detect code. This code runs at IOMMU detection time in the DMA * Early detect code. This code runs at IOMMU detection time in the DMA
......
...@@ -462,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) ...@@ -462,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
static void static void
__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{ {
union entry_union eu; union entry_union eu = {{0, 0}};
eu.entry = e; eu.entry = e;
io_apic_write(apic, 0x11 + 2*pin, eu.w2); io_apic_write(apic, 0x11 + 2*pin, eu.w2);
io_apic_write(apic, 0x10 + 2*pin, eu.w1); io_apic_write(apic, 0x10 + 2*pin, eu.w1);
...@@ -2003,7 +2004,9 @@ void disable_IO_APIC(void) ...@@ -2003,7 +2004,9 @@ void disable_IO_APIC(void)
/* /*
* Use virtual wire A mode when interrupt remapping is enabled. * Use virtual wire A mode when interrupt remapping is enabled.
*/ */
disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); if (cpu_has_apic)
disconnect_bsp_APIC(!intr_remapping_enabled &&
ioapic_i8259.pin != -1);
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -3567,7 +3570,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) ...@@ -3567,7 +3570,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
struct irq_chip dmar_msi_type = { static struct irq_chip dmar_msi_type = {
.name = "DMAR_MSI", .name = "DMAR_MSI",
.unmask = dmar_msi_unmask, .unmask = dmar_msi_unmask,
.mask = dmar_msi_mask, .mask = dmar_msi_mask,
......
...@@ -20,23 +20,12 @@ ...@@ -20,23 +20,12 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <asm/mpspec.h>
#include <asm/fixmap.h>
#include <asm/apicdef.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/init.h>
#include <asm/ipi.h> #include <asm/ipi.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <asm/acpi.h> #include <asm/acpi.h>
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/setup.h>
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1) #define DEFAULT_SEND_IPI (1)
......
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
#include <asm/ipi.h> #include <asm/ipi.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/init.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/smp.h> #include <linux/smp.h>
......
...@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { ...@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
/* data */ /* data */
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
GDT_STACK_CANARY_INIT GDT_STACK_CANARY_INIT
#endif #endif
...@@ -848,6 +848,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) ...@@ -848,6 +848,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
numa_add_cpu(smp_processor_id()); numa_add_cpu(smp_processor_id());
#endif #endif
/* Cap the iomem address space to what is addressable on all CPUs */
iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1;
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
obj-y = mce.o therm_throt.o obj-y = mce.o
obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
...@@ -10,10 +10,9 @@ ...@@ -10,10 +10,9 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For AMD Athlon/Duron: */ /* Machine Check Handler For AMD Athlon/Duron: */
static void k7_machine_check(struct pt_regs *regs, long error_code) static void k7_machine_check(struct pt_regs *regs, long error_code)
{ {
......
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
#include <asm/msr.h> #include <asm/msr.h>
#include "mce-internal.h" #include "mce-internal.h"
#include "mce.h"
/* Handle unconfigured int18 (should never happen) */ /* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code) static void unexpected_machine_check(struct pt_regs *regs, long error_code)
...@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) ...@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) = void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check; unexpected_machine_check;
int mce_disabled; int mce_disabled __read_mostly;
#ifdef CONFIG_X86_NEW_MCE #ifdef CONFIG_X86_NEW_MCE
...@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); ...@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
* 2: SIGBUS or log uncorrected errors (if possible), log corrected errors * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
* 3: never panic or SIGBUS, log all errors (for testing only) * 3: never panic or SIGBUS, log all errors (for testing only)
*/ */
static int tolerant = 1; static int tolerant __read_mostly = 1;
static int banks; static int banks __read_mostly;
static u64 *bank; static u64 *bank __read_mostly;
static unsigned long notify_user; static int rip_msr __read_mostly;
static int rip_msr; static int mce_bootlog __read_mostly = -1;
static int mce_bootlog = -1; static int monarch_timeout __read_mostly = -1;
static int monarch_timeout = -1; static int mce_panic_timeout __read_mostly;
static int mce_panic_timeout; static int mce_dont_log_ce __read_mostly;
static int mce_dont_log_ce; int mce_cmci_disabled __read_mostly;
int mce_cmci_disabled; int mce_ignore_ce __read_mostly;
int mce_ignore_ce; int mce_ser __read_mostly;
int mce_ser;
/* User mode helper program triggered by machine check event */
static char trigger[128]; static unsigned long mce_need_notify;
static char *trigger_argv[2] = { trigger, NULL }; static char mce_helper[128];
static char *mce_helper_argv[2] = { mce_helper, NULL };
static unsigned long dont_init_banks; static unsigned long dont_init_banks;
...@@ -180,7 +180,7 @@ void mce_log(struct mce *mce) ...@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
wmb(); wmb();
mce->finished = 1; mce->finished = 1;
set_bit(0, &notify_user); set_bit(0, &mce_need_notify);
} }
static void print_mce(struct mce *m) static void print_mce(struct mce *m)
...@@ -691,18 +691,21 @@ static atomic_t global_nwo; ...@@ -691,18 +691,21 @@ static atomic_t global_nwo;
* in the entry order. * in the entry order.
* TBD double check parallel CPU hotunplug * TBD double check parallel CPU hotunplug
*/ */
static int mce_start(int no_way_out, int *order) static int mce_start(int *no_way_out)
{ {
int nwo; int order;
int cpus = num_online_cpus(); int cpus = num_online_cpus();
u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
if (!timeout) { if (!timeout)
*order = -1; return -1;
return no_way_out;
}
atomic_add(no_way_out, &global_nwo); atomic_add(*no_way_out, &global_nwo);
/*
* global_nwo should be updated before mce_callin
*/
smp_wmb();
order = atomic_add_return(1, &mce_callin);
/* /*
* Wait for everyone. * Wait for everyone.
...@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order) ...@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
while (atomic_read(&mce_callin) != cpus) { while (atomic_read(&mce_callin) != cpus) {
if (mce_timed_out(&timeout)) { if (mce_timed_out(&timeout)) {
atomic_set(&global_nwo, 0); atomic_set(&global_nwo, 0);
*order = -1; return -1;
return no_way_out;
} }
ndelay(SPINUNIT); ndelay(SPINUNIT);
} }
/* /*
* Cache the global no_way_out state. * mce_callin should be read before global_nwo
*/ */
nwo = atomic_read(&global_nwo); smp_rmb();
/* if (order == 1) {
* Monarch starts executing now, the others wait. /*
*/ * Monarch: Starts executing now, the others wait.
if (*order == 1) { */
atomic_set(&mce_executing, 1); atomic_set(&mce_executing, 1);
return nwo; } else {
/*
* Subject: Now start the scanning loop one by one in
* the original callin order.
* This way when there are any shared banks it will be
* only seen by one CPU before cleared, avoiding duplicates.
*/
while (atomic_read(&mce_executing) < order) {
if (mce_timed_out(&timeout)) {
atomic_set(&global_nwo, 0);
return -1;
}
ndelay(SPINUNIT);
}
} }
/* /*
* Now start the scanning loop one by one * Cache the global no_way_out state.
* in the original callin order.
* This way when there are any shared banks it will
* be only seen by one CPU before cleared, avoiding duplicates.
*/ */
while (atomic_read(&mce_executing) < *order) { *no_way_out = atomic_read(&global_nwo);
if (mce_timed_out(&timeout)) {
atomic_set(&global_nwo, 0); return order;
*order = -1;
return no_way_out;
}
ndelay(SPINUNIT);
}
return nwo;
} }
/* /*
...@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* check handler. * check handler.
*/ */
int order; int order;
/* /*
* If no_way_out gets set, there is no safe way to recover from this * If no_way_out gets set, there is no safe way to recover from this
* MCE. If tolerant is cranked up, we'll try anyway. * MCE. If tolerant is cranked up, we'll try anyway.
...@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (!banks) if (!banks)
goto out; goto out;
order = atomic_add_return(1, &mce_callin);
mce_setup(&m); mce_setup(&m);
m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
...@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* This way we don't report duplicated events on shared banks * This way we don't report duplicated events on shared banks
* because the first one to see it will clear it. * because the first one to see it will clear it.
*/ */
no_way_out = mce_start(no_way_out, &order); order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++) {
__clear_bit(i, toclear); __clear_bit(i, toclear);
if (!bank[i]) if (!bank[i])
...@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data) ...@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data)
static void mce_do_trigger(struct work_struct *work) static void mce_do_trigger(struct work_struct *work)
{ {
call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
} }
static DECLARE_WORK(mce_trigger_work, mce_do_trigger); static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
...@@ -1135,7 +1139,7 @@ int mce_notify_irq(void) ...@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
clear_thread_flag(TIF_MCE_NOTIFY); clear_thread_flag(TIF_MCE_NOTIFY);
if (test_and_clear_bit(0, &notify_user)) { if (test_and_clear_bit(0, &mce_need_notify)) {
wake_up_interruptible(&mce_wait); wake_up_interruptible(&mce_wait);
/* /*
...@@ -1143,7 +1147,7 @@ int mce_notify_irq(void) ...@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
* work_pending is always cleared before the function is * work_pending is always cleared before the function is
* executed. * executed.
*/ */
if (trigger[0] && !work_pending(&mce_trigger_work)) if (mce_helper[0] && !work_pending(&mce_trigger_work))
schedule_work(&mce_trigger_work); schedule_work(&mce_trigger_work);
if (__ratelimit(&ratelimit)) if (__ratelimit(&ratelimit))
...@@ -1245,7 +1249,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) ...@@ -1245,7 +1249,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
* Various K7s with broken bank 0 around. Always disable * Various K7s with broken bank 0 around. Always disable
* by default. * by default.
*/ */
if (c->x86 == 6) if (c->x86 == 6 && banks > 0)
bank[0] = 0; bank[0] = 0;
} }
...@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) ...@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
return; return;
switch (c->x86_vendor) { switch (c->x86_vendor) {
case X86_VENDOR_INTEL: case X86_VENDOR_INTEL:
if (mce_p5_enabled()) intel_p5_mcheck_init(c);
intel_p5_mcheck_init(c);
break; break;
case X86_VENDOR_CENTAUR: case X86_VENDOR_CENTAUR:
winchip_mcheck_init(c); winchip_mcheck_init(c);
...@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev) ...@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev)
static void mce_cpu_restart(void *data) static void mce_cpu_restart(void *data)
{ {
del_timer_sync(&__get_cpu_var(mce_timer)); del_timer_sync(&__get_cpu_var(mce_timer));
if (mce_available(&current_cpu_data)) if (!mce_available(&current_cpu_data))
mce_init(); return;
mce_init();
mce_init_timer(); mce_init_timer();
} }
...@@ -1620,6 +1624,26 @@ static void mce_restart(void) ...@@ -1620,6 +1624,26 @@ static void mce_restart(void)
on_each_cpu(mce_cpu_restart, NULL, 1); on_each_cpu(mce_cpu_restart, NULL, 1);
} }
/* Toggle features for corrected errors */
static void mce_disable_ce(void *all)
{
if (!mce_available(&current_cpu_data))
return;
if (all)
del_timer_sync(&__get_cpu_var(mce_timer));
cmci_clear();
}
static void mce_enable_ce(void *all)
{
if (!mce_available(&current_cpu_data))
return;
cmci_reenable();
cmci_recheck();
if (all)
mce_init_timer();
}
static struct sysdev_class mce_sysclass = { static struct sysdev_class mce_sysclass = {
.suspend = mce_suspend, .suspend = mce_suspend,
.shutdown = mce_shutdown, .shutdown = mce_shutdown,
...@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, ...@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
static ssize_t static ssize_t
show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
{ {
strcpy(buf, trigger); strcpy(buf, mce_helper);
strcat(buf, "\n"); strcat(buf, "\n");
return strlen(trigger) + 1; return strlen(mce_helper) + 1;
} }
static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
...@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, ...@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
char *p; char *p;
int len; int len;
strncpy(trigger, buf, sizeof(trigger)); strncpy(mce_helper, buf, sizeof(mce_helper));
trigger[sizeof(trigger)-1] = 0; mce_helper[sizeof(mce_helper)-1] = 0;
len = strlen(trigger); len = strlen(mce_helper);
p = strchr(trigger, '\n'); p = strchr(mce_helper, '\n');
if (*p) if (*p)
*p = 0; *p = 0;
...@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, ...@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
return len; return len;
} }
static ssize_t set_ignore_ce(struct sys_device *s,
struct sysdev_attribute *attr,
const char *buf, size_t size)
{
u64 new;
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
if (mce_ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
on_each_cpu(mce_disable_ce, (void *)1, 1);
mce_ignore_ce = 1;
} else {
/* enable ce features */
mce_ignore_ce = 0;
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
return size;
}
static ssize_t set_cmci_disabled(struct sys_device *s,
struct sysdev_attribute *attr,
const char *buf, size_t size)
{
u64 new;
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
if (mce_cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
on_each_cpu(mce_disable_ce, NULL, 1);
mce_cmci_disabled = 1;
} else {
/* enable cmci */
mce_cmci_disabled = 0;
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
return size;
}
static ssize_t store_int_with_restart(struct sys_device *s, static ssize_t store_int_with_restart(struct sys_device *s,
struct sysdev_attribute *attr, struct sysdev_attribute *attr,
const char *buf, size_t size) const char *buf, size_t size)
...@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, ...@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
static struct sysdev_ext_attribute attr_check_interval = { static struct sysdev_ext_attribute attr_check_interval = {
_SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
...@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = { ...@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
&check_interval &check_interval
}; };
static struct sysdev_ext_attribute attr_ignore_ce = {
_SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
&mce_ignore_ce
};
static struct sysdev_ext_attribute attr_cmci_disabled = {
_SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
&mce_cmci_disabled
};
static struct sysdev_attribute *mce_attrs[] = { static struct sysdev_attribute *mce_attrs[] = {
&attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, &attr_tolerant.attr,
&attr_check_interval.attr,
&attr_trigger,
&attr_monarch_timeout.attr, &attr_monarch_timeout.attr,
&attr_dont_log_ce.attr,
&attr_ignore_ce.attr,
&attr_cmci_disabled.attr,
NULL NULL
}; };
...@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized; ...@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
static __cpuinit int mce_create_device(unsigned int cpu) static __cpuinit int mce_create_device(unsigned int cpu)
{ {
int err; int err;
int i; int i, j;
if (!mce_available(&boot_cpu_data)) if (!mce_available(&boot_cpu_data))
return -EIO; return -EIO;
...@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) ...@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if (err) if (err)
goto error; goto error;
} }
for (i = 0; i < banks; i++) { for (j = 0; j < banks; j++) {
err = sysdev_create_file(&per_cpu(mce_dev, cpu), err = sysdev_create_file(&per_cpu(mce_dev, cpu),
&bank_attrs[i]); &bank_attrs[j]);
if (err) if (err)
goto error2; goto error2;
} }
...@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) ...@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return 0; return 0;
error2: error2:
while (--i >= 0) while (--j >= 0)
sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
error: error:
while (--i >= 0) while (--i >= 0)
sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
...@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void) ...@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data)) if (!mce_available(&boot_cpu_data))
return -EIO; return -EIO;
alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
err = mce_init_banks(); err = mce_init_banks();
if (err) if (err)
...@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ ...@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
/* This has to be run for each processor */ /* This has to be run for each processor */
void mcheck_init(struct cpuinfo_x86 *c) void mcheck_init(struct cpuinfo_x86 *c)
{ {
if (mce_disabled == 1) if (mce_disabled)
return; return;
switch (c->x86_vendor) { switch (c->x86_vendor) {
...@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) ...@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
static int __init mcheck_enable(char *str) static int __init mcheck_enable(char *str)
{ {
mce_disabled = -1; mce_p5_enabled = 1;
return 1; return 1;
} }
__setup("mce", mcheck_enable); __setup("mce", mcheck_enable);
#endif /* CONFIG_X86_OLD_MCE */ #endif /* CONFIG_X86_OLD_MCE */
......
#include <linux/init.h>
#include <asm/mce.h>
#ifdef CONFIG_X86_OLD_MCE
void amd_mcheck_init(struct cpuinfo_x86 *c);
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
extern int mce_p5_enable;
static inline int mce_p5_enabled(void) { return mce_p5_enable; }
static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
#else
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline int mce_p5_enabled(void) { return 0; }
static inline void enable_p5_mce(void) { }
#endif
/* Call the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
#ifdef CONFIG_X86_OLD_MCE
extern int nr_mce_banks;
void intel_set_thermal_handler(void);
#else
static inline void intel_set_thermal_handler(void) { }
#endif
void intel_init_thermal(struct cpuinfo_x86 *c);
/* /*
* Common code for Intel machine checks * Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/ */
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/therm_throt.h> #include <linux/init.h>
#include <asm/processor.h> #include <linux/interrupt.h>
#include <asm/system.h> #include <linux/percpu.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/mce.h>
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
#include "mce.h" static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
void intel_init_thermal(struct cpuinfo_x86 *c) /*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static DEFINE_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
static int cmci_supported(int *banks)
{ {
unsigned int cpu = smp_processor_id(); u64 cap;
int tm2 = 0;
u32 l, h;
/* Thermal monitoring depends on ACPI and clock modulation*/ if (mce_cmci_disabled || mce_ignore_ce)
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) return 0;
return;
/* /*
* First check if its enabled already, in which case there might * Vendor check is not strictly needed, but the initial
* be some SMM goo which handles it, so we can't even put a handler * initialization is vendor keyed and this
* since it might be delivered via SMI already: * makes sure none of the backdoors are entered otherwise.
*/ */
rdmsr(MSR_IA32_MISC_ENABLE, l, h); if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
h = apic_read(APIC_LVTTHMR); return 0;
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { if (!cpu_has_apic || lapic_get_maxlvt() < 6)
printk(KERN_DEBUG return 0;
"CPU%d: Thermal monitoring handled by SMI\n", cpu); rdmsrl(MSR_IA32_MCG_CAP, cap);
return; *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
return !!(cap & MCG_CMCI_P);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static void intel_threshold_interrupt(void)
{
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
mce_notify_irq();
}
static void print_update(char *type, int *hdr, int num)
{
if (*hdr == 0)
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
*hdr = 1;
printk(KERN_CONT " %s:%d", type, num);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static void cmci_discover(int banks, int boot)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
unsigned long flags;
int hdr = 0;
int i;
spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
if (test_bit(i, owned))
continue;
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Already owned by someone else? */
if (val & CMCI_EN) {
if (test_and_clear_bit(i, owned) || boot)
print_update("SHD", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
}
val |= CMCI_EN | CMCI_THRESHOLD;
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & CMCI_EN) {
if (!test_and_set_bit(i, owned) || boot)
print_update("CMCI", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
} else {
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
} }
spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void cmci_recheck(void)
{
unsigned long flags;
int banks;
if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
local_irq_restore(flags);
}
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) /*
tm2 = 1; * Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void cmci_clear(void)
{
unsigned long flags;
int i;
int banks;
u64 val;
/* Check whether a vector already exists */ if (!cmci_supported(&banks))
if (h & APIC_VECTOR_MASK) {
printk(KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed\n",
cpu, (h & APIC_VECTOR_MASK));
return; return;
spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
} }
spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void cmci_rediscover(int dying)
{
int banks;
int cpu;
cpumask_var_t old;
if (!cmci_supported(&banks))
return;
if (!alloc_cpumask_var(&old, GFP_KERNEL))
return;
cpumask_copy(old, &current->cpus_allowed);
/* We'll mask the thermal vector in the lapic till we're ready: */ for_each_online_cpu(cpu) {
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; if (cpu == dying)
apic_write(APIC_LVTTHMR, h); continue;
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); set_cpus_allowed_ptr(current, old);
wrmsr(MSR_IA32_THERM_INTERRUPT, free_cpumask_var(old);
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); }
intel_set_thermal_handler(); /*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void cmci_reenable(void)
{
int banks;
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
rdmsr(MSR_IA32_MISC_ENABLE, l, h); static void intel_init_cmci(void)
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); {
int banks;
/* Unmask the thermal vector: */ if (!cmci_supported(&banks))
l = apic_read(APIC_LVTTHMR); return;
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", mce_threshold_vector = intel_threshold_interrupt;
cpu, tm2 ? "TM2" : "TM1"); cmci_discover(banks, 1);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
cmci_recheck();
}
/* enable thermal throttle processing */ void mce_intel_feature_init(struct cpuinfo_x86 *c)
atomic_set(&therm_throt_en, 1); {
intel_init_thermal(c);
intel_init_cmci();
} }
/*
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
#include "mce.h"
asmlinkage void smp_thermal_interrupt(void)
{
__u64 msr_val;
ack_APIC_irq();
exit_idle();
irq_enter();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
mce_log_therm_throt_event(msr_val);
inc_irq_stat(irq_thermal_count);
irq_exit();
}
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static DEFINE_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
static int cmci_supported(int *banks)
{
u64 cap;
if (mce_cmci_disabled || mce_ignore_ce)
return 0;
/*
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
return !!(cap & MCG_CMCI_P);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static void intel_threshold_interrupt(void)
{
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
mce_notify_irq();
}
static void print_update(char *type, int *hdr, int num)
{
if (*hdr == 0)
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
*hdr = 1;
printk(KERN_CONT " %s:%d", type, num);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static void cmci_discover(int banks, int boot)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
unsigned long flags;
int hdr = 0;
int i;
spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
if (test_bit(i, owned))
continue;
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Already owned by someone else? */
if (val & CMCI_EN) {
if (test_and_clear_bit(i, owned) || boot)
print_update("SHD", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
}
val |= CMCI_EN | CMCI_THRESHOLD;
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & CMCI_EN) {
if (!test_and_set_bit(i, owned) || boot)
print_update("CMCI", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
} else {
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void cmci_recheck(void)
{
unsigned long flags;
int banks;
if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
local_irq_restore(flags);
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void cmci_clear(void)
{
unsigned long flags;
int i;
int banks;
u64 val;
if (!cmci_supported(&banks))
return;
spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void cmci_rediscover(int dying)
{
int banks;
int cpu;
cpumask_var_t old;
if (!cmci_supported(&banks))
return;
if (!alloc_cpumask_var(&old, GFP_KERNEL))
return;
cpumask_copy(old, &current->cpus_allowed);
for_each_online_cpu(cpu) {
if (cpu == dying)
continue;
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
set_cpus_allowed_ptr(current, old);
free_cpumask_var(old);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void cmci_reenable(void)
{
int banks;
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
static void intel_init_cmci(void)
{
int banks;
if (!cmci_supported(&banks))
return;
mce_threshold_vector = intel_threshold_interrupt;
cmci_discover(banks, 1);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
cmci_recheck();
}
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_thermal(c);
intel_init_cmci();
}
...@@ -17,10 +17,9 @@ ...@@ -17,10 +17,9 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
static int firstbank; static int firstbank;
#define MCE_RATE (15*HZ) /* timer rate is 15s */ #define MCE_RATE (15*HZ) /* timer rate is 15s */
......
/* /*
* P4 specific Machine Check Exception Reporting * P4 specific Machine Check Exception Reporting
*/ */
#include <linux/interrupt.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <asm/therm_throt.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/mce.h>
#include <asm/apic.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* as supported by the P4/Xeon family */ /* as supported by the P4/Xeon family */
struct intel_mce_extended_msrs { struct intel_mce_extended_msrs {
u32 eax; u32 eax;
...@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs { ...@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
static int mce_num_extended_msrs; static int mce_num_extended_msrs;
#ifdef CONFIG_X86_MCE_P4THERMAL
static void unexpected_thermal_interrupt(struct pt_regs *regs)
{
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
smp_processor_id());
add_taint(TAINT_MACHINE_CHECK);
}
/* P4/Xeon Thermal transition interrupt handler: */
static void intel_thermal_interrupt(struct pt_regs *regs)
{
__u64 msr_val;
ack_APIC_irq();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
}
/* Thermal interrupt handler for this CPU setup: */
static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
unexpected_thermal_interrupt;
void smp_thermal_interrupt(struct pt_regs *regs)
{
irq_enter();
vendor_thermal_interrupt(regs);
__get_cpu_var(irq_stat).irq_thermal_count++;
irq_exit();
}
void intel_set_thermal_handler(void)
{
vendor_thermal_interrupt = intel_thermal_interrupt;
}
#endif /* CONFIG_X86_MCE_P4THERMAL */
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
{ {
......
...@@ -10,12 +10,11 @@ ...@@ -10,12 +10,11 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* By default disabled */ /* By default disabled */
int mce_p5_enable; int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */ /* Machine check handler for Pentium class Intel CPUs: */
static void pentium_machine_check(struct pt_regs *regs, long error_code) static void pentium_machine_check(struct pt_regs *regs, long error_code)
...@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) ...@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{ {
u32 l, h; u32 l, h;
/* Check for MCE support: */ /* Default P5 to off as its often misconnected: */
if (!cpu_has(c, X86_FEATURE_MCE)) if (!mce_p5_enabled)
return; return;
#ifdef CONFIG_X86_OLD_MCE /* Check for MCE support: */
/* Default P5 to off as its often misconnected: */ if (!cpu_has(c, X86_FEATURE_MCE))
if (mce_disabled != -1)
return; return;
#endif
machine_check_vector = pentium_machine_check; machine_check_vector = pentium_machine_check;
/* Make sure the vector pointer is visible before we enable MCEs: */ /* Make sure the vector pointer is visible before we enable MCEs: */
......
...@@ -10,10 +10,9 @@ ...@@ -10,10 +10,9 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For PII/PIII */ /* Machine Check Handler For PII/PIII */
static void intel_machine_check(struct pt_regs *regs, long error_code) static void intel_machine_check(struct pt_regs *regs, long error_code)
{ {
......
...@@ -13,13 +13,23 @@ ...@@ -13,13 +13,23 @@
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
* Inspired by Ross Biro's and Al Borchers' counter code. * Inspired by Ross Biro's and Al Borchers' counter code.
*/ */
#include <linux/interrupt.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <asm/therm_throt.h> #include <asm/processor.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* How long to wait between reporting thermal events */ /* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ) #define CHECK_INTERVAL (300 * HZ)
...@@ -27,7 +37,7 @@ ...@@ -27,7 +37,7 @@
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
atomic_t therm_throt_en = ATOMIC_INIT(0); static atomic_t therm_throt_en = ATOMIC_INIT(0);
#ifdef CONFIG_SYSFS #ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \ #define define_therm_throt_sysdev_one_ro(_name) \
...@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { ...@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been * 1 : Event should be logged further, and a message has been
* printed to the syslog. * printed to the syslog.
*/ */
int therm_throt_process(int curr) static int therm_throt_process(int curr)
{ {
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
__u64 tmp_jiffs = get_jiffies_64(); __u64 tmp_jiffs = get_jiffies_64();
...@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void) ...@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
return 0; return 0;
} }
device_initcall(thermal_throttle_init_device); device_initcall(thermal_throttle_init_device);
#endif /* CONFIG_SYSFS */ #endif /* CONFIG_SYSFS */
/* Thermal transition interrupt handler */
static void intel_thermal_interrupt(void)
{
__u64 msr_val;
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
mce_log_therm_throt_event(msr_val);
}
static void unexpected_thermal_interrupt(void)
{
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
smp_processor_id());
add_taint(TAINT_MACHINE_CHECK);
}
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
{
exit_idle();
irq_enter();
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
irq_exit();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq();
}
void intel_init_thermal(struct cpuinfo_x86 *c)
{
unsigned int cpu = smp_processor_id();
int tm2 = 0;
u32 l, h;
/* Thermal monitoring depends on ACPI and clock modulation*/
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
return;
/*
* First check if its enabled already, in which case there might
* be some SMM goo which handles it, so we can't even put a handler
* since it might be delivered via SMI already:
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
return;
}
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
tm2 = 1;
/* Check whether a vector already exists */
if (h & APIC_VECTOR_MASK) {
printk(KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed\n",
cpu, (h & APIC_VECTOR_MASK));
return;
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
apic_write(APIC_LVTTHMR, h);
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
wrmsr(MSR_IA32_THERM_INTERRUPT,
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
smp_thermal_vector = intel_thermal_interrupt;
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
/* Unmask the thermal vector: */
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
cpu, tm2 ? "TM2" : "TM1");
/* enable thermal throttle processing */
atomic_set(&therm_throt_en, 1);
}
...@@ -9,10 +9,9 @@ ...@@ -9,10 +9,9 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "mce.h"
/* Machine check handler for WinChip C6: */ /* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code) static void winchip_machine_check(struct pt_regs *regs, long error_code)
{ {
......
...@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void) ...@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void)
wd_ops = &k7_wd_ops; wd_ops = &k7_wd_ops;
break; break;
case X86_VENDOR_INTEL: case X86_VENDOR_INTEL:
/* /* Work around where perfctr1 doesn't have a working enable
* Work around Core Duo (Yonah) errata AE49 where perfctr1 * bit as described in the following errata:
* doesn't have a working enable bit. * AE49 Core Duo and Intel Core Solo 65 nm
* AN49 Intel Pentium Dual-Core
* AF49 Dual-Core Intel Xeon Processor LV
*/ */
if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
boot_cpu_data.x86_mask == 4))) {
intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <asm/cpu.h> #include <asm/cpu.h>
#include <asm/reboot.h> #include <asm/reboot.h>
#include <asm/virtext.h> #include <asm/virtext.h>
#include <asm/iommu.h>
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
...@@ -103,5 +104,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs) ...@@ -103,5 +104,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#ifdef CONFIG_HPET_TIMER #ifdef CONFIG_HPET_TIMER
hpet_disable(); hpet_disable();
#endif #endif
#ifdef CONFIG_X86_64
pci_iommu_shutdown();
#endif
crash_save_cpu(regs, safe_smp_processor_id()); crash_save_cpu(regs, safe_smp_processor_id());
} }
...@@ -240,10 +240,35 @@ static void __init do_add_efi_memmap(void) ...@@ -240,10 +240,35 @@ static void __init do_add_efi_memmap(void)
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
int e820_type; int e820_type;
if (md->attribute & EFI_MEMORY_WB) switch (md->type) {
e820_type = E820_RAM; case EFI_LOADER_CODE:
else case EFI_LOADER_DATA:
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
if (md->attribute & EFI_MEMORY_WB)
e820_type = E820_RAM;
else
e820_type = E820_RESERVED;
break;
case EFI_ACPI_RECLAIM_MEMORY:
e820_type = E820_ACPI;
break;
case EFI_ACPI_MEMORY_NVS:
e820_type = E820_NVS;
break;
case EFI_UNUSABLE_MEMORY:
e820_type = E820_UNUSABLE;
break;
default:
/*
* EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
* EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
* EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
*/
e820_type = E820_RESERVED; e820_type = E820_RESERVED;
break;
}
e820_add_region(start, size, e820_type); e820_add_region(start, size, e820_type);
} }
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
......
...@@ -48,7 +48,6 @@ ...@@ -48,7 +48,6 @@
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/page_types.h> #include <asm/page_types.h>
#include <asm/desc.h>
#include <asm/percpu.h> #include <asm/percpu.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
...@@ -84,7 +83,7 @@ ...@@ -84,7 +83,7 @@
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else #else
#define preempt_stop(clobbers) #define preempt_stop(clobbers)
#define resume_kernel restore_nocheck #define resume_kernel restore_all
#endif #endif
.macro TRACE_IRQS_IRET .macro TRACE_IRQS_IRET
...@@ -372,7 +371,7 @@ END(ret_from_exception) ...@@ -372,7 +371,7 @@ END(ret_from_exception)
ENTRY(resume_kernel) ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY) DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_nocheck jnz restore_all
need_resched: need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ? movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl testb $_TIF_NEED_RESCHED, %cl
...@@ -540,6 +539,8 @@ syscall_exit: ...@@ -540,6 +539,8 @@ syscall_exit:
jne syscall_exit_work jne syscall_exit_work
restore_all: restore_all:
TRACE_IRQS_IRET
restore_all_notrace:
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
# Warning: PT_OLDSS(%esp) contains the wrong/random values if we # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
# are returning to the kernel. # are returning to the kernel.
...@@ -551,8 +552,6 @@ restore_all: ...@@ -551,8 +552,6 @@ restore_all:
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
je ldt_ss # returning to user-space with LDT SS je ldt_ss # returning to user-space with LDT SS
restore_nocheck: restore_nocheck:
TRACE_IRQS_IRET
restore_nocheck_notrace:
RESTORE_REGS 4 # skip orig_eax/error_code RESTORE_REGS 4 # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
irq_return: irq_return:
...@@ -588,22 +587,34 @@ ldt_ss: ...@@ -588,22 +587,34 @@ ldt_ss:
jne restore_nocheck jne restore_nocheck
#endif #endif
/* If returning to userspace with 16bit stack, /*
* try to fix the higher word of ESP, as the CPU * Setup and switch to ESPFIX stack
* won't restore it. *
* This is an "official" bug of all the x86-compatible * We're returning to userspace with a 16 bit stack. The CPU will not
* CPUs, which we can try to work around to make * restore the high word of ESP for us on executing iret... This is an
* dosemu and wine happy. */ * "official" bug of all the x86-compatible CPUs, which we can work
movl PT_OLDESP(%esp), %eax * around to make dosemu and wine happy. We do this by preloading the
movl %esp, %edx * high word of ESP with the high word of the userspace ESP while
call patch_espfix_desc * compensating for the offset by changing to the ESPFIX segment with
* a base address that matches for the difference.
*/
mov %esp, %edx /* load kernel esp */
mov PT_OLDESP(%esp), %eax /* load userspace esp */
mov %dx, %ax /* eax: new kernel esp */
sub %eax, %edx /* offset (low word is 0) */
PER_CPU(gdt_page, %ebx)
shr $16, %edx
mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
pushl $__ESPFIX_SS pushl $__ESPFIX_SS
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
pushl %eax push %eax /* new kernel esp */
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
/* Disable interrupts, but do not irqtrace this section: we
* will soon execute iret and the tracer was already set to
* the irqstate after the iret */
DISABLE_INTERRUPTS(CLBR_EAX) DISABLE_INTERRUPTS(CLBR_EAX)
TRACE_IRQS_OFF lss (%esp), %esp /* switch to espfix segment */
lss (%esp), %esp
CFI_ADJUST_CFA_OFFSET -8 CFI_ADJUST_CFA_OFFSET -8
jmp restore_nocheck jmp restore_nocheck
CFI_ENDPROC CFI_ENDPROC
...@@ -716,15 +727,24 @@ PTREGSCALL(vm86) ...@@ -716,15 +727,24 @@ PTREGSCALL(vm86)
PTREGSCALL(vm86old) PTREGSCALL(vm86old)
.macro FIXUP_ESPFIX_STACK .macro FIXUP_ESPFIX_STACK
/* since we are on a wrong stack, we cant make it a C code :( */ /*
* Switch back for ESPFIX stack to the normal zerobased stack
*
* We can't call C functions using the ESPFIX stack. This code reads
* the high word of the segment base from the GDT and swiches to the
* normal stack and adjusts ESP with the matching offset.
*/
/* fixup the stack */
PER_CPU(gdt_page, %ebx) PER_CPU(gdt_page, %ebx)
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
addl %esp, %eax mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
shl $16, %eax
addl %esp, %eax /* the adjusted stack pointer */
pushl $__KERNEL_DS pushl $__KERNEL_DS
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
pushl %eax pushl %eax
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
lss (%esp), %esp lss (%esp), %esp /* switch to the normal stack segment */
CFI_ADJUST_CFA_OFFSET -8 CFI_ADJUST_CFA_OFFSET -8
.endm .endm
.macro UNWIND_ESPFIX_STACK .macro UNWIND_ESPFIX_STACK
...@@ -1329,7 +1349,7 @@ nmi_stack_correct: ...@@ -1329,7 +1349,7 @@ nmi_stack_correct:
xorl %edx,%edx # zero error code xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer movl %esp,%eax # pt_regs pointer
call do_nmi call do_nmi
jmp restore_nocheck_notrace jmp restore_all_notrace
CFI_ENDPROC CFI_ENDPROC
nmi_stack_fixup: nmi_stack_fixup:
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/page_types.h> #include <asm/page_types.h>
#include <asm/pgtable_types.h> #include <asm/pgtable_types.h>
#include <asm/desc.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/init.h> #include <linux/init.h>
#include <asm/desc.h>
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/page.h> #include <asm/page.h>
......
...@@ -510,7 +510,8 @@ static int hpet_setup_irq(struct hpet_dev *dev) ...@@ -510,7 +510,8 @@ static int hpet_setup_irq(struct hpet_dev *dev)
{ {
if (request_irq(dev->irq, hpet_interrupt_handler, if (request_irq(dev->irq, hpet_interrupt_handler,
IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
dev->name, dev))
return -1; return -1;
disable_irq(dev->irq); disable_irq(dev->irq);
......
...@@ -290,6 +290,8 @@ static int __init pci_iommu_init(void) ...@@ -290,6 +290,8 @@ static int __init pci_iommu_init(void)
void pci_iommu_shutdown(void) void pci_iommu_shutdown(void)
{ {
gart_iommu_shutdown(); gart_iommu_shutdown();
amd_iommu_shutdown();
} }
/* Must execute after PCI subsystem */ /* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init); fs_initcall(pci_iommu_init);
......
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include <asm/traps.h> #include <asm/traps.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/mce.h>
#include <asm/mach_traps.h> #include <asm/mach_traps.h>
...@@ -65,8 +66,6 @@ ...@@ -65,8 +66,6 @@
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/traps.h> #include <asm/traps.h>
#include "cpu/mcheck/mce.h"
asmlinkage int system_call(void); asmlinkage int system_call(void);
/* Do we ignore FPU interrupts ? */ /* Do we ignore FPU interrupts ? */
......
...@@ -952,8 +952,6 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -952,8 +952,6 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
tsk = current; tsk = current;
mm = tsk->mm; mm = tsk->mm;
prefetchw(&mm->mmap_sem);
/* Get the faulting address: */ /* Get the faulting address: */
address = read_cr2(); address = read_cr2();
...@@ -963,6 +961,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -963,6 +961,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/ */
if (kmemcheck_active(regs)) if (kmemcheck_active(regs))
kmemcheck_hide(regs); kmemcheck_hide(regs);
prefetchw(&mm->mmap_sem);
if (unlikely(kmmio_fault(regs, address))) if (unlikely(kmmio_fault(regs, address)))
return; return;
......
...@@ -527,7 +527,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, ...@@ -527,7 +527,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
return phys_pud_init(pud, addr, end, page_size_mask); return phys_pud_init(pud, addr, end, page_size_mask);
} }
unsigned long __init unsigned long __meminit
kernel_physical_mapping_init(unsigned long start, kernel_physical_mapping_init(unsigned long start,
unsigned long end, unsigned long end,
unsigned long page_size_mask) unsigned long page_size_mask)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment