Commit 1d44e828 authored by Jack Steiner's avatar Jack Steiner Committed by Ingo Molnar

x86, UV: Fix NMI handler for UV platforms

This fixes problems seen on UV systems handling NMIs from the
node controller.

I isolated the "dazed..." messages that I saw earlier to a bug in
the BMC on our platform. It was sending NMIs w/o properly setting
a register that indicated the source of NMI.

So rather than _assuming_ any unhandled NMI came from the UV system
maintenance console (SMC), add a check to verify that the SMC actually
sent the NMI.
Signed-off-by: default avatarJack Steiner <steiner@sgi.com>
Cc: gorcunov@gmail.com
Cc: dzickus@redhat.com
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 693d92a1
...@@ -398,6 +398,8 @@ struct uv_blade_info { ...@@ -398,6 +398,8 @@ struct uv_blade_info {
unsigned short nr_online_cpus; unsigned short nr_online_cpus;
unsigned short pnode; unsigned short pnode;
short memory_nid; short memory_nid;
spinlock_t nmi_lock;
unsigned long nmi_count;
}; };
extern struct uv_blade_info *uv_blade_info; extern struct uv_blade_info *uv_blade_info;
extern short *uv_node_to_blade; extern short *uv_node_to_blade;
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* SGI UV MMR definitions * SGI UV MMR definitions
* *
* Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
*/ */
#ifndef _ASM_X86_UV_UV_MMRS_H #ifndef _ASM_X86_UV_UV_MMRS_H
...@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { ...@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
} s; } s;
}; };
/* ========================================================================= */
/* UVH_SCRATCH5 */
/* ========================================================================= */
#define UVH_SCRATCH5 0x2d0200UL
#define UVH_SCRATCH5_32 0x00778
#define UVH_SCRATCH5_SCRATCH5_SHFT 0
#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
union uvh_scratch5_u {
unsigned long v;
struct uvh_scratch5_s {
unsigned long scratch5 : 64; /* RW, W1CS */
} s;
};
#endif /* __ASM_UV_MMRS_X86_H__ */ #endif /* __ASM_UV_MMRS_X86_H__ */
...@@ -37,6 +37,13 @@ ...@@ -37,6 +37,13 @@
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/x86_init.h> #include <asm/x86_init.h>
#include <asm/emergency-restart.h> #include <asm/emergency-restart.h>
#include <asm/nmi.h>
/* BMC sets a bit this MMR non-zero before sending an NMI */
#define UVH_NMI_MMR UVH_SCRATCH5
#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
#define UV_NMI_PENDING_MASK (1UL << 63)
DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
DEFINE_PER_CPU(int, x2apic_extra_bits); DEFINE_PER_CPU(int, x2apic_extra_bits);
...@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) ...@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
*/ */
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
{ {
unsigned long real_uv_nmi;
int bid;
if (reason != DIE_NMIUNKNOWN) if (reason != DIE_NMIUNKNOWN)
return NOTIFY_OK; return NOTIFY_OK;
if (in_crash_kexec) if (in_crash_kexec)
/* do nothing if entering the crash kernel */ /* do nothing if entering the crash kernel */
return NOTIFY_OK; return NOTIFY_OK;
/* /*
* Use a lock so only one cpu prints at a time * Each blade has an MMR that indicates when an NMI has been sent
* to prevent intermixed output. * to cpus on the blade. If an NMI is detected, atomically
* clear the MMR and update a per-blade NMI count used to
* cause each cpu on the blade to notice a new NMI.
*/
bid = uv_numa_blade_id();
real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
if (unlikely(real_uv_nmi)) {
spin_lock(&uv_blade_info[bid].nmi_lock);
real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
if (real_uv_nmi) {
uv_blade_info[bid].nmi_count++;
uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
}
spin_unlock(&uv_blade_info[bid].nmi_lock);
}
if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
return NOTIFY_DONE;
__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
/*
* Use a lock so only one cpu prints at a time.
* This prevents intermixed output.
*/ */
spin_lock(&uv_nmi_lock); spin_lock(&uv_nmi_lock);
pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
dump_stack(); dump_stack();
spin_unlock(&uv_nmi_lock); spin_unlock(&uv_nmi_lock);
...@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) ...@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
} }
static struct notifier_block uv_dump_stack_nmi_nb = { static struct notifier_block uv_dump_stack_nmi_nb = {
.notifier_call = uv_handle_nmi .notifier_call = uv_handle_nmi,
.priority = NMI_LOCAL_LOW_PRIOR - 1,
}; };
void uv_register_nmi_notifier(void) void uv_register_nmi_notifier(void)
...@@ -720,8 +756,9 @@ void __init uv_system_init(void) ...@@ -720,8 +756,9 @@ void __init uv_system_init(void)
printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
uv_blade_info = kmalloc(bytes, GFP_KERNEL); uv_blade_info = kzalloc(bytes, GFP_KERNEL);
BUG_ON(!uv_blade_info); BUG_ON(!uv_blade_info);
for (blade = 0; blade < uv_num_possible_blades(); blade++) for (blade = 0; blade < uv_num_possible_blades(); blade++)
uv_blade_info[blade].memory_nid = -1; uv_blade_info[blade].memory_nid = -1;
...@@ -747,6 +784,7 @@ void __init uv_system_init(void) ...@@ -747,6 +784,7 @@ void __init uv_system_init(void)
uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].pnode = pnode;
uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0;
spin_lock_init(&uv_blade_info[blade].nmi_lock);
max_pnode = max(pnode, max_pnode); max_pnode = max(pnode, max_pnode);
blade++; blade++;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment