non-fatal.c 2.39 KB
Newer Older
1
/*
Andrew Morton's avatar
Andrew Morton committed
2
 * Non Fatal Machine Check Exception Reporting
3 4 5 6 7
 *
 * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
 *
 * This file contains routines to check for non-fatal MCEs every 15s
 *
8 9 10 11 12 13 14 15 16 17 18
 */

#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/config.h>
#include <linux/irq.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <linux/smp.h>
19
#include <linux/module.h>
20 21 22 23 24 25 26 27 28

#include <asm/processor.h> 
#include <asm/system.h>
#include <asm/msr.h>

#include "mce.h"

static struct timer_list mce_timer;
static int timerset;
29
static int firstbank;
30 31 32 33 34 35 36 37 38

#define MCE_RATE	15*HZ	/* timer rate is 15s */

static void mce_checkregs (void *info)
{
	u32 low, high;
	int i;

	preempt_disable(); 
39
	for (i=firstbank; i<nr_mce_banks; i++) {
40 41 42
		rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);

		if (high & (1<<31)) {
43
			printk (KERN_EMERG "MCE: The hardware reports a non fatal, correctable incident occurred on CPU %d.\n",
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
				smp_processor_id());
			printk (KERN_EMERG "Bank %d: %08x%08x\n", i, high, low);

			/* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);

			/* Serialize */
			wmb();
		}
	}
	preempt_enable();
}

static void do_mce_timer(void *data)
{ 
	smp_call_function (mce_checkregs, NULL, 1, 1);
} 

static DECLARE_WORK(mce_work, do_mce_timer, NULL);

static void mce_timerfunc (unsigned long data)
{
66
	mce_checkregs (NULL);
67 68 69 70 71 72 73 74
#ifdef CONFIG_SMP
	if (num_online_cpus() > 1) 
		schedule_work (&mce_work); 
#endif
	mce_timer.expires = jiffies + MCE_RATE;
	add_timer (&mce_timer);
}	

75
static int __init init_nonfatal_mce_checker(void)
76
{
77 78 79 80 81 82 83 84 85 86
	struct cpuinfo_x86 *c = &boot_cpu_data;

	/* Check for MCE support */
	if (!cpu_has(c, X86_FEATURE_MCE))
		return -ENODEV;

	/* Check for PPro style MCA */
	if (!cpu_has(c, X86_FEATURE_MCA))
		return -ENODEV;

87 88 89 90 91 92 93
	/* Some Athlons misbehave when we frob bank 0 */
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
		boot_cpu_data.x86 == 6)
			firstbank = 1;
	else
			firstbank = 0;

94 95 96 97 98 99 100 101 102 103 104
	if (timerset == 0) {
		/* Set the timer to check for non-fatal
		   errors every MCE_RATE seconds */
		init_timer (&mce_timer);
		mce_timer.expires = jiffies + MCE_RATE;
		mce_timer.data = 0;
		mce_timer.function = &mce_timerfunc;
		add_timer (&mce_timer);
		timerset = 1;
		printk(KERN_INFO "Machine check exception polling timer started.\n");
	}
105
	return 0;
106
}
107
module_init(init_nonfatal_mce_checker);