Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
1d991001
Commit
1d991001
authored
Jun 20, 2009
by
Ingo Molnar
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'x86/mce3' into x86/urgent
parents
bc3f5d3d
b1f49f95
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
528 additions
and
508 deletions
+528
-508
arch/x86/include/asm/mce.h
arch/x86/include/asm/mce.h
+51
-12
arch/x86/include/asm/therm_throt.h
arch/x86/include/asm/therm_throt.h
+0
-9
arch/x86/kernel/cpu/mcheck/Makefile
arch/x86/kernel/cpu/mcheck/Makefile
+5
-4
arch/x86/kernel/cpu/mcheck/k7.c
arch/x86/kernel/cpu/mcheck/k7.c
+1
-2
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/mcheck/mce.c
+158
-73
arch/x86/kernel/cpu/mcheck/mce.h
arch/x86/kernel/cpu/mcheck/mce.h
+0
-38
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
+0
-0
arch/x86/kernel/cpu/mcheck/mce_intel.c
arch/x86/kernel/cpu/mcheck/mce_intel.c
+200
-54
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+0
-248
arch/x86/kernel/cpu/mcheck/non-fatal.c
arch/x86/kernel/cpu/mcheck/non-fatal.c
+1
-2
arch/x86/kernel/cpu/mcheck/p4.c
arch/x86/kernel/cpu/mcheck/p4.c
+1
-47
arch/x86/kernel/cpu/mcheck/p5.c
arch/x86/kernel/cpu/mcheck/p5.c
+6
-9
arch/x86/kernel/cpu/mcheck/p6.c
arch/x86/kernel/cpu/mcheck/p6.c
+1
-2
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
+102
-4
arch/x86/kernel/cpu/mcheck/winchip.c
arch/x86/kernel/cpu/mcheck/winchip.c
+1
-2
arch/x86/kernel/traps.c
arch/x86/kernel/traps.c
+1
-2
No files found.
arch/x86/include/asm/mce.h
View file @
1d991001
...
...
@@ -102,15 +102,39 @@ struct mce_log {
#ifdef __KERNEL__
#include <linux/percpu.h>
#include <linux/init.h>
#include <asm/atomic.h>
extern
int
mce_disabled
;
extern
int
mce_p5_enabled
;
#include <asm/atomic.h>
#include <linux/percpu.h>
#ifdef CONFIG_X86_MCE
void
mcheck_init
(
struct
cpuinfo_x86
*
c
);
#else
static
inline
void
mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
#endif
#ifdef CONFIG_X86_OLD_MCE
extern
int
nr_mce_banks
;
void
amd_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p4_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p6_mcheck_init
(
struct
cpuinfo_x86
*
c
);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
);
static
inline
void
enable_p5_mce
(
void
)
{
mce_p5_enabled
=
1
;
}
#else
static
inline
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
enable_p5_mce
(
void
)
{}
#endif
void
mce_setup
(
struct
mce
*
m
);
void
mce_log
(
struct
mce
*
m
);
DECLARE_PER_CPU
(
struct
sys_device
,
mce_dev
);
extern
void
(
*
threshold_cpu_callback
)(
unsigned
long
action
,
unsigned
int
cpu
);
/*
* To support more than 128 would need to escape the predefined
...
...
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
DECLARE_PER_CPU
(
unsigned
,
mce_exception_count
);
DECLARE_PER_CPU
(
unsigned
,
mce_poll_count
);
void
mce_log_therm_throt_event
(
__u64
status
);
extern
atomic_t
mce_entry
;
void
do_machine_check
(
struct
pt_regs
*
,
long
);
typedef
DECLARE_BITMAP
(
mce_banks_t
,
MAX_NR_BANKS
);
DECLARE_PER_CPU
(
mce_banks_t
,
mce_poll_banks
);
...
...
@@ -167,13 +187,32 @@ void mce_notify_process(void);
DECLARE_PER_CPU
(
struct
mce
,
injectm
);
extern
struct
file_operations
mce_chrdev_ops
;
#ifdef CONFIG_X86_MCE
void
mcheck_init
(
struct
cpuinfo_x86
*
c
);
#else
#define mcheck_init(c) do { } while (0)
#endif
/*
* Exception handler
*/
/* Call the installed machine check handler for this CPU setup. */
extern
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
);
void
do_machine_check
(
struct
pt_regs
*
,
long
);
/*
* Threshold handler
*/
extern
void
(
*
mce_threshold_vector
)(
void
);
extern
void
(
*
threshold_cpu_callback
)(
unsigned
long
action
,
unsigned
int
cpu
);
/*
* Thermal handler
*/
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
);
#ifdef CONFIG_X86_NEW_MCE
void
mce_log_therm_throt_event
(
__u64
status
);
#else
static
inline
void
mce_log_therm_throt_event
(
__u64
status
)
{}
#endif
#endif
/* __KERNEL__ */
#endif
/* _ASM_X86_MCE_H */
arch/x86/include/asm/therm_throt.h
deleted
100644 → 0
View file @
bc3f5d3d
#ifndef _ASM_X86_THERM_THROT_H
#define _ASM_X86_THERM_THROT_H
#include <asm/atomic.h>
extern
atomic_t
therm_throt_en
;
int
therm_throt_process
(
int
curr
);
#endif
/* _ASM_X86_THERM_THROT_H */
arch/x86/kernel/cpu/mcheck/Makefile
View file @
1d991001
obj-y
=
mce.o
therm_throt.o
obj-y
=
mce.o
obj-$(CONFIG_X86_NEW_MCE)
+=
mce-severity.o
obj-$(CONFIG_X86_OLD_MCE)
+=
k7.o p4.o p6.o
obj-$(CONFIG_X86_ANCIENT_MCE)
+=
winchip.o p5.o
obj-$(CONFIG_X86_MCE_P4THERMAL)
+=
mce_intel.o
obj-$(CONFIG_X86_MCE_INTEL)
+=
mce_intel_64.o mce_intel.o
obj-$(CONFIG_X86_MCE_AMD)
+=
mce_amd_64.o
obj-$(CONFIG_X86_MCE_INTEL)
+=
mce_intel.o
obj-$(CONFIG_X86_MCE_AMD)
+=
mce_amd.o
obj-$(CONFIG_X86_MCE_NONFATAL)
+=
non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD)
+=
threshold.o
obj-$(CONFIG_X86_MCE_INJECT)
+=
mce-inject.o
obj-$(CONFIG_X86_THERMAL_VECTOR)
+=
therm_throt.o
arch/x86/kernel/cpu/mcheck/k7.c
View file @
1d991001
...
...
@@ -10,10 +10,9 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For AMD Athlon/Duron: */
static
void
k7_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
...
...
arch/x86/kernel/cpu/mcheck/mce.c
View file @
1d991001
...
...
@@ -44,7 +44,6 @@
#include <asm/msr.h>
#include "mce-internal.h"
#include "mce.h"
/* Handle unconfigured int18 (should never happen) */
static
void
unexpected_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
...
...
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
)
=
unexpected_machine_check
;
int
mce_disabled
;
int
mce_disabled
__read_mostly
;
#ifdef CONFIG_X86_NEW_MCE
...
...
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
* 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
* 3: never panic or SIGBUS, log all errors (for testing only)
*/
static
int
tolerant
=
1
;
static
int
banks
;
static
u64
*
bank
;
static
unsigned
long
notify_user
;
static
int
rip_msr
;
static
int
mce_bootlog
=
-
1
;
static
int
monarch_timeout
=
-
1
;
static
int
mce_panic_timeout
;
static
int
mce_dont_log_ce
;
int
mce_cmci_disabled
;
int
mce_ignore_ce
;
int
mce_ser
;
static
char
trigger
[
128
];
static
char
*
trigger_argv
[
2
]
=
{
trigger
,
NULL
};
static
int
tolerant
__read_mostly
=
1
;
static
int
banks
__read_mostly
;
static
u64
*
bank
__read_mostly
;
static
int
rip_msr
__read_mostly
;
static
int
mce_bootlog
__read_mostly
=
-
1
;
static
int
monarch_timeout
__read_mostly
=
-
1
;
static
int
mce_panic_timeout
__read_mostly
;
static
int
mce_dont_log_ce
__read_mostly
;
int
mce_cmci_disabled
__read_mostly
;
int
mce_ignore_ce
__read_mostly
;
int
mce_ser
__read_mostly
;
/* User mode helper program triggered by machine check event */
static
unsigned
long
mce_need_notify
;
static
char
mce_helper
[
128
];
static
char
*
mce_helper_argv
[
2
]
=
{
mce_helper
,
NULL
};
static
unsigned
long
dont_init_banks
;
...
...
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
wmb
();
mce
->
finished
=
1
;
set_bit
(
0
,
&
notify_user
);
set_bit
(
0
,
&
mce_need_notify
);
}
static
void
print_mce
(
struct
mce
*
m
)
...
...
@@ -691,18 +691,21 @@ static atomic_t global_nwo;
* in the entry order.
* TBD double check parallel CPU hotunplug
*/
static
int
mce_start
(
int
no_way_out
,
int
*
order
)
static
int
mce_start
(
int
*
no_way_out
)
{
int
nwo
;
int
order
;
int
cpus
=
num_online_cpus
();
u64
timeout
=
(
u64
)
monarch_timeout
*
NSEC_PER_USEC
;
if
(
!
timeout
)
{
*
order
=
-
1
;
return
no_way_out
;
}
if
(
!
timeout
)
return
-
1
;
atomic_add
(
no_way_out
,
&
global_nwo
);
atomic_add
(
*
no_way_out
,
&
global_nwo
);
/*
* global_nwo should be updated before mce_callin
*/
smp_wmb
();
order
=
atomic_add_return
(
1
,
&
mce_callin
);
/*
* Wait for everyone.
...
...
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
while
(
atomic_read
(
&
mce_callin
)
!=
cpus
)
{
if
(
mce_timed_out
(
&
timeout
))
{
atomic_set
(
&
global_nwo
,
0
);
*
order
=
-
1
;
return
no_way_out
;
return
-
1
;
}
ndelay
(
SPINUNIT
);
}
/*
*
Cache the global no_way_out state.
*
mce_callin should be read before global_nwo
*/
nwo
=
atomic_read
(
&
global_nwo
);
smp_rmb
(
);
/*
* Monarch starts executing now, the others wait.
*/
if
(
*
order
==
1
)
{
if
(
order
==
1
)
{
/*
* Monarch: Starts executing now, the others wait.
*/
atomic_set
(
&
mce_executing
,
1
);
return
nwo
;
}
else
{
/*
* Subject: Now start the scanning loop one by one in
* the original callin order.
* This way when there are any shared banks it will be
* only seen by one CPU before cleared, avoiding duplicates.
*/
while
(
atomic_read
(
&
mce_executing
)
<
order
)
{
if
(
mce_timed_out
(
&
timeout
))
{
atomic_set
(
&
global_nwo
,
0
);
return
-
1
;
}
ndelay
(
SPINUNIT
);
}
}
/*
* Now start the scanning loop one by one
* in the original callin order.
* This way when there are any shared banks it will
* be only seen by one CPU before cleared, avoiding duplicates.
* Cache the global no_way_out state.
*/
while
(
atomic_read
(
&
mce_executing
)
<
*
order
)
{
if
(
mce_timed_out
(
&
timeout
))
{
atomic_set
(
&
global_nwo
,
0
);
*
order
=
-
1
;
return
no_way_out
;
}
ndelay
(
SPINUNIT
);
}
return
nwo
;
*
no_way_out
=
atomic_read
(
&
global_nwo
);
return
order
;
}
/*
...
...
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* check handler.
*/
int
order
;
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If tolerant is cranked up, we'll try anyway.
...
...
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if
(
!
banks
)
goto
out
;
order
=
atomic_add_return
(
1
,
&
mce_callin
);
mce_setup
(
&
m
);
m
.
mcgstatus
=
mce_rdmsrl
(
MSR_IA32_MCG_STATUS
);
...
...
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* This way we don't report duplicated events on shared banks
* because the first one to see it will clear it.
*/
no_way_out
=
mce_start
(
no_way_out
,
&
order
);
order
=
mce_start
(
&
no_way_out
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
__clear_bit
(
i
,
toclear
);
if
(
!
bank
[
i
])
...
...
@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data)
static
void
mce_do_trigger
(
struct
work_struct
*
work
)
{
call_usermodehelper
(
trigger
,
trigg
er_argv
,
NULL
,
UMH_NO_WAIT
);
call_usermodehelper
(
mce_helper
,
mce_help
er_argv
,
NULL
,
UMH_NO_WAIT
);
}
static
DECLARE_WORK
(
mce_trigger_work
,
mce_do_trigger
);
...
...
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
clear_thread_flag
(
TIF_MCE_NOTIFY
);
if
(
test_and_clear_bit
(
0
,
&
notify_user
))
{
if
(
test_and_clear_bit
(
0
,
&
mce_need_notify
))
{
wake_up_interruptible
(
&
mce_wait
);
/*
...
...
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
* work_pending is always cleared before the function is
* executed.
*/
if
(
trigg
er
[
0
]
&&
!
work_pending
(
&
mce_trigger_work
))
if
(
mce_help
er
[
0
]
&&
!
work_pending
(
&
mce_trigger_work
))
schedule_work
(
&
mce_trigger_work
);
if
(
__ratelimit
(
&
ratelimit
))
...
...
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
return
;
switch
(
c
->
x86_vendor
)
{
case
X86_VENDOR_INTEL
:
if
(
mce_p5_enabled
())
intel_p5_mcheck_init
(
c
);
intel_p5_mcheck_init
(
c
);
break
;
case
X86_VENDOR_CENTAUR
:
winchip_mcheck_init
(
c
);
...
...
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev)
static
void
mce_cpu_restart
(
void
*
data
)
{
del_timer_sync
(
&
__get_cpu_var
(
mce_timer
));
if
(
mce_available
(
&
current_cpu_data
))
mce_init
();
if
(
!
mce_available
(
&
current_cpu_data
))
return
;
mce_init
();
mce_init_timer
();
}
...
...
@@ -1620,6 +1624,26 @@ static void mce_restart(void)
on_each_cpu
(
mce_cpu_restart
,
NULL
,
1
);
}
/* Toggle features for corrected errors */
static
void
mce_disable_ce
(
void
*
all
)
{
if
(
!
mce_available
(
&
current_cpu_data
))
return
;
if
(
all
)
del_timer_sync
(
&
__get_cpu_var
(
mce_timer
));
cmci_clear
();
}
static
void
mce_enable_ce
(
void
*
all
)
{
if
(
!
mce_available
(
&
current_cpu_data
))
return
;
cmci_reenable
();
cmci_recheck
();
if
(
all
)
mce_init_timer
();
}
static
struct
sysdev_class
mce_sysclass
=
{
.
suspend
=
mce_suspend
,
.
shutdown
=
mce_shutdown
,
...
...
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
static
ssize_t
show_trigger
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
char
*
buf
)
{
strcpy
(
buf
,
trigg
er
);
strcpy
(
buf
,
mce_help
er
);
strcat
(
buf
,
"
\n
"
);
return
strlen
(
trigg
er
)
+
1
;
return
strlen
(
mce_help
er
)
+
1
;
}
static
ssize_t
set_trigger
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
...
...
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
char
*
p
;
int
len
;
strncpy
(
trigger
,
buf
,
sizeof
(
trigg
er
));
trigger
[
sizeof
(
trigg
er
)
-
1
]
=
0
;
len
=
strlen
(
trigg
er
);
p
=
strchr
(
trigg
er
,
'\n'
);
strncpy
(
mce_helper
,
buf
,
sizeof
(
mce_help
er
));
mce_helper
[
sizeof
(
mce_help
er
)
-
1
]
=
0
;
len
=
strlen
(
mce_help
er
);
p
=
strchr
(
mce_help
er
,
'\n'
);
if
(
*
p
)
*
p
=
0
;
...
...
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
return
len
;
}
static
ssize_t
set_ignore_ce
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
const
char
*
buf
,
size_t
size
)
{
u64
new
;
if
(
strict_strtoull
(
buf
,
0
,
&
new
)
<
0
)
return
-
EINVAL
;
if
(
mce_ignore_ce
^
!!
new
)
{
if
(
new
)
{
/* disable ce features */
on_each_cpu
(
mce_disable_ce
,
(
void
*
)
1
,
1
);
mce_ignore_ce
=
1
;
}
else
{
/* enable ce features */
mce_ignore_ce
=
0
;
on_each_cpu
(
mce_enable_ce
,
(
void
*
)
1
,
1
);
}
}
return
size
;
}
static
ssize_t
set_cmci_disabled
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
const
char
*
buf
,
size_t
size
)
{
u64
new
;
if
(
strict_strtoull
(
buf
,
0
,
&
new
)
<
0
)
return
-
EINVAL
;
if
(
mce_cmci_disabled
^
!!
new
)
{
if
(
new
)
{
/* disable cmci */
on_each_cpu
(
mce_disable_ce
,
NULL
,
1
);
mce_cmci_disabled
=
1
;
}
else
{
/* enable cmci */
mce_cmci_disabled
=
0
;
on_each_cpu
(
mce_enable_ce
,
NULL
,
1
);
}
}
return
size
;
}
static
ssize_t
store_int_with_restart
(
struct
sys_device
*
s
,
struct
sysdev_attribute
*
attr
,
const
char
*
buf
,
size_t
size
)
...
...
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
static
SYSDEV_ATTR
(
trigger
,
0644
,
show_trigger
,
set_trigger
);
static
SYSDEV_INT_ATTR
(
tolerant
,
0644
,
tolerant
);
static
SYSDEV_INT_ATTR
(
monarch_timeout
,
0644
,
monarch_timeout
);
static
SYSDEV_INT_ATTR
(
dont_log_ce
,
0644
,
mce_dont_log_ce
);
static
struct
sysdev_ext_attribute
attr_check_interval
=
{
_SYSDEV_ATTR
(
check_interval
,
0644
,
sysdev_show_int
,
...
...
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
&
check_interval
};
static
struct
sysdev_ext_attribute
attr_ignore_ce
=
{
_SYSDEV_ATTR
(
ignore_ce
,
0644
,
sysdev_show_int
,
set_ignore_ce
),
&
mce_ignore_ce
};
static
struct
sysdev_ext_attribute
attr_cmci_disabled
=
{
_SYSDEV_ATTR
(
cmci_disabled
,
0644
,
sysdev_show_int
,
set_cmci_disabled
),
&
mce_cmci_disabled
};
static
struct
sysdev_attribute
*
mce_attrs
[]
=
{
&
attr_tolerant
.
attr
,
&
attr_check_interval
.
attr
,
&
attr_trigger
,
&
attr_tolerant
.
attr
,
&
attr_check_interval
.
attr
,
&
attr_trigger
,
&
attr_monarch_timeout
.
attr
,
&
attr_dont_log_ce
.
attr
,
&
attr_ignore_ce
.
attr
,
&
attr_cmci_disabled
.
attr
,
NULL
};
...
...
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
static
__cpuinit
int
mce_create_device
(
unsigned
int
cpu
)
{
int
err
;
int
i
;
int
i
,
j
;
if
(
!
mce_available
(
&
boot_cpu_data
))
return
-
EIO
;
...
...
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if
(
err
)
goto
error
;
}
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
for
(
j
=
0
;
j
<
banks
;
j
++
)
{
err
=
sysdev_create_file
(
&
per_cpu
(
mce_dev
,
cpu
),
&
bank_attrs
[
i
]);
&
bank_attrs
[
j
]);
if
(
err
)
goto
error2
;
}
...
...
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return
0
;
error2:
while
(
--
i
>=
0
)
sysdev_remove_file
(
&
per_cpu
(
mce_dev
,
cpu
),
&
bank_attrs
[
i
]);
while
(
--
j
>=
0
)
sysdev_remove_file
(
&
per_cpu
(
mce_dev
,
cpu
),
&
bank_attrs
[
j
]);
error:
while
(
--
i
>=
0
)
sysdev_remove_file
(
&
per_cpu
(
mce_dev
,
cpu
),
mce_attrs
[
i
]);
...
...
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
if
(
!
mce_available
(
&
boot_cpu_data
))
return
-
EIO
;
alloc_cpumask_var
(
&
mce_dev_initialized
,
GFP_KERNEL
);
z
alloc_cpumask_var
(
&
mce_dev_initialized
,
GFP_KERNEL
);
err
=
mce_init_banks
();
if
(
err
)
...
...
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
/* This has to be run for each processor */
void
mcheck_init
(
struct
cpuinfo_x86
*
c
)
{
if
(
mce_disabled
==
1
)
if
(
mce_disabled
)
return
;
switch
(
c
->
x86_vendor
)
{
...
...
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
static
int
__init
mcheck_enable
(
char
*
str
)
{
mce_
disabled
=
-
1
;
mce_
p5_enabled
=
1
;
return
1
;
}
__setup
(
"mce"
,
mcheck_enable
);
#endif
/* CONFIG_X86_OLD_MCE */
...
...
arch/x86/kernel/cpu/mcheck/mce.h
deleted
100644 → 0
View file @
bc3f5d3d
#include <linux/init.h>
#include <asm/mce.h>
#ifdef CONFIG_X86_OLD_MCE
void
amd_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p4_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p6_mcheck_init
(
struct
cpuinfo_x86
*
c
);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
);
extern
int
mce_p5_enable
;
static
inline
int
mce_p5_enabled
(
void
)
{
return
mce_p5_enable
;
}
static
inline
void
enable_p5_mce
(
void
)
{
mce_p5_enable
=
1
;
}
#else
static
inline
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
int
mce_p5_enabled
(
void
)
{
return
0
;
}
static
inline
void
enable_p5_mce
(
void
)
{
}
#endif
/* Call the installed machine check handler for this CPU setup. */
extern
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
);
#ifdef CONFIG_X86_OLD_MCE
extern
int
nr_mce_banks
;
void
intel_set_thermal_handler
(
void
);
#else
static
inline
void
intel_set_thermal_handler
(
void
)
{
}
#endif
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
);
arch/x86/kernel/cpu/mcheck/mce_amd
_64
.c
→
arch/x86/kernel/cpu/mcheck/mce_amd.c
View file @
1d991001
File moved
arch/x86/kernel/cpu/mcheck/mce_intel.c
View file @
1d991001
/*
* Common code for Intel machine checks
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <
asm/therm_thro
t.h>
#include <
asm/processor
.h>
#include <
asm/system
.h>
#include <
linux/ini
t.h>
#include <
linux/interrupt
.h>
#include <
linux/percpu
.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
#include "mce.h"
static
DEFINE_PER_CPU
(
mce_banks_t
,
mce_banks_owned
);
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
)
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static
DEFINE_SPINLOCK
(
cmci_discover_lock
);
#define CMCI_THRESHOLD 1
static
int
cmci_supported
(
int
*
banks
)
{
unsigned
int
cpu
=
smp_processor_id
();
int
tm2
=
0
;
u32
l
,
h
;
u64
cap
;
if
(
mce_cmci_disabled
||
mce_ignore_ce
)
return
0
;
/*
* Thermal monitoring depends on ACPI, clock modulation
* and APIC as well
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if
(
!
cpu_has
(
c
,
X86_FEATURE_ACPI
)
||
!
cpu_has
(
c
,
X86_FEATURE_ACC
)
||
!
cpu_has
(
c
,
X86_FEATURE_APIC
))
{
pr_debug
(
"Thermal monitoring disabled
\n
"
);
return
;
if
(
boot_cpu_data
.
x86_vendor
!=
X86_VENDOR_INTEL
)
return
0
;
if
(
!
cpu_has_apic
||
lapic_get_maxlvt
()
<
6
)
return
0
;
rdmsrl
(
MSR_IA32_MCG_CAP
,
cap
);
*
banks
=
min_t
(
unsigned
,
MAX_NR_BANKS
,
cap
&
0xff
);
return
!!
(
cap
&
MCG_CMCI_P
);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static
void
intel_threshold_interrupt
(
void
)
{
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
mce_notify_irq
();
}
static
void
print_update
(
char
*
type
,
int
*
hdr
,
int
num
)
{
if
(
*
hdr
==
0
)
printk
(
KERN_INFO
"CPU %d MCA banks"
,
smp_processor_id
());
*
hdr
=
1
;
printk
(
KERN_CONT
" %s:%d"
,
type
,
num
);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static
void
cmci_discover
(
int
banks
,
int
boot
)
{
unsigned
long
*
owned
=
(
void
*
)
&
__get_cpu_var
(
mce_banks_owned
);
unsigned
long
flags
;
int
hdr
=
0
;
int
i
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
u64
val
;
if
(
test_bit
(
i
,
owned
))
continue
;
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Already owned by someone else? */
if
(
val
&
CMCI_EN
)
{
if
(
test_and_clear_bit
(
i
,
owned
)
||
boot
)
print_update
(
"SHD"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
continue
;
}
val
|=
CMCI_EN
|
CMCI_THRESHOLD
;
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Did the enable bit stick? -- the bank supports CMCI */
if
(
val
&
CMCI_EN
)
{
if
(
!
test_and_set_bit
(
i
,
owned
)
||
boot
)
print_update
(
"CMCI"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
}
else
{
WARN_ON
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
)));
}
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
if
(
hdr
)
printk
(
KERN_CONT
"
\n
"
);
}
/*
* First check if its enabled already, in which case there might
* be some SMM goo which handles it, so we can't even put a handler
* since it might be delivered via SMI already:
*/
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
h
=
apic_read
(
APIC_LVTTHMR
)
;
i
f
((
l
&
MSR_IA32_MISC_ENABLE_TM1
)
&&
(
h
&
APIC_DM_SMI
))
{
printk
(
KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI
\n
"
,
cpu
);
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void
cmci_recheck
(
void
)
{
unsigned
long
flags
;
i
nt
banks
;
if
(
!
mce_available
(
&
current_cpu_data
)
||
!
cmci_supported
(
&
banks
))
return
;
}
local_irq_save
(
flags
);
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
local_irq_restore
(
flags
);
}
if
(
cpu_has
(
c
,
X86_FEATURE_TM2
)
&&
(
l
&
MSR_IA32_MISC_ENABLE_TM2
))
tm2
=
1
;
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void
cmci_clear
(
void
)
{
unsigned
long
flags
;
int
i
;
int
banks
;
u64
val
;
/* Check whether a vector already exists */
if
(
h
&
APIC_VECTOR_MASK
)
{
printk
(
KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed
\n
"
,
cpu
,
(
h
&
APIC_VECTOR_MASK
));
if
(
!
cmci_supported
(
&
banks
))
return
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
if
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
)))
continue
;
/* Disable CMCI */
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
val
&=
~
(
CMCI_EN
|
CMCI_THRESHOLD_MASK
);
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
));
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h
=
THERMAL_APIC_VECTOR
|
APIC_DM_FIXED
|
APIC_LVT_MASKED
;
apic_write
(
APIC_LVTTHMR
,
h
);
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void
cmci_rediscover
(
int
dying
)
{
int
banks
;
int
cpu
;
cpumask_var_t
old
;
rdmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
,
h
);
wrmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
|
(
THERM_INT_LOW_ENABLE
|
THERM_INT_HIGH_ENABLE
),
h
);
if
(
!
cmci_supported
(
&
banks
))
return
;
if
(
!
alloc_cpumask_var
(
&
old
,
GFP_KERNEL
))
return
;
cpumask_copy
(
old
,
&
current
->
cpus_allowed
);
for_each_online_cpu
(
cpu
)
{
if
(
cpu
==
dying
)
continue
;
if
(
set_cpus_allowed_ptr
(
current
,
cpumask_of
(
cpu
)))
continue
;
/* Recheck banks in case CPUs don't all have the same */
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
intel_set_thermal_handler
();
set_cpus_allowed_ptr
(
current
,
old
);
free_cpumask_var
(
old
);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void
cmci_reenable
(
void
)
{
int
banks
;
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
wrmsr
(
MSR_IA32_MISC_ENABLE
,
l
|
MSR_IA32_MISC_ENABLE_TM1
,
h
);
static
void
intel_init_cmci
(
void
)
{
int
banks
;
/* Unmask the thermal vector: */
l
=
apic_read
(
APIC_LVTTHMR
);
apic_write
(
APIC_LVTTHMR
,
l
&
~
APIC_LVT_MASKED
);
if
(
!
cmci_supported
(
&
banks
))
return
;
printk
(
KERN_INFO
"CPU%d: Thermal monitoring enabled (%s)
\n
"
,
cpu
,
tm2
?
"TM2"
:
"TM1"
);
mce_threshold_vector
=
intel_threshold_interrupt
;
cmci_discover
(
banks
,
1
);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write
(
APIC_LVTCMCI
,
THRESHOLD_APIC_VECTOR
|
APIC_DM_FIXED
);
cmci_recheck
();
}
/* enable thermal throttle processing */
atomic_set
(
&
therm_throt_en
,
1
);
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
)
{
intel_init_thermal
(
c
);
intel_init_cmci
();
}
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
deleted
100644 → 0
View file @
bc3f5d3d
/*
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
#include "mce.h"
asmlinkage
void
smp_thermal_interrupt
(
void
)
{
__u64
msr_val
;
ack_APIC_irq
();
exit_idle
();
irq_enter
();
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
if
(
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
))
mce_log_therm_throt_event
(
msr_val
);
inc_irq_stat
(
irq_thermal_count
);
irq_exit
();
}
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
static
DEFINE_PER_CPU
(
mce_banks_t
,
mce_banks_owned
);
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static
DEFINE_SPINLOCK
(
cmci_discover_lock
);
#define CMCI_THRESHOLD 1
static
int
cmci_supported
(
int
*
banks
)
{
u64
cap
;
if
(
mce_cmci_disabled
||
mce_ignore_ce
)
return
0
;
/*
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if
(
boot_cpu_data
.
x86_vendor
!=
X86_VENDOR_INTEL
)
return
0
;
if
(
!
cpu_has_apic
||
lapic_get_maxlvt
()
<
6
)
return
0
;
rdmsrl
(
MSR_IA32_MCG_CAP
,
cap
);
*
banks
=
min_t
(
unsigned
,
MAX_NR_BANKS
,
cap
&
0xff
);
return
!!
(
cap
&
MCG_CMCI_P
);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static
void
intel_threshold_interrupt
(
void
)
{
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
mce_notify_irq
();
}
static
void
print_update
(
char
*
type
,
int
*
hdr
,
int
num
)
{
if
(
*
hdr
==
0
)
printk
(
KERN_INFO
"CPU %d MCA banks"
,
smp_processor_id
());
*
hdr
=
1
;
printk
(
KERN_CONT
" %s:%d"
,
type
,
num
);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static
void
cmci_discover
(
int
banks
,
int
boot
)
{
unsigned
long
*
owned
=
(
void
*
)
&
__get_cpu_var
(
mce_banks_owned
);
unsigned
long
flags
;
int
hdr
=
0
;
int
i
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
u64
val
;
if
(
test_bit
(
i
,
owned
))
continue
;
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Already owned by someone else? */
if
(
val
&
CMCI_EN
)
{
if
(
test_and_clear_bit
(
i
,
owned
)
||
boot
)
print_update
(
"SHD"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
continue
;
}
val
|=
CMCI_EN
|
CMCI_THRESHOLD
;
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Did the enable bit stick? -- the bank supports CMCI */
if
(
val
&
CMCI_EN
)
{
if
(
!
test_and_set_bit
(
i
,
owned
)
||
boot
)
print_update
(
"CMCI"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
}
else
{
WARN_ON
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
)));
}
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
if
(
hdr
)
printk
(
KERN_CONT
"
\n
"
);
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void
cmci_recheck
(
void
)
{
unsigned
long
flags
;
int
banks
;
if
(
!
mce_available
(
&
current_cpu_data
)
||
!
cmci_supported
(
&
banks
))
return
;
local_irq_save
(
flags
);
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
local_irq_restore
(
flags
);
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void
cmci_clear
(
void
)
{
unsigned
long
flags
;
int
i
;
int
banks
;
u64
val
;
if
(
!
cmci_supported
(
&
banks
))
return
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
if
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
)))
continue
;
/* Disable CMCI */
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
val
&=
~
(
CMCI_EN
|
CMCI_THRESHOLD_MASK
);
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
));
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void
cmci_rediscover
(
int
dying
)
{
int
banks
;
int
cpu
;
cpumask_var_t
old
;
if
(
!
cmci_supported
(
&
banks
))
return
;
if
(
!
alloc_cpumask_var
(
&
old
,
GFP_KERNEL
))
return
;
cpumask_copy
(
old
,
&
current
->
cpus_allowed
);
for_each_online_cpu
(
cpu
)
{
if
(
cpu
==
dying
)
continue
;
if
(
set_cpus_allowed_ptr
(
current
,
cpumask_of
(
cpu
)))
continue
;
/* Recheck banks in case CPUs don't all have the same */
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
set_cpus_allowed_ptr
(
current
,
old
);
free_cpumask_var
(
old
);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void
cmci_reenable
(
void
)
{
int
banks
;
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
static
void
intel_init_cmci
(
void
)
{
int
banks
;
if
(
!
cmci_supported
(
&
banks
))
return
;
mce_threshold_vector
=
intel_threshold_interrupt
;
cmci_discover
(
banks
,
1
);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write
(
APIC_LVTCMCI
,
THRESHOLD_APIC_VECTOR
|
APIC_DM_FIXED
);
cmci_recheck
();
}
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
)
{
intel_init_thermal
(
c
);
intel_init_cmci
();
}
arch/x86/kernel/cpu/mcheck/non-fatal.c
View file @
1d991001
...
...
@@ -17,10 +17,9 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
static
int
firstbank
;
#define MCE_RATE (15*HZ)
/* timer rate is 15s */
...
...
arch/x86/kernel/cpu/mcheck/p4.c
View file @
1d991001
/*
* P4 specific Machine Check Exception Reporting
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/therm_throt.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* as supported by the P4/Xeon family */
struct
intel_mce_extended_msrs
{
u32
eax
;
...
...
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
static
int
mce_num_extended_msrs
;
#ifdef CONFIG_X86_MCE_P4THERMAL
static
void
unexpected_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
printk
(
KERN_ERR
"CPU%d: Unexpected LVT TMR interrupt!
\n
"
,
smp_processor_id
());
add_taint
(
TAINT_MACHINE_CHECK
);
}
/* P4/Xeon Thermal transition interrupt handler: */
static
void
intel_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
__u64
msr_val
;
ack_APIC_irq
();
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
);
}
/* Thermal interrupt handler for this CPU setup: */
static
void
(
*
vendor_thermal_interrupt
)(
struct
pt_regs
*
regs
)
=
unexpected_thermal_interrupt
;
void
smp_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
irq_enter
();
vendor_thermal_interrupt
(
regs
);
__get_cpu_var
(
irq_stat
).
irq_thermal_count
++
;
irq_exit
();
}
void
intel_set_thermal_handler
(
void
)
{
vendor_thermal_interrupt
=
intel_thermal_interrupt
;
}
#endif
/* CONFIG_X86_MCE_P4THERMAL */
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
static
void
intel_get_extended_msrs
(
struct
intel_mce_extended_msrs
*
r
)
{
...
...
arch/x86/kernel/cpu/mcheck/p5.c
View file @
1d991001
...
...
@@ -10,12 +10,11 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* By default disabled */
int
mce_p5_enable
;
int
mce_p5_enabled
__read_mostly
;
/* Machine check handler for Pentium class Intel CPUs: */
static
void
pentium_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
...
...
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32
l
,
h
;
/*
Check for MCE support
: */
if
(
!
cpu_has
(
c
,
X86_FEATURE_MCE
)
)
/*
Default P5 to off as its often misconnected
: */
if
(
!
mce_p5_enabled
)
return
;
#ifdef CONFIG_X86_OLD_MCE
/* Default P5 to off as its often misconnected: */
if
(
mce_disabled
!=
-
1
)
/* Check for MCE support: */
if
(
!
cpu_has
(
c
,
X86_FEATURE_MCE
))
return
;
#endif
machine_check_vector
=
pentium_machine_check
;
/* Make sure the vector pointer is visible before we enable MCEs: */
...
...
arch/x86/kernel/cpu/mcheck/p6.c
View file @
1d991001
...
...
@@ -10,10 +10,9 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For PII/PIII */
static
void
intel_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
...
...
arch/x86/kernel/cpu/mcheck/therm_throt.c
View file @
1d991001
...
...
@@ -13,13 +13,23 @@
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
* Inspired by Ross Biro's and Al Borchers' counter code.
*/
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/sysdev.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <asm/therm_throt.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
...
...
@@ -27,7 +37,7 @@
static
DEFINE_PER_CPU
(
__u64
,
next_check
)
=
INITIAL_JIFFIES
;
static
DEFINE_PER_CPU
(
unsigned
long
,
thermal_throttle_count
);
atomic_t
therm_throt_en
=
ATOMIC_INIT
(
0
);
static
atomic_t
therm_throt_en
=
ATOMIC_INIT
(
0
);
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
...
...
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been
* printed to the syslog.
*/
int
therm_throt_process
(
int
curr
)
static
int
therm_throt_process
(
int
curr
)
{
unsigned
int
cpu
=
smp_processor_id
();
__u64
tmp_jiffs
=
get_jiffies_64
();
...
...
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
return
0
;
}
device_initcall
(
thermal_throttle_init_device
);
#endif
/* CONFIG_SYSFS */
/* Thermal transition interrupt handler */
static
void
intel_thermal_interrupt
(
void
)
{
__u64
msr_val
;
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
if
(
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
))
mce_log_therm_throt_event
(
msr_val
);
}
static
void
unexpected_thermal_interrupt
(
void
)
{
printk
(
KERN_ERR
"CPU%d: Unexpected LVT TMR interrupt!
\n
"
,
smp_processor_id
());
add_taint
(
TAINT_MACHINE_CHECK
);
}
static
void
(
*
smp_thermal_vector
)(
void
)
=
unexpected_thermal_interrupt
;
asmlinkage
void
smp_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
exit_idle
();
irq_enter
();
inc_irq_stat
(
irq_thermal_count
);
smp_thermal_vector
();
irq_exit
();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq
();
}
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
)
{
unsigned
int
cpu
=
smp_processor_id
();
int
tm2
=
0
;
u32
l
,
h
;
/* Thermal monitoring depends on ACPI and clock modulation*/
if
(
!
cpu_has
(
c
,
X86_FEATURE_ACPI
)
||
!
cpu_has
(
c
,
X86_FEATURE_ACC
))
return
;
/*
* First check if its enabled already, in which case there might
* be some SMM goo which handles it, so we can't even put a handler
* since it might be delivered via SMI already:
*/
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
h
=
apic_read
(
APIC_LVTTHMR
);
if
((
l
&
MSR_IA32_MISC_ENABLE_TM1
)
&&
(
h
&
APIC_DM_SMI
))
{
printk
(
KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI
\n
"
,
cpu
);
return
;
}
if
(
cpu_has
(
c
,
X86_FEATURE_TM2
)
&&
(
l
&
MSR_IA32_MISC_ENABLE_TM2
))
tm2
=
1
;
/* Check whether a vector already exists */
if
(
h
&
APIC_VECTOR_MASK
)
{
printk
(
KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed
\n
"
,
cpu
,
(
h
&
APIC_VECTOR_MASK
));
return
;
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h
=
THERMAL_APIC_VECTOR
|
APIC_DM_FIXED
|
APIC_LVT_MASKED
;
apic_write
(
APIC_LVTTHMR
,
h
);
rdmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
,
h
);
wrmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
|
(
THERM_INT_LOW_ENABLE
|
THERM_INT_HIGH_ENABLE
),
h
);
smp_thermal_vector
=
intel_thermal_interrupt
;
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
wrmsr
(
MSR_IA32_MISC_ENABLE
,
l
|
MSR_IA32_MISC_ENABLE_TM1
,
h
);
/* Unmask the thermal vector: */
l
=
apic_read
(
APIC_LVTTHMR
);
apic_write
(
APIC_LVTTHMR
,
l
&
~
APIC_LVT_MASKED
);
printk
(
KERN_INFO
"CPU%d: Thermal monitoring enabled (%s)
\n
"
,
cpu
,
tm2
?
"TM2"
:
"TM1"
);
/* enable thermal throttle processing */
atomic_set
(
&
therm_throt_en
,
1
);
}
arch/x86/kernel/cpu/mcheck/winchip.c
View file @
1d991001
...
...
@@ -9,10 +9,9 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine check handler for WinChip C6: */
static
void
winchip_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
...
...
arch/x86/kernel/traps.c
View file @
1d991001
...
...
@@ -53,6 +53,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mce.h>
#include <asm/mach_traps.h>
...
...
@@ -64,8 +65,6 @@
#include <asm/setup.h>
#include <asm/traps.h>
#include "cpu/mcheck/mce.h"
asmlinkage
int
system_call
(
void
);
/* Do we ignore FPU interrupts ? */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment