Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
f157f596
Commit
f157f596
authored
Jun 18, 2013
by
Zhang Rui
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'cpu-package-thermal' of .git into next
Conflicts: drivers/thermal/Kconfig drivers/thermal/Makefile
parents
30072fb9
23be63f4
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
769 additions
and
4 deletions
+769
-4
Documentation/thermal/x86_pkg_temperature_thermal
Documentation/thermal/x86_pkg_temperature_thermal
+47
-0
arch/x86/include/asm/mce.h
arch/x86/include/asm/mce.h
+7
-0
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
+59
-4
drivers/thermal/Kconfig
drivers/thermal/Kconfig
+13
-0
drivers/thermal/Makefile
drivers/thermal/Makefile
+1
-0
drivers/thermal/x86_pkg_temp_thermal.c
drivers/thermal/x86_pkg_temp_thermal.c
+642
-0
No files found.
Documentation/thermal/x86_pkg_temperature_thermal
0 → 100644
View file @
f157f596
Kernel driver: x86_pkg_temp_thermal
===================
Supported chips:
* x86: with package level thermal management
(Verify using: CPUID.06H:EAX[bit 6] =1)
Authors: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Reference
---
Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013):
Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT
Description
---------
This driver register CPU digital temperature package level sensor as a thermal
zone with maximum two user mode configurable trip points. Number of trip points
depends on the capability of the package. Once the trip point is violated,
user mode can receive notification via thermal notification mechanism and can
take any action to control temperature.
Threshold management
--------------------
Each package will register as a thermal zone under /sys/class/thermal.
Example:
/sys/class/thermal/thermal_zone1
This contains two trip points:
- trip_point_0_temp
- trip_point_1_temp
User can set any temperature between 0 to TJ-Max temperature. Temperature units
are in milli-degree Celsius. Refer to "Documentation/thermal/sysfs-api.txt" for
thermal sys-fs details.
Any value other than 0 in these trip points, can trigger thermal notifications.
Setting 0, stops sending thermal notifications.
Thermal notifications: To get kobject-uevent notifications, set the thermal zone
policy to "user_space". For example: echo -n "user_space" > policy
arch/x86/include/asm/mce.h
View file @
f157f596
...
@@ -214,6 +214,13 @@ void mce_log_therm_throt_event(__u64 status);
...
@@ -214,6 +214,13 @@ void mce_log_therm_throt_event(__u64 status);
/* Interrupt Handler for core thermal thresholds */
/* Interrupt Handler for core thermal thresholds */
extern
int
(
*
platform_thermal_notify
)(
__u64
msr_val
);
extern
int
(
*
platform_thermal_notify
)(
__u64
msr_val
);
/* Interrupt Handler for package thermal thresholds */
extern
int
(
*
platform_thermal_package_notify
)(
__u64
msr_val
);
/* Callback support of rate control, return true, if
* callback has rate control */
extern
bool
(
*
platform_thermal_package_rate_control
)(
void
);
#ifdef CONFIG_X86_THERMAL_VECTOR
#ifdef CONFIG_X86_THERMAL_VECTOR
extern
void
mcheck_intel_therm_init
(
void
);
extern
void
mcheck_intel_therm_init
(
void
);
#else
#else
...
...
arch/x86/kernel/cpu/mcheck/therm_throt.c
View file @
f157f596
...
@@ -54,12 +54,24 @@ struct thermal_state {
...
@@ -54,12 +54,24 @@ struct thermal_state {
struct
_thermal_state
package_power_limit
;
struct
_thermal_state
package_power_limit
;
struct
_thermal_state
core_thresh0
;
struct
_thermal_state
core_thresh0
;
struct
_thermal_state
core_thresh1
;
struct
_thermal_state
core_thresh1
;
struct
_thermal_state
pkg_thresh0
;
struct
_thermal_state
pkg_thresh1
;
};
};
/* Callback to handle core threshold interrupts */
/* Callback to handle core threshold interrupts */
int
(
*
platform_thermal_notify
)(
__u64
msr_val
);
int
(
*
platform_thermal_notify
)(
__u64
msr_val
);
EXPORT_SYMBOL
(
platform_thermal_notify
);
EXPORT_SYMBOL
(
platform_thermal_notify
);
/* Callback to handle core package threshold_interrupts */
int
(
*
platform_thermal_package_notify
)(
__u64
msr_val
);
EXPORT_SYMBOL_GPL
(
platform_thermal_package_notify
);
/* Callback support of rate control, return true, if
* callback has rate control */
bool
(
*
platform_thermal_package_rate_control
)(
void
);
EXPORT_SYMBOL_GPL
(
platform_thermal_package_rate_control
);
static
DEFINE_PER_CPU
(
struct
thermal_state
,
thermal_state
);
static
DEFINE_PER_CPU
(
struct
thermal_state
,
thermal_state
);
static
atomic_t
therm_throt_en
=
ATOMIC_INIT
(
0
);
static
atomic_t
therm_throt_en
=
ATOMIC_INIT
(
0
);
...
@@ -203,19 +215,25 @@ static int therm_throt_process(bool new_event, int event, int level)
...
@@ -203,19 +215,25 @@ static int therm_throt_process(bool new_event, int event, int level)
return
0
;
return
0
;
}
}
static
int
thresh_event_valid
(
int
event
)
static
int
thresh_event_valid
(
int
level
,
int
event
)
{
{
struct
_thermal_state
*
state
;
struct
_thermal_state
*
state
;
unsigned
int
this_cpu
=
smp_processor_id
();
unsigned
int
this_cpu
=
smp_processor_id
();
struct
thermal_state
*
pstate
=
&
per_cpu
(
thermal_state
,
this_cpu
);
struct
thermal_state
*
pstate
=
&
per_cpu
(
thermal_state
,
this_cpu
);
u64
now
=
get_jiffies_64
();
u64
now
=
get_jiffies_64
();
state
=
(
event
==
0
)
?
&
pstate
->
core_thresh0
:
&
pstate
->
core_thresh1
;
if
(
level
==
PACKAGE_LEVEL
)
state
=
(
event
==
0
)
?
&
pstate
->
pkg_thresh0
:
&
pstate
->
pkg_thresh1
;
else
state
=
(
event
==
0
)
?
&
pstate
->
core_thresh0
:
&
pstate
->
core_thresh1
;
if
(
time_before64
(
now
,
state
->
next_check
))
if
(
time_before64
(
now
,
state
->
next_check
))
return
0
;
return
0
;
state
->
next_check
=
now
+
CHECK_INTERVAL
;
state
->
next_check
=
now
+
CHECK_INTERVAL
;
return
1
;
return
1
;
}
}
...
@@ -321,6 +339,39 @@ device_initcall(thermal_throttle_init_device);
...
@@ -321,6 +339,39 @@ device_initcall(thermal_throttle_init_device);
#endif
/* CONFIG_SYSFS */
#endif
/* CONFIG_SYSFS */
static
void
notify_package_thresholds
(
__u64
msr_val
)
{
bool
notify_thres_0
=
false
;
bool
notify_thres_1
=
false
;
if
(
!
platform_thermal_package_notify
)
return
;
/* lower threshold check */
if
(
msr_val
&
THERM_LOG_THRESHOLD0
)
notify_thres_0
=
true
;
/* higher threshold check */
if
(
msr_val
&
THERM_LOG_THRESHOLD1
)
notify_thres_1
=
true
;
if
(
!
notify_thres_0
&&
!
notify_thres_1
)
return
;
if
(
platform_thermal_package_rate_control
&&
platform_thermal_package_rate_control
())
{
/* Rate control is implemented in callback */
platform_thermal_package_notify
(
msr_val
);
return
;
}
/* lower threshold reached */
if
(
notify_thres_0
&&
thresh_event_valid
(
PACKAGE_LEVEL
,
0
))
platform_thermal_package_notify
(
msr_val
);
/* higher threshold reached */
if
(
notify_thres_1
&&
thresh_event_valid
(
PACKAGE_LEVEL
,
1
))
platform_thermal_package_notify
(
msr_val
);
}
static
void
notify_thresholds
(
__u64
msr_val
)
static
void
notify_thresholds
(
__u64
msr_val
)
{
{
/* check whether the interrupt handler is defined;
/* check whether the interrupt handler is defined;
...
@@ -330,10 +381,12 @@ static void notify_thresholds(__u64 msr_val)
...
@@ -330,10 +381,12 @@ static void notify_thresholds(__u64 msr_val)
return
;
return
;
/* lower threshold reached */
/* lower threshold reached */
if
((
msr_val
&
THERM_LOG_THRESHOLD0
)
&&
thresh_event_valid
(
0
))
if
((
msr_val
&
THERM_LOG_THRESHOLD0
)
&&
thresh_event_valid
(
CORE_LEVEL
,
0
))
platform_thermal_notify
(
msr_val
);
platform_thermal_notify
(
msr_val
);
/* higher threshold reached */
/* higher threshold reached */
if
((
msr_val
&
THERM_LOG_THRESHOLD1
)
&&
thresh_event_valid
(
1
))
if
((
msr_val
&
THERM_LOG_THRESHOLD1
)
&&
thresh_event_valid
(
CORE_LEVEL
,
1
))
platform_thermal_notify
(
msr_val
);
platform_thermal_notify
(
msr_val
);
}
}
...
@@ -359,6 +412,8 @@ static void intel_thermal_interrupt(void)
...
@@ -359,6 +412,8 @@ static void intel_thermal_interrupt(void)
if
(
this_cpu_has
(
X86_FEATURE_PTS
))
{
if
(
this_cpu_has
(
X86_FEATURE_PTS
))
{
rdmsrl
(
MSR_IA32_PACKAGE_THERM_STATUS
,
msr_val
);
rdmsrl
(
MSR_IA32_PACKAGE_THERM_STATUS
,
msr_val
);
/* check violations of package thermal thresholds */
notify_package_thresholds
(
msr_val
);
therm_throt_process
(
msr_val
&
PACKAGE_THERM_STATUS_PROCHOT
,
therm_throt_process
(
msr_val
&
PACKAGE_THERM_STATUS_PROCHOT
,
THERMAL_THROTTLING_EVENT
,
THERMAL_THROTTLING_EVENT
,
PACKAGE_LEVEL
);
PACKAGE_LEVEL
);
...
...
drivers/thermal/Kconfig
View file @
f157f596
...
@@ -169,7 +169,20 @@ config INTEL_POWERCLAMP
...
@@ -169,7 +169,20 @@ config INTEL_POWERCLAMP
enforce idle time which results in more package C-state residency. The
enforce idle time which results in more package C-state residency. The
user interface is exposed via generic thermal framework.
user interface is exposed via generic thermal framework.
config X86_PKG_TEMP_THERMAL
tristate "X86 package temperature thermal driver"
depends on THERMAL
depends on X86
select THERMAL_GOV_USER_SPACE
default m
help
Enable this to register CPU digital sensor for package temperature as
thermal zone. Each package will have its own thermal zone. There are
two trip points which can be set by user to get notifications via thermal
notification methods.
menu "Texas Instruments thermal drivers"
menu "Texas Instruments thermal drivers"
source "drivers/thermal/ti-soc-thermal/Kconfig"
source "drivers/thermal/ti-soc-thermal/Kconfig"
endmenu
endmenu
endif
endif
drivers/thermal/Makefile
View file @
f157f596
...
@@ -23,4 +23,5 @@ obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o
...
@@ -23,4 +23,5 @@ obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o
obj-$(CONFIG_ARMADA_THERMAL)
+=
armada_thermal.o
obj-$(CONFIG_ARMADA_THERMAL)
+=
armada_thermal.o
obj-$(CONFIG_DB8500_CPUFREQ_COOLING)
+=
db8500_cpufreq_cooling.o
obj-$(CONFIG_DB8500_CPUFREQ_COOLING)
+=
db8500_cpufreq_cooling.o
obj-$(CONFIG_INTEL_POWERCLAMP)
+=
intel_powerclamp.o
obj-$(CONFIG_INTEL_POWERCLAMP)
+=
intel_powerclamp.o
obj-$(CONFIG_X86_PKG_TEMP_THERMAL)
+=
x86_pkg_temp_thermal.o
obj-$(CONFIG_TI_SOC_THERMAL)
+=
ti-soc-thermal/
obj-$(CONFIG_TI_SOC_THERMAL)
+=
ti-soc-thermal/
drivers/thermal/x86_pkg_temp_thermal.c
0 → 100644
View file @
f157f596
/*
* x86_pkg_temp_thermal driver
* Copyright (c) 2013, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/param.h>
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/pm.h>
#include <linux/thermal.h>
#include <linux/debugfs.h>
#include <asm/cpu_device_id.h>
#include <asm/mce.h>
/*
* Rate control delay: Idea is to introduce denounce effect
* This should be long enough to avoid reduce events, when
* threshold is set to a temperature, which is constantly
* violated, but at the short enough to take any action.
* The action can be remove threshold or change it to next
* interesting setting. Based on experiments, in around
* every 5 seconds under load will give us a significant
* temperature change.
*/
#define PKG_TEMP_THERMAL_NOTIFY_DELAY 5000
static
int
notify_delay_ms
=
PKG_TEMP_THERMAL_NOTIFY_DELAY
;
module_param
(
notify_delay_ms
,
int
,
0644
);
MODULE_PARM_DESC
(
notify_delay_ms
,
"User space notification delay in milli seconds."
);
/* Number of trip points in thermal zone. Currently it can't
* be more than 2. MSR can allow setting and getting notifications
* for only 2 thresholds. This define enforces this, if there
* is some wrong values returned by cpuid for number of thresholds.
*/
#define MAX_NUMBER_OF_TRIPS 2
struct
phy_dev_entry
{
struct
list_head
list
;
u16
phys_proc_id
;
u16
first_cpu
;
u32
tj_max
;
int
ref_cnt
;
u32
start_pkg_therm_low
;
u32
start_pkg_therm_high
;
struct
thermal_zone_device
*
tzone
;
};
/* List maintaining number of package instances */
static
LIST_HEAD
(
phy_dev_list
);
static
DEFINE_MUTEX
(
phy_dev_list_mutex
);
/* Interrupt to work function schedule queue */
static
DEFINE_PER_CPU
(
struct
delayed_work
,
pkg_temp_thermal_threshold_work
);
/* To track if the work is already scheduled on a package */
static
u8
*
pkg_work_scheduled
;
/* Spin lock to prevent races with pkg_work_scheduled */
static
spinlock_t
pkg_work_lock
;
static
u16
max_phy_id
;
/* Debug counters to show using debugfs */
static
struct
dentry
*
debugfs
;
static
unsigned
int
pkg_interrupt_cnt
;
static
unsigned
int
pkg_work_cnt
;
static
int
pkg_temp_debugfs_init
(
void
)
{
struct
dentry
*
d
;
debugfs
=
debugfs_create_dir
(
"pkg_temp_thermal"
,
NULL
);
if
(
!
debugfs
)
return
-
ENOENT
;
d
=
debugfs_create_u32
(
"pkg_thres_interrupt"
,
S_IRUGO
,
debugfs
,
(
u32
*
)
&
pkg_interrupt_cnt
);
if
(
!
d
)
goto
err_out
;
d
=
debugfs_create_u32
(
"pkg_thres_work"
,
S_IRUGO
,
debugfs
,
(
u32
*
)
&
pkg_work_cnt
);
if
(
!
d
)
goto
err_out
;
return
0
;
err_out:
debugfs_remove_recursive
(
debugfs
);
return
-
ENOENT
;
}
static
struct
phy_dev_entry
*
pkg_temp_thermal_get_phy_entry
(
unsigned
int
cpu
)
{
u16
phys_proc_id
=
topology_physical_package_id
(
cpu
);
struct
phy_dev_entry
*
phy_ptr
;
mutex_lock
(
&
phy_dev_list_mutex
);
list_for_each_entry
(
phy_ptr
,
&
phy_dev_list
,
list
)
if
(
phy_ptr
->
phys_proc_id
==
phys_proc_id
)
{
mutex_unlock
(
&
phy_dev_list_mutex
);
return
phy_ptr
;
}
mutex_unlock
(
&
phy_dev_list_mutex
);
return
NULL
;
}
/*
* tj-max is is interesting because threshold is set relative to this
* temperature.
*/
static
int
get_tj_max
(
int
cpu
,
u32
*
tj_max
)
{
u32
eax
,
edx
;
u32
val
;
int
err
;
err
=
rdmsr_safe_on_cpu
(
cpu
,
MSR_IA32_TEMPERATURE_TARGET
,
&
eax
,
&
edx
);
if
(
err
)
goto
err_ret
;
else
{
val
=
(
eax
>>
16
)
&
0xff
;
if
(
val
)
*
tj_max
=
val
*
1000
;
else
{
err
=
-
EINVAL
;
goto
err_ret
;
}
}
return
0
;
err_ret:
*
tj_max
=
0
;
return
err
;
}
static
int
sys_get_curr_temp
(
struct
thermal_zone_device
*
tzd
,
unsigned
long
*
temp
)
{
u32
eax
,
edx
;
struct
phy_dev_entry
*
phy_dev_entry
;
phy_dev_entry
=
tzd
->
devdata
;
rdmsr_on_cpu
(
phy_dev_entry
->
first_cpu
,
MSR_IA32_PACKAGE_THERM_STATUS
,
&
eax
,
&
edx
);
if
(
eax
&
0x80000000
)
{
*
temp
=
phy_dev_entry
->
tj_max
-
((
eax
>>
16
)
&
0x7f
)
*
1000
;
pr_debug
(
"sys_get_curr_temp %ld
\n
"
,
*
temp
);
return
0
;
}
return
-
EINVAL
;
}
static
int
sys_get_trip_temp
(
struct
thermal_zone_device
*
tzd
,
int
trip
,
unsigned
long
*
temp
)
{
u32
eax
,
edx
;
struct
phy_dev_entry
*
phy_dev_entry
;
u32
mask
,
shift
;
unsigned
long
thres_reg_value
;
int
ret
;
if
(
trip
>=
MAX_NUMBER_OF_TRIPS
)
return
-
EINVAL
;
phy_dev_entry
=
tzd
->
devdata
;
if
(
trip
)
{
mask
=
THERM_MASK_THRESHOLD1
;
shift
=
THERM_SHIFT_THRESHOLD1
;
}
else
{
mask
=
THERM_MASK_THRESHOLD0
;
shift
=
THERM_SHIFT_THRESHOLD0
;
}
ret
=
rdmsr_on_cpu
(
phy_dev_entry
->
first_cpu
,
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
&
eax
,
&
edx
);
if
(
ret
<
0
)
return
-
EINVAL
;
thres_reg_value
=
(
eax
&
mask
)
>>
shift
;
if
(
thres_reg_value
)
*
temp
=
phy_dev_entry
->
tj_max
-
thres_reg_value
*
1000
;
else
*
temp
=
0
;
pr_debug
(
"sys_get_trip_temp %ld
\n
"
,
*
temp
);
return
0
;
}
int
sys_set_trip_temp
(
struct
thermal_zone_device
*
tzd
,
int
trip
,
unsigned
long
temp
)
{
u32
l
,
h
;
struct
phy_dev_entry
*
phy_dev_entry
;
u32
mask
,
shift
,
intr
;
int
ret
;
phy_dev_entry
=
tzd
->
devdata
;
if
(
trip
>=
MAX_NUMBER_OF_TRIPS
||
temp
>=
phy_dev_entry
->
tj_max
)
return
-
EINVAL
;
ret
=
rdmsr_on_cpu
(
phy_dev_entry
->
first_cpu
,
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
&
l
,
&
h
);
if
(
ret
<
0
)
return
-
EINVAL
;
if
(
trip
)
{
mask
=
THERM_MASK_THRESHOLD1
;
shift
=
THERM_SHIFT_THRESHOLD1
;
intr
=
THERM_INT_THRESHOLD1_ENABLE
;
}
else
{
mask
=
THERM_MASK_THRESHOLD0
;
shift
=
THERM_SHIFT_THRESHOLD0
;
intr
=
THERM_INT_THRESHOLD0_ENABLE
;
}
l
&=
~
mask
;
/*
* When users space sets a trip temperature == 0, which is indication
* that, it is no longer interested in receiving notifications.
*/
if
(
!
temp
)
l
&=
~
intr
;
else
{
l
|=
(
phy_dev_entry
->
tj_max
-
temp
)
/
1000
<<
shift
;
l
|=
intr
;
}
return
wrmsr_on_cpu
(
phy_dev_entry
->
first_cpu
,
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
l
,
h
);
}
static
int
sys_get_trip_type
(
struct
thermal_zone_device
*
thermal
,
int
trip
,
enum
thermal_trip_type
*
type
)
{
*
type
=
THERMAL_TRIP_PASSIVE
;
return
0
;
}
/* Thermal zone callback registry */
static
struct
thermal_zone_device_ops
tzone_ops
=
{
.
get_temp
=
sys_get_curr_temp
,
.
get_trip_temp
=
sys_get_trip_temp
,
.
get_trip_type
=
sys_get_trip_type
,
.
set_trip_temp
=
sys_set_trip_temp
,
};
static
bool
pkg_temp_thermal_platform_thermal_rate_control
(
void
)
{
return
true
;
}
/* Enable threshold interrupt on local package/cpu */
static
inline
void
enable_pkg_thres_interrupt
(
void
)
{
u32
l
,
h
;
u8
thres_0
,
thres_1
;
rdmsr
(
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
l
,
h
);
/* only enable/disable if it had valid threshold value */
thres_0
=
(
l
&
THERM_MASK_THRESHOLD0
)
>>
THERM_SHIFT_THRESHOLD0
;
thres_1
=
(
l
&
THERM_MASK_THRESHOLD1
)
>>
THERM_SHIFT_THRESHOLD1
;
if
(
thres_0
)
l
|=
THERM_INT_THRESHOLD0_ENABLE
;
if
(
thres_1
)
l
|=
THERM_INT_THRESHOLD1_ENABLE
;
wrmsr
(
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
l
,
h
);
}
/* Disable threshold interrupt on local package/cpu */
static
inline
void
disable_pkg_thres_interrupt
(
void
)
{
u32
l
,
h
;
rdmsr
(
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
l
,
h
);
wrmsr
(
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
l
&
(
~
THERM_INT_THRESHOLD0_ENABLE
)
&
(
~
THERM_INT_THRESHOLD1_ENABLE
),
h
);
}
static
void
pkg_temp_thermal_threshold_work_fn
(
struct
work_struct
*
work
)
{
__u64
msr_val
;
int
cpu
=
smp_processor_id
();
int
phy_id
=
topology_physical_package_id
(
cpu
);
struct
phy_dev_entry
*
phdev
=
pkg_temp_thermal_get_phy_entry
(
cpu
);
bool
notify
=
false
;
if
(
!
phdev
)
return
;
spin_lock
(
&
pkg_work_lock
);
++
pkg_work_cnt
;
if
(
unlikely
(
phy_id
>
max_phy_id
))
{
spin_unlock
(
&
pkg_work_lock
);
return
;
}
pkg_work_scheduled
[
phy_id
]
=
0
;
spin_unlock
(
&
pkg_work_lock
);
enable_pkg_thres_interrupt
();
rdmsrl
(
MSR_IA32_PACKAGE_THERM_STATUS
,
msr_val
);
if
(
msr_val
&
THERM_LOG_THRESHOLD0
)
{
wrmsrl
(
MSR_IA32_PACKAGE_THERM_STATUS
,
msr_val
&
~
THERM_LOG_THRESHOLD0
);
notify
=
true
;
}
if
(
msr_val
&
THERM_LOG_THRESHOLD1
)
{
wrmsrl
(
MSR_IA32_PACKAGE_THERM_STATUS
,
msr_val
&
~
THERM_LOG_THRESHOLD1
);
notify
=
true
;
}
if
(
notify
)
{
pr_debug
(
"thermal_zone_device_update
\n
"
);
thermal_zone_device_update
(
phdev
->
tzone
);
}
}
static
int
pkg_temp_thermal_platform_thermal_notify
(
__u64
msr_val
)
{
unsigned
long
flags
;
int
cpu
=
smp_processor_id
();
int
phy_id
=
topology_physical_package_id
(
cpu
);
/*
* When a package is in interrupted state, all CPU's in that package
* are in the same interrupt state. So scheduling on any one CPU in
* the package is enough and simply return for others.
*/
spin_lock_irqsave
(
&
pkg_work_lock
,
flags
);
++
pkg_interrupt_cnt
;
if
(
unlikely
(
phy_id
>
max_phy_id
)
||
unlikely
(
!
pkg_work_scheduled
)
||
pkg_work_scheduled
[
phy_id
])
{
disable_pkg_thres_interrupt
();
spin_unlock_irqrestore
(
&
pkg_work_lock
,
flags
);
return
-
EINVAL
;
}
pkg_work_scheduled
[
phy_id
]
=
1
;
spin_unlock_irqrestore
(
&
pkg_work_lock
,
flags
);
disable_pkg_thres_interrupt
();
schedule_delayed_work_on
(
cpu
,
&
per_cpu
(
pkg_temp_thermal_threshold_work
,
cpu
),
msecs_to_jiffies
(
notify_delay_ms
));
return
0
;
}
static
int
find_siblings_cpu
(
int
cpu
)
{
int
i
;
int
id
=
topology_physical_package_id
(
cpu
);
for_each_online_cpu
(
i
)
if
(
i
!=
cpu
&&
topology_physical_package_id
(
i
)
==
id
)
return
i
;
return
0
;
}
static
int
pkg_temp_thermal_device_add
(
unsigned
int
cpu
)
{
int
err
;
u32
tj_max
;
struct
phy_dev_entry
*
phy_dev_entry
;
char
buffer
[
30
];
int
thres_count
;
u32
eax
,
ebx
,
ecx
,
edx
;
cpuid
(
6
,
&
eax
,
&
ebx
,
&
ecx
,
&
edx
);
thres_count
=
ebx
&
0x07
;
if
(
!
thres_count
)
return
-
ENODEV
;
thres_count
=
clamp_val
(
thres_count
,
0
,
MAX_NUMBER_OF_TRIPS
);
err
=
get_tj_max
(
cpu
,
&
tj_max
);
if
(
err
)
goto
err_ret
;
mutex_lock
(
&
phy_dev_list_mutex
);
phy_dev_entry
=
kzalloc
(
sizeof
(
*
phy_dev_entry
),
GFP_KERNEL
);
if
(
!
phy_dev_entry
)
{
err
=
-
ENOMEM
;
goto
err_ret_unlock
;
}
spin_lock
(
&
pkg_work_lock
);
if
(
topology_physical_package_id
(
cpu
)
>
max_phy_id
)
max_phy_id
=
topology_physical_package_id
(
cpu
);
pkg_work_scheduled
=
krealloc
(
pkg_work_scheduled
,
(
max_phy_id
+
1
)
*
sizeof
(
u8
),
GFP_ATOMIC
);
if
(
!
pkg_work_scheduled
)
{
spin_unlock
(
&
pkg_work_lock
);
err
=
-
ENOMEM
;
goto
err_ret_free
;
}
pkg_work_scheduled
[
topology_physical_package_id
(
cpu
)]
=
0
;
spin_unlock
(
&
pkg_work_lock
);
phy_dev_entry
->
phys_proc_id
=
topology_physical_package_id
(
cpu
);
phy_dev_entry
->
first_cpu
=
cpu
;
phy_dev_entry
->
tj_max
=
tj_max
;
phy_dev_entry
->
ref_cnt
=
1
;
snprintf
(
buffer
,
sizeof
(
buffer
),
"pkg-temp-%d
\n
"
,
phy_dev_entry
->
phys_proc_id
);
phy_dev_entry
->
tzone
=
thermal_zone_device_register
(
buffer
,
thres_count
,
(
thres_count
==
MAX_NUMBER_OF_TRIPS
)
?
0x03
:
0x01
,
phy_dev_entry
,
&
tzone_ops
,
NULL
,
0
,
0
);
if
(
IS_ERR
(
phy_dev_entry
->
tzone
))
{
err
=
PTR_ERR
(
phy_dev_entry
->
tzone
);
goto
err_ret_free
;
}
/* Store MSR value for package thermal interrupt, to restore at exit */
rdmsr_on_cpu
(
cpu
,
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
&
phy_dev_entry
->
start_pkg_therm_low
,
&
phy_dev_entry
->
start_pkg_therm_high
);
list_add_tail
(
&
phy_dev_entry
->
list
,
&
phy_dev_list
);
pr_debug
(
"pkg_temp_thermal_device_add :phy_id %d cpu %d
\n
"
,
phy_dev_entry
->
phys_proc_id
,
cpu
);
mutex_unlock
(
&
phy_dev_list_mutex
);
return
0
;
err_ret_free:
kfree
(
phy_dev_entry
);
err_ret_unlock:
mutex_unlock
(
&
phy_dev_list_mutex
);
err_ret:
return
err
;
}
static
int
pkg_temp_thermal_device_remove
(
unsigned
int
cpu
)
{
struct
phy_dev_entry
*
n
;
u16
phys_proc_id
=
topology_physical_package_id
(
cpu
);
struct
phy_dev_entry
*
phdev
=
pkg_temp_thermal_get_phy_entry
(
cpu
);
if
(
!
phdev
)
return
-
ENODEV
;
mutex_lock
(
&
phy_dev_list_mutex
);
/* If we are loosing the first cpu for this package, we need change */
if
(
phdev
->
first_cpu
==
cpu
)
{
phdev
->
first_cpu
=
find_siblings_cpu
(
cpu
);
pr_debug
(
"thermal_device_remove: first cpu switched %d
\n
"
,
phdev
->
first_cpu
);
}
/*
* It is possible that no siblings left as this was the last cpu
* going offline. We don't need to worry about this assignment
* as the phydev entry will be removed in this case and
* thermal zone is removed.
*/
--
phdev
->
ref_cnt
;
pr_debug
(
"thermal_device_remove: pkg: %d cpu %d ref_cnt %d
\n
"
,
phys_proc_id
,
cpu
,
phdev
->
ref_cnt
);
if
(
!
phdev
->
ref_cnt
)
list_for_each_entry_safe
(
phdev
,
n
,
&
phy_dev_list
,
list
)
{
if
(
phdev
->
phys_proc_id
==
phys_proc_id
)
{
thermal_zone_device_unregister
(
phdev
->
tzone
);
list_del
(
&
phdev
->
list
);
kfree
(
phdev
);
break
;
}
}
mutex_unlock
(
&
phy_dev_list_mutex
);
return
0
;
}
static
int
get_core_online
(
unsigned
int
cpu
)
{
struct
cpuinfo_x86
*
c
=
&
cpu_data
(
cpu
);
struct
phy_dev_entry
*
phdev
=
pkg_temp_thermal_get_phy_entry
(
cpu
);
/* Check if there is already an instance for this package */
if
(
!
phdev
)
{
if
(
!
cpu_has
(
c
,
X86_FEATURE_DTHERM
)
&&
!
cpu_has
(
c
,
X86_FEATURE_PTS
))
return
-
ENODEV
;
if
(
pkg_temp_thermal_device_add
(
cpu
))
return
-
ENODEV
;
}
else
{
mutex_lock
(
&
phy_dev_list_mutex
);
++
phdev
->
ref_cnt
;
pr_debug
(
"get_core_online: cpu %d ref_cnt %d
\n
"
,
cpu
,
phdev
->
ref_cnt
);
mutex_unlock
(
&
phy_dev_list_mutex
);
}
INIT_DELAYED_WORK
(
&
per_cpu
(
pkg_temp_thermal_threshold_work
,
cpu
),
pkg_temp_thermal_threshold_work_fn
);
pr_debug
(
"get_core_online: cpu %d successful
\n
"
,
cpu
);
return
0
;
}
static
void
put_core_offline
(
unsigned
int
cpu
)
{
if
(
!
pkg_temp_thermal_device_remove
(
cpu
))
cancel_delayed_work_sync
(
&
per_cpu
(
pkg_temp_thermal_threshold_work
,
cpu
));
pr_debug
(
"put_core_offline: cpu %d
\n
"
,
cpu
);
}
static
int
pkg_temp_thermal_cpu_callback
(
struct
notifier_block
*
nfb
,
unsigned
long
action
,
void
*
hcpu
)
{
unsigned
int
cpu
=
(
unsigned
long
)
hcpu
;
switch
(
action
)
{
case
CPU_ONLINE
:
case
CPU_DOWN_FAILED
:
get_core_online
(
cpu
);
break
;
case
CPU_DOWN_PREPARE
:
put_core_offline
(
cpu
);
break
;
}
return
NOTIFY_OK
;
}
static
struct
notifier_block
pkg_temp_thermal_notifier
__refdata
=
{
.
notifier_call
=
pkg_temp_thermal_cpu_callback
,
};
static
const
struct
x86_cpu_id
__initconst
pkg_temp_thermal_ids
[]
=
{
{
X86_VENDOR_INTEL
,
X86_FAMILY_ANY
,
X86_MODEL_ANY
,
X86_FEATURE_DTHERM
},
{}
};
MODULE_DEVICE_TABLE
(
x86cpu
,
pkg_temp_thermal_ids
);
static
int
__init
pkg_temp_thermal_init
(
void
)
{
int
i
;
if
(
!
x86_match_cpu
(
pkg_temp_thermal_ids
))
return
-
ENODEV
;
spin_lock_init
(
&
pkg_work_lock
);
platform_thermal_package_notify
=
pkg_temp_thermal_platform_thermal_notify
;
platform_thermal_package_rate_control
=
pkg_temp_thermal_platform_thermal_rate_control
;
get_online_cpus
();
for_each_online_cpu
(
i
)
if
(
get_core_online
(
i
))
goto
err_ret
;
register_hotcpu_notifier
(
&
pkg_temp_thermal_notifier
);
put_online_cpus
();
pkg_temp_debugfs_init
();
/* Don't care if fails */
return
0
;
err_ret:
get_online_cpus
();
for_each_online_cpu
(
i
)
put_core_offline
(
i
);
put_online_cpus
();
kfree
(
pkg_work_scheduled
);
platform_thermal_package_notify
=
NULL
;
platform_thermal_package_rate_control
=
NULL
;
return
-
ENODEV
;
}
static
void
__exit
pkg_temp_thermal_exit
(
void
)
{
struct
phy_dev_entry
*
phdev
,
*
n
;
int
i
;
get_online_cpus
();
unregister_hotcpu_notifier
(
&
pkg_temp_thermal_notifier
);
mutex_lock
(
&
phy_dev_list_mutex
);
list_for_each_entry_safe
(
phdev
,
n
,
&
phy_dev_list
,
list
)
{
/* Retore old MSR value for package thermal interrupt */
wrmsr_on_cpu
(
phdev
->
first_cpu
,
MSR_IA32_PACKAGE_THERM_INTERRUPT
,
phdev
->
start_pkg_therm_low
,
phdev
->
start_pkg_therm_high
);
thermal_zone_device_unregister
(
phdev
->
tzone
);
list_del
(
&
phdev
->
list
);
kfree
(
phdev
);
}
mutex_unlock
(
&
phy_dev_list_mutex
);
platform_thermal_package_notify
=
NULL
;
platform_thermal_package_rate_control
=
NULL
;
for_each_online_cpu
(
i
)
cancel_delayed_work_sync
(
&
per_cpu
(
pkg_temp_thermal_threshold_work
,
i
));
put_online_cpus
();
kfree
(
pkg_work_scheduled
);
debugfs_remove_recursive
(
debugfs
);
}
module_init
(
pkg_temp_thermal_init
)
module_exit
(
pkg_temp_thermal_exit
)
MODULE_DESCRIPTION
(
"X86 PKG TEMP Thermal Driver"
);
MODULE_AUTHOR
(
"Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"
);
MODULE_LICENSE
(
"GPL v2"
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment