Commit 755113d7 authored by Daniel Lezcano's avatar Daniel Lezcano Committed by Rafael J. Wysocki

thermal/debugfs: Add thermal cooling device debugfs information

The thermal framework does not have any debug information except a
sysfs stat which is a bit controversial. This one allocates big chunks
of memory for every cooling devices with a high number of states and
could represent on some systems in production several megabytes of
memory for just a portion of it. As the sysfs is limited to a page
size, the output is not exploitable with large data array and gets
truncated.

The patch provides the same information than sysfs except the
transitions are dynamically allocated, thus they won't show more
events than the ones which actually occurred. There is no longer a
size limitation and it opens the field for more debugging information
where the debugfs is designed for, not sysfs.

The thermal debugfs directory structure tries to stay consistent with
the sysfs one but in a very simplified way:

thermal/
 -- cooling_devices
    |-- 0
    |   |-- clear
    |   |-- time_in_state_ms
    |   |-- total_trans
    |   `-- trans_table
    |-- 1
    |   |-- clear
    |   |-- time_in_state_ms
    |   |-- total_trans
    |   `-- trans_table
    |-- 2
    |   |-- clear
    |   |-- time_in_state_ms
    |   |-- total_trans
    |   `-- trans_table
    |-- 3
    |   |-- clear
    |   |-- time_in_state_ms
    |   |-- total_trans
    |   `-- trans_table
    `-- 4
        |-- clear
        |-- time_in_state_ms
        |-- total_trans
        `-- trans_table

The content of the files in the cooling devices directory is the same
as the sysfs one except for the trans_table which has the following
format:

Transition	Hits
1->0      	246
0->1      	246
2->1      	632
1->2      	632
3->2      	98
2->3      	98
Signed-off-by: default avatarDaniel Lezcano <daniel.lezcano@linaro.org>
[ rjw: White space fixups, rebase ]
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
parent 2f521890
......@@ -33,6 +33,13 @@ config THERMAL_STATISTICS
If in doubt, say N.
config THERMAL_DEBUGFS
bool "Thermal subsystem debug support"
depends on DEBUG_FS
help
Say Y to allow the thermal subsystem to collect diagnostic
information that can be accessed via debugfs.
config THERMAL_EMERGENCY_POWEROFF_DELAY_MS
int "Emergency poweroff delay in milli-seconds"
default 0
......
......@@ -10,6 +10,8 @@ thermal_sys-y += thermal_trip.o thermal_helpers.o
# netlink interface to manage the thermal framework
thermal_sys-$(CONFIG_THERMAL_NETLINK) += thermal_netlink.o
thermal_sys-$(CONFIG_THERMAL_DEBUGFS) += thermal_debugfs.o
# interface to/from other layers providing sensors
thermal_sys-$(CONFIG_THERMAL_HWMON) += thermal_hwmon.o
thermal_sys-$(CONFIG_THERMAL_OF) += thermal_of.o
......
......@@ -960,6 +960,8 @@ __thermal_cooling_device_register(struct device_node *np,
mutex_unlock(&thermal_list_lock);
thermal_debug_cdev_add(cdev);
return cdev;
out_cooling_dev:
......@@ -1166,6 +1168,8 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
if (!cdev)
return;
thermal_debug_cdev_remove(cdev);
mutex_lock(&thermal_list_lock);
if (!thermal_cooling_device_present(cdev)) {
......@@ -1629,6 +1633,8 @@ static int __init thermal_init(void)
{
int result;
thermal_debug_init();
result = thermal_netlink_init();
if (result)
goto error;
......
......@@ -13,6 +13,7 @@
#include <linux/thermal.h>
#include "thermal_netlink.h"
#include "thermal_debugfs.h"
/* Default Thermal Governor */
#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
#ifdef CONFIG_THERMAL_DEBUGFS
void thermal_debug_init(void);
void thermal_debug_cdev_add(struct thermal_cooling_device *cdev);
void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev);
void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev, int state);
#else
static inline void thermal_debug_init(void) {}
static inline void thermal_debug_cdev_add(struct thermal_cooling_device *cdev) {}
static inline void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev) {}
static inline void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev,
int state) {}
#endif /* CONFIG_THERMAL_DEBUGFS */
......@@ -146,14 +146,22 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp)
}
EXPORT_SYMBOL_GPL(thermal_zone_get_temp);
static void thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev,
int target)
static int thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev, int state)
{
if (cdev->ops->set_cur_state(cdev, target))
return;
int ret;
/*
* No check is needed for the ops->set_cur_state as the
* registering function checked the ops are correctly set
*/
ret = cdev->ops->set_cur_state(cdev, state);
if (!ret) {
thermal_notify_cdev_state_update(cdev->id, state);
thermal_cooling_device_stats_update(cdev, state);
thermal_debug_cdev_state_update(cdev, state);
}
thermal_notify_cdev_state_update(cdev->id, target);
thermal_cooling_device_stats_update(cdev, target);
return ret;
}
void __thermal_cdev_update(struct thermal_cooling_device *cdev)
......
......@@ -32,6 +32,7 @@
struct thermal_zone_device;
struct thermal_cooling_device;
struct thermal_instance;
struct thermal_debugfs;
struct thermal_attr;
enum thermal_trend {
......@@ -113,6 +114,9 @@ struct thermal_cooling_device {
struct mutex lock; /* protect thermal_instances list */
struct list_head thermal_instances;
struct list_head node;
#ifdef CONFIG_THERMAL_DEBUGFS
struct thermal_debugfs *debugfs;
#endif
};
/**
......@@ -189,6 +193,9 @@ struct thermal_zone_device {
struct list_head node;
struct delayed_work poll_queue;
enum thermal_notify_event notify_event;
#ifdef CONFIG_THERMAL_DEBUGFS
struct thermal_debugfs *debugfs;
#endif
bool suspended;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment