Commit 25c933b1 authored by Evan Quan's avatar Evan Quan Committed by Alex Deucher

drm/amd/powerplay: add new sysfs interface for retrieving gpu metrics(V2)

A new interface for UMD to retrieve gpu metrics data.

V2: rich the documentation
Signed-off-by: default avatarEvan Quan <evan.quan@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 39c5a1ce
...@@ -206,6 +206,12 @@ pp_power_profile_mode ...@@ -206,6 +206,12 @@ pp_power_profile_mode
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
:doc: mem_busy_percent :doc: mem_busy_percent
gpu_metrics
~~~~~~~~~~~~~~~~~~~~~
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
:doc: gpu_metrics
GPU Product Information GPU Product Information
======================= =======================
......
...@@ -369,6 +369,9 @@ enum amdgpu_pcie_gen { ...@@ -369,6 +369,9 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->set_ppfeature_status(\ ((adev)->powerplay.pp_funcs->set_ppfeature_status(\
(adev)->powerplay.pp_handle, (ppfeatures))) (adev)->powerplay.pp_handle, (ppfeatures)))
#define amdgpu_dpm_get_gpu_metrics(adev, table) \
((adev)->powerplay.pp_funcs->get_gpu_metrics((adev)->powerplay.pp_handle, table))
struct amdgpu_dpm { struct amdgpu_dpm {
struct amdgpu_ps *ps; struct amdgpu_ps *ps;
/* number of valid power states */ /* number of valid power states */
......
...@@ -2120,6 +2120,59 @@ static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev, ...@@ -2120,6 +2120,59 @@ static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
return count; return count;
} }
/**
* DOC: gpu_metrics
*
* The amdgpu driver provides a sysfs API for retrieving current gpu
* metrics data. The file gpu_metrics is used for this. Reading the
* file will dump all the current gpu metrics data.
*
* These data include temperature, frequency, engines utilization,
* power consume, throttler status, fan speed and cpu core statistics(
* available for APU only). That's it will give a snapshot of all sensors
* at the same time.
*/
static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
void *gpu_metrics;
ssize_t size = 0;
int ret;
if (amdgpu_in_reset(adev))
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
down_read(&adev->reset_sem);
if (is_support_sw_smu(adev))
size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics);
else if (adev->powerplay.pp_funcs->get_gpu_metrics)
size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
up_read(&adev->reset_sem);
if (size <= 0)
goto out;
if (size >= PAGE_SIZE)
size = PAGE_SIZE - 1;
memcpy(buf, gpu_metrics, size);
out:
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
return size;
}
static struct amdgpu_device_attr amdgpu_device_attrs[] = { static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC),
...@@ -2143,6 +2196,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { ...@@ -2143,6 +2196,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(gpu_metrics, ATTR_FLAG_BASIC),
}; };
static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
...@@ -2192,6 +2246,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ ...@@ -2192,6 +2246,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
} else if (DEVICE_ATTR_IS(pp_features)) { } else if (DEVICE_ATTR_IS(pp_features)) {
if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10) if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10)
*states = ATTR_STATE_UNSUPPORTED; *states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(gpu_metrics)) {
if (asic_type < CHIP_VEGA12)
*states = ATTR_STATE_UNSUPPORTED;
} }
if (asic_type == CHIP_ARCTURUS) { if (asic_type == CHIP_ARCTURUS) {
......
...@@ -322,6 +322,7 @@ struct amd_pm_funcs { ...@@ -322,6 +322,7 @@ struct amd_pm_funcs {
int (*asic_reset_mode_2)(void *handle); int (*asic_reset_mode_2)(void *handle);
int (*set_df_cstate)(void *handle, enum pp_df_cstate state); int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
int (*set_xgmi_pstate)(void *handle, uint32_t pstate); int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
ssize_t (*get_gpu_metrics)(void *handle, void **table);
}; };
struct metrics_table_header { struct metrics_table_header {
......
...@@ -2516,3 +2516,23 @@ int smu_get_dpm_clock_table(struct smu_context *smu, ...@@ -2516,3 +2516,23 @@ int smu_get_dpm_clock_table(struct smu_context *smu,
return ret; return ret;
} }
ssize_t smu_sys_get_gpu_metrics(struct smu_context *smu,
void **table)
{
ssize_t size;
if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
return -EOPNOTSUPP;
if (!smu->ppt_funcs->get_gpu_metrics)
return -EOPNOTSUPP;
mutex_lock(&smu->mutex);
size = smu->ppt_funcs->get_gpu_metrics(smu, table);
mutex_unlock(&smu->mutex);
return size;
}
...@@ -589,6 +589,7 @@ struct pptable_funcs { ...@@ -589,6 +589,7 @@ struct pptable_funcs {
void (*log_thermal_throttling_event)(struct smu_context *smu); void (*log_thermal_throttling_event)(struct smu_context *smu);
size_t (*get_pp_feature_mask)(struct smu_context *smu, char *buf); size_t (*get_pp_feature_mask)(struct smu_context *smu, char *buf);
int (*set_pp_feature_mask)(struct smu_context *smu, uint64_t new_mask); int (*set_pp_feature_mask)(struct smu_context *smu, uint64_t new_mask);
ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table);
}; };
typedef enum { typedef enum {
...@@ -791,5 +792,7 @@ int smu_get_dpm_clock_table(struct smu_context *smu, ...@@ -791,5 +792,7 @@ int smu_get_dpm_clock_table(struct smu_context *smu,
int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value); int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value);
ssize_t smu_sys_get_gpu_metrics(struct smu_context *smu, void **table);
#endif #endif
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment