Commit b45e18ac authored by Kent Russell's avatar Kent Russell Committed by Alex Deucher

drm/amdgpu: Add sysfs file for PCIe usage v5

Add a sysfs file that reports the number of bytes transmitted and
received in the last second. This can be used to approximate the PCIe
bandwidth usage over the last second.

v2: Clarify use of mps as estimation of bandwidth
v3: Don't make the file on APUs
v4: Early exit for APUs in the read function, change output to
    display "packets-received packets-sent mps"
v5: fix missing header for si (Alex)
Signed-off-by: default avatarKent Russell <kent.russell@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a0bb79e2
...@@ -542,6 +542,9 @@ struct amdgpu_asic_funcs { ...@@ -542,6 +542,9 @@ struct amdgpu_asic_funcs {
bool (*need_full_reset)(struct amdgpu_device *adev); bool (*need_full_reset)(struct amdgpu_device *adev);
/* initialize doorbell layout for specific asic*/ /* initialize doorbell layout for specific asic*/
void (*init_doorbell_index)(struct amdgpu_device *adev); void (*init_doorbell_index)(struct amdgpu_device *adev);
/* PCIe bandwidth usage */
void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
uint64_t *count1);
}; };
/* /*
...@@ -1042,6 +1045,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); ...@@ -1042,6 +1045,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
/* Common functions */ /* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
......
...@@ -990,6 +990,31 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, ...@@ -990,6 +990,31 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", value); return snprintf(buf, PAGE_SIZE, "%d\n", value);
} }
/**
* DOC: pcie_bw
*
* The amdgpu driver provides a sysfs API for estimating how much data
* has been received and sent by the GPU in the last second through PCIe.
* The file pcie_bw is used for this.
* The Perf counters count the number of received and sent messages and return
* those values, as well as the maximum payload size of a PCIe packet (mps).
* Note that it is not possible to easily and quickly obtain the size of each
* packet transmitted, so we output the max payload size (mps) to allow for
* quick estimation of the PCIe bandwidth usage
*/
static ssize_t amdgpu_get_pcie_bw(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint64_t count0, count1;
amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n",
count0, count1, pcie_get_mps(adev->pdev));
}
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level, amdgpu_get_dpm_forced_performance_level,
...@@ -1025,6 +1050,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, ...@@ -1025,6 +1050,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_set_pp_od_clk_voltage); amdgpu_set_pp_od_clk_voltage);
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
amdgpu_get_busy_percent, NULL); amdgpu_get_busy_percent, NULL);
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev, static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr, struct device_attribute *attr,
...@@ -2108,6 +2134,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) ...@@ -2108,6 +2134,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
"gpu_busy_level\n"); "gpu_busy_level\n");
return ret; return ret;
} }
/* PCIe Perf counters won't work on APU nodes */
if (adev->flags & !AMD_IS_APU) {
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
if (ret) {
DRM_ERROR("failed to create device file pcie_bw\n");
return ret;
}
}
ret = amdgpu_debugfs_pm_init(adev); ret = amdgpu_debugfs_pm_init(adev);
if (ret) { if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n"); DRM_ERROR("Failed to register debugfs file for dpm!\n");
...@@ -2147,6 +2181,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) ...@@ -2147,6 +2181,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, device_remove_file(adev->dev,
&dev_attr_pp_od_clk_voltage); &dev_attr_pp_od_clk_voltage);
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
if (adev->flags & !AMD_IS_APU)
device_remove_file(adev->dev, &dev_attr_pcie_bw);
} }
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
......
...@@ -1741,6 +1741,52 @@ static bool cik_need_full_reset(struct amdgpu_device *adev) ...@@ -1741,6 +1741,52 @@ static bool cik_need_full_reset(struct amdgpu_device *adev)
return true; return true;
} }
static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
uint64_t *count1)
{
uint32_t perfctr = 0;
uint64_t cnt0_of, cnt1_of;
int tmp;
/* This reports 0 on APUs, so return to avoid writing/reading registers
* that may or may not be different from their GPU counterparts
*/
if (adev->flags & AMD_IS_APU)
return;
/* Set the 2 events that we wish to watch, defined above */
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
/* Write to enable desired perf counters */
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
/* Zero out and enable the perf counters
* Write 0x5:
* Bit 0 = Start all counters(1)
* Bit 2 = Global counter reset enable(1)
*/
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
msleep(1000);
/* Load the shadow and disable the perf counters
* Write 0x2:
* Bit 0 = Stop counters(0)
* Bit 1 = Load the shadow counters(1)
*/
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
/* Read register values to get any >32bit overflow */
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
/* Get the values and add the overflow */
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
static const struct amdgpu_asic_funcs cik_asic_funcs = static const struct amdgpu_asic_funcs cik_asic_funcs =
{ {
.read_disabled_bios = &cik_read_disabled_bios, .read_disabled_bios = &cik_read_disabled_bios,
...@@ -1756,6 +1802,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = ...@@ -1756,6 +1802,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.invalidate_hdp = &cik_invalidate_hdp, .invalidate_hdp = &cik_invalidate_hdp,
.need_full_reset = &cik_need_full_reset, .need_full_reset = &cik_need_full_reset,
.init_doorbell_index = &legacy_doorbell_index_init, .init_doorbell_index = &legacy_doorbell_index_init,
.get_pcie_usage = &cik_get_pcie_usage,
}; };
static int cik_common_early_init(void *handle) static int cik_common_early_init(void *handle)
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include "dce/dce_6_0_d.h" #include "dce/dce_6_0_d.h"
#include "uvd/uvd_4_0_d.h" #include "uvd/uvd_4_0_d.h"
#include "bif/bif_3_0_d.h" #include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
static const u32 tahiti_golden_registers[] = static const u32 tahiti_golden_registers[] =
{ {
...@@ -1323,6 +1324,52 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes) ...@@ -1323,6 +1324,52 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
} }
static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
uint64_t *count1)
{
uint32_t perfctr = 0;
uint64_t cnt0_of, cnt1_of;
int tmp;
/* This reports 0 on APUs, so return to avoid writing/reading registers
* that may or may not be different from their GPU counterparts
*/
if (adev->flags & AMD_IS_APU)
return;
/* Set the 2 events that we wish to watch, defined above */
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
/* Write to enable desired perf counters */
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
/* Zero out and enable the perf counters
* Write 0x5:
* Bit 0 = Start all counters(1)
* Bit 2 = Global counter reset enable(1)
*/
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
msleep(1000);
/* Load the shadow and disable the perf counters
* Write 0x2:
* Bit 0 = Stop counters(0)
* Bit 1 = Load the shadow counters(1)
*/
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
/* Read register values to get any >32bit overflow */
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
/* Get the values and add the overflow */
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
static const struct amdgpu_asic_funcs si_asic_funcs = static const struct amdgpu_asic_funcs si_asic_funcs =
{ {
.read_disabled_bios = &si_read_disabled_bios, .read_disabled_bios = &si_read_disabled_bios,
...@@ -1339,6 +1386,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = ...@@ -1339,6 +1386,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.flush_hdp = &si_flush_hdp, .flush_hdp = &si_flush_hdp,
.invalidate_hdp = &si_invalidate_hdp, .invalidate_hdp = &si_invalidate_hdp,
.need_full_reset = &si_need_full_reset, .need_full_reset = &si_need_full_reset,
.get_pcie_usage = &si_get_pcie_usage,
}; };
static uint32_t si_get_rev_id(struct amdgpu_device *adev) static uint32_t si_get_rev_id(struct amdgpu_device *adev)
......
...@@ -43,6 +43,9 @@ ...@@ -43,6 +43,9 @@
#include "hdp/hdp_4_0_sh_mask.h" #include "hdp/hdp_4_0_sh_mask.h"
#include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_offset.h"
#include "smuio/smuio_9_0_sh_mask.h" #include "smuio/smuio_9_0_sh_mask.h"
#include "nbio/nbio_7_0_default.h"
#include "nbio/nbio_7_0_sh_mask.h"
#include "nbio/nbio_7_0_smn.h"
#include "soc15.h" #include "soc15.h"
#include "soc15_common.h" #include "soc15_common.h"
...@@ -601,6 +604,51 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev) ...@@ -601,6 +604,51 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev)
/* change this when we implement soft reset */ /* change this when we implement soft reset */
return true; return true;
} }
static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
uint64_t *count1)
{
uint32_t perfctr = 0;
uint64_t cnt0_of, cnt1_of;
int tmp;
/* This reports 0 on APUs, so return to avoid writing/reading registers
* that may or may not be different from their GPU counterparts
*/
if (adev->flags & AMD_IS_APU)
return;
/* Set the 2 events that we wish to watch, defined above */
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
/* Write to enable desired perf counters */
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
/* Zero out and enable the perf counters
* Write 0x5:
* Bit 0 = Start all counters(1)
* Bit 2 = Global counter reset enable(1)
*/
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
msleep(1000);
/* Load the shadow and disable the perf counters
* Write 0x2:
* Bit 0 = Stop counters(0)
* Bit 1 = Load the shadow counters(1)
*/
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
/* Read register values to get any >32bit overflow */
tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK);
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
/* Get the values and add the overflow */
*count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
static const struct amdgpu_asic_funcs soc15_asic_funcs = static const struct amdgpu_asic_funcs soc15_asic_funcs =
{ {
...@@ -617,6 +665,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = ...@@ -617,6 +665,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.invalidate_hdp = &soc15_invalidate_hdp, .invalidate_hdp = &soc15_invalidate_hdp,
.need_full_reset = &soc15_need_full_reset, .need_full_reset = &soc15_need_full_reset,
.init_doorbell_index = &vega10_doorbell_index_init, .init_doorbell_index = &vega10_doorbell_index_init,
.get_pcie_usage = &soc15_get_pcie_usage,
}; };
static const struct amdgpu_asic_funcs vega20_asic_funcs = static const struct amdgpu_asic_funcs vega20_asic_funcs =
...@@ -634,6 +683,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = ...@@ -634,6 +683,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.invalidate_hdp = &soc15_invalidate_hdp, .invalidate_hdp = &soc15_invalidate_hdp,
.need_full_reset = &soc15_need_full_reset, .need_full_reset = &soc15_need_full_reset,
.init_doorbell_index = &vega20_doorbell_index_init, .init_doorbell_index = &vega20_doorbell_index_init,
.get_pcie_usage = &soc15_get_pcie_usage,
}; };
static int soc15_common_early_init(void *handle) static int soc15_common_early_init(void *handle)
......
...@@ -941,6 +941,52 @@ static bool vi_need_full_reset(struct amdgpu_device *adev) ...@@ -941,6 +941,52 @@ static bool vi_need_full_reset(struct amdgpu_device *adev)
} }
} }
static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
uint64_t *count1)
{
uint32_t perfctr = 0;
uint64_t cnt0_of, cnt1_of;
int tmp;
/* This reports 0 on APUs, so return to avoid writing/reading registers
* that may or may not be different from their GPU counterparts
*/
if (adev->flags & AMD_IS_APU)
return;
/* Set the 2 events that we wish to watch, defined above */
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
/* Write to enable desired perf counters */
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
/* Zero out and enable the perf counters
* Write 0x5:
* Bit 0 = Start all counters(1)
* Bit 2 = Global counter reset enable(1)
*/
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
msleep(1000);
/* Load the shadow and disable the perf counters
* Write 0x2:
* Bit 0 = Stop counters(0)
* Bit 1 = Load the shadow counters(1)
*/
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
/* Read register values to get any >32bit overflow */
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
/* Get the values and add the overflow */
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
static const struct amdgpu_asic_funcs vi_asic_funcs = static const struct amdgpu_asic_funcs vi_asic_funcs =
{ {
.read_disabled_bios = &vi_read_disabled_bios, .read_disabled_bios = &vi_read_disabled_bios,
...@@ -956,6 +1002,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = ...@@ -956,6 +1002,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.invalidate_hdp = &vi_invalidate_hdp, .invalidate_hdp = &vi_invalidate_hdp,
.need_full_reset = &vi_need_full_reset, .need_full_reset = &vi_need_full_reset,
.init_doorbell_index = &legacy_doorbell_index_init, .init_doorbell_index = &legacy_doorbell_index_init,
.get_pcie_usage = &vi_get_pcie_usage,
}; };
#define CZ_REV_BRISTOL(rev) \ #define CZ_REV_BRISTOL(rev) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment