drm/amdgpu: Add sysfs file for PCIe usage v5
Add a sysfs file that reports the number of bytes transmitted and received in the last second. This can be used to approximate the PCIe bandwidth usage over the last second. v2: Clarify use of mps as estimation of bandwidth v3: Don't make the file on APUs v4: Early exit for APUs in the read function, change output to display "packets-received packets-sent mps" v5: fix missing header for si (Alex) Signed-off-by: Kent Russell <kent.russell@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
a0bb79e255
commit
b45e18acd3
|
@ -542,6 +542,9 @@ struct amdgpu_asic_funcs {
|
|||
bool (*need_full_reset)(struct amdgpu_device *adev);
|
||||
/* initialize doorbell layout for specific asic*/
|
||||
void (*init_doorbell_index)(struct amdgpu_device *adev);
|
||||
/* PCIe bandwidth usage */
|
||||
void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1);
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1042,6 +1045,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
|
||||
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
|
||||
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
|
||||
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
|
||||
|
||||
/* Common functions */
|
||||
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
||||
|
|
|
@ -990,6 +990,31 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
|
|||
return snprintf(buf, PAGE_SIZE, "%d\n", value);
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: pcie_bw
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for estimating how much data
|
||||
* has been received and sent by the GPU in the last second through PCIe.
|
||||
* The file pcie_bw is used for this.
|
||||
* The Perf counters count the number of received and sent messages and return
|
||||
* those values, as well as the maximum payload size of a PCIe packet (mps).
|
||||
* Note that it is not possible to easily and quickly obtain the size of each
|
||||
* packet transmitted, so we output the max payload size (mps) to allow for
|
||||
* quick estimation of the PCIe bandwidth usage
|
||||
*/
|
||||
static ssize_t amdgpu_get_pcie_bw(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
uint64_t count0, count1;
|
||||
|
||||
amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
|
||||
return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n",
|
||||
count0, count1, pcie_get_mps(adev->pdev));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
|
||||
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_dpm_forced_performance_level,
|
||||
|
@ -1025,6 +1050,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
|
|||
amdgpu_set_pp_od_clk_voltage);
|
||||
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
|
||||
amdgpu_get_busy_percent, NULL);
|
||||
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
|
@ -2108,6 +2134,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
|||
"gpu_busy_level\n");
|
||||
return ret;
|
||||
}
|
||||
/* PCIe Perf counters won't work on APU nodes */
|
||||
if (adev->flags & !AMD_IS_APU) {
|
||||
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file pcie_bw\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ret = amdgpu_debugfs_pm_init(adev);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to register debugfs file for dpm!\n");
|
||||
|
@ -2147,6 +2181,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
|
|||
device_remove_file(adev->dev,
|
||||
&dev_attr_pp_od_clk_voltage);
|
||||
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
|
||||
if (adev->flags & !AMD_IS_APU)
|
||||
device_remove_file(adev->dev, &dev_attr_pcie_bw);
|
||||
}
|
||||
|
||||
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
|
||||
|
|
|
@ -1741,6 +1741,52 @@ static bool cik_need_full_reset(struct amdgpu_device *adev)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs cik_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &cik_read_disabled_bios,
|
||||
|
@ -1756,6 +1802,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
|
|||
.invalidate_hdp = &cik_invalidate_hdp,
|
||||
.need_full_reset = &cik_need_full_reset,
|
||||
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||
.get_pcie_usage = &cik_get_pcie_usage,
|
||||
};
|
||||
|
||||
static int cik_common_early_init(void *handle)
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include "dce/dce_6_0_d.h"
|
||||
#include "uvd/uvd_4_0_d.h"
|
||||
#include "bif/bif_3_0_d.h"
|
||||
#include "bif/bif_3_0_sh_mask.h"
|
||||
|
||||
static const u32 tahiti_golden_registers[] =
|
||||
{
|
||||
|
@ -1323,6 +1324,52 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
|
|||
WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
|
||||
}
|
||||
|
||||
static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs si_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &si_read_disabled_bios,
|
||||
|
@ -1339,6 +1386,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
|
|||
.flush_hdp = &si_flush_hdp,
|
||||
.invalidate_hdp = &si_invalidate_hdp,
|
||||
.need_full_reset = &si_need_full_reset,
|
||||
.get_pcie_usage = &si_get_pcie_usage,
|
||||
};
|
||||
|
||||
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
|
||||
|
|
|
@ -43,6 +43,9 @@
|
|||
#include "hdp/hdp_4_0_sh_mask.h"
|
||||
#include "smuio/smuio_9_0_offset.h"
|
||||
#include "smuio/smuio_9_0_sh_mask.h"
|
||||
#include "nbio/nbio_7_0_default.h"
|
||||
#include "nbio/nbio_7_0_sh_mask.h"
|
||||
#include "nbio/nbio_7_0_smn.h"
|
||||
|
||||
#include "soc15.h"
|
||||
#include "soc15_common.h"
|
||||
|
@ -601,6 +604,51 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev)
|
|||
/* change this when we implement soft reset */
|
||||
return true;
|
||||
}
|
||||
static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
||||
{
|
||||
|
@ -617,6 +665,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
|||
.invalidate_hdp = &soc15_invalidate_hdp,
|
||||
.need_full_reset = &soc15_need_full_reset,
|
||||
.init_doorbell_index = &vega10_doorbell_index_init,
|
||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||
};
|
||||
|
||||
static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
||||
|
@ -634,6 +683,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
|||
.invalidate_hdp = &soc15_invalidate_hdp,
|
||||
.need_full_reset = &soc15_need_full_reset,
|
||||
.init_doorbell_index = &vega20_doorbell_index_init,
|
||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||
};
|
||||
|
||||
static int soc15_common_early_init(void *handle)
|
||||
|
|
|
@ -941,6 +941,52 @@ static bool vi_need_full_reset(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs vi_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &vi_read_disabled_bios,
|
||||
|
@ -956,6 +1002,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
|
|||
.invalidate_hdp = &vi_invalidate_hdp,
|
||||
.need_full_reset = &vi_need_full_reset,
|
||||
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||
.get_pcie_usage = &vi_get_pcie_usage,
|
||||
};
|
||||
|
||||
#define CZ_REV_BRISTOL(rev) \
|
||||
|
|
Loading…
Reference in New Issue