drm/amd/pm: Fix SMUv13.0.6 throttle status report
Add throttle status in power context Keep throttle status indicator in SMUv13 power context v2: Removed Dummy definition Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Asad Kamal <asad.kamal@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
023f4d6074
commit
93682f8a19
|
@ -119,6 +119,7 @@ struct smu_13_0_power_context {
|
||||||
uint32_t power_source;
|
uint32_t power_source;
|
||||||
uint8_t in_power_limit_boost_mode;
|
uint8_t in_power_limit_boost_mode;
|
||||||
enum smu_13_0_power_state power_state;
|
enum smu_13_0_power_state power_state;
|
||||||
|
atomic_t throttle_status;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3)
|
#if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3)
|
||||||
|
|
|
@ -82,8 +82,6 @@
|
||||||
|
|
||||||
#define smnPCIE_ESM_CTRL 0x111003D0
|
#define smnPCIE_ESM_CTRL 0x111003D0
|
||||||
|
|
||||||
#define THROTTLER_TEMP_HBM_BIT 2
|
|
||||||
|
|
||||||
static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
|
static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
|
||||||
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0),
|
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0),
|
||||||
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1),
|
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1),
|
||||||
|
@ -174,17 +172,12 @@ static const struct cmn2asic_mapping smu_v13_0_6_table_map[SMU_TABLE_COUNT] = {
|
||||||
TAB_MAP(I2C_COMMANDS),
|
TAB_MAP(I2C_COMMANDS),
|
||||||
};
|
};
|
||||||
|
|
||||||
#define THROTTLER_PROCHOT_GFX_BIT 0
|
|
||||||
#define THROTTLER_PPT_BIT 1
|
|
||||||
#define THROTTLER_TEMP_SOC_BIT 2
|
|
||||||
#define THROTTLER_TEMP_VR_GFX_BIT 3
|
|
||||||
|
|
||||||
static const uint8_t smu_v13_0_6_throttler_map[] = {
|
static const uint8_t smu_v13_0_6_throttler_map[] = {
|
||||||
[THROTTLER_PPT_BIT] = (SMU_THROTTLER_PPT0_BIT),
|
[THROTTLER_PPT_BIT] = (SMU_THROTTLER_PPT0_BIT),
|
||||||
[THROTTLER_TEMP_SOC_BIT] = (SMU_THROTTLER_TEMP_GPU_BIT),
|
[THROTTLER_THERMAL_SOCKET_BIT] = (SMU_THROTTLER_TEMP_GPU_BIT),
|
||||||
[THROTTLER_TEMP_HBM_BIT] = (SMU_THROTTLER_TEMP_MEM_BIT),
|
[THROTTLER_THERMAL_HBM_BIT] = (SMU_THROTTLER_TEMP_MEM_BIT),
|
||||||
[THROTTLER_TEMP_VR_GFX_BIT] = (SMU_THROTTLER_TEMP_VR_GFX_BIT),
|
[THROTTLER_THERMAL_VR_BIT] = (SMU_THROTTLER_TEMP_VR_GFX_BIT),
|
||||||
[THROTTLER_PROCHOT_GFX_BIT] = (SMU_THROTTLER_PROCHOT_GFX_BIT),
|
[THROTTLER_PROCHOT_BIT] = (SMU_THROTTLER_PROCHOT_GFX_BIT),
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PPTable_t {
|
struct PPTable_t {
|
||||||
|
@ -642,16 +635,14 @@ static int smu_v13_0_6_freqs_in_same_level(int32_t frequency1,
|
||||||
return (abs(frequency1 - frequency2) <= EPSILON);
|
return (abs(frequency1 - frequency2) <= EPSILON);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t smu_v13_0_6_get_throttler_status(struct smu_context *smu,
|
static uint32_t smu_v13_0_6_get_throttler_status(struct smu_context *smu)
|
||||||
MetricsTable_t *metrics)
|
|
||||||
{
|
{
|
||||||
|
struct smu_power_context *smu_power = &smu->smu_power;
|
||||||
|
struct smu_13_0_power_context *power_context = smu_power->power_context;
|
||||||
uint32_t throttler_status = 0;
|
uint32_t throttler_status = 0;
|
||||||
|
|
||||||
throttler_status |= metrics->ProchotResidencyAcc > 0 ? 1U << THROTTLER_PROCHOT_GFX_BIT : 0;
|
throttler_status = atomic_read(&power_context->throttle_status);
|
||||||
throttler_status |= metrics->PptResidencyAcc > 0 ? 1U << THROTTLER_PPT_BIT : 0;
|
dev_dbg(smu->adev->dev, "SMU Throttler status: %u", throttler_status);
|
||||||
throttler_status |= metrics->SocketThmResidencyAcc > 0 ? 1U << THROTTLER_TEMP_SOC_BIT : 0;
|
|
||||||
throttler_status |= metrics->VrThmResidencyAcc > 0 ? 1U << THROTTLER_TEMP_VR_GFX_BIT : 0;
|
|
||||||
throttler_status |= metrics->HbmThmResidencyAcc > 0 ? 1U << THROTTLER_TEMP_HBM_BIT : 0;
|
|
||||||
|
|
||||||
return throttler_status;
|
return throttler_status;
|
||||||
}
|
}
|
||||||
|
@ -721,9 +712,6 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
|
||||||
case METRICS_TEMPERATURE_VRSOC:
|
case METRICS_TEMPERATURE_VRSOC:
|
||||||
*value = SMUQ10_TO_UINT(metrics->MaxVrTemperature);
|
*value = SMUQ10_TO_UINT(metrics->MaxVrTemperature);
|
||||||
break;
|
break;
|
||||||
case METRICS_THROTTLER_STATUS:
|
|
||||||
*value = smu_v13_0_6_get_throttler_status(smu, metrics);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
*value = UINT_MAX;
|
*value = UINT_MAX;
|
||||||
break;
|
break;
|
||||||
|
@ -1290,13 +1278,11 @@ static int smu_v13_0_6_irq_process(struct amdgpu_device *adev,
|
||||||
struct amdgpu_iv_entry *entry)
|
struct amdgpu_iv_entry *entry)
|
||||||
{
|
{
|
||||||
struct smu_context *smu = adev->powerplay.pp_handle;
|
struct smu_context *smu = adev->powerplay.pp_handle;
|
||||||
|
struct smu_power_context *smu_power = &smu->smu_power;
|
||||||
|
struct smu_13_0_power_context *power_context = smu_power->power_context;
|
||||||
uint32_t client_id = entry->client_id;
|
uint32_t client_id = entry->client_id;
|
||||||
uint32_t src_id = entry->src_id;
|
|
||||||
/*
|
|
||||||
* ctxid is used to distinguish different
|
|
||||||
* events for SMCToHost interrupt
|
|
||||||
*/
|
|
||||||
uint32_t ctxid = entry->src_data[0];
|
uint32_t ctxid = entry->src_data[0];
|
||||||
|
uint32_t src_id = entry->src_id;
|
||||||
uint32_t data;
|
uint32_t data;
|
||||||
|
|
||||||
if (client_id == SOC15_IH_CLIENTID_MP1) {
|
if (client_id == SOC15_IH_CLIENTID_MP1) {
|
||||||
|
@ -1305,7 +1291,10 @@ static int smu_v13_0_6_irq_process(struct amdgpu_device *adev,
|
||||||
data = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL);
|
data = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL);
|
||||||
data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, INT_ACK, 1);
|
data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, INT_ACK, 1);
|
||||||
WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, data);
|
WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, data);
|
||||||
|
/*
|
||||||
|
* ctxid is used to distinguish different events for SMCToHost
|
||||||
|
* interrupt.
|
||||||
|
*/
|
||||||
switch (ctxid) {
|
switch (ctxid) {
|
||||||
case IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING:
|
case IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING:
|
||||||
/*
|
/*
|
||||||
|
@ -1316,8 +1305,17 @@ static int smu_v13_0_6_irq_process(struct amdgpu_device *adev,
|
||||||
if (!atomic_read(&adev->throttling_logging_enabled))
|
if (!atomic_read(&adev->throttling_logging_enabled))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (__ratelimit(&adev->throttling_logging_rs))
|
/* This uses the new method which fixes the
|
||||||
|
* incorrect throttling status reporting
|
||||||
|
* through metrics table. For older FWs,
|
||||||
|
* it will be ignored.
|
||||||
|
*/
|
||||||
|
if (__ratelimit(&adev->throttling_logging_rs)) {
|
||||||
|
atomic_set(
|
||||||
|
&power_context->throttle_status,
|
||||||
|
entry->src_data[1]);
|
||||||
schedule_work(&smu->throttling_logging_work);
|
schedule_work(&smu->throttling_logging_work);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1895,37 +1893,35 @@ static int smu_v13_0_6_allow_xgmi_power_down(struct smu_context *smu, bool en)
|
||||||
en ? 0 : 1, NULL);
|
en ? 0 : 1, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct throttling_logging_label {
|
static const char *const throttling_logging_label[] = {
|
||||||
uint32_t feature_mask;
|
[THROTTLER_PROCHOT_BIT] = "Prochot",
|
||||||
const char *label;
|
[THROTTLER_PPT_BIT] = "PPT",
|
||||||
} logging_label[] = {
|
[THROTTLER_THERMAL_SOCKET_BIT] = "SOC",
|
||||||
{ (1U << THROTTLER_TEMP_HBM_BIT), "HBM" },
|
[THROTTLER_THERMAL_VR_BIT] = "VR",
|
||||||
{ (1U << THROTTLER_TEMP_SOC_BIT), "SOC" },
|
[THROTTLER_THERMAL_HBM_BIT] = "HBM"
|
||||||
{ (1U << THROTTLER_TEMP_VR_GFX_BIT), "VR limit" },
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static void smu_v13_0_6_log_thermal_throttling_event(struct smu_context *smu)
|
static void smu_v13_0_6_log_thermal_throttling_event(struct smu_context *smu)
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
int throttler_idx, throtting_events = 0, buf_idx = 0;
|
int throttler_idx, throtting_events = 0, buf_idx = 0;
|
||||||
struct amdgpu_device *adev = smu->adev;
|
struct amdgpu_device *adev = smu->adev;
|
||||||
uint32_t throttler_status;
|
uint32_t throttler_status;
|
||||||
char log_buf[256];
|
char log_buf[256];
|
||||||
|
|
||||||
ret = smu_v13_0_6_get_smu_metrics_data(smu, METRICS_THROTTLER_STATUS,
|
throttler_status = smu_v13_0_6_get_throttler_status(smu);
|
||||||
&throttler_status);
|
if (!throttler_status)
|
||||||
if (ret)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
memset(log_buf, 0, sizeof(log_buf));
|
memset(log_buf, 0, sizeof(log_buf));
|
||||||
for (throttler_idx = 0; throttler_idx < ARRAY_SIZE(logging_label);
|
for (throttler_idx = 0;
|
||||||
|
throttler_idx < ARRAY_SIZE(throttling_logging_label);
|
||||||
throttler_idx++) {
|
throttler_idx++) {
|
||||||
if (throttler_status &
|
if (throttler_status & (1U << throttler_idx)) {
|
||||||
logging_label[throttler_idx].feature_mask) {
|
|
||||||
throtting_events++;
|
throtting_events++;
|
||||||
buf_idx += snprintf(log_buf + buf_idx,
|
buf_idx += snprintf(
|
||||||
sizeof(log_buf) - buf_idx, "%s%s",
|
log_buf + buf_idx, sizeof(log_buf) - buf_idx,
|
||||||
throtting_events > 1 ? " and " : "",
|
"%s%s", throtting_events > 1 ? " and " : "",
|
||||||
logging_label[throttler_idx].label);
|
throttling_logging_label[throttler_idx]);
|
||||||
if (buf_idx >= sizeof(log_buf)) {
|
if (buf_idx >= sizeof(log_buf)) {
|
||||||
dev_err(adev->dev, "buffer overflow!\n");
|
dev_err(adev->dev, "buffer overflow!\n");
|
||||||
log_buf[sizeof(log_buf) - 1] = '\0';
|
log_buf[sizeof(log_buf) - 1] = '\0';
|
||||||
|
@ -1934,10 +1930,9 @@ static void smu_v13_0_6_log_thermal_throttling_event(struct smu_context *smu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dev_warn(
|
dev_warn(adev->dev,
|
||||||
adev->dev,
|
"WARN: GPU is throttled, expect performance decrease. %s.\n",
|
||||||
"WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
|
log_buf);
|
||||||
log_buf);
|
|
||||||
kgd2kfd_smi_event_throttle(
|
kgd2kfd_smi_event_throttle(
|
||||||
smu->adev->kfd.dev,
|
smu->adev->kfd.dev,
|
||||||
smu_cmn_get_indep_throttler_status(throttler_status,
|
smu_cmn_get_indep_throttler_status(throttler_status,
|
||||||
|
|
Loading…
Reference in New Issue