amd-drm-fixes-6.5-2023-08-09:

amdgpu:
 - S/G display workaround for platforms with >= 64G of memory
 - S0i3 fix
 - SMU 13.0.0 fixes
 - Disable SMU 13.x OD features temporarily while the interface is reworked
   to enable additional functionality
 - Fix cursor gamma issues on DCN3+
 - SMU 13.0.6 fixes
 - Fix possible UAF in CS IOCTL
 - Polaris display regression fix
 - Only enable CP GFX shadowing on SR-IOV
 
 amdkfd:
 - Raven/Picasso KFD regression fix
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZNPY2QAKCRC93/aFa7yZ
 2FvQAP9jqcDiWVXhxilca6TywZFF36Ip2/yrONeEfjBDzDG7gwD/cpKZFLDL6qPP
 wyuIN85zmfCo7Goe2y4jZwMUIhF6gAM=
 =I8Ya
 -----END PGP SIGNATURE-----

Merge tag 'amd-drm-fixes-6.5-2023-08-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.5-2023-08-09:

amdgpu:
- S/G display workaround for platforms with >= 64G of memory
- S0i3 fix
- SMU 13.0.0 fixes
- Disable SMU 13.x OD features temporarily while the interface is reworked
  to enable additional functionality
- Fix cursor gamma issues on DCN3+
- SMU 13.0.6 fixes
- Fix possible UAF in CS IOCTL
- Polaris display regression fix
- Only enable CP GFX shadowing on SR-IOV

amdkfd:
- Raven/Picasso KFD regression fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230809182827.8135-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2023-08-11 14:49:17 +10:00
commit fbe8ff726a
16 changed files with 81 additions and 54 deletions

View File

@ -1296,6 +1296,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
bool amdgpu_device_pcie_dynamic_switching_supported(void);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
bool amdgpu_device_aspm_support_quirk(void);

View File

@ -295,7 +295,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
if (!p->gang_size) {
ret = -EINVAL;
goto free_partial_kdata;
goto free_all_kdata;
}
for (i = 0; i < p->gang_size; ++i) {

View File

@ -1458,6 +1458,32 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
}
/*
* On APUs with >= 64GB white flickering has been observed w/ SG enabled.
* Disable S/G on such systems until we have a proper fix.
* https://gitlab.freedesktop.org/drm/amd/-/issues/2354
* https://gitlab.freedesktop.org/drm/amd/-/issues/2735
*/
bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
{
switch (amdgpu_sg_display) {
case -1:
break;
case 0:
return false;
case 1:
return true;
default:
return false;
}
if ((totalram_pages() << (PAGE_SHIFT - 10)) +
(adev->gmc.real_vram_size / 1024) >= 64000000) {
DRM_WARN("Disabling S/G due to >=64GB RAM\n");
return false;
}
return true;
}
/*
* Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
* speed switching. Until we have confirmation from Intel that a specific host

View File

@ -471,8 +471,12 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 3):
if ((adev->gfx.me_fw_version >= 1505) &&
(adev->gfx.pfp_fw_version >= 1600) &&
(adev->gfx.mec_fw_version >= 512))
(adev->gfx.mec_fw_version >= 512)) {
if (amdgpu_sriov_vf(adev))
adev->gfx.cp_gfx_shadow = true;
else
adev->gfx.cp_gfx_shadow = false;
}
break;
default:
adev->gfx.cp_gfx_shadow = false;

View File

@ -137,14 +137,15 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
int ret;
int retry_loop;
/* Wait for bootloader to signify that it is ready having bit 31 of
* C2PMSG_35 set to 1. All other bits are expected to be cleared.
* If there is an error in processing command, bits[7:0] will be set.
* This is applicable for PSP v13.0.6 and newer.
*/
for (retry_loop = 0; retry_loop < 10; retry_loop++) {
/* Wait for bootloader to signify that is
ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp,
SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
0x80000000,
0x80000000,
false);
ret = psp_wait_for(
psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
0x80000000, 0xffffffff, false);
if (ret == 0)
return 0;

View File

@ -1543,11 +1543,7 @@ static bool kfd_ignore_crat(void)
if (ignore_crat)
return true;
#ifndef KFD_SUPPORT_IOMMU_V2
ret = true;
#else
ret = false;
#endif
return ret;
}

View File

@ -194,11 +194,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
kfd_device_info_set_event_interrupt_class(kfd);
/* Raven */
if (gc_version == IP_VERSION(9, 1, 0) ||
gc_version == IP_VERSION(9, 2, 2))
kfd->device_info.needs_iommu_device = true;
if (gc_version < IP_VERSION(11, 0, 0)) {
/* Navi2x+, Navi1x+ */
if (gc_version == IP_VERSION(10, 3, 6))
@ -233,10 +228,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
asic_type != CHIP_TONGA)
kfd->device_info.supports_cwsr = true;
if (asic_type == CHIP_KAVERI ||
asic_type == CHIP_CARRIZO)
kfd->device_info.needs_iommu_device = true;
if (asic_type != CHIP_HAWAII && !vf)
kfd->device_info.needs_pci_atomics = true;
}
@ -249,7 +240,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
uint32_t gfx_target_version = 0;
switch (adev->asic_type) {
#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
gfx_target_version = 70000;
@ -262,7 +252,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
gfx_target_version = 70001;
@ -298,7 +287,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
gfx_target_version = 90000;
f2g = &gfx_v9_kfd2kgd;
break;
#ifdef KFD_SUPPORT_IOMMU_V2
/* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
@ -306,7 +294,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
#endif
/* Vega12 */
case IP_VERSION(9, 2, 1):
gfx_target_version = 90004;

View File

@ -2538,18 +2538,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
}
switch (dev->adev->asic_type) {
case CHIP_CARRIZO:
device_queue_manager_init_vi(&dqm->asic_ops);
break;
case CHIP_KAVERI:
device_queue_manager_init_cik(&dqm->asic_ops);
break;
case CHIP_HAWAII:
device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
break;
case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:

View File

@ -1638,9 +1638,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
break;
}
if (init_data.flags.gpu_vm_support &&
(amdgpu_sg_display == 0))
init_data.flags.gpu_vm_support = false;
if (init_data.flags.gpu_vm_support)
init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
if (init_data.flags.gpu_vm_support)
adev->mode_info.gpu_vm_support = true;

View File

@ -1320,7 +1320,7 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
if (computed_streams[i])
continue;
if (!res_pool->funcs->remove_stream_from_ctx ||
if (res_pool->funcs->remove_stream_from_ctx &&
res_pool->funcs->remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
return -EINVAL;

View File

@ -777,7 +777,8 @@ void dce110_edp_wait_for_hpd_ready(
dal_gpio_destroy_irq(&hpd);
/* ensure that the panel is detected */
ASSERT(edp_hpd_high);
if (!edp_hpd_high)
DC_LOG_DC("%s: wait timed out!\n", __func__);
}
void dce110_edp_power_control(

View File

@ -357,8 +357,11 @@ void dpp3_set_cursor_attributes(
int cur_rom_en = 0;
if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA)
color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
cur_rom_en = 1;
}
}
REG_UPDATE_3(CURSOR0_CONTROL,
CUR0_MODE, color_format,

View File

@ -1581,9 +1581,9 @@ static int smu_disable_dpms(struct smu_context *smu)
/*
* For SMU 13.0.4/11, PMFW will handle the features disablement properly
* for gpu reset case. Driver involvement is unnecessary.
* for gpu reset and S0i3 cases. Driver involvement is unnecessary.
*/
if (amdgpu_in_reset(adev)) {
if (amdgpu_in_reset(adev) || adev->in_s0ix) {
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):

View File

@ -331,11 +331,13 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
struct smu_13_0_0_powerplay_table *powerplay_table =
table_context->power_play_table;
struct smu_baco_context *smu_baco = &smu->smu_baco;
#if 0
PPTable_t *pptable = smu->smu_table.driver_pptable;
const OverDriveLimits_t * const overdrive_upperlimits =
&pptable->SkuTable.OverDriveLimitsBasicMax;
const OverDriveLimits_t * const overdrive_lowerlimits =
&pptable->SkuTable.OverDriveLimitsMin;
#endif
if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC)
smu->dc_controlled_by_gpio = true;
@ -347,18 +349,27 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
smu_baco->maco_support = true;
/*
* We are in the transition to a new OD mechanism.
* Disable the OD feature support for SMU13 temporarily.
* TODO: get this reverted when new OD mechanism online
*/
#if 0
if (!overdrive_lowerlimits->FeatureCtrlMask ||
!overdrive_upperlimits->FeatureCtrlMask)
smu->od_enabled = false;
table_context->thermal_controller_type =
powerplay_table->thermal_controller_type;
/*
* Instead of having its own buffer space and get overdrive_table copied,
* smu->od_settings just points to the actual overdrive_table
*/
smu->od_settings = &powerplay_table->overdrive_table;
#else
smu->od_enabled = false;
#endif
table_context->thermal_controller_type =
powerplay_table->thermal_controller_type;
return 0;
}
@ -1140,7 +1151,6 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
(OverDriveTableExternal_t *)smu->smu_table.overdrive_table;
struct smu_13_0_dpm_table *single_dpm_table;
struct smu_13_0_pcie_table *pcie_table;
const int link_width[] = {0, 1, 2, 4, 8, 12, 16};
uint32_t gen_speed, lane_width;
int i, curr_freq, size = 0;
int32_t min_value, max_value;
@ -1256,7 +1266,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
(pcie_table->pcie_lane[i] == 6) ? "x16" : "",
pcie_table->clk_freq[i],
(gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
(lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ?
(lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ?
"*" : "");
break;

View File

@ -1993,9 +1993,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
gpu_metrics->average_socket_power =
SMUQ10_TO_UINT(metrics->SocketPower);
/* Energy is reported in 15.625mJ units */
gpu_metrics->energy_accumulator =
SMUQ10_TO_UINT(metrics->SocketEnergyAcc);
/* Energy counter reported in 15.259uJ (2^-16) units */
gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
gpu_metrics->current_gfxclk =
SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]);

View File

@ -323,10 +323,12 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
struct smu_baco_context *smu_baco = &smu->smu_baco;
PPTable_t *smc_pptable = table_context->driver_pptable;
BoardTable_t *BoardTable = &smc_pptable->BoardTable;
#if 0
const OverDriveLimits_t * const overdrive_upperlimits =
&smc_pptable->SkuTable.OverDriveLimitsBasicMax;
const OverDriveLimits_t * const overdrive_lowerlimits =
&smc_pptable->SkuTable.OverDriveLimitsMin;
#endif
if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC)
smu->dc_controlled_by_gpio = true;
@ -338,18 +340,22 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled))
smu_baco->maco_support = true;
#if 0
if (!overdrive_lowerlimits->FeatureCtrlMask ||
!overdrive_upperlimits->FeatureCtrlMask)
smu->od_enabled = false;
table_context->thermal_controller_type =
powerplay_table->thermal_controller_type;
/*
* Instead of having its own buffer space and get overdrive_table copied,
* smu->od_settings just points to the actual overdrive_table
*/
smu->od_settings = &powerplay_table->overdrive_table;
#else
smu->od_enabled = false;
#endif
table_context->thermal_controller_type =
powerplay_table->thermal_controller_type;
return 0;
}