From a6c44d2538c469f412c3fded0de2290494d762d7 Mon Sep 17 00:00:00 2001 From: John Clements Date: Fri, 17 Jan 2020 12:18:00 +0800 Subject: [PATCH] drm/amdgpu: added support to get mGPU DRAM base resolves issue with RAS error injection in mGPU configuration Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_df.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 +++++++ drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 59 ++++++++++++++++++- .../amd/include/asic_reg/df/df_3_6_offset.h | 3 + .../amd/include/asic_reg/df/df_3_6_sh_mask.h | 8 +++ 5 files changed, 92 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 61a26c15c8dd..057f6ea645d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -52,6 +52,9 @@ struct amdgpu_df_funcs { uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, uint32_t ficadl_val, uint32_t ficadh_val); + uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev, + uint32_t df_inst); + uint32_t (*get_df_inst_id)(struct amdgpu_device *adev); }; struct amdgpu_df { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 766be7f18282..cef94e2169fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -742,6 +742,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, return 0; } +uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr) +{ + uint32_t df_inst_id; + + if ((!adev->df.funcs) || + (!adev->df.funcs->get_df_inst_id) || + (!adev->df.funcs->get_dram_base_addr)) + return addr; + + df_inst_id = adev->df.funcs->get_df_inst_id(adev); + + return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id); +} + /* wrapper of psp_ras_trigger_error */ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info) @@ -759,6 +773,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, if (!obj) return -EINVAL; + /* Calculate XGMI relative offset */ + if (adev->gmc.xgmi.num_physical_nodes > 1) { + block_info.address = get_xgmi_relative_phy_addr(adev, + block_info.address); + } + switch (info->head.block) { case AMDGPU_RAS_BLOCK__GFX: if (adev->gfx.funcs->ras_error_inject) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index f51326598a8c..5a1bd8ed1a6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -27,6 +27,9 @@ #include "df/df_3_6_offset.h" #include "df/df_3_6_sh_mask.h" +#define DF_3_6_SMN_REG_INST_DIST 0x8 +#define DF_3_6_INST_CNT 8 + static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 32, 0, 0, 0, 2, 4, 8}; @@ -683,6 +686,58 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, } } +static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev, + uint32_t df_inst) +{ + uint32_t base_addr_reg_val = 0; + uint64_t base_addr = 0; + + base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 + + df_inst * DF_3_6_SMN_REG_INST_DIST); + + if (REG_GET_FIELD(base_addr_reg_val, + DF_CS_UMC_AON0_DramBaseAddress0, + AddrRngVal) == 0) { + DRM_WARN("address range not valid"); + return 0; + } + + base_addr = REG_GET_FIELD(base_addr_reg_val, + DF_CS_UMC_AON0_DramBaseAddress0, + DramBaseAddr); + + return base_addr << 28; +} + +static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev) +{ + uint32_t xgmi_node_id = 0; + uint32_t df_inst_id = 0; + + /* Walk through DF dst nodes to find current XGMI node */ + for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) { + + xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 + + df_inst_id * DF_3_6_SMN_REG_INST_DIST); + xgmi_node_id = REG_GET_FIELD(xgmi_node_id, + DF_CS_UMC_AON0_DramLimitAddress0, + DstFabricID); + + /* TODO: establish reason dest fabric id is offset by 7 */ + xgmi_node_id = xgmi_node_id >> 7; + + if (adev->gmc.xgmi.physical_node_id == xgmi_node_id) + break; + } + + if (df_inst_id == DF_3_6_INST_CNT) { + DRM_WARN("cant match df dst id with gpu node"); + return 0; + } + + return df_inst_id; +} + const struct amdgpu_df_funcs df_v3_6_funcs = { .sw_init = df_v3_6_sw_init, .sw_fini = df_v3_6_sw_fini, @@ -696,5 +751,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .pmc_stop = df_v3_6_pmc_stop, .pmc_get_count = df_v3_6_pmc_get_count, .get_fica = df_v3_6_get_fica, - .set_fica = df_v3_6_set_fica + .set_fica = df_v3_6_set_fica, + .get_dram_base_addr = df_v3_6_get_dram_base_addr, + .get_df_inst_id = df_v3_6_get_df_inst_id }; diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h index 87c84691b5be..bb2c9c7a18df 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -71,4 +71,7 @@ #define smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3 0x1d098UL #define smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3 0x1d09cUL +#define smnDF_CS_UMC_AON0_DramBaseAddress0 0x1c110UL +#define smnDF_CS_UMC_AON0_DramLimitAddress0 0x1c114UL + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h index 65e9f756e86e..7afa87c7ff54 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h @@ -53,4 +53,12 @@ #define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000E00L #define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L +//DF_CS_UMC_AON0_DramLimitAddress0 +#define DF_CS_UMC_AON0_DramLimitAddress0__DstFabricID__SHIFT 0x0 +#define DF_CS_UMC_AON0_DramLimitAddress0__AllowReqIO__SHIFT 0xa +#define DF_CS_UMC_AON0_DramLimitAddress0__DramLimitAddr__SHIFT 0xc +#define DF_CS_UMC_AON0_DramLimitAddress0__DstFabricID_MASK 0x000003FFL +#define DF_CS_UMC_AON0_DramLimitAddress0__AllowReqIO_MASK 0x00000400L +#define DF_CS_UMC_AON0_DramLimitAddress0__DramLimitAddr_MASK 0xFFFFF000L + #endif