drm/amdgpu: split umc callbacks to ras and non-ras ones

umc ras is not managed by gpu driver when gpu is
connected to cpu through xgmi. split umc callbacks
into ras and non-ras ones so gpu driver only
initializes umc ras callbacks when it manages
umc ras.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: John Clements <John.Clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Hawking Zhang 2021-03-17 19:17:52 +08:00 committed by Alex Deucher
parent 52137ca852
commit 49070c4ea3
12 changed files with 51 additions and 32 deletions

View File

@ -391,8 +391,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
{
int r;
if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
r = adev->umc.funcs->ras_late_init(adev);
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->ras_late_init) {
r = adev->umc.ras_funcs->ras_late_init(adev);
if (r)
return r;
}
@ -418,8 +419,12 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{
amdgpu_umc_ras_fini(adev);
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->ras_fini)
adev->umc.ras_funcs->ras_fini(adev);
amdgpu_mmhub_ras_fini(adev);
if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev);

View File

@ -774,13 +774,15 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC:
if (adev->umc.funcs->query_ras_error_count)
adev->umc.funcs->query_ras_error_count(adev, &err_data);
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->query_ras_error_count)
adev->umc.ras_funcs->query_ras_error_count(adev, &err_data);
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
if (adev->umc.funcs->query_ras_error_address)
adev->umc.funcs->query_ras_error_address(adev, &err_data);
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->query_ras_error_address)
adev->umc.ras_funcs->query_ras_error_address(adev, &err_data);
break;
case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->query_ras_error_count) {

View File

@ -60,8 +60,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
}
/* ras init of specific umc version */
if (adev->umc.funcs && adev->umc.funcs->err_cnt_init)
adev->umc.funcs->err_cnt_init(adev);
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->err_cnt_init)
adev->umc.ras_funcs->err_cnt_init(adev);
return 0;
@ -95,12 +96,12 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->umc.funcs &&
adev->umc.funcs->query_ras_error_count)
adev->umc.funcs->query_ras_error_count(adev, ras_error_status);
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->query_ras_error_count)
adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
if (adev->umc.funcs &&
adev->umc.funcs->query_ras_error_address &&
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
err_data->err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query,
@ -116,7 +117,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
adev->umc.funcs->query_ras_error_address(adev, ras_error_status);
adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
}
/* only uncorrectable error needs gpu reset */

View File

@ -35,13 +35,17 @@
#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
struct amdgpu_umc_funcs {
struct amdgpu_umc_ras_funcs {
void (*err_cnt_init)(struct amdgpu_device *adev);
int (*ras_late_init)(struct amdgpu_device *adev);
void (*ras_fini)(struct amdgpu_device *adev);
void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void *ras_error_status);
void (*query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status);
};
struct amdgpu_umc_funcs {
void (*init_registers)(struct amdgpu_device *adev);
};
@ -59,6 +63,7 @@ struct amdgpu_umc {
struct ras_common_if *ras_if;
const struct amdgpu_umc_funcs *funcs;
const struct amdgpu_umc_ras_funcs *ras_funcs;
};
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);

View File

@ -655,7 +655,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
adev->umc.funcs = &umc_v8_7_funcs;
adev->umc.ras_funcs = &umc_v8_7_ras_funcs;
break;
default:
break;

View File

@ -1155,7 +1155,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
adev->umc.funcs = &umc_v6_1_funcs;
adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
break;
case CHIP_ARCTURUS:
adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
@ -1163,7 +1163,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
adev->umc.funcs = &umc_v6_1_funcs;
adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
break;
default:
break;
@ -1194,12 +1194,6 @@ static int gmc_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
gmc_v9_0_set_umc_funcs(adev);
gmc_v9_0_set_mmhub_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
if (adev->asic_type == CHIP_VEGA20 ||
adev->asic_type == CHIP_ARCTURUS)
adev->gmc.xgmi.supported = true;
@ -1210,6 +1204,12 @@ static int gmc_v9_0_early_init(void *handle)
adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
}
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
gmc_v9_0_set_umc_funcs(adev);
gmc_v9_0_set_mmhub_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
adev->gmc.shared_aperture_start + (4ULL << 30) - 1;

View File

@ -22,6 +22,7 @@
*/
#include "umc_v6_1.h"
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu.h"
#include "rsmu/rsmu_0_0_2_offset.h"
@ -464,9 +465,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
umc_v6_1_enable_umc_index_mode(adev);
}
const struct amdgpu_umc_funcs umc_v6_1_funcs = {
const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = {
.err_cnt_init = umc_v6_1_err_cnt_init,
.ras_late_init = amdgpu_umc_ras_late_init,
.ras_fini = amdgpu_umc_ras_fini,
.query_ras_error_count = umc_v6_1_query_ras_error_count,
.query_ras_error_address = umc_v6_1_query_ras_error_address,
};

View File

@ -45,7 +45,7 @@
/* umc ce count initial value */
#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
extern const struct amdgpu_umc_funcs umc_v6_1_funcs;
extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs;
extern const uint32_t
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];

View File

@ -22,6 +22,7 @@
*/
#include "umc_v6_7.h"
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu.h"
#include "umc/umc_6_7_0_offset.h"
@ -272,8 +273,9 @@ static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
}
}
const struct amdgpu_umc_funcs umc_v6_7_funcs = {
const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = {
.ras_late_init = amdgpu_umc_ras_late_init,
.ras_fini = amdgpu_umc_ras_fini,
.query_ras_error_count = umc_v6_7_query_ras_error_count,
.query_ras_error_address = umc_v6_7_query_ras_error_address,
};

View File

@ -32,6 +32,6 @@
#define UMC_V6_7_INST_DIST 0x40000
extern const struct amdgpu_umc_funcs umc_v6_7_funcs;
extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs;
#endif

View File

@ -22,6 +22,7 @@
*/
#include "umc_v8_7.h"
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu.h"
#include "rsmu/rsmu_0_0_2_offset.h"
@ -323,9 +324,10 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
}
}
const struct amdgpu_umc_funcs umc_v8_7_funcs = {
const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = {
.err_cnt_init = umc_v8_7_err_cnt_init,
.ras_late_init = amdgpu_umc_ras_late_init,
.ras_fini = amdgpu_umc_ras_fini,
.query_ras_error_count = umc_v8_7_query_ras_error_count,
.query_ras_error_address = umc_v8_7_query_ras_error_address,
};

View File

@ -44,7 +44,7 @@
/* umc ce count initial value */
#define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD)
extern const struct amdgpu_umc_funcs umc_v8_7_funcs;
extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs;
extern const uint32_t
umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM];