x86/mce/zhaoxin: Enable mcelog to decode PCIE, ZDI/ZPI, and DRAM errors

zhaoxin inclusion
category: feature

-------------------

The mcelog cannot decode PCIE, ZDI/ZPI, and DRAM errors in the FFM
(Firmware First Mode).
The purpose of this patch is to enable mcelog to decode PCIE, ZDI/ZPI, and
DRAM errors that occur on Zhaoxin processors, so that the cause of these
errors can be quickly located.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
This commit is contained in:
leoliu-oc 2024-08-15 15:36:38 +08:00
parent 336eee5394
commit 1e425ba912
6 changed files with 242 additions and 5 deletions

View File

@ -289,6 +289,12 @@ struct cper_sec_mem_err;
extern void apei_mce_report_mem_error(int corrected, extern void apei_mce_report_mem_error(int corrected,
struct cper_sec_mem_err *mem_err); struct cper_sec_mem_err *mem_err);
extern void zx_apei_mce_report_mem_error(struct cper_sec_mem_err *mem_err);
struct cper_sec_pcie;
extern void zx_apei_mce_report_pcie_error(int corrected, struct cper_sec_pcie *pcie_err);
struct cper_sec_proc_generic;
extern void zx_apei_mce_report_zdi_error(struct cper_sec_proc_generic *zdi_err);
/* /*
* Enumerate new IP types and HWID values in AMD processors which support * Enumerate new IP types and HWID values in AMD processors which support
* Scalable MCA. * Scalable MCA.

View File

@ -40,10 +40,36 @@ int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data)
void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{ {
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
zx_apei_mce_report_mem_error(mem_err);
else
apei_mce_report_mem_error(sev, mem_err); apei_mce_report_mem_error(sev, mem_err);
#endif #endif
} }
void arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err)
{
#ifdef CONFIG_X86_MCE
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
zx_apei_mce_report_pcie_error(sev, pcie_err);
#endif
}
bool arch_apei_report_zdi_error(guid_t *sec_type, struct cper_sec_proc_generic *zdi_err)
{
#ifdef CONFIG_X86_MCE
if ((boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR ||
boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) &&
(guid_equal(sec_type, &CPER_SEC_PROC_GENERIC))) {
zx_apei_mce_report_zdi_error(zdi_err);
return true;
}
#endif
return false;
}
int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{ {
return apei_smca_report_x86_error(ctx_info, lapic_id); return apei_smca_report_x86_error(ctx_info, lapic_id);

View File

@ -63,6 +63,173 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
} }
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
void zx_apei_mce_report_mem_error(struct cper_sec_mem_err *mem_err)
{
struct mce m;
int apei_error = 0;
if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
return;
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
mce_setup(&m);
m.misc = 0;
m.misc = mem_err->module;
m.addr = mem_err->physical_addr;
if (mem_err->card == 0)
m.bank = 9;
else
m.bank = 10;
switch (mem_err->error_type) {
case 2:
m.status = 0x9c20004000010080;
break;
case 3:
m.status = 0xbe40000000020090;
apei_error = apei_write_mce(&m);
break;
case 8:
if (mem_err->requestor_id == 2) {
m.status = 0x98200040000400b0;
} else if (mem_err->requestor_id == 3) {
m.status = 0xba400000000600a0;
apei_error = apei_write_mce(&m);
} else if (mem_err->requestor_id == 4) {
m.status = 0x98200100000300b0;
} else if (mem_err->requestor_id == 5) {
m.status = 0xba000000000500b0;
apei_error = apei_write_mce(&m);
} else {
pr_info("Undefined Parity error\n");
}
break;
case 10:
if (mem_err->requestor_id == 6) {
m.status = 0xba400000000700a0;
apei_error = apei_write_mce(&m);
} else if (mem_err->requestor_id == 7) {
m.status = 0xba000000000800b0;
apei_error = apei_write_mce(&m);
} else {
pr_info("Undefined dvad error\n");
}
break;
case 13:
m.status = 0x9c200040000100c0;
break;
case 14:
m.status = 0xbd000000000200c0;
apei_error = apei_write_mce(&m);
break;
}
mce_log(&m);
}
EXPORT_SYMBOL_GPL(zx_apei_mce_report_mem_error);
void zx_apei_mce_report_pcie_error(int severity, struct cper_sec_pcie *pcie_err)
{
struct mce m;
int apei_error = 0;
if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
return;
mce_setup(&m);
m.addr = 0;
m.misc = 0;
m.misc |= (u64)pcie_err->device_id.segment << 32;
m.misc |= pcie_err->device_id.bus << 24;
m.misc |= pcie_err->device_id.device << 19;
m.misc |= pcie_err->device_id.function << 16;
m.bank = 6;
switch (severity) {
case 1:
m.status = 0x9820004000020e0b;
break;
case 2:
m.status = 0xba20000000010e0b;
break;
case 3:
m.status = 0xbd20000000000e0b;
apei_error = apei_write_mce(&m);
break;
default:
pr_info("Undefine pcie error\n");
break;
}
mce_log(&m);
}
EXPORT_SYMBOL_GPL(zx_apei_mce_report_pcie_error);
void zx_apei_mce_report_zdi_error(struct cper_sec_proc_generic *zdi_err)
{
struct mce m;
int apei_error = 0;
if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
return;
mce_setup(&m);
m.misc = 0;
m.misc |= (zdi_err->requestor_id & 0xff) << 19;
m.misc |= ((zdi_err->requestor_id & 0xff00) >> 8) >> 24;
m.bank = 5;
switch (zdi_err->responder_id) {
case 2:
m.status = 0xba00000000040e0f;
apei_error = apei_write_mce(&m);
break;
case 3:
m.status = 0xba00000000030e0f;
apei_error = apei_write_mce(&m);
break;
case 4:
m.status = 0xba00000000020e0f;
apei_error = apei_write_mce(&m);
break;
case 5:
m.status = 0xba00000000010e0f;
apei_error = apei_write_mce(&m);
break;
case 6:
m.status = 0x9820004000090e0f;
break;
case 7:
m.status = 0x9820004000080e0f;
break;
case 8:
m.status = 0x9820004000070e0f;
break;
case 9:
m.status = 0x9820004000060e0f;
break;
case 10:
m.status = 0x9820004000050e0f;
break;
case 11:
case 12:
case 13:
case 14:
case 15:
m.status = 0x98200040000b0e0f;
break;
case 16:
case 17:
case 18:
m.status = 0x98200040000c0e0f;
break;
default:
pr_info("Undefined ZDI Error\n");
break;
}
mce_log(&m);
}
EXPORT_SYMBOL_GPL(zx_apei_mce_report_zdi_error);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{ {
const u64 *i_mce = ((const u64 *) (ctx_info + 1)); const u64 *i_mce = ((const u64 *) (ctx_info + 1));

View File

@ -773,6 +773,17 @@ void __weak arch_apei_report_mem_error(int sev,
} }
EXPORT_SYMBOL_GPL(arch_apei_report_mem_error); EXPORT_SYMBOL_GPL(arch_apei_report_mem_error);
void __weak arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err)
{
}
EXPORT_SYMBOL_GPL(arch_apei_report_pcie_error);
bool __weak arch_apei_report_zdi_error(guid_t *sec_type, struct cper_sec_proc_generic *zdi_err)
{
return false;
}
EXPORT_SYMBOL_GPL(arch_apei_report_zdi_error);
int apei_osc_setup(void) int apei_osc_setup(void)
{ {
static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";

View File

@ -703,6 +703,9 @@ static bool ghes_do_proc(struct ghes *ghes,
queued = ghes_handle_memory_failure(gdata, sev, sync); queued = ghes_handle_memory_failure(gdata, sev, sync);
} }
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
arch_apei_report_pcie_error(sec_sev, pcie_err);
ghes_handle_aer(gdata); ghes_handle_aer(gdata);
} }
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
@ -710,12 +713,15 @@ static bool ghes_do_proc(struct ghes *ghes,
} else { } else {
void *err = acpi_hest_get_payload(gdata); void *err = acpi_hest_get_payload(gdata);
if (!arch_apei_report_zdi_error(sec_type,
(struct cper_sec_proc_generic *)err)) {
ghes_defer_non_standard_event(gdata, sev); ghes_defer_non_standard_event(gdata, sev);
log_non_standard_event(sec_type, fru_id, fru_text, log_non_standard_event(sec_type, fru_id, fru_text,
sec_sev, err, sec_sev, err,
gdata->error_data_length); gdata->error_data_length);
} }
} }
}
return queued; return queued;
} }
@ -1091,6 +1097,8 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
u32 len, node_len; u32 len, node_len;
u64 buf_paddr; u64 buf_paddr;
int sev, rc; int sev, rc;
struct acpi_hest_generic_data *gdata;
guid_t *sec_type;
if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
return -EOPNOTSUPP; return -EOPNOTSUPP;
@ -1126,6 +1134,23 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
sev = ghes_severity(estatus->error_severity); sev = ghes_severity(estatus->error_severity);
if (sev >= GHES_SEV_PANIC) { if (sev >= GHES_SEV_PANIC) {
apei_estatus_for_each_section(estatus, gdata) {
sec_type = (guid_t *)gdata->section_type;
if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
arch_apei_report_mem_error(sev, mem_err);
} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
arch_apei_report_pcie_error(sev, pcie_err);
} else if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
struct cper_sec_proc_generic *zdi_err =
acpi_hest_get_payload(gdata);
arch_apei_report_zdi_error(sec_type, zdi_err);
}
}
ghes_print_queued_estatus(); ghes_print_queued_estatus();
__ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
} }

View File

@ -52,6 +52,8 @@ int erst_clear(u64 record_id);
int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data); int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err); void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
void arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err);
bool arch_apei_report_zdi_error(guid_t *sec_type, struct cper_sec_proc_generic *zdi_err);
#endif #endif
#endif #endif