From 5828c46f2c07b97d758da6dc6afd5c374768d44d Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Sep 2016 09:59:37 +0200 Subject: [PATCH] x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems The MCA_IPID register uniquely identifies a bank's type and instance on Scalable MCA systems. We should save the value of this register in struct mce along with the other relevant error information. This ensures that we can decode errors without relying on system software to correlate the bank to the type. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1472680624-34221-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/include/uapi/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++-- arch/x86/kernel/cpu/mcheck/mce_amd.c | 8 ++++++-- include/trace/events/mce.h | 5 ++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 8c75fbc94c3f..69a6e07e3149 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -27,6 +27,7 @@ struct mce { __u32 apicid; /* CPU initial apic ID */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ + __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 91a179b95fd0..17e9ff011c0e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -583,8 +583,12 @@ static void mce_read_aux(struct mce *m, int i) } } - if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV)) - m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); + if (mce_flags.smca) { + m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i)); + + if (m->status & MCI_STATUS_SYNDV) + m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); + } } static bool memory_error(struct mce *m) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 16766e09c2b7..d2f92ab5322f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -564,8 +564,12 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) if (m.status & MCI_STATUS_ADDRV) rdmsrl(msr_addr, m.addr); - if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV)) - rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); + if (mce_flags.smca) { + rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid); + + if (m.status & MCI_STATUS_SYNDV) + rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); + } mce_log(&m); diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 8be5268caf28..70f02149808c 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -21,6 +21,7 @@ TRACE_EVENT(mce_record, __field( u64, addr ) __field( u64, misc ) __field( u64, synd ) + __field( u64, ipid ) __field( u64, ip ) __field( u64, tsc ) __field( u64, walltime ) @@ -40,6 +41,7 @@ TRACE_EVENT(mce_record, __entry->addr = m->addr; __entry->misc = m->misc; __entry->synd = m->synd; + __entry->ipid = m->ipid; __entry->ip = m->ip; __entry->tsc = m->tsc; __entry->walltime = m->time; @@ -52,10 +54,11 @@ TRACE_EVENT(mce_record, __entry->cpuvendor = m->cpuvendor; ), - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", __entry->cpu, __entry->mcgcap, __entry->mcgstatus, __entry->bank, __entry->status, + __entry->ipid, __entry->addr, __entry->misc, __entry->synd, __entry->cs, __entry->ip, __entry->tsc,