From 9f1b19b977ee3cbd3fe9135ff63dbf221eac1d6a Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Fri, 25 Feb 2022 13:33:40 -0600 Subject: [PATCH 1/4] x86/mce: Avoid unnecessary padding in struct mce_bank Convert struct mce_bank member "init" from bool to a bitfield to get rid of unnecessary padding. $ pahole -C mce_bank arch/x86/kernel/cpu/mce/core.o before: /* size: 16, cachelines: 1, members: 2 */ /* padding: 7 */ /* last cacheline: 16 bytes */ after: /* size: 16, cachelines: 1, members: 3 */ /* last cacheline: 16 bytes */ No functional changes. Signed-off-by: Smita Koralahalli Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220225193342.215780-2-Smita.KoralahalliChannabasappa@amd.com --- arch/x86/kernel/cpu/mce/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 981496e6bc0e..d775fcd74e98 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -69,7 +69,9 @@ DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); struct mce_bank { u64 ctl; /* subevents to enable */ - bool init; /* initialise bank? */ + + __u64 init : 1, /* initialise bank? */ + __reserved_1 : 63; }; static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); From e5f28623ceb103e13fc3d7bd45edf9818b227fd0 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Tue, 29 Mar 2022 17:47:05 +0700 Subject: [PATCH 2/4] x86/MCE/AMD: Fix memory leak when threshold_create_bank() fails In mce_threshold_create_device(), if threshold_create_bank() fails, the previously allocated threshold banks array @bp will be leaked because the call to mce_threshold_remove_device() will not free it. This happens because mce_threshold_remove_device() fetches the pointer through the threshold_banks per-CPU variable but bp is written there only after the bank creation is successful, and not before, when threshold_create_bank() fails. Add a helper which unwinds all the bank creation work previously done and pass into it the previously allocated threshold banks array for freeing. [ bp: Massage. ] Fixes: 6458de97fc15 ("x86/mce/amd: Straighten CPU hotplug path") Co-developed-by: Alviro Iskandar Setiawan Signed-off-by: Alviro Iskandar Setiawan Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Ammar Faizi Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20220329104705.65256-3-ammarfaizi2@gnuweeb.org --- arch/x86/kernel/cpu/mce/amd.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 1940d305db1c..1c87501e0fa3 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1294,10 +1294,23 @@ out_free: kfree(bank); } +static void __threshold_remove_device(struct threshold_bank **bp) +{ + unsigned int bank, numbanks = this_cpu_read(mce_num_banks); + + for (bank = 0; bank < numbanks; bank++) { + if (!bp[bank]) + continue; + + threshold_remove_bank(bp[bank]); + bp[bank] = NULL; + } + kfree(bp); +} + int mce_threshold_remove_device(unsigned int cpu) { struct threshold_bank **bp = this_cpu_read(threshold_banks); - unsigned int bank, numbanks = this_cpu_read(mce_num_banks); if (!bp) return 0; @@ -1308,13 +1321,7 @@ int mce_threshold_remove_device(unsigned int cpu) */ this_cpu_write(threshold_banks, NULL); - for (bank = 0; bank < numbanks; bank++) { - if (bp[bank]) { - threshold_remove_bank(bp[bank]); - bp[bank] = NULL; - } - } - kfree(bp); + __threshold_remove_device(bp); return 0; } @@ -1351,15 +1358,14 @@ int mce_threshold_create_device(unsigned int cpu) if (!(this_cpu_read(bank_map) & (1 << bank))) continue; err = threshold_create_bank(bp, cpu, bank); - if (err) - goto out_err; + if (err) { + __threshold_remove_device(bp); + return err; + } } this_cpu_write(threshold_banks, bp); if (thresholding_irq_en) mce_threshold_vector = amd_threshold_interrupt; return 0; -out_err: - mce_threshold_remove_device(cpu); - return err; } From 70c459d915e838b7f536b8e26e0b3a6141bd2645 Mon Sep 17 00:00:00 2001 From: Carlos Bilbao Date: Tue, 5 Apr 2022 13:32:13 -0500 Subject: [PATCH 3/4] x86/mce: Simplify AMD severity grading logic The MCE handler needs to understand the severity of the machine errors to act accordingly. Simplify the AMD grading logic following a logic that closely resembles the descriptions of the public PPR documents. This will help include more fine-grained grading of errors in the future. [ bp: Touchups. ] Signed-off-by: Carlos Bilbao Signed-off-by: Borislav Petkov Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20220405183212.354606-2-carlos.bilbao@amd.com --- arch/x86/kernel/cpu/mce/severity.c | 105 +++++++++++------------------ 1 file changed, 38 insertions(+), 67 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 1add86935349..d842148f9c39 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -301,85 +301,56 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) } } -static __always_inline int mce_severity_amd_smca(struct mce *m, enum context err_ctx) -{ - u64 mcx_cfg; - - /* - * We need to look at the following bits: - * - "succor" bit (data poisoning support), and - * - TCC bit (Task Context Corrupt) - * in MCi_STATUS to determine error severity. - */ - if (!mce_flags.succor) - return MCE_PANIC_SEVERITY; - - mcx_cfg = mce_rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(m->bank)); - - /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */ - if ((mcx_cfg & MCI_CONFIG_MCAX) && - (m->status & MCI_STATUS_TCC) && - (err_ctx == IN_KERNEL)) - return MCE_PANIC_SEVERITY; - - /* ...otherwise invoke hwpoison handler. */ - return MCE_AR_SEVERITY; -} - -/* - * See AMD Error Scope Hierarchy table in a newer BKDG. For example - * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" - */ +/* See AMD PPR(s) section Machine Check Error Handling. */ static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp) { - enum context ctx = error_context(m, regs); + int ret; + + /* + * Default return value: Action required, the error must be handled + * immediately. + */ + ret = MCE_AR_SEVERITY; /* Processor Context Corrupt, no need to fumble too much, die! */ - if (m->status & MCI_STATUS_PCC) - return MCE_PANIC_SEVERITY; + if (m->status & MCI_STATUS_PCC) { + ret = MCE_PANIC_SEVERITY; + goto out; + } - if (m->status & MCI_STATUS_UC) { - - if (ctx == IN_KERNEL) - return MCE_PANIC_SEVERITY; - - /* - * On older systems where overflow_recov flag is not present, we - * should simply panic if an error overflow occurs. If - * overflow_recov flag is present and set, then software can try - * to at least kill process to prolong system operation. - */ - if (mce_flags.overflow_recov) { - if (mce_flags.smca) - return mce_severity_amd_smca(m, ctx); - - /* kill current process */ - return MCE_AR_SEVERITY; - } else { - /* at least one error was not logged */ - if (m->status & MCI_STATUS_OVER) - return MCE_PANIC_SEVERITY; - } - - /* - * For any other case, return MCE_UC_SEVERITY so that we log the - * error and exit #MC handler. - */ - return MCE_UC_SEVERITY; + if (m->status & MCI_STATUS_DEFERRED) { + ret = MCE_DEFERRED_SEVERITY; + goto out; } /* - * deferred error: poll handler catches these and adds to mce_ring so - * memory-failure can take recovery actions. + * If the UC bit is not set, the system either corrected or deferred + * the error. No action will be required after logging the error. */ - if (m->status & MCI_STATUS_DEFERRED) - return MCE_DEFERRED_SEVERITY; + if (!(m->status & MCI_STATUS_UC)) { + ret = MCE_KEEP_SEVERITY; + goto out; + } /* - * corrected error: poll handler catches these and passes responsibility - * of decoding the error to EDAC + * On MCA overflow, without the MCA overflow recovery feature the + * system will not be able to recover, panic. */ - return MCE_KEEP_SEVERITY; + if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) { + ret = MCE_PANIC_SEVERITY; + goto out; + } + + if (!mce_flags.succor) { + ret = MCE_PANIC_SEVERITY; + goto out; + } + + if (error_context(m, regs) == IN_KERNEL) + ret = MCE_PANIC_SEVERITY; + +out: + return ret; } static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp) From fa619f5156cf1ee3773edc6d756be262c9ef35de Mon Sep 17 00:00:00 2001 From: Carlos Bilbao Date: Tue, 5 Apr 2022 13:32:14 -0500 Subject: [PATCH 4/4] x86/mce: Add messages for panic errors in AMD's MCE grading When a machine error is graded as PANIC by the AMD grading logic, the MCE handler calls mce_panic(). The notification chain does not come into effect so the AMD EDAC driver does not decode the errors. In these cases, the messages displayed to the user are more cryptic and miss information that might be relevant, like the context in which the error took place. Add messages to the grading logic for machine errors so that it is clear what error it was. [ bp: Massage commit message. ] Signed-off-by: Carlos Bilbao Signed-off-by: Borislav Petkov Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20220405183212.354606-3-carlos.bilbao@amd.com --- arch/x86/kernel/cpu/mce/severity.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index d842148f9c39..00483d1c27e4 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -304,6 +304,7 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) /* See AMD PPR(s) section Machine Check Error Handling. */ static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp) { + char *panic_msg = NULL; int ret; /* @@ -314,6 +315,7 @@ static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char ** /* Processor Context Corrupt, no need to fumble too much, die! */ if (m->status & MCI_STATUS_PCC) { + panic_msg = "Processor Context Corrupt"; ret = MCE_PANIC_SEVERITY; goto out; } @@ -337,19 +339,26 @@ static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char ** * system will not be able to recover, panic. */ if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) { + panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery"; ret = MCE_PANIC_SEVERITY; goto out; } if (!mce_flags.succor) { + panic_msg = "Uncorrected error without MCA Recovery"; ret = MCE_PANIC_SEVERITY; goto out; } - if (error_context(m, regs) == IN_KERNEL) + if (error_context(m, regs) == IN_KERNEL) { + panic_msg = "Uncorrected unrecoverable error in kernel context"; ret = MCE_PANIC_SEVERITY; + } out: + if (msg && panic_msg) + *msg = panic_msg; + return ret; }