From 20d51a426fe9a0d0a63cc3a7488f621c8bac37e1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Aug 2015 18:29:33 +0200 Subject: [PATCH 01/13] x86/mce: Reuse one of the u16 padding fields in 'struct mce' ... to save the error severity of the MCE and whether the reported address of the error is usable. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1439396985-12812-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/mce.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index a0eab85ce7b8..76880ede9a35 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -15,7 +15,8 @@ struct mce { __u64 time; /* wall time_t when error was detected */ __u8 cpuvendor; /* cpu vendor as encoded in system.h */ __u8 inject_flags; /* software inject flags */ - __u16 pad; + __u8 severity; + __u8 usable_addr; __u32 cpuid; /* CPUID 1 EAX */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ From 648ed94038c030245a06e4be59744fd5cdc18c40 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 12 Aug 2015 18:29:34 +0200 Subject: [PATCH 02/13] x86/mce: Provide a lockless memory pool to save error records printk() is not safe to use in MCE context. Add a lockless memory allocator pool to save error records in MCE context. Those records will be issued later, in a printk-safe context. The idea is inspired by the APEI/GHES driver. We're very conservative and allocate only two pages for it but since we're going to use those pages throughout the system's lifetime, we allocate them statically to avoid early boot time allocation woes. Signed-off-by: Chen, Gong [ Rewrite. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1439396985-12812-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/mcheck/Makefile | 2 +- arch/x86/kernel/cpu/mcheck/mce-genpool.c | 99 +++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/mce-internal.h | 12 +++ arch/x86/kernel/cpu/mcheck/mce.c | 8 +- 5 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 arch/x86/kernel/cpu/mcheck/mce-genpool.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b3a1a5d77d92..06dbb5da90c6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -955,6 +955,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS config X86_MCE bool "Machine Check / overheating reporting" + select GENERIC_ALLOCATOR default y ---help--- Machine Check support allows the processor to notify the diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index bb34b03af252..a3311c886194 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,4 +1,4 @@ -obj-y = mce.o mce-severity.o +obj-y = mce.o mce-severity.o mce-genpool.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c new file mode 100644 index 000000000000..0a850100c594 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -0,0 +1,99 @@ +/* + * MCE event pool management in MCE context + * + * Copyright (C) 2015 Intel Corp. + * Author: Chen, Gong + * + * This file is licensed under GPLv2. + */ +#include +#include +#include +#include +#include "mce-internal.h" + +/* + * printk() is not safe in MCE context. This is a lock-less memory allocator + * used to save error information organized in a lock-less list. + * + * This memory pool is only to be used to save MCE records in MCE context. + * MCE events are rare, so a fixed size memory pool should be enough. Use + * 2 pages to save MCE events for now (~80 MCE records at most). + */ +#define MCE_POOLSZ (2 * PAGE_SIZE) + +static struct gen_pool *mce_evt_pool; +static LLIST_HEAD(mce_event_llist); +static char gen_pool_buf[MCE_POOLSZ]; + +void mce_gen_pool_process(void) +{ + struct llist_node *head; + struct mce_evt_llist *node; + struct mce *mce; + + head = llist_del_all(&mce_event_llist); + if (!head) + return; + + head = llist_reverse_order(head); + llist_for_each_entry(node, head, llnode) { + mce = &node->mce; + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); + } +} + +bool mce_gen_pool_empty(void) +{ + return llist_empty(&mce_event_llist); +} + +int mce_gen_pool_add(struct mce *mce) +{ + struct mce_evt_llist *node; + + if (!mce_evt_pool) + return -EINVAL; + + node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node)); + if (!node) { + pr_warn_ratelimited("MCE records pool full!\n"); + return -ENOMEM; + } + + memcpy(&node->mce, mce, sizeof(*mce)); + llist_add(&node->llnode, &mce_event_llist); + + return 0; +} + +static int mce_gen_pool_create(void) +{ + struct gen_pool *tmpp; + int ret = -ENOMEM; + + tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1); + if (!tmpp) + goto out; + + ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1); + if (ret) { + gen_pool_destroy(tmpp); + goto out; + } + + mce_evt_pool = tmpp; + +out: + return ret; +} + +int mce_gen_pool_init(void) +{ + /* Just init mce_gen_pool once. */ + if (mce_evt_pool) + return 0; + + return mce_gen_pool_create(); +} diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index fe32074b865b..ea8b62264c14 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -13,6 +13,8 @@ enum severity_level { MCE_PANIC_SEVERITY, }; +extern struct atomic_notifier_head x86_mce_decoder_chain; + #define ATTR_LEN 16 #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ @@ -24,6 +26,16 @@ struct mce_bank { char attrname[ATTR_LEN]; /* attribute name */ }; +struct mce_evt_llist { + struct llist_node llnode; + struct mce mce; +}; + +void mce_gen_pool_process(void); +bool mce_gen_pool_empty(void); +int mce_gen_pool_add(struct mce *mce); +int mce_gen_pool_init(void); + extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); struct dentry *mce_get_debugfs_dir(void); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index df919ff103c3..a41c014e5cde 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -118,7 +118,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. */ -static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) @@ -1731,6 +1731,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) return; } + if (mce_gen_pool_init()) { + mca_cfg.disabled = true; + pr_emerg("Couldn't allocate MCE records pool!\n"); + return; + } + machine_check_vector = do_machine_check; __mcheck_cpu_init_generic(); From 061120aed7081b9a4393fbe07b558192f40ad911 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 12 Aug 2015 18:29:35 +0200 Subject: [PATCH 03/13] x86/mce: Don't use percpu workqueues An MCE is a rare event. Therefore, there's no need to have per-CPU instances of both normal and IRQ workqueues. Make them both global. Signed-off-by: Chen, Gong [ Fold in subsequent patch from Rui/Boris/Tony for early boot logging. ] Signed-off-by: Tony Luck [ Massage commit message. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a41c014e5cde..456f8d7b8fd3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -110,7 +110,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { */ mce_banks_t mce_banks_ce_disabled; -static DEFINE_PER_CPU(struct work_struct, mce_work); +static struct work_struct mce_work; +static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); @@ -526,11 +527,9 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { if (!mce_ring_empty()) - schedule_work(this_cpu_ptr(&mce_work)); + schedule_work(&mce_work); } -static DEFINE_PER_CPU(struct irq_work, mce_irq_work); - static void mce_irq_work_cb(struct irq_work *entry) { mce_notify_irq(); @@ -551,7 +550,7 @@ static void mce_report_event(struct pt_regs *regs) return; } - irq_work_queue(this_cpu_ptr(&mce_irq_work)); + irq_work_queue(&mce_irq_work); } /* @@ -1742,8 +1741,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); - INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); - init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb); } /* @@ -2064,6 +2061,9 @@ int __init mcheck_init(void) mcheck_intel_therm_init(); mcheck_vendor_init_severity(); + INIT_WORK(&mce_work, mce_process_work); + init_irq_work(&mce_irq_work, mce_irq_work_cb); + return 0; } From fd4cf79fcc4b5130ced8fd8c40378d3cec2e5fa8 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 12 Aug 2015 18:29:36 +0200 Subject: [PATCH 04/13] x86/mce: Remove the MCE ring for Action Optional errors Use unified genpool to save Action Optional error events and put Action Optional error handling in the same notification chain as MCE error decoding. Signed-off-by: Chen, Gong [ Fold in subsequent patch from Boris for early boot logging. ] Signed-off-by: Tony Luck [ Correct a lot. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 135 ++++++++++++++----------------- drivers/acpi/acpi_extlog.c | 2 +- drivers/edac/i7core_edac.c | 2 +- drivers/edac/mce_amd.c | 2 +- drivers/edac/sb_edac.c | 2 +- 6 files changed, 65 insertions(+), 80 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 982dfc3679ad..dfaa4de1dbb4 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -140,7 +140,7 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; -extern void mce_register_decode_chain(struct notifier_block *nb); +extern void mce_register_decode_chain(struct notifier_block *nb, bool drain); extern void mce_unregister_decode_chain(struct notifier_block *nb); #include diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 456f8d7b8fd3..82603690b65c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -114,6 +114,7 @@ static struct work_struct mce_work; static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); +static int mce_usable_address(struct mce *m); /* * CPU/chipset specific EDAC code can register a notifier call here to print @@ -234,11 +235,18 @@ static void drain_mcelog_buffer(void) } while (next != prev); } +static struct notifier_block mce_srao_nb; -void mce_register_decode_chain(struct notifier_block *nb) +void mce_register_decode_chain(struct notifier_block *nb, bool drain) { + /* Ensure SRAO notifier has the highest priority in the decode chain. */ + if (nb != &mce_srao_nb && nb->priority == INT_MAX) + nb->priority -= 1; + atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); - drain_mcelog_buffer(); + + if (drain) + drain_mcelog_buffer(); } EXPORT_SYMBOL_GPL(mce_register_decode_chain); @@ -462,61 +470,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) } } -/* - * Simple lockless ring to communicate PFNs from the exception handler with the - * process context work function. This is vastly simplified because there's - * only a single reader and a single writer. - */ -#define MCE_RING_SIZE 16 /* we use one entry less */ - -struct mce_ring { - unsigned short start; - unsigned short end; - unsigned long ring[MCE_RING_SIZE]; -}; -static DEFINE_PER_CPU(struct mce_ring, mce_ring); - -/* Runs with CPU affinity in workqueue */ -static int mce_ring_empty(void) -{ - struct mce_ring *r = this_cpu_ptr(&mce_ring); - - return r->start == r->end; -} - -static int mce_ring_get(unsigned long *pfn) -{ - struct mce_ring *r; - int ret = 0; - - *pfn = 0; - get_cpu(); - r = this_cpu_ptr(&mce_ring); - if (r->start == r->end) - goto out; - *pfn = r->ring[r->start]; - r->start = (r->start + 1) % MCE_RING_SIZE; - ret = 1; -out: - put_cpu(); - return ret; -} - -/* Always runs in MCE context with preempt off */ -static int mce_ring_add(unsigned long pfn) -{ - struct mce_ring *r = this_cpu_ptr(&mce_ring); - unsigned next; - - next = (r->end + 1) % MCE_RING_SIZE; - if (next == r->start) - return -1; - r->ring[r->end] = pfn; - wmb(); - r->end = next; - return 0; -} - int mce_available(struct cpuinfo_x86 *c) { if (mca_cfg.disabled) @@ -526,7 +479,7 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { - if (!mce_ring_empty()) + if (!mce_gen_pool_empty() && keventd_up()) schedule_work(&mce_work); } @@ -553,6 +506,27 @@ static void mce_report_event(struct pt_regs *regs) irq_work_queue(&mce_irq_work); } +static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + unsigned long pfn; + + if (!mce) + return NOTIFY_DONE; + + if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) { + pfn = mce->addr >> PAGE_SHIFT; + memory_failure(pfn, MCE_VECTOR, 0); + } + + return NOTIFY_OK; +} +static struct notifier_block mce_srao_nb = { + .notifier_call = srao_decode_notifier, + .priority = INT_MAX, +}; + /* * Read ADDR and MISC registers. */ @@ -671,8 +645,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) */ if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { if (m.status & MCI_STATUS_ADDRV) { - mce_ring_add(m.addr >> PAGE_SHIFT); - mce_schedule_work(); + m.severity = severity; + m.usable_addr = mce_usable_address(&m); + + if (!mce_gen_pool_add(&m)) + mce_schedule_work(); } } @@ -1142,15 +1119,10 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_read_aux(&m, i); - /* - * Action optional error. Queue address for later processing. - * When the ring overflows we just ignore the AO error. - * RED-PEN add some logging mechanism when - * usable_address or mce_add_ring fails. - * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0 - */ - if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) - mce_ring_add(m.addr >> PAGE_SHIFT); + /* assuming valid severity level != 0 */ + m.severity = severity; + m.usable_addr = mce_usable_address(&m); + mce_gen_pool_add(&m); mce_log(&m); @@ -1246,14 +1218,11 @@ int memory_failure(unsigned long pfn, int vector, int flags) /* * Action optional processing happens here (picking up * from the list of faulting pages that do_machine_check() - * placed into the "ring"). + * placed into the genpool). */ static void mce_process_work(struct work_struct *dummy) { - unsigned long pfn; - - while (mce_ring_get(&pfn)) - memory_failure(pfn, MCE_VECTOR, 0); + mce_gen_pool_process(); } #ifdef CONFIG_X86_MCE_INTEL @@ -2059,6 +2028,7 @@ __setup("mce", mcheck_enable); int __init mcheck_init(void) { mcheck_intel_therm_init(); + mce_register_decode_chain(&mce_srao_nb, false); mcheck_vendor_init_severity(); INIT_WORK(&mce_work, mce_process_work); @@ -2597,5 +2567,20 @@ static int __init mcheck_debugfs_init(void) return 0; } -late_initcall(mcheck_debugfs_init); +#else +static int __init mcheck_debugfs_init(void) { return -EINVAL; } #endif + +static int __init mcheck_late_init(void) +{ + mcheck_debugfs_init(); + + /* + * Flush out everything that has been logged during early boot, now that + * everything has been initialized (workqueues, decoders, ...). + */ + mce_schedule_work(); + + return 0; +} +late_initcall(mcheck_late_init); diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index b3842ffc19ba..07e012e74c1b 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -286,7 +286,7 @@ static int __init extlog_init(void) */ old_edac_report_status = get_edac_report_status(); set_edac_report_status(EDAC_REPORTING_DISABLED); - mce_register_decode_chain(&extlog_mce_dec); + mce_register_decode_chain(&extlog_mce_dec, true); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 01087a38da22..13d77f4a892c 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -2424,7 +2424,7 @@ static int __init i7core_init(void) pci_rc = pci_register_driver(&i7core_driver); if (pci_rc >= 0) { - mce_register_decode_chain(&i7_mce_dec); + mce_register_decode_chain(&i7_mce_dec, true); return 0; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 58586d59bf8e..aca31a237073 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -895,7 +895,7 @@ static int __init mce_amd_init(void) pr_info("MCE: In-kernel MCE decoding enabled.\n"); - mce_register_decode_chain(&amd_mce_dec_nb); + mce_register_decode_chain(&amd_mce_dec_nb, true); return 0; } diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index ca7831168298..5780e26c3e58 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2591,7 +2591,7 @@ static int __init sbridge_init(void) pci_rc = pci_register_driver(&sbridge_driver); if (pci_rc >= 0) { - mce_register_decode_chain(&sbridge_mce_dec); + mce_register_decode_chain(&sbridge_mce_dec, true); if (get_edac_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; From f29a7aff4bd60ebc3da4982f80144a4158c4c74a Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 12 Aug 2015 18:29:37 +0200 Subject: [PATCH 05/13] x86/mce: Avoid potential deadlock due to printk() in MCE context Printing in MCE context is a no-no, currently, as printk() is not NMI-safe. If some of the notifiers on the MCE chain call do so, we may deadlock. In order to avoid that, delay printk() to process context where it is safe. Reported-by: Xie XiuQi Signed-off-by: Chen, Gong [ Fold in subsequent patch from Boris for early boot logging. ] Signed-off-by: Tony Luck [ Kick irq_work in mce_log() directly. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-apei.c | 1 - arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index a1aef9533154..34c89a3e8260 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) m.addr = mem_err->physical_addr; mce_log(&m); - mce_notify_irq(); } EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 82603690b65c..9568bb55bfe2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -159,7 +159,8 @@ void mce_log(struct mce *mce) /* Emit the trace record: */ trace_mce_record(mce); - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + if (!mce_gen_pool_add(mce)) + irq_work_queue(&mce_irq_work); mce->finished = 0; wmb(); @@ -1122,7 +1123,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* assuming valid severity level != 0 */ m.severity = severity; m.usable_addr = mce_usable_address(&m); - mce_gen_pool_add(&m); mce_log(&m); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 844f56c5616d..70f567f774ed 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -246,7 +246,6 @@ static void intel_threshold_interrupt(void) return; machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); - mce_notify_irq(); } /* From eef4dfa0cb83899c782935ac5345532f47073cea Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Aug 2015 18:29:38 +0200 Subject: [PATCH 06/13] x86/mce: Kill drain_mcelog_buffer() This used to flush out MCEs logged during early boot and which were in the MCA registers from a previous system run. No need for that now, since we've moved to a genpool. Suggested-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 44 ++------------------------------ drivers/acpi/acpi_extlog.c | 2 +- drivers/edac/i7core_edac.c | 2 +- drivers/edac/mce_amd.c | 2 +- drivers/edac/sb_edac.c | 2 +- 6 files changed, 7 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dfaa4de1dbb4..982dfc3679ad 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -140,7 +140,7 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; -extern void mce_register_decode_chain(struct notifier_block *nb, bool drain); +extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); #include diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9568bb55bfe2..32b586ee006a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -199,55 +199,15 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } -static void drain_mcelog_buffer(void) -{ - unsigned int next, i, prev = 0; - - next = ACCESS_ONCE(mcelog.next); - - do { - struct mce *m; - - /* drain what was logged during boot */ - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - unsigned retries = 1; - - m = &mcelog.entry[i]; - - while (!m->finished) { - if (time_after_eq(jiffies, start + 2*retries)) - retries++; - - cpu_relax(); - - if (!m->finished && retries >= 4) { - pr_err("skipping error being logged currently!\n"); - break; - } - } - smp_rmb(); - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); - } - - memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); -} - static struct notifier_block mce_srao_nb; -void mce_register_decode_chain(struct notifier_block *nb, bool drain) +void mce_register_decode_chain(struct notifier_block *nb) { /* Ensure SRAO notifier has the highest priority in the decode chain. */ if (nb != &mce_srao_nb && nb->priority == INT_MAX) nb->priority -= 1; atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); - - if (drain) - drain_mcelog_buffer(); } EXPORT_SYMBOL_GPL(mce_register_decode_chain); @@ -2028,7 +1988,7 @@ __setup("mce", mcheck_enable); int __init mcheck_init(void) { mcheck_intel_therm_init(); - mce_register_decode_chain(&mce_srao_nb, false); + mce_register_decode_chain(&mce_srao_nb); mcheck_vendor_init_severity(); INIT_WORK(&mce_work, mce_process_work); diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 07e012e74c1b..b3842ffc19ba 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -286,7 +286,7 @@ static int __init extlog_init(void) */ old_edac_report_status = get_edac_report_status(); set_edac_report_status(EDAC_REPORTING_DISABLED); - mce_register_decode_chain(&extlog_mce_dec, true); + mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 13d77f4a892c..01087a38da22 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -2424,7 +2424,7 @@ static int __init i7core_init(void) pci_rc = pci_register_driver(&i7core_driver); if (pci_rc >= 0) { - mce_register_decode_chain(&i7_mce_dec, true); + mce_register_decode_chain(&i7_mce_dec); return 0; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index aca31a237073..58586d59bf8e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -895,7 +895,7 @@ static int __init mce_amd_init(void) pr_info("MCE: In-kernel MCE decoding enabled.\n"); - mce_register_decode_chain(&amd_mce_dec_nb, true); + mce_register_decode_chain(&amd_mce_dec_nb); return 0; } diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 5780e26c3e58..ca7831168298 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2591,7 +2591,7 @@ static int __init sbridge_init(void) pci_rc = pci_register_driver(&sbridge_driver); if (pci_rc >= 0) { - mce_register_decode_chain(&sbridge_mce_dec, true); + mce_register_decode_chain(&sbridge_mce_dec); if (get_edac_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; From 4d1d5cdc345d15e09518a2410f7fcd069465ffac Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 12 Aug 2015 18:29:39 +0200 Subject: [PATCH 07/13] x86/mce: Remove unused function declarations Remove unused function declarations. Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1439396985-12812-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 982dfc3679ad..38d3a1a8830f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -185,16 +185,12 @@ void cmci_clear(void); void cmci_reenable(void); void cmci_rediscover(void); void cmci_recheck(void); -void lmce_clear(void); -void lmce_enable(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } static inline void cmci_clear(void) {} static inline void cmci_reenable(void) {} static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} -static inline void lmce_clear(void) {} -static inline void lmce_enable(void) {} #endif #ifdef CONFIG_X86_MCE_AMD From 8838eb6c0bf3b6a6494a163947ab3d1700ab45d2 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 12 Aug 2015 18:29:40 +0200 Subject: [PATCH 08/13] x86/mce: Clear Local MCE opt-in before kexec kexec could boot a kernel that could be legacy with no knowledge of LMCE. Hence we should make sure we clear LMCE optin before kexec reboot. Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1439396985-12812-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 4 ++++ arch/x86/kernel/cpu/mcheck/mce.c | 30 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 19 +++++++++++++++- arch/x86/kernel/process.c | 2 ++ arch/x86/kernel/smp.c | 2 ++ 5 files changed, 56 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 38d3a1a8830f..2dbc0bf2b9f3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -151,10 +151,12 @@ extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); +void mcheck_cpu_clear(struct cpuinfo_x86 *c); void mcheck_vendor_init_severity(void); #else static inline int mcheck_init(void) { return 0; } static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} +static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} static inline void mcheck_vendor_init_severity(void) {} #endif @@ -181,12 +183,14 @@ DECLARE_PER_CPU(struct device *, mce_device); #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); +void mce_intel_feature_clear(struct cpuinfo_x86 *c); void cmci_clear(void); void cmci_reenable(void); void cmci_rediscover(void); void cmci_recheck(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } +static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { } static inline void cmci_clear(void) {} static inline void cmci_reenable(void) {} static inline void cmci_rediscover(void) {} diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 32b586ee006a..ee5272d77a16 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1606,6 +1606,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } +static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) +{ + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + mce_intel_feature_clear(c); + break; + default: + break; + } +} + static void mce_start_timer(unsigned int cpu, struct timer_list *t) { unsigned long iv = check_interval * HZ; @@ -1672,6 +1683,25 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_timer(); } +/* + * Called for each booted CPU to clear some machine checks opt-ins + */ +void mcheck_cpu_clear(struct cpuinfo_x86 *c) +{ + if (mca_cfg.disabled) + return; + + if (!mce_available(c)) + return; + + /* + * Possibly to clear general settings generic to x86 + * __mcheck_cpu_clear_generic(c); + */ + __mcheck_cpu_clear_vendor(c); + +} + /* * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 70f567f774ed..c5c003291861 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -434,7 +434,7 @@ static void intel_init_cmci(void) cmci_recheck(); } -void intel_init_lmce(void) +static void intel_init_lmce(void) { u64 val; @@ -447,9 +447,26 @@ void intel_init_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); } +static void intel_clear_lmce(void) +{ + u64 val; + + if (!lmce_supported()) + return; + + rdmsrl(MSR_IA32_MCG_EXT_CTL, val); + val &= ~MCG_EXT_CTL_LMCE_EN; + wrmsrl(MSR_IA32_MCG_EXT_CTL, val); +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); intel_init_cmci(); intel_init_lmce(); } + +void mce_intel_feature_clear(struct cpuinfo_x86 *c) +{ + intel_clear_lmce(); +} diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 397688beed4b..b20ef187ff41 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -319,6 +320,7 @@ void stop_this_cpu(void *dummy) */ set_cpu_online(smp_processor_id(), false); disable_local_APIC(); + mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); for (;;) halt(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 15aaa69bbb5e..12c8286206ce 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -30,6 +30,7 @@ #include #include #include +#include #include /* * Some notes on x86 processor bugs affecting SMP operation: @@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait) finish: local_irq_save(flags); disable_local_APIC(); + mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); local_irq_restore(flags); } From 1b48465500611a2dc5e75800c61ac352e22d41c3 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Wed, 12 Aug 2015 18:29:41 +0200 Subject: [PATCH 09/13] x86/mce: Reenable CMCI banks when swiching back to interrupt mode Zhang Liguang reported the following issue: 1) System detects a CMCI storm on the current CPU. 2) Kernel disables the CMCI interrupt on banks owned by the current CPU and switches to poll mode 3) After the CMCI storm subsides, kernel switches back to interrupt mode 4) We expect the system to reenable the CMCI interrupt on banks owned by the current CPU mce_intel_adjust_timer |-> cmci_reenable |-> cmci_discover # owned banks are ignored here static void cmci_discover(int banks) ... for (i = 0; i < banks; i++) { ... if (test_bit(i, owned)) # ownd banks is ignore here continue; So convert cmci_storm_disable_banks() to cmci_toggle_interrupt_mode() which controls whether to enable or disable CMCI interrupts with its argument. NB: We cannot clear the owned bit because the banks won't be polled, otherwise. See: 27f6c573e0f7 ("x86, CMCI: Add proper detection of end of CMCI storms") for more info. Reported-by: Zhang Liguang Signed-off-by: Xie XiuQi Signed-off-by: Borislav Petkov Cc: # v3.15+ Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: huawei.libin@huawei.com Cc: linux-edac Cc: rui.xiang@huawei.com Link: http://lkml.kernel.org/r/1439396985-12812-10-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 41 +++++++++++++++----------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index c5c003291861..1e8bb6c94f14 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu) per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; } +static void cmci_toggle_interrupt_mode(bool on) +{ + unsigned long flags, *owned; + int bank; + u64 val; + + raw_spin_lock_irqsave(&cmci_discover_lock, flags); + owned = this_cpu_ptr(mce_banks_owned); + for_each_set_bit(bank, owned, MAX_NR_BANKS) { + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); + + if (on) + val |= MCI_CTL2_CMCI_EN; + else + val &= ~MCI_CTL2_CMCI_EN; + + wrmsrl(MSR_IA32_MCx_CTL2(bank), val); + } + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); +} + unsigned long cmci_intel_adjust_timer(unsigned long interval) { if ((this_cpu_read(cmci_backoff_cnt) > 0) && @@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) */ if (!atomic_read(&cmci_storm_on_cpus)) { __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); - cmci_reenable(); + cmci_toggle_interrupt_mode(true); cmci_recheck(); } return CMCI_POLL_INTERVAL; @@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) } } -static void cmci_storm_disable_banks(void) -{ - unsigned long flags, *owned; - int bank; - u64 val; - - raw_spin_lock_irqsave(&cmci_discover_lock, flags); - owned = this_cpu_ptr(mce_banks_owned); - for_each_set_bit(bank, owned, MAX_NR_BANKS) { - rdmsrl(MSR_IA32_MCx_CTL2(bank), val); - val &= ~MCI_CTL2_CMCI_EN; - wrmsrl(MSR_IA32_MCx_CTL2(bank), val); - } - raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); -} - static bool cmci_storm_detect(void) { unsigned int cnt = __this_cpu_read(cmci_storm_cnt); @@ -223,7 +228,7 @@ static bool cmci_storm_detect(void) if (cnt <= CMCI_STORM_THRESHOLD) return false; - cmci_storm_disable_banks(); + cmci_toggle_interrupt_mode(false); __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); r = atomic_add_return(1, &cmci_storm_on_cpus); mce_timer_kick(CMCI_STORM_INTERVAL); From 9b45ef443acde55526485db37440cd3e8f03d544 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Aug 2015 18:29:42 +0200 Subject: [PATCH 10/13] RAS: Add a menuconfig option with descriptive text Text taken a previous patch from "Gong Chen" . Signed-off-by: Borislav Petkov Cc: Gong Chen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1439396985-12812-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- drivers/ras/Kconfig | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index f9da613052c2..e5f0a43998d2 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -1,2 +1,32 @@ -config RAS - bool +menuconfig RAS + bool "Reliability, Availability and Serviceability (RAS) features" + help + Reliability, availability and serviceability (RAS) is a computer + hardware engineering term. Computers designed with higher levels + of RAS have a multitude of features that protect data integrity + and help them stay available for long periods of time without + failure. + + Reliability can be defined as the probability that the system will + produce correct outputs up to some given time. Reliability is + enhanced by features that help to avoid, detect and repair hardware + faults. + + Availability is the probability a system is operational at a given + time, i.e. the amount of time a device is actually operating as the + percentage of total time it should be operating. + + Serviceability or maintainability is the simplicity and speed with + which a system can be repaired or maintained; if the time to repair + a failed system increases, then availability will decrease. + + Note that Reliability and Availability are distinct concepts: + Reliability is a measure of the ability of a system to function + correctly, including avoiding data corruption, whereas Availability + measures how often it is available for use, even though it may not + be functioning correctly. For example, a server may run forever and + so have ideal availability, but may be unreliable, with frequent + data corruption. + +if RAS +endif From 9a7783d02197f299f71b4fa2364a345c05f92b83 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Aug 2015 18:29:43 +0200 Subject: [PATCH 11/13] x86/mce: Rename rcu_dereference_check_mce() to mce_log_get_idx_check() The "rcu_" prefix misleads for it being a proper RCU interface which is not. It basically checks whether we're preemptible or holding the chrdev_read mutex. Rename it accordingly. Signed-off-by: Borislav Petkov Acked-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1439396985-12812-12-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ee5272d77a16..b979711452a5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -52,11 +52,11 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); -#define rcu_dereference_check_mce(p) \ +#define mce_log_get_idx_check(p) \ ({ \ rcu_lockdep_assert(rcu_read_lock_sched_held() || \ lockdep_is_held(&mce_chrdev_read_mutex), \ - "suspicious rcu_dereference_check_mce() usage"); \ + "suspicious mce_log_get_idx_check() usage"); \ smp_load_acquire(&(p)); \ }) @@ -165,7 +165,7 @@ void mce_log(struct mce *mce) mce->finished = 0; wmb(); for (;;) { - entry = rcu_dereference_check_mce(mcelog.next); + entry = mce_log_get_idx_check(mcelog.next); for (;;) { /* @@ -1812,7 +1812,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, goto out; } - next = rcu_dereference_check_mce(mcelog.next); + next = mce_log_get_idx_check(mcelog.next); /* Only supports full reads right now */ err = -EINVAL; From a79da38494ec23f1a7d6ee734e07e9575fd18b58 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Aug 2015 18:29:44 +0200 Subject: [PATCH 12/13] x86/mce: Add a wrapper around mce_log() for injection Will be used by an injector module in a following patch. Additionally, add a missing module export reported by 0-DAY kernel test. Reported-by: kbuild test robot Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1439396985-12812-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-internal.h | 2 ++ arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ea8b62264c14..547720efd923 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -79,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id) return -EINVAL; } #endif + +void mce_inject_log(struct mce *m); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b979711452a5..e4e6646cac46 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -199,6 +199,14 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } +void mce_inject_log(struct mce *m) +{ + mutex_lock(&mce_chrdev_read_mutex); + mce_log(m); + mutex_unlock(&mce_chrdev_read_mutex); +} +EXPORT_SYMBOL_GPL(mce_inject_log); + static struct notifier_block mce_srao_nb; void mce_register_decode_chain(struct notifier_block *nb) From 6c36dfe949187dc2729abfad4b083758ac5c2e0e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Aug 2015 18:29:45 +0200 Subject: [PATCH 13/13] x86/ras: Move AMD MCE injector to arch/x86/ras/ This is an x86-specific module and would benefit from being closer to the arch code. Move it there. Update copyright while at it. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1439396985-12812-14-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 2 ++ arch/x86/ras/Kconfig | 11 +++++++++++ arch/x86/ras/Makefile | 2 ++ {drivers/edac => arch/x86/ras}/mce_amd_inj.c | 6 +++--- drivers/edac/Kconfig | 10 ---------- drivers/edac/Makefile | 1 - drivers/ras/Kconfig | 3 +++ 7 files changed, 21 insertions(+), 14 deletions(-) create mode 100644 arch/x86/ras/Kconfig create mode 100644 arch/x86/ras/Makefile rename {drivers/edac => arch/x86/ras}/mce_amd_inj.c (98%) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 118e6debc483..0f38418719ab 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -212,6 +212,8 @@ drivers-$(CONFIG_PM) += arch/x86/power/ drivers-$(CONFIG_FB) += arch/x86/video/ +drivers-$(CONFIG_RAS) += arch/x86/ras/ + #### # boot loader support. Several targets are kept for legacy purposes diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig new file mode 100644 index 000000000000..10fea5fc821e --- /dev/null +++ b/arch/x86/ras/Kconfig @@ -0,0 +1,11 @@ +config AMD_MCE_INJ + tristate "Simple MCE injection interface for AMD processors" + depends on RAS && EDAC_DECODE_MCE && DEBUG_FS + default n + help + This is a simple debugfs interface to inject MCEs and test different + aspects of the MCE handling code. + + WARNING: Do not even assume this interface is staying stable! + + diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile new file mode 100644 index 000000000000..dd2c98b84037 --- /dev/null +++ b/arch/x86/ras/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o + diff --git a/drivers/edac/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c similarity index 98% rename from drivers/edac/mce_amd_inj.c rename to arch/x86/ras/mce_amd_inj.c index 4c73e4d03d46..17e35b5bf779 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -6,7 +6,7 @@ * This file may be distributed under the terms of the GNU General Public * License version 2. * - * Copyright (c) 2010-14: Borislav Petkov + * Copyright (c) 2010-15: Borislav Petkov * Advanced Micro Devices Inc. */ @@ -19,7 +19,7 @@ #include #include -#include "mce_amd.h" +#include "../kernel/cpu/mcheck/mce-internal.h" /* * Collect all the MCi_XXX settings @@ -195,7 +195,7 @@ static void do_inject(void) i_mce.status |= MCI_STATUS_MISCV; if (inj_type == SW_INJ) { - amd_decode_mce(NULL, 0, &i_mce); + mce_inject_log(&i_mce); return; } diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 8677ead2a8e1..ef25000a5bc6 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -61,16 +61,6 @@ config EDAC_DECODE_MCE which occur really early upon boot, before the module infrastructure has been initialized. -config EDAC_MCE_INJ - tristate "Simple MCE injection interface" - depends on EDAC_DECODE_MCE && DEBUG_FS - default n - help - This is a simple debugfs interface to inject MCEs and test different - aspects of the MCE handling code. - - WARNING: Do not even assume this interface is staying stable! - config EDAC_MM_EDAC tristate "Main Memory EDAC (Error Detection And Correction) reporting" select RAS diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 28ef2a519f65..ae3c5f3ce405 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -17,7 +17,6 @@ edac_core-y += edac_pci.o edac_pci_sysfs.o endif obj-$(CONFIG_EDAC_GHES) += ghes_edac.o -obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index e5f0a43998d2..4c3c67d13254 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -29,4 +29,7 @@ menuconfig RAS data corruption. if RAS + +source arch/x86/ras/Kconfig + endif