Merge branch 'x86/mce' into x86/ras

Pursue a single RAS/MCE topic branch on x86.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2013-08-12 17:54:05 +02:00
commit 0237d7f355
11 changed files with 150 additions and 20 deletions

View File

@ -176,6 +176,11 @@ ACPI
acpi=noirq Don't route interrupts acpi=noirq Don't route interrupts
acpi=nocmcff Disable firmware first mode for corrected errors. This
disables parsing the HEST CMC error source to check if
firmware has set the FF flag. This may result in
duplicate corrected error reports.
PCI PCI
pci=off Don't use PCI pci=off Don't use PCI

View File

@ -86,6 +86,7 @@ extern int acpi_pci_disabled;
extern int acpi_skip_timer_override; extern int acpi_skip_timer_override;
extern int acpi_use_timer_override; extern int acpi_use_timer_override;
extern int acpi_fix_pin2_polarity; extern int acpi_fix_pin2_polarity;
extern int acpi_disable_cmcff;
extern u8 acpi_sci_flags; extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi; extern int acpi_sci_override_gsi;
@ -168,6 +169,7 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
#define acpi_lapic 0 #define acpi_lapic 0
#define acpi_ioapic 0 #define acpi_ioapic 0
#define acpi_disable_cmcff 0
static inline void acpi_noirq_set(void) { } static inline void acpi_noirq_set(void) { }
static inline void acpi_disable_pci(void) { } static inline void acpi_disable_pci(void) { }
static inline void disable_acpi(void) { } static inline void disable_acpi(void) { }

View File

@ -188,6 +188,9 @@ extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
const char __user *ubuf, const char __user *ubuf,
size_t usize, loff_t *off)); size_t usize, loff_t *off));
/* Disable CMCI/polling for MCA bank claimed by firmware */
extern void mce_disable_bank(int bank);
/* /*
* Exception handler * Exception handler
*/ */

View File

@ -67,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
int acpi_lapic; int acpi_lapic;
int acpi_ioapic; int acpi_ioapic;
int acpi_strict; int acpi_strict;
int acpi_disable_cmcff;
u8 acpi_sci_flags __initdata; u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata; int acpi_sci_override_gsi __initdata;
@ -1626,6 +1627,10 @@ static int __init parse_acpi(char *arg)
/* "acpi=copy_dsdt" copys DSDT */ /* "acpi=copy_dsdt" copys DSDT */
else if (strcmp(arg, "copy_dsdt") == 0) { else if (strcmp(arg, "copy_dsdt") == 0) {
acpi_gbl_copy_dsdt_locally = 1; acpi_gbl_copy_dsdt_locally = 1;
}
/* "acpi=nocmcff" disables FF mode for corrected errors */
else if (strcmp(arg, "nocmcff") == 0) {
acpi_disable_cmcff = 1;
} else { } else {
/* Core will printk when we return error. */ /* Core will printk when we return error. */
return -EINVAL; return -EINVAL;

View File

@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void); struct dentry *mce_get_debugfs_dir(void);
extern struct mce_bank *mce_banks; extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL #ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval); unsigned long mce_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void); void mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu); void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank);
#else #else
# define mce_intel_adjust_timer mce_adjust_timer_default # define mce_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { } static inline void mce_intel_cmci_poll(void) { }
static inline void mce_intel_hcpu_update(unsigned long cpu) { } static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { }
#endif #endif
void mce_timer_kick(unsigned long interval); void mce_timer_kick(unsigned long interval);

View File

@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
}; };
/*
* MCA banks controlled through firmware first for corrected errors.
* This is a global list of banks for which we won't enable CMCI and we
* won't poll. Firmware controls these banks and is responsible for
* reporting corrected errors through GHES. Uncorrected/recoverable
* errors are still notified through a machine check.
*/
mce_banks_t mce_banks_ce_disabled;
static DEFINE_PER_CPU(struct work_struct, mce_work); static DEFINE_PER_CPU(struct work_struct, mce_work);
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = {
&mce_chrdev_ops, &mce_chrdev_ops,
}; };
static void __mce_disable_bank(void *arg)
{
int bank = *((int *)arg);
__clear_bit(bank, __get_cpu_var(mce_poll_banks));
cmci_disable_bank(bank);
}
void mce_disable_bank(int bank)
{
if (bank >= mca_cfg.banks) {
pr_warn(FW_BUG
"Ignoring request to disable invalid MCA bank %d.\n",
bank);
return;
}
set_bit(bank, mce_banks_ce_disabled);
on_each_cpu(__mce_disable_bank, &bank, 1);
}
/* /*
* mce=off Disables machine check * mce=off Disables machine check
* mce=no_cmci Disables CMCI * mce=no_cmci Disables CMCI

View File

@ -203,6 +203,10 @@ static void cmci_discover(int banks)
if (test_bit(i, owned)) if (test_bit(i, owned))
continue; continue;
/* Skip banks in firmware first mode */
if (test_bit(i, mce_banks_ce_disabled))
continue;
rdmsrl(MSR_IA32_MCx_CTL2(i), val); rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Already owned by someone else? */ /* Already owned by someone else? */
@ -271,6 +275,19 @@ void cmci_recheck(void)
local_irq_restore(flags); local_irq_restore(flags);
} }
/* Caller must hold the lock on cmci_discover_lock */
static void __cmci_disable_bank(int bank)
{
u64 val;
if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
return;
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
__clear_bit(bank, __get_cpu_var(mce_banks_owned));
}
/* /*
* Disable CMCI on this CPU for all banks it owns when it goes down. * Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery. * This allows other CPUs to claim the banks on rediscovery.
@ -280,20 +297,12 @@ void cmci_clear(void)
unsigned long flags; unsigned long flags;
int i; int i;
int banks; int banks;
u64 val;
if (!cmci_supported(&banks)) if (!cmci_supported(&banks))
return; return;
raw_spin_lock_irqsave(&cmci_discover_lock, flags); raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++)
if (!test_bit(i, __get_cpu_var(mce_banks_owned))) __cmci_disable_bank(i);
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
} }
@ -327,6 +336,19 @@ void cmci_reenable(void)
cmci_discover(banks); cmci_discover(banks);
} }
void cmci_disable_bank(int bank)
{
int banks;
unsigned long flags;
if (!cmci_supported(&banks))
return;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
__cmci_disable_bank(bank);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void intel_init_cmci(void) static void intel_init_cmci(void)
{ {
int banks; int banks;

View File

@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes)
ghes->flags &= ~GHES_TO_CLEAR; ghes->flags &= ~GHES_TO_CLEAR;
} }
static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
{
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
unsigned long pfn;
int sec_sev = ghes_severity(gdata->error_severity);
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata + 1);
if (sec_sev == GHES_SEV_CORRECTED &&
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
(mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) {
pfn = mem_err->physical_addr >> PAGE_SHIFT;
if (pfn_valid(pfn))
memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
else if (printk_ratelimit())
pr_warn(FW_WARN GHES_PFX
"Invalid address in generic error data: %#llx\n",
mem_err->physical_addr);
}
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
pfn = mem_err->physical_addr >> PAGE_SHIFT;
memory_failure_queue(pfn, 0, 0);
}
#endif
}
static void ghes_do_proc(struct ghes *ghes, static void ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus) const struct acpi_hest_generic_status *estatus)
{ {
@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes,
apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
mem_err); mem_err);
#endif #endif
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE ghes_handle_memory_failure(gdata, sev);
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
unsigned long pfn;
pfn = mem_err->physical_addr >> PAGE_SHIFT;
memory_failure_queue(pfn, 0, 0);
}
#endif
} }
#ifdef CONFIG_ACPI_APEI_PCIEAER #ifdef CONFIG_ACPI_APEI_PCIEAER
else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,

View File

@ -36,6 +36,7 @@
#include <linux/io.h> #include <linux/io.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <acpi/apei.h> #include <acpi/apei.h>
#include <asm/mce.h>
#include "apei-internal.h" #include "apei-internal.h"
@ -121,6 +122,40 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
} }
EXPORT_SYMBOL_GPL(apei_hest_parse); EXPORT_SYMBOL_GPL(apei_hest_parse);
/*
* Check if firmware advertises firmware first mode. We need FF bit to be set
* along with a set of MC banks which work in FF mode.
*/
static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
{
int i;
struct acpi_hest_ia_corrected *cmc;
struct acpi_hest_ia_error_bank *mc_bank;
if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
return 0;
cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
if (!cmc->enabled)
return 0;
/*
* We expect HEST to provide a list of MC banks that report errors
* in firmware first mode. Otherwise, return non-zero value to
* indicate that we are done parsing HEST.
*/
if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
return 1;
pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
mce_disable_bank(mc_bank->bank_number);
return 1;
}
struct ghes_arr { struct ghes_arr {
struct platform_device **ghes_devs; struct platform_device **ghes_devs;
unsigned int count; unsigned int count;
@ -227,6 +262,9 @@ void __init acpi_hest_init(void)
goto err; goto err;
} }
if (!acpi_disable_cmcff)
apei_hest_parse(hest_parse_cmc, NULL);
if (!ghes_disable) { if (!ghes_disable) {
rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
if (rc) if (rc)

View File

@ -1798,6 +1798,7 @@ enum mf_flags {
MF_COUNT_INCREASED = 1 << 0, MF_COUNT_INCREASED = 1 << 0,
MF_ACTION_REQUIRED = 1 << 1, MF_ACTION_REQUIRED = 1 << 1,
MF_MUST_KILL = 1 << 2, MF_MUST_KILL = 1 << 2,
MF_SOFT_OFFLINE = 1 << 3,
}; };
extern int memory_failure(unsigned long pfn, int trapno, int flags); extern int memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);

View File

@ -1286,7 +1286,10 @@ static void memory_failure_work_func(struct work_struct *work)
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten) if (!gotten)
break; break;
memory_failure(entry.pfn, entry.trapno, entry.flags); if (entry.flags & MF_SOFT_OFFLINE)
soft_offline_page(pfn_to_page(entry.pfn), entry.flags);
else
memory_failure(entry.pfn, entry.trapno, entry.flags);
} }
} }