mce, edac: Use an atomic notifier for MCEs decoding
Add an atomic notifier which ensures proper locking when conveying MCE info to EDAC for decoding. The actual notifier call overrides a default, negative priority notifier. Note: make sure we register the default decoder only once since mcheck_init() runs on each CPU. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> LKML-Reference: <20091003065752.GA8935@liondog.tnic> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
d93a8f829f
commit
fb2531953f
|
@ -108,6 +108,8 @@ struct mce_log {
|
||||||
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
|
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
|
||||||
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
|
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
|
||||||
|
|
||||||
|
extern struct atomic_notifier_head x86_mce_decoder_chain;
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
|
|
||||||
#include <linux/percpu.h>
|
#include <linux/percpu.h>
|
||||||
|
@ -213,6 +215,5 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
|
||||||
void intel_init_thermal(struct cpuinfo_x86 *c);
|
void intel_init_thermal(struct cpuinfo_x86 *c);
|
||||||
|
|
||||||
void mce_log_therm_throt_event(__u64 status);
|
void mce_log_therm_throt_event(__u64 status);
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
#endif /* _ASM_X86_MCE_H */
|
#endif /* _ASM_X86_MCE_H */
|
||||||
|
|
|
@ -85,18 +85,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
|
||||||
static DEFINE_PER_CPU(struct mce, mces_seen);
|
static DEFINE_PER_CPU(struct mce, mces_seen);
|
||||||
static int cpu_missing;
|
static int cpu_missing;
|
||||||
|
|
||||||
static void default_decode_mce(struct mce *m)
|
/*
|
||||||
|
* CPU/chipset specific EDAC code can register a notifier call here to print
|
||||||
|
* MCE errors in a human-readable form.
|
||||||
|
*/
|
||||||
|
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||||
|
EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
|
||||||
|
|
||||||
|
static int default_decode_mce(struct notifier_block *nb, unsigned long val,
|
||||||
|
void *data)
|
||||||
{
|
{
|
||||||
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
|
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
|
||||||
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
|
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
|
||||||
|
|
||||||
|
return NOTIFY_STOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static struct notifier_block mce_dec_nb = {
|
||||||
* CPU/chipset specific EDAC code can register a callback here to print
|
.notifier_call = default_decode_mce,
|
||||||
* MCE errors in a human-readable form:
|
.priority = -1,
|
||||||
*/
|
};
|
||||||
void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
|
|
||||||
EXPORT_SYMBOL(x86_mce_decode_callback);
|
|
||||||
|
|
||||||
/* MCA banks polled by the period polling timer for corrected events */
|
/* MCA banks polled by the period polling timer for corrected events */
|
||||||
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
|
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
|
||||||
|
@ -204,9 +212,9 @@ static void print_mce(struct mce *m)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Print out human-readable details about the MCE error,
|
* Print out human-readable details about the MCE error,
|
||||||
* (if the CPU has an implementation for that):
|
* (if the CPU has an implementation for that)
|
||||||
*/
|
*/
|
||||||
x86_mce_decode_callback(m);
|
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_mce_head(void)
|
static void print_mce_head(void)
|
||||||
|
@ -1420,6 +1428,9 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
|
||||||
mce_cpu_features(c);
|
mce_cpu_features(c);
|
||||||
mce_init_timer();
|
mce_init_timer();
|
||||||
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
|
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
|
||||||
|
|
||||||
|
if (raw_smp_processor_id() == 0)
|
||||||
|
atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
|
|
||||||
static bool report_gart_errors;
|
static bool report_gart_errors;
|
||||||
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
|
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
|
||||||
static void (*orig_mce_callback)(struct mce *m);
|
|
||||||
|
|
||||||
void amd_report_gart_errors(bool v)
|
void amd_report_gart_errors(bool v)
|
||||||
{
|
{
|
||||||
|
@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec)
|
||||||
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
|
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amd_decode_mce(struct mce *m)
|
static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
|
||||||
|
void *data)
|
||||||
{
|
{
|
||||||
|
struct mce *m = (struct mce *)data;
|
||||||
struct err_regs regs;
|
struct err_regs regs;
|
||||||
int node, ecc;
|
int node, ecc;
|
||||||
|
|
||||||
|
@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m)
|
||||||
}
|
}
|
||||||
|
|
||||||
amd_decode_err_code(m->status & 0xffff);
|
amd_decode_err_code(m->status & 0xffff);
|
||||||
|
|
||||||
|
return NOTIFY_STOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct notifier_block amd_mce_dec_nb = {
|
||||||
|
.notifier_call = amd_decode_mce,
|
||||||
|
};
|
||||||
|
|
||||||
static int __init mce_amd_init(void)
|
static int __init mce_amd_init(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We can decode MCEs for Opteron and later CPUs:
|
* We can decode MCEs for Opteron and later CPUs:
|
||||||
*/
|
*/
|
||||||
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
|
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
|
||||||
(boot_cpu_data.x86 >= 0xf)) {
|
(boot_cpu_data.x86 >= 0xf))
|
||||||
/* safe the default decode mce callback */
|
atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
|
||||||
orig_mce_callback = x86_mce_decode_callback;
|
|
||||||
|
|
||||||
x86_mce_decode_callback = amd_decode_mce;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -442,7 +445,7 @@ early_initcall(mce_amd_init);
|
||||||
#ifdef MODULE
|
#ifdef MODULE
|
||||||
static void __exit mce_amd_exit(void)
|
static void __exit mce_amd_exit(void)
|
||||||
{
|
{
|
||||||
x86_mce_decode_callback = orig_mce_callback;
|
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
|
||||||
}
|
}
|
||||||
|
|
||||||
MODULE_DESCRIPTION("AMD MCE decoder");
|
MODULE_DESCRIPTION("AMD MCE decoder");
|
||||||
|
|
Loading…
Reference in New Issue