Merge branch 'x86/mce3' into x86/urgent
This commit is contained in:
commit
1d99100120
|
@ -102,15 +102,39 @@ struct mce_log {
|
|||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
extern int mce_disabled;
|
||||
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/atomic.h>
|
||||
|
||||
extern int mce_disabled;
|
||||
extern int mce_p5_enabled;
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
void mcheck_init(struct cpuinfo_x86 *c);
|
||||
#else
|
||||
static inline void mcheck_init(struct cpuinfo_x86 *c) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_OLD_MCE
|
||||
extern int nr_mce_banks;
|
||||
void amd_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_ANCIENT_MCE
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void winchip_mcheck_init(struct cpuinfo_x86 *c);
|
||||
static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
|
||||
#else
|
||||
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
|
||||
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
|
||||
static inline void enable_p5_mce(void) {}
|
||||
#endif
|
||||
|
||||
void mce_setup(struct mce *m);
|
||||
void mce_log(struct mce *m);
|
||||
DECLARE_PER_CPU(struct sys_device, mce_dev);
|
||||
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
|
||||
|
||||
/*
|
||||
* To support more than 128 would need to escape the predefined
|
||||
|
@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
|
|||
DECLARE_PER_CPU(unsigned, mce_exception_count);
|
||||
DECLARE_PER_CPU(unsigned, mce_poll_count);
|
||||
|
||||
void mce_log_therm_throt_event(__u64 status);
|
||||
|
||||
extern atomic_t mce_entry;
|
||||
|
||||
void do_machine_check(struct pt_regs *, long);
|
||||
|
||||
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
|
||||
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
|
||||
|
||||
|
@ -167,13 +187,32 @@ void mce_notify_process(void);
|
|||
DECLARE_PER_CPU(struct mce, injectm);
|
||||
extern struct file_operations mce_chrdev_ops;
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
void mcheck_init(struct cpuinfo_x86 *c);
|
||||
#else
|
||||
#define mcheck_init(c) do { } while (0)
|
||||
#endif
|
||||
/*
|
||||
* Exception handler
|
||||
*/
|
||||
|
||||
/* Call the installed machine check handler for this CPU setup. */
|
||||
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
|
||||
void do_machine_check(struct pt_regs *, long);
|
||||
|
||||
/*
|
||||
* Threshold handler
|
||||
*/
|
||||
|
||||
extern void (*mce_threshold_vector)(void);
|
||||
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
|
||||
|
||||
/*
|
||||
* Thermal handler
|
||||
*/
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c);
|
||||
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
void mce_log_therm_throt_event(__u64 status);
|
||||
#else
|
||||
static inline void mce_log_therm_throt_event(__u64 status) {}
|
||||
#endif
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _ASM_X86_MCE_H */
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
#ifndef _ASM_X86_THERM_THROT_H
|
||||
#define _ASM_X86_THERM_THROT_H
|
||||
|
||||
#include <asm/atomic.h>
|
||||
|
||||
extern atomic_t therm_throt_en;
|
||||
int therm_throt_process(int curr);
|
||||
|
||||
#endif /* _ASM_X86_THERM_THROT_H */
|
|
@ -1,11 +1,12 @@
|
|||
obj-y = mce.o therm_throt.o
|
||||
obj-y = mce.o
|
||||
|
||||
obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
|
||||
obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
|
||||
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
|
||||
obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
|
||||
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
|
||||
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
|
||||
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
|
||||
|
||||
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
|
||||
|
|
|
@ -10,10 +10,9 @@
|
|||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* Machine Check Handler For AMD Athlon/Duron: */
|
||||
static void k7_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
|
|
|
@ -44,7 +44,6 @@
|
|||
#include <asm/msr.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
#include "mce.h"
|
||||
|
||||
/* Handle unconfigured int18 (should never happen) */
|
||||
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
|||
void (*machine_check_vector)(struct pt_regs *, long error_code) =
|
||||
unexpected_machine_check;
|
||||
|
||||
int mce_disabled;
|
||||
int mce_disabled __read_mostly;
|
||||
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
|
||||
|
@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
|
|||
* 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
|
||||
* 3: never panic or SIGBUS, log all errors (for testing only)
|
||||
*/
|
||||
static int tolerant = 1;
|
||||
static int banks;
|
||||
static u64 *bank;
|
||||
static unsigned long notify_user;
|
||||
static int rip_msr;
|
||||
static int mce_bootlog = -1;
|
||||
static int monarch_timeout = -1;
|
||||
static int mce_panic_timeout;
|
||||
static int mce_dont_log_ce;
|
||||
int mce_cmci_disabled;
|
||||
int mce_ignore_ce;
|
||||
int mce_ser;
|
||||
static int tolerant __read_mostly = 1;
|
||||
static int banks __read_mostly;
|
||||
static u64 *bank __read_mostly;
|
||||
static int rip_msr __read_mostly;
|
||||
static int mce_bootlog __read_mostly = -1;
|
||||
static int monarch_timeout __read_mostly = -1;
|
||||
static int mce_panic_timeout __read_mostly;
|
||||
static int mce_dont_log_ce __read_mostly;
|
||||
int mce_cmci_disabled __read_mostly;
|
||||
int mce_ignore_ce __read_mostly;
|
||||
int mce_ser __read_mostly;
|
||||
|
||||
static char trigger[128];
|
||||
static char *trigger_argv[2] = { trigger, NULL };
|
||||
/* User mode helper program triggered by machine check event */
|
||||
static unsigned long mce_need_notify;
|
||||
static char mce_helper[128];
|
||||
static char *mce_helper_argv[2] = { mce_helper, NULL };
|
||||
|
||||
static unsigned long dont_init_banks;
|
||||
|
||||
|
@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
|
|||
wmb();
|
||||
|
||||
mce->finished = 1;
|
||||
set_bit(0, ¬ify_user);
|
||||
set_bit(0, &mce_need_notify);
|
||||
}
|
||||
|
||||
static void print_mce(struct mce *m)
|
||||
|
@ -691,18 +691,21 @@ static atomic_t global_nwo;
|
|||
* in the entry order.
|
||||
* TBD double check parallel CPU hotunplug
|
||||
*/
|
||||
static int mce_start(int no_way_out, int *order)
|
||||
static int mce_start(int *no_way_out)
|
||||
{
|
||||
int nwo;
|
||||
int order;
|
||||
int cpus = num_online_cpus();
|
||||
u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
|
||||
|
||||
if (!timeout) {
|
||||
*order = -1;
|
||||
return no_way_out;
|
||||
}
|
||||
if (!timeout)
|
||||
return -1;
|
||||
|
||||
atomic_add(no_way_out, &global_nwo);
|
||||
atomic_add(*no_way_out, &global_nwo);
|
||||
/*
|
||||
* global_nwo should be updated before mce_callin
|
||||
*/
|
||||
smp_wmb();
|
||||
order = atomic_add_return(1, &mce_callin);
|
||||
|
||||
/*
|
||||
* Wait for everyone.
|
||||
|
@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
|
|||
while (atomic_read(&mce_callin) != cpus) {
|
||||
if (mce_timed_out(&timeout)) {
|
||||
atomic_set(&global_nwo, 0);
|
||||
*order = -1;
|
||||
return no_way_out;
|
||||
return -1;
|
||||
}
|
||||
ndelay(SPINUNIT);
|
||||
}
|
||||
|
||||
/*
|
||||
* mce_callin should be read before global_nwo
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
if (order == 1) {
|
||||
/*
|
||||
* Monarch: Starts executing now, the others wait.
|
||||
*/
|
||||
atomic_set(&mce_executing, 1);
|
||||
} else {
|
||||
/*
|
||||
* Subject: Now start the scanning loop one by one in
|
||||
* the original callin order.
|
||||
* This way when there are any shared banks it will be
|
||||
* only seen by one CPU before cleared, avoiding duplicates.
|
||||
*/
|
||||
while (atomic_read(&mce_executing) < order) {
|
||||
if (mce_timed_out(&timeout)) {
|
||||
atomic_set(&global_nwo, 0);
|
||||
return -1;
|
||||
}
|
||||
ndelay(SPINUNIT);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Cache the global no_way_out state.
|
||||
*/
|
||||
nwo = atomic_read(&global_nwo);
|
||||
*no_way_out = atomic_read(&global_nwo);
|
||||
|
||||
/*
|
||||
* Monarch starts executing now, the others wait.
|
||||
*/
|
||||
if (*order == 1) {
|
||||
atomic_set(&mce_executing, 1);
|
||||
return nwo;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now start the scanning loop one by one
|
||||
* in the original callin order.
|
||||
* This way when there are any shared banks it will
|
||||
* be only seen by one CPU before cleared, avoiding duplicates.
|
||||
*/
|
||||
while (atomic_read(&mce_executing) < *order) {
|
||||
if (mce_timed_out(&timeout)) {
|
||||
atomic_set(&global_nwo, 0);
|
||||
*order = -1;
|
||||
return no_way_out;
|
||||
}
|
||||
ndelay(SPINUNIT);
|
||||
}
|
||||
return nwo;
|
||||
return order;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
* check handler.
|
||||
*/
|
||||
int order;
|
||||
|
||||
/*
|
||||
* If no_way_out gets set, there is no safe way to recover from this
|
||||
* MCE. If tolerant is cranked up, we'll try anyway.
|
||||
|
@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
if (!banks)
|
||||
goto out;
|
||||
|
||||
order = atomic_add_return(1, &mce_callin);
|
||||
mce_setup(&m);
|
||||
|
||||
m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||
|
@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
* This way we don't report duplicated events on shared banks
|
||||
* because the first one to see it will clear it.
|
||||
*/
|
||||
no_way_out = mce_start(no_way_out, &order);
|
||||
order = mce_start(&no_way_out);
|
||||
for (i = 0; i < banks; i++) {
|
||||
__clear_bit(i, toclear);
|
||||
if (!bank[i])
|
||||
|
@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data)
|
|||
|
||||
static void mce_do_trigger(struct work_struct *work)
|
||||
{
|
||||
call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
|
||||
call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
|
||||
|
@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
|
|||
|
||||
clear_thread_flag(TIF_MCE_NOTIFY);
|
||||
|
||||
if (test_and_clear_bit(0, ¬ify_user)) {
|
||||
if (test_and_clear_bit(0, &mce_need_notify)) {
|
||||
wake_up_interruptible(&mce_wait);
|
||||
|
||||
/*
|
||||
|
@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
|
|||
* work_pending is always cleared before the function is
|
||||
* executed.
|
||||
*/
|
||||
if (trigger[0] && !work_pending(&mce_trigger_work))
|
||||
if (mce_helper[0] && !work_pending(&mce_trigger_work))
|
||||
schedule_work(&mce_trigger_work);
|
||||
|
||||
if (__ratelimit(&ratelimit))
|
||||
|
@ -1282,7 +1286,6 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
|
|||
return;
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
if (mce_p5_enabled())
|
||||
intel_p5_mcheck_init(c);
|
||||
break;
|
||||
case X86_VENDOR_CENTAUR:
|
||||
|
@ -1609,7 +1612,8 @@ static int mce_resume(struct sys_device *dev)
|
|||
static void mce_cpu_restart(void *data)
|
||||
{
|
||||
del_timer_sync(&__get_cpu_var(mce_timer));
|
||||
if (mce_available(¤t_cpu_data))
|
||||
if (!mce_available(¤t_cpu_data))
|
||||
return;
|
||||
mce_init();
|
||||
mce_init_timer();
|
||||
}
|
||||
|
@ -1620,6 +1624,26 @@ static void mce_restart(void)
|
|||
on_each_cpu(mce_cpu_restart, NULL, 1);
|
||||
}
|
||||
|
||||
/* Toggle features for corrected errors */
|
||||
static void mce_disable_ce(void *all)
|
||||
{
|
||||
if (!mce_available(¤t_cpu_data))
|
||||
return;
|
||||
if (all)
|
||||
del_timer_sync(&__get_cpu_var(mce_timer));
|
||||
cmci_clear();
|
||||
}
|
||||
|
||||
static void mce_enable_ce(void *all)
|
||||
{
|
||||
if (!mce_available(¤t_cpu_data))
|
||||
return;
|
||||
cmci_reenable();
|
||||
cmci_recheck();
|
||||
if (all)
|
||||
mce_init_timer();
|
||||
}
|
||||
|
||||
static struct sysdev_class mce_sysclass = {
|
||||
.suspend = mce_suspend,
|
||||
.shutdown = mce_shutdown,
|
||||
|
@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
|
|||
static ssize_t
|
||||
show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
|
||||
{
|
||||
strcpy(buf, trigger);
|
||||
strcpy(buf, mce_helper);
|
||||
strcat(buf, "\n");
|
||||
return strlen(trigger) + 1;
|
||||
return strlen(mce_helper) + 1;
|
||||
}
|
||||
|
||||
static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
|
||||
|
@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
|
|||
char *p;
|
||||
int len;
|
||||
|
||||
strncpy(trigger, buf, sizeof(trigger));
|
||||
trigger[sizeof(trigger)-1] = 0;
|
||||
len = strlen(trigger);
|
||||
p = strchr(trigger, '\n');
|
||||
strncpy(mce_helper, buf, sizeof(mce_helper));
|
||||
mce_helper[sizeof(mce_helper)-1] = 0;
|
||||
len = strlen(mce_helper);
|
||||
p = strchr(mce_helper, '\n');
|
||||
|
||||
if (*p)
|
||||
*p = 0;
|
||||
|
@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
|
|||
return len;
|
||||
}
|
||||
|
||||
static ssize_t set_ignore_ce(struct sys_device *s,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
u64 new;
|
||||
|
||||
if (strict_strtoull(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (mce_ignore_ce ^ !!new) {
|
||||
if (new) {
|
||||
/* disable ce features */
|
||||
on_each_cpu(mce_disable_ce, (void *)1, 1);
|
||||
mce_ignore_ce = 1;
|
||||
} else {
|
||||
/* enable ce features */
|
||||
mce_ignore_ce = 0;
|
||||
on_each_cpu(mce_enable_ce, (void *)1, 1);
|
||||
}
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t set_cmci_disabled(struct sys_device *s,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
u64 new;
|
||||
|
||||
if (strict_strtoull(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (mce_cmci_disabled ^ !!new) {
|
||||
if (new) {
|
||||
/* disable cmci */
|
||||
on_each_cpu(mce_disable_ce, NULL, 1);
|
||||
mce_cmci_disabled = 1;
|
||||
} else {
|
||||
/* enable cmci */
|
||||
mce_cmci_disabled = 0;
|
||||
on_each_cpu(mce_enable_ce, NULL, 1);
|
||||
}
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t store_int_with_restart(struct sys_device *s,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
|
@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
|
|||
static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
|
||||
static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
|
||||
static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
|
||||
static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
|
||||
|
||||
static struct sysdev_ext_attribute attr_check_interval = {
|
||||
_SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
|
||||
|
@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
|
|||
&check_interval
|
||||
};
|
||||
|
||||
static struct sysdev_ext_attribute attr_ignore_ce = {
|
||||
_SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
|
||||
&mce_ignore_ce
|
||||
};
|
||||
|
||||
static struct sysdev_ext_attribute attr_cmci_disabled = {
|
||||
_SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
|
||||
&mce_cmci_disabled
|
||||
};
|
||||
|
||||
static struct sysdev_attribute *mce_attrs[] = {
|
||||
&attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
|
||||
&attr_tolerant.attr,
|
||||
&attr_check_interval.attr,
|
||||
&attr_trigger,
|
||||
&attr_monarch_timeout.attr,
|
||||
&attr_dont_log_ce.attr,
|
||||
&attr_ignore_ce.attr,
|
||||
&attr_cmci_disabled.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
|
|||
static __cpuinit int mce_create_device(unsigned int cpu)
|
||||
{
|
||||
int err;
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
if (!mce_available(&boot_cpu_data))
|
||||
return -EIO;
|
||||
|
@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
|
|||
if (err)
|
||||
goto error;
|
||||
}
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (j = 0; j < banks; j++) {
|
||||
err = sysdev_create_file(&per_cpu(mce_dev, cpu),
|
||||
&bank_attrs[i]);
|
||||
&bank_attrs[j]);
|
||||
if (err)
|
||||
goto error2;
|
||||
}
|
||||
|
@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
|
|||
|
||||
return 0;
|
||||
error2:
|
||||
while (--i >= 0)
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
|
||||
while (--j >= 0)
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
|
||||
error:
|
||||
while (--i >= 0)
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
|
||||
|
@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
|
|||
if (!mce_available(&boot_cpu_data))
|
||||
return -EIO;
|
||||
|
||||
alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
|
||||
zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
|
||||
|
||||
err = mce_init_banks();
|
||||
if (err)
|
||||
|
@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
|
|||
/* This has to be run for each processor */
|
||||
void mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mce_disabled == 1)
|
||||
if (mce_disabled)
|
||||
return;
|
||||
|
||||
switch (c->x86_vendor) {
|
||||
|
@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
|
|||
|
||||
static int __init mcheck_enable(char *str)
|
||||
{
|
||||
mce_disabled = -1;
|
||||
mce_p5_enabled = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("mce", mcheck_enable);
|
||||
|
||||
#endif /* CONFIG_X86_OLD_MCE */
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
#include <linux/init.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#ifdef CONFIG_X86_OLD_MCE
|
||||
void amd_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_ANCIENT_MCE
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void winchip_mcheck_init(struct cpuinfo_x86 *c);
|
||||
extern int mce_p5_enable;
|
||||
static inline int mce_p5_enabled(void) { return mce_p5_enable; }
|
||||
static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
|
||||
#else
|
||||
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
|
||||
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
|
||||
static inline int mce_p5_enabled(void) { return 0; }
|
||||
static inline void enable_p5_mce(void) { }
|
||||
#endif
|
||||
|
||||
/* Call the installed machine check handler for this CPU setup. */
|
||||
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
|
||||
|
||||
#ifdef CONFIG_X86_OLD_MCE
|
||||
|
||||
extern int nr_mce_banks;
|
||||
|
||||
void intel_set_thermal_handler(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline void intel_set_thermal_handler(void) { }
|
||||
|
||||
#endif
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c);
|
|
@ -1,80 +1,226 @@
|
|||
/*
|
||||
* Common code for Intel machine checks
|
||||
* Intel specific MCE features.
|
||||
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
|
||||
* Copyright (C) 2008, 2009 Intel Corporation
|
||||
* Author: Andi Kleen
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce.h"
|
||||
/*
|
||||
* Support for Intel Correct Machine Check Interrupts. This allows
|
||||
* the CPU to raise an interrupt when a corrected machine check happened.
|
||||
* Normally we pick those up using a regular polling timer.
|
||||
* Also supports reliable discovery of shared banks.
|
||||
*/
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
|
||||
|
||||
/*
|
||||
* cmci_discover_lock protects against parallel discovery attempts
|
||||
* which could race against each other.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(cmci_discover_lock);
|
||||
|
||||
#define CMCI_THRESHOLD 1
|
||||
|
||||
static int cmci_supported(int *banks)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
int tm2 = 0;
|
||||
u32 l, h;
|
||||
u64 cap;
|
||||
|
||||
if (mce_cmci_disabled || mce_ignore_ce)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Thermal monitoring depends on ACPI, clock modulation
|
||||
* and APIC as well
|
||||
* Vendor check is not strictly needed, but the initial
|
||||
* initialization is vendor keyed and this
|
||||
* makes sure none of the backdoors are entered otherwise.
|
||||
*/
|
||||
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC) ||
|
||||
!cpu_has(c, X86_FEATURE_APIC)) {
|
||||
pr_debug("Thermal monitoring disabled\n");
|
||||
return;
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return 0;
|
||||
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
|
||||
return 0;
|
||||
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
||||
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
|
||||
return !!(cap & MCG_CMCI_P);
|
||||
}
|
||||
|
||||
/*
|
||||
* First check if its enabled already, in which case there might
|
||||
* be some SMM goo which handles it, so we can't even put a handler
|
||||
* since it might be delivered via SMI already:
|
||||
* The interrupt handler. This is called on every event.
|
||||
* Just call the poller directly to log any events.
|
||||
* This could in theory increase the threshold under high load,
|
||||
* but doesn't for now.
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
h = apic_read(APIC_LVTTHMR);
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
static void intel_threshold_interrupt(void)
|
||||
{
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
static void print_update(char *type, int *hdr, int num)
|
||||
{
|
||||
if (*hdr == 0)
|
||||
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
|
||||
*hdr = 1;
|
||||
printk(KERN_CONT " %s:%d", type, num);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
|
||||
* on this CPU. Use the algorithm recommended in the SDM to discover shared
|
||||
* banks.
|
||||
*/
|
||||
static void cmci_discover(int banks, int boot)
|
||||
{
|
||||
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
|
||||
unsigned long flags;
|
||||
int hdr = 0;
|
||||
int i;
|
||||
|
||||
spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++) {
|
||||
u64 val;
|
||||
|
||||
if (test_bit(i, owned))
|
||||
continue;
|
||||
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
|
||||
/* Already owned by someone else? */
|
||||
if (val & CMCI_EN) {
|
||||
if (test_and_clear_bit(i, owned) || boot)
|
||||
print_update("SHD", &hdr, i);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
continue;
|
||||
}
|
||||
|
||||
val |= CMCI_EN | CMCI_THRESHOLD;
|
||||
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
|
||||
/* Did the enable bit stick? -- the bank supports CMCI */
|
||||
if (val & CMCI_EN) {
|
||||
if (!test_and_set_bit(i, owned) || boot)
|
||||
print_update("CMCI", &hdr, i);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
} else {
|
||||
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
if (hdr)
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Just in case we missed an event during initialization check
|
||||
* all the CMCI owned banks.
|
||||
*/
|
||||
void cmci_recheck(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int banks;
|
||||
|
||||
if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks))
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
|
||||
tm2 = 1;
|
||||
/*
|
||||
* Disable CMCI on this CPU for all banks it owns when it goes down.
|
||||
* This allows other CPUs to claim the banks on rediscovery.
|
||||
*/
|
||||
void cmci_clear(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
int banks;
|
||||
u64 val;
|
||||
|
||||
/* Check whether a vector already exists */
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal LVT vector (%#x) already installed\n",
|
||||
cpu, (h & APIC_VECTOR_MASK));
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
|
||||
continue;
|
||||
/* Disable CMCI */
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
|
||||
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
||||
}
|
||||
spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
/* We'll mask the thermal vector in the lapic till we're ready: */
|
||||
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
/*
|
||||
* After a CPU went down cycle through all the others and rediscover
|
||||
* Must run in process context.
|
||||
*/
|
||||
void cmci_rediscover(int dying)
|
||||
{
|
||||
int banks;
|
||||
int cpu;
|
||||
cpumask_var_t old;
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
if (!alloc_cpumask_var(&old, GFP_KERNEL))
|
||||
return;
|
||||
cpumask_copy(old, ¤t->cpus_allowed);
|
||||
|
||||
intel_set_thermal_handler();
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
||||
|
||||
/* Unmask the thermal vector: */
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
|
||||
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
|
||||
cpu, tm2 ? "TM2" : "TM1");
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == dying)
|
||||
continue;
|
||||
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
|
||||
continue;
|
||||
/* Recheck banks in case CPUs don't all have the same */
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks, 0);
|
||||
}
|
||||
|
||||
set_cpus_allowed_ptr(current, old);
|
||||
free_cpumask_var(old);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reenable CMCI on this CPU in case a CPU down failed.
|
||||
*/
|
||||
void cmci_reenable(void)
|
||||
{
|
||||
int banks;
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks, 0);
|
||||
}
|
||||
|
||||
static void intel_init_cmci(void)
|
||||
{
|
||||
int banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
|
||||
mce_threshold_vector = intel_threshold_interrupt;
|
||||
cmci_discover(banks, 1);
|
||||
/*
|
||||
* For CPU #0 this runs with still disabled APIC, but that's
|
||||
* ok because only the vector is set up. We still do another
|
||||
* check for the banks later for CPU #0 just to make sure
|
||||
* to not miss any events.
|
||||
*/
|
||||
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
|
||||
cmci_recheck();
|
||||
}
|
||||
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_init_thermal(c);
|
||||
intel_init_cmci();
|
||||
}
|
||||
|
|
|
@ -1,248 +0,0 @@
|
|||
/*
|
||||
* Intel specific MCE features.
|
||||
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
|
||||
* Copyright (C) 2008, 2009 Intel Corporation
|
||||
* Author: Andi Kleen
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/therm_throt.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
asmlinkage void smp_thermal_interrupt(void)
|
||||
{
|
||||
__u64 msr_val;
|
||||
|
||||
ack_APIC_irq();
|
||||
|
||||
exit_idle();
|
||||
irq_enter();
|
||||
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
|
||||
mce_log_therm_throt_event(msr_val);
|
||||
|
||||
inc_irq_stat(irq_thermal_count);
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
/*
|
||||
* Support for Intel Correct Machine Check Interrupts. This allows
|
||||
* the CPU to raise an interrupt when a corrected machine check happened.
|
||||
* Normally we pick those up using a regular polling timer.
|
||||
* Also supports reliable discovery of shared banks.
|
||||
*/
|
||||
|
||||
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
|
||||
|
||||
/*
|
||||
* cmci_discover_lock protects against parallel discovery attempts
|
||||
* which could race against each other.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(cmci_discover_lock);
|
||||
|
||||
#define CMCI_THRESHOLD 1
|
||||
|
||||
static int cmci_supported(int *banks)
|
||||
{
|
||||
u64 cap;
|
||||
|
||||
if (mce_cmci_disabled || mce_ignore_ce)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Vendor check is not strictly needed, but the initial
|
||||
* initialization is vendor keyed and this
|
||||
* makes sure none of the backdoors are entered otherwise.
|
||||
*/
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return 0;
|
||||
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
|
||||
return 0;
|
||||
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
||||
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
|
||||
return !!(cap & MCG_CMCI_P);
|
||||
}
|
||||
|
||||
/*
|
||||
* The interrupt handler. This is called on every event.
|
||||
* Just call the poller directly to log any events.
|
||||
* This could in theory increase the threshold under high load,
|
||||
* but doesn't for now.
|
||||
*/
|
||||
static void intel_threshold_interrupt(void)
|
||||
{
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
static void print_update(char *type, int *hdr, int num)
|
||||
{
|
||||
if (*hdr == 0)
|
||||
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
|
||||
*hdr = 1;
|
||||
printk(KERN_CONT " %s:%d", type, num);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
|
||||
* on this CPU. Use the algorithm recommended in the SDM to discover shared
|
||||
* banks.
|
||||
*/
|
||||
static void cmci_discover(int banks, int boot)
|
||||
{
|
||||
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
|
||||
unsigned long flags;
|
||||
int hdr = 0;
|
||||
int i;
|
||||
|
||||
spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++) {
|
||||
u64 val;
|
||||
|
||||
if (test_bit(i, owned))
|
||||
continue;
|
||||
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
|
||||
/* Already owned by someone else? */
|
||||
if (val & CMCI_EN) {
|
||||
if (test_and_clear_bit(i, owned) || boot)
|
||||
print_update("SHD", &hdr, i);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
continue;
|
||||
}
|
||||
|
||||
val |= CMCI_EN | CMCI_THRESHOLD;
|
||||
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
|
||||
/* Did the enable bit stick? -- the bank supports CMCI */
|
||||
if (val & CMCI_EN) {
|
||||
if (!test_and_set_bit(i, owned) || boot)
|
||||
print_update("CMCI", &hdr, i);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
} else {
|
||||
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
if (hdr)
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Just in case we missed an event during initialization check
|
||||
* all the CMCI owned banks.
|
||||
*/
|
||||
void cmci_recheck(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int banks;
|
||||
|
||||
if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks))
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable CMCI on this CPU for all banks it owns when it goes down.
|
||||
* This allows other CPUs to claim the banks on rediscovery.
|
||||
*/
|
||||
void cmci_clear(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
int banks;
|
||||
u64 val;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
|
||||
continue;
|
||||
/* Disable CMCI */
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
|
||||
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
||||
}
|
||||
spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* After a CPU went down cycle through all the others and rediscover
|
||||
* Must run in process context.
|
||||
*/
|
||||
void cmci_rediscover(int dying)
|
||||
{
|
||||
int banks;
|
||||
int cpu;
|
||||
cpumask_var_t old;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
if (!alloc_cpumask_var(&old, GFP_KERNEL))
|
||||
return;
|
||||
cpumask_copy(old, ¤t->cpus_allowed);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == dying)
|
||||
continue;
|
||||
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
|
||||
continue;
|
||||
/* Recheck banks in case CPUs don't all have the same */
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks, 0);
|
||||
}
|
||||
|
||||
set_cpus_allowed_ptr(current, old);
|
||||
free_cpumask_var(old);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reenable CMCI on this CPU in case a CPU down failed.
|
||||
*/
|
||||
void cmci_reenable(void)
|
||||
{
|
||||
int banks;
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks, 0);
|
||||
}
|
||||
|
||||
static void intel_init_cmci(void)
|
||||
{
|
||||
int banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
|
||||
mce_threshold_vector = intel_threshold_interrupt;
|
||||
cmci_discover(banks, 1);
|
||||
/*
|
||||
* For CPU #0 this runs with still disabled APIC, but that's
|
||||
* ok because only the vector is set up. We still do another
|
||||
* check for the banks later for CPU #0 just to make sure
|
||||
* to not miss any events.
|
||||
*/
|
||||
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
|
||||
cmci_recheck();
|
||||
}
|
||||
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_init_thermal(c);
|
||||
intel_init_cmci();
|
||||
}
|
|
@ -17,10 +17,9 @@
|
|||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
static int firstbank;
|
||||
|
||||
#define MCE_RATE (15*HZ) /* timer rate is 15s */
|
||||
|
|
|
@ -1,21 +1,15 @@
|
|||
/*
|
||||
* P4 specific Machine Check Exception Reporting
|
||||
*/
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* as supported by the P4/Xeon family */
|
||||
struct intel_mce_extended_msrs {
|
||||
u32 eax;
|
||||
|
@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
|
|||
|
||||
static int mce_num_extended_msrs;
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_MCE_P4THERMAL
|
||||
|
||||
static void unexpected_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
|
||||
smp_processor_id());
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
|
||||
/* P4/Xeon Thermal transition interrupt handler: */
|
||||
static void intel_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
__u64 msr_val;
|
||||
|
||||
ack_APIC_irq();
|
||||
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
|
||||
}
|
||||
|
||||
/* Thermal interrupt handler for this CPU setup: */
|
||||
static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
|
||||
unexpected_thermal_interrupt;
|
||||
|
||||
void smp_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
irq_enter();
|
||||
vendor_thermal_interrupt(regs);
|
||||
__get_cpu_var(irq_stat).irq_thermal_count++;
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
void intel_set_thermal_handler(void)
|
||||
{
|
||||
vendor_thermal_interrupt = intel_thermal_interrupt;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_X86_MCE_P4THERMAL */
|
||||
|
||||
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
|
||||
static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
||||
{
|
||||
|
|
|
@ -10,12 +10,11 @@
|
|||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* By default disabled */
|
||||
int mce_p5_enable;
|
||||
int mce_p5_enabled __read_mostly;
|
||||
|
||||
/* Machine check handler for Pentium class Intel CPUs: */
|
||||
static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
@ -43,16 +42,14 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
|
|||
{
|
||||
u32 l, h;
|
||||
|
||||
/* Default P5 to off as its often misconnected: */
|
||||
if (!mce_p5_enabled)
|
||||
return;
|
||||
|
||||
/* Check for MCE support: */
|
||||
if (!cpu_has(c, X86_FEATURE_MCE))
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_OLD_MCE
|
||||
/* Default P5 to off as its often misconnected: */
|
||||
if (mce_disabled != -1)
|
||||
return;
|
||||
#endif
|
||||
|
||||
machine_check_vector = pentium_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
|
|
@ -10,10 +10,9 @@
|
|||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* Machine Check Handler For PII/PIII */
|
||||
static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
|
|
|
@ -13,13 +13,23 @@
|
|||
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
|
||||
* Inspired by Ross Biro's and Al Borchers' counter code.
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/* How long to wait between reporting thermal events */
|
||||
#define CHECK_INTERVAL (300 * HZ)
|
||||
|
@ -27,7 +37,7 @@
|
|||
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
|
||||
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
|
||||
|
||||
atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
static atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define define_therm_throt_sysdev_one_ro(_name) \
|
||||
|
@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
|
|||
* 1 : Event should be logged further, and a message has been
|
||||
* printed to the syslog.
|
||||
*/
|
||||
int therm_throt_process(int curr)
|
||||
static int therm_throt_process(int curr)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
__u64 tmp_jiffs = get_jiffies_64();
|
||||
|
@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
device_initcall(thermal_throttle_init_device);
|
||||
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
/* Thermal transition interrupt handler */
|
||||
static void intel_thermal_interrupt(void)
|
||||
{
|
||||
__u64 msr_val;
|
||||
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
|
||||
mce_log_therm_throt_event(msr_val);
|
||||
}
|
||||
|
||||
static void unexpected_thermal_interrupt(void)
|
||||
{
|
||||
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
|
||||
smp_processor_id());
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
|
||||
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
|
||||
|
||||
asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
exit_idle();
|
||||
irq_enter();
|
||||
inc_irq_stat(irq_thermal_count);
|
||||
smp_thermal_vector();
|
||||
irq_exit();
|
||||
/* Ack only at the end to avoid potential reentry */
|
||||
ack_APIC_irq();
|
||||
}
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
int tm2 = 0;
|
||||
u32 l, h;
|
||||
|
||||
/* Thermal monitoring depends on ACPI and clock modulation*/
|
||||
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
|
||||
return;
|
||||
|
||||
/*
|
||||
* First check if its enabled already, in which case there might
|
||||
* be some SMM goo which handles it, so we can't even put a handler
|
||||
* since it might be delivered via SMI already:
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
h = apic_read(APIC_LVTTHMR);
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
|
||||
tm2 = 1;
|
||||
|
||||
/* Check whether a vector already exists */
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal LVT vector (%#x) already installed\n",
|
||||
cpu, (h & APIC_VECTOR_MASK));
|
||||
return;
|
||||
}
|
||||
|
||||
/* We'll mask the thermal vector in the lapic till we're ready: */
|
||||
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
|
||||
|
||||
smp_thermal_vector = intel_thermal_interrupt;
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
||||
|
||||
/* Unmask the thermal vector: */
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
|
||||
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
|
||||
cpu, tm2 ? "TM2" : "TM1");
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
}
|
||||
|
|
|
@ -9,10 +9,9 @@
|
|||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
|
|
|
@ -53,6 +53,7 @@
|
|||
#include <asm/traps.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include <asm/mach_traps.h>
|
||||
|
||||
|
@ -64,8 +65,6 @@
|
|||
#include <asm/setup.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
#include "cpu/mcheck/mce.h"
|
||||
|
||||
asmlinkage int system_call(void);
|
||||
|
||||
/* Do we ignore FPU interrupts ? */
|
||||
|
|
Loading…
Reference in New Issue