x86/mcheck: Reorganize the hotplug callbacks
Initially I wanted to remove mcheck_cpu_init() from identify_cpu() and let it become an independent early hotplug callback. The main problem here was that the init on the boot CPU may happen too late (device_initcall_sync(mcheck_init_device)) and nobody wanted to risk receiving and MCE event at boot time leading to a shutdown (if the MCE feature is not yet enabled). Here is attempt two: the timming stays as-is but the ordering of the functions is changed: - mcheck_cpu_init() (which is run from identify_cpu()) will setup the timer struct but won't fire the timer. This is moved to CPU_ONLINE since its cleanup part is in CPU_DOWN_PREPARE. So if it is okay to stop the timer early in the shutdown phase, it should be okay to start it late in the bring up phase. - CPU_DOWN_PREPARE disables the MCE feature flags for !INTEL CPUs in mce_disable_cpu(). If a failure occures it would be re-enabled on all vendor CPUs (including Intel where it was not disabled during shutdown). To keep this working I am moving it to CPU_ONLINE. smp_call_function_single() is dropped beause the notifier runs nowdays on the target CPU. - CPU_ONLINE is invoking mce_device_create() + mce_threshold_create_device() but its cleanup part is in CPU_DEAD (mce_threshold_remove_device() and mce_device_remove()). In order to keep this symmetrical I am moving the clean up from CPU_DEAD to CPU_DOWN_PREPARE. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Borislav Petkov <bp@alien8.de> Cc: Tony Luck <tony.luck@intel.com> Cc: rt@linutronix.de Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20161110174447.11848-6-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
4d7b02d58c
commit
39f152ffbf
|
@ -1745,6 +1745,14 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
|
|||
add_timer_on(t, cpu);
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_setup_timer(void)
|
||||
{
|
||||
struct timer_list *t = this_cpu_ptr(&mce_timer);
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
setup_pinned_timer(t, mce_timer_fn, cpu);
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_init_timer(void)
|
||||
{
|
||||
struct timer_list *t = this_cpu_ptr(&mce_timer);
|
||||
|
@ -1796,7 +1804,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
|
|||
__mcheck_cpu_init_generic();
|
||||
__mcheck_cpu_init_vendor(c);
|
||||
__mcheck_cpu_init_clear_banks();
|
||||
__mcheck_cpu_init_timer();
|
||||
__mcheck_cpu_setup_timer();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2470,28 +2478,25 @@ static void mce_device_remove(unsigned int cpu)
|
|||
}
|
||||
|
||||
/* Make sure there are no machine checks on offlined CPUs. */
|
||||
static void mce_disable_cpu(void *h)
|
||||
static void mce_disable_cpu(void)
|
||||
{
|
||||
unsigned long action = *(unsigned long *)h;
|
||||
|
||||
if (!mce_available(raw_cpu_ptr(&cpu_info)))
|
||||
return;
|
||||
|
||||
if (!(action & CPU_TASKS_FROZEN))
|
||||
if (!cpuhp_tasks_frozen)
|
||||
cmci_clear();
|
||||
|
||||
vendor_disable_error_reporting();
|
||||
}
|
||||
|
||||
static void mce_reenable_cpu(void *h)
|
||||
static void mce_reenable_cpu(void)
|
||||
{
|
||||
unsigned long action = *(unsigned long *)h;
|
||||
int i;
|
||||
|
||||
if (!mce_available(raw_cpu_ptr(&cpu_info)))
|
||||
return;
|
||||
|
||||
if (!(action & CPU_TASKS_FROZEN))
|
||||
if (!cpuhp_tasks_frozen)
|
||||
cmci_reenable();
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
@ -2510,6 +2515,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_DOWN_FAILED:
|
||||
|
||||
mce_device_create(cpu);
|
||||
|
||||
|
@ -2517,11 +2523,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|||
mce_device_remove(cpu);
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
|
||||
mce_reenable_cpu();
|
||||
mce_start_timer(cpu, t);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
mce_threshold_remove_device(cpu);
|
||||
mce_device_remove(cpu);
|
||||
mce_intel_hcpu_update(cpu);
|
||||
|
||||
/* intentionally ignoring frozen here */
|
||||
|
@ -2529,12 +2534,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|||
cmci_rediscover();
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
|
||||
mce_disable_cpu();
|
||||
del_timer_sync(t);
|
||||
break;
|
||||
case CPU_DOWN_FAILED:
|
||||
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
|
||||
mce_start_timer(cpu, t);
|
||||
|
||||
mce_threshold_remove_device(cpu);
|
||||
mce_device_remove(cpu);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue