s390/nmi: use smp_emergency_stop instead of smp_send_stop
The smp_send_stop() function can be called from s390_handle_damage while DAT is off. This happens if a machine check indicates that kernel gprs or control registers can not be restored. The function smp_send_stop reenables DAT via __load_psw_mask. That should work for the case of lost kernel gprs and the system will do the expected stop of all CPUs. But if control registers are lost, in particular CR13 with the home space ASCE, interesting secondary crashes may occur. Make smp_emergency_stop callable from nmi.c and remove the cpumask argument. Replace the smp_send_stop call with smp_emergency_stop in the s390_handle_damage function. In addition add notrace and NOKPROBE_SYMBOL annotations for all functions required for the emergency shutdown. Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
parent
608796ffe1
commit
00a8f886db
|
@ -27,6 +27,7 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
|
|||
|
||||
extern void smp_call_online_cpu(void (*func)(void *), void *);
|
||||
extern void smp_call_ipl_cpu(void (*func)(void *), void *);
|
||||
extern void smp_emergency_stop(void);
|
||||
|
||||
extern int smp_find_processor_id(u16 address);
|
||||
extern int smp_store_status(int cpu);
|
||||
|
@ -52,6 +53,10 @@ static inline void smp_call_online_cpu(void (*func)(void *), void *data)
|
|||
func(data);
|
||||
}
|
||||
|
||||
static inline void smp_emergency_stop(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int smp_find_processor_id(u16 address) { return 0; }
|
||||
static inline int smp_store_status(int cpu) { return 0; }
|
||||
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched/signal.h>
|
||||
|
@ -38,12 +39,13 @@ struct mcck_struct {
|
|||
|
||||
static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
|
||||
|
||||
static void s390_handle_damage(void)
|
||||
static notrace void s390_handle_damage(void)
|
||||
{
|
||||
smp_send_stop();
|
||||
smp_emergency_stop();
|
||||
disabled_wait((unsigned long) __builtin_return_address(0));
|
||||
while (1);
|
||||
}
|
||||
NOKPROBE_SYMBOL(s390_handle_damage);
|
||||
|
||||
/*
|
||||
* Main machine check handler function. Will be called with interrupts enabled
|
||||
|
@ -275,6 +277,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
|
|||
|
||||
return kill_task;
|
||||
}
|
||||
NOKPROBE_SYMBOL(s390_validate_registers);
|
||||
|
||||
/*
|
||||
* Backup the guest's machine check info to its description block
|
||||
|
@ -300,6 +303,7 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
|
|||
mcck_backup->failing_storage_address
|
||||
= S390_lowcore.failing_storage_address;
|
||||
}
|
||||
NOKPROBE_SYMBOL(s390_backup_mcck_info);
|
||||
|
||||
#define MAX_IPD_COUNT 29
|
||||
#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
|
||||
|
@ -443,6 +447,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
|
|||
clear_cpu_flag(CIF_MCCK_GUEST);
|
||||
nmi_exit();
|
||||
}
|
||||
NOKPROBE_SYMBOL(s390_do_machine_check);
|
||||
|
||||
static int __init machine_check_init(void)
|
||||
{
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/diag.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
@ -422,13 +423,17 @@ void smp_yield_cpu(int cpu)
|
|||
* Send cpus emergency shutdown signal. This gives the cpus the
|
||||
* opportunity to complete outstanding interrupts.
|
||||
*/
|
||||
static void smp_emergency_stop(cpumask_t *cpumask)
|
||||
void notrace smp_emergency_stop(void)
|
||||
{
|
||||
cpumask_t cpumask;
|
||||
u64 end;
|
||||
int cpu;
|
||||
|
||||
cpumask_copy(&cpumask, cpu_online_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), &cpumask);
|
||||
|
||||
end = get_tod_clock() + (1000000UL << 12);
|
||||
for_each_cpu(cpu, cpumask) {
|
||||
for_each_cpu(cpu, &cpumask) {
|
||||
struct pcpu *pcpu = pcpu_devices + cpu;
|
||||
set_bit(ec_stop_cpu, &pcpu->ec_mask);
|
||||
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
|
||||
|
@ -437,21 +442,21 @@ static void smp_emergency_stop(cpumask_t *cpumask)
|
|||
cpu_relax();
|
||||
}
|
||||
while (get_tod_clock() < end) {
|
||||
for_each_cpu(cpu, cpumask)
|
||||
for_each_cpu(cpu, &cpumask)
|
||||
if (pcpu_stopped(pcpu_devices + cpu))
|
||||
cpumask_clear_cpu(cpu, cpumask);
|
||||
if (cpumask_empty(cpumask))
|
||||
cpumask_clear_cpu(cpu, &cpumask);
|
||||
if (cpumask_empty(&cpumask))
|
||||
break;
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
NOKPROBE_SYMBOL(smp_emergency_stop);
|
||||
|
||||
/*
|
||||
* Stop all cpus but the current one.
|
||||
*/
|
||||
void smp_send_stop(void)
|
||||
{
|
||||
cpumask_t cpumask;
|
||||
int cpu;
|
||||
|
||||
/* Disable all interrupts/machine checks */
|
||||
|
@ -459,17 +464,16 @@ void smp_send_stop(void)
|
|||
trace_hardirqs_off();
|
||||
|
||||
debug_set_critical();
|
||||
cpumask_copy(&cpumask, cpu_online_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), &cpumask);
|
||||
|
||||
if (oops_in_progress)
|
||||
smp_emergency_stop(&cpumask);
|
||||
smp_emergency_stop();
|
||||
|
||||
/* stop all processors */
|
||||
for_each_cpu(cpu, &cpumask) {
|
||||
struct pcpu *pcpu = pcpu_devices + cpu;
|
||||
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
|
||||
while (!pcpu_stopped(pcpu))
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == smp_processor_id())
|
||||
continue;
|
||||
pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
|
||||
while (!pcpu_stopped(pcpu_devices + cpu))
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue