tile: support delivering NMIs for multicore backtrace

A new hypervisor service was added some time ago (MDE 4.2.1 or
later, or MDE 4.3 or later) that allows cores to request NMIs
to be delivered to other cores.  Use this facility to deliver
a request that causes a backtrace to be generated on each core,
and hook it into the magic SysRq functionality.

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
This commit is contained in:
Chris Metcalf 2015-05-04 17:26:35 -04:00
parent b4287df829
commit e5701b74cc
8 changed files with 197 additions and 2 deletions

View File

@ -78,4 +78,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
void setup_irq_regs(void);
#ifdef __tilegx__
void arch_trigger_all_cpu_backtrace(bool self);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
#endif
#endif /* _ASM_TILE_IRQ_H */

View File

@ -52,6 +52,14 @@ void do_timer_interrupt(struct pt_regs *, int fault_num);
/* kernel/messaging.c */
void hv_message_intr(struct pt_regs *, int intnum);
#define TILE_NMI_DUMP_STACK 1 /* Dump stack for sysrq+'l' */
/* kernel/process.c */
void do_nmi_dump_stack(struct pt_regs *regs);
/* kernel/traps.c */
void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
/* kernel/irq.c */
void tile_dev_intr(struct pt_regs *, int intnum);

View File

@ -321,8 +321,11 @@
/** hv_console_set_ipi */
#define HV_DISPATCH_CONSOLE_SET_IPI 63
/** hv_send_nmi */
#define HV_DISPATCH_SEND_NMI 65
/** One more than the largest dispatch value */
#define _HV_DISPATCH_END 64
#define _HV_DISPATCH_END 66
#ifndef __ASSEMBLER__
@ -1253,6 +1256,11 @@ void hv_downcall_dispatch(void);
#define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL
/** Device interrupt downcall interrupt vector */
#define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS
/** NMI downcall interrupt vector */
#define INT_NMI_DWNCL 64
#define HV_NMI_FLAG_FORCE 0x1 /**< Force an NMI downcall regardless of
the ICS bit of the client. */
#ifndef __ASSEMBLER__
@ -1780,6 +1788,56 @@ int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
int hv_dev_poll_cancel(int devhdl);
/** NMI information */
typedef struct
{
/** Result: negative error, or HV_NMI_RESULT_xxx. */
int result;
/** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
HV_VirtAddr pc;
} HV_NMI_Info;
/** NMI issued successfully. */
#define HV_NMI_RESULT_OK 0
/** NMI not issued: remote tile running at client PL with ICS set. */
#define HV_NMI_RESULT_FAIL_ICS 1
/** NMI not issued: remote tile waiting in hypervisor. */
#define HV_NMI_RESULT_FAIL_HV 2
/** Force an NMI downcall regardless of the ICS bit of the client. */
#define HV_NMI_FLAG_FORCE 0x1
/** Send an NMI interrupt request to a particular tile.
*
* This will cause the NMI to be issued on the remote tile regardless
* of the state of the client interrupt mask. However, if the remote
* tile is in the hypervisor, it will not execute the NMI, and
* HV_NMI_RESULT_FAIL_HV will be returned. Similarly, if the remote
* tile is in a client interrupt critical section at the time of the
* NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
* be returned. In this second case, however, if HV_NMI_FLAG_FORCE
* is set in flags, then the remote tile will enter its NMI interrupt
* vector regardless. Forcing the NMI vector during an interrupt
* critical section will mean that the client can not safely continue
* execution after handling the interrupt.
*
* @param tile Tile to which the NMI request is sent.
* @param info NMI information which is defined by and interpreted by the
* supervisor, is passed to the specified tile, and is
* stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
* specified tile when entering the NMI handler routine.
* Typically, this parameter stores the NMI type, or an aligned
* VA plus some special bits, etc.
* @param flags Flags (HV_NMI_FLAG_xxx).
* @return Information about the requested NMI.
*/
HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
/** Scatter-gather list for preada/pwritea calls. */
typedef struct
#if CHIP_VA_WIDTH() <= 32

View File

@ -71,4 +71,5 @@ gensym hv_flush_all, 0x6e0, 32
gensym hv_get_ipi_pte, 0x700, 32
gensym hv_set_pte_super_shift, 0x720, 32
gensym hv_console_set_ipi, 0x7e0, 32
gensym hv_glue_internals, 0x800, 30720
gensym hv_send_nmi, 0x820, 32
gensym hv_glue_internals, 0x820, 30688

View File

@ -75,6 +75,7 @@
#define hv_get_ipi_pte _hv_get_ipi_pte
#define hv_set_pte_super_shift _hv_set_pte_super_shift
#define hv_console_set_ipi _hv_console_set_ipi
#define hv_send_nmi _hv_send_nmi
#include <hv/hypervisor.h>
#undef hv_init
#undef hv_install_context
@ -134,6 +135,7 @@
#undef hv_get_ipi_pte
#undef hv_set_pte_super_shift
#undef hv_console_set_ipi
#undef hv_send_nmi
/*
* Provide macros based on <linux/syscalls.h> to provide a wrapper
@ -264,3 +266,5 @@ HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
HV_VirtAddr, tlb_va, unsigned long, tlb_length,
unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
HV_Remote_ASID*, asids, int, asidcount)
HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
__hv64, flags)

View File

@ -515,6 +515,10 @@ intvec_\vecname:
.ifc \c_routine, handle_perf_interrupt
mfspr r2, AUX_PERF_COUNT_STS
.endif
.ifc \c_routine, do_nmi
mfspr r2, SPR_SYSTEM_SAVE_K_2 /* nmi type */
.else
.endif
.endif
.endif
.endif
@ -1571,3 +1575,5 @@ intrpt_start:
/* Synthetic interrupt delivered only by the simulator */
int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
/* Synthetic interrupt delivered by hv */
int_hand INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi

View File

@ -27,6 +27,7 @@
#include <linux/kernel.h>
#include <linux/tracehook.h>
#include <linux/signal.h>
#include <linux/delay.h>
#include <linux/context_tracking.h>
#include <asm/stack.h>
#include <asm/switch_to.h>
@ -574,3 +575,103 @@ void show_regs(struct pt_regs *regs)
dump_stack_regs(regs);
}
/* To ensure stack dump on tiles occurs one by one. */
static DEFINE_SPINLOCK(backtrace_lock);
/* To ensure no backtrace occurs before all of the stack dump are done. */
static atomic_t backtrace_cpus;
/* The cpu mask to avoid reentrance. */
static struct cpumask backtrace_mask;
void do_nmi_dump_stack(struct pt_regs *regs)
{
int is_idle = is_idle_task(current) && !in_interrupt();
int cpu;
nmi_enter();
cpu = smp_processor_id();
if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
goto done;
spin_lock(&backtrace_lock);
if (is_idle)
pr_info("CPU: %d idle\n", cpu);
else
show_regs(regs);
spin_unlock(&backtrace_lock);
atomic_dec(&backtrace_cpus);
done:
nmi_exit();
}
#ifdef __tilegx__
void arch_trigger_all_cpu_backtrace(bool self)
{
struct cpumask mask;
HV_Coord tile;
unsigned int timeout;
int cpu;
int ongoing;
HV_NMI_Info info[NR_CPUS];
ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
if (ongoing != 0) {
pr_err("Trying to do all-cpu backtrace.\n");
pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
ongoing);
if (self) {
pr_err("Reporting the stack on this cpu only.\n");
dump_stack();
}
return;
}
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
cpumask_copy(&backtrace_mask, &mask);
/* Backtrace for myself first. */
if (self)
dump_stack();
/* Tentatively dump stack on remote tiles via NMI. */
timeout = 100;
while (!cpumask_empty(&mask) && timeout) {
for_each_cpu(cpu, &mask) {
tile.x = cpu_x(cpu);
tile.y = cpu_y(cpu);
info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
if (info[cpu].result == HV_NMI_RESULT_OK)
cpumask_clear_cpu(cpu, &mask);
}
mdelay(10);
timeout--;
}
/* Warn about cpus stuck in ICS and decrement their counts here. */
if (!cpumask_empty(&mask)) {
for_each_cpu(cpu, &mask) {
switch (info[cpu].result) {
case HV_NMI_RESULT_FAIL_ICS:
pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
cpu, info[cpu].pc);
break;
case HV_NMI_RESULT_FAIL_HV:
pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
cpu);
break;
case HV_ENOSYS:
pr_warn("Hypervisor too old to allow remote stack dumps.\n");
goto skip_for_each;
default: /* should not happen */
pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
cpu, info[cpu].result, info[cpu].pc);
break;
}
}
skip_for_each:
atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
}
}
#endif /* __tilegx_ */

View File

@ -395,6 +395,18 @@ done:
exception_exit(prev_state);
}
void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
{
switch (reason) {
case TILE_NMI_DUMP_STACK:
do_nmi_dump_stack(regs);
break;
default:
panic("Unexpected do_nmi type %ld", reason);
return;
}
}
void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
{
_dump_stack(dummy, pc, lr, sp, r52);