From 0c7a6b2135c1bcb5139ca9ca87f292caafcb9410 Mon Sep 17 00:00:00 2001 From: Robin Getz Date: Wed, 8 Oct 2008 16:27:12 +0800 Subject: [PATCH] Blackfin arch: add supporting for double fault debug handling Signed-off-by: Robin Getz Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/Kconfig.debug | 38 +++++++ arch/blackfin/kernel/setup.c | 30 +++-- arch/blackfin/kernel/traps.c | 34 +++++- arch/blackfin/mach-common/entry.S | 179 +++++++++++++++++++++--------- arch/blackfin/mach-common/head.S | 40 ++++++- 5 files changed, 250 insertions(+), 71 deletions(-) diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug index c468624d55f0..0afa89818722 100644 --- a/arch/blackfin/Kconfig.debug +++ b/arch/blackfin/Kconfig.debug @@ -22,6 +22,44 @@ config DEBUG_HWERR hardware error interrupts and need to know where they are coming from. +config DEBUG_DOUBLEFAULT + bool "Debug Double Faults" + default n + help + If an exception is caused while executing code within the exception + handler, the NMI handler, the reset vector, or in emulator mode, + a double fault occurs. On the Blackfin, this is a unrecoverable + event. You have two options: + - RESET exactly when double fault occurs. The excepting + instruction address is stored in RETX, where the next kernel + boot will print it out. + - Print debug message. This is much more error prone, although + easier to handle. It is error prone since: + - The excepting instruction is not committed. + - All writebacks from the instruction are prevented. + - The generated exception is not taken. + - The EXCAUSE field is updated with an unrecoverable event + The only way to check this is to see if EXCAUSE contains the + unrecoverable event value at every exception return. By selecting + this option, you are skipping over the faulting instruction, and + hoping things stay together enough to print out a debug message. + + This does add a little kernel code, but is the only method to debug + double faults - if unsure say "Y" + +choice + prompt "Double Fault Failure Method" + default DEBUG_DOUBLEFAULT_PRINT + depends on DEBUG_DOUBLEFAULT + +config DEBUG_DOUBLEFAULT_PRINT + bool "Print" + +config DEBUG_DOUBLEFAULT_RESET + bool "Reset" + +endchoice + config DEBUG_ICACHE_CHECK bool "Check Instruction cache coherency" depends on DEBUG_KERNEL diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c index 7a82d10b4ebf..8e639dc886a3 100644 --- a/arch/blackfin/kernel/setup.c +++ b/arch/blackfin/kernel/setup.c @@ -52,7 +52,8 @@ EXPORT_SYMBOL(mtd_size); #endif char __initdata command_line[COMMAND_LINE_SIZE]; -unsigned int __initdata *__retx; +void __initdata *init_retx, *init_saved_retx, *init_saved_seqstat, + *init_saved_icplb_fault_addr, *init_saved_dcplb_fault_addr; /* boot memmap, for parsing "memmap=" */ #define BFIN_MEMMAP_MAX 128 /* number of entries in bfin_memmap */ @@ -782,16 +783,25 @@ void __init setup_arch(char **cmdline_p) _bfin_swrst = bfin_read_SWRST(); - /* If we double fault, reset the system - otherwise we hang forever */ - bfin_write_SWRST(DOUBLE_FAULT); +#ifdef CONFIG_DEBUG_DOUBLEFAULT_PRINT + bfin_write_SWRST(_bfin_swrst & ~DOUBLE_FAULT); +#endif +#ifdef CONFIG_DEBUG_DOUBLEFAULT_RESET + bfin_write_SWRST(_bfin_swrst | DOUBLE_FAULT); +#endif - if (_bfin_swrst & RESET_DOUBLE) - /* - * don't decode the address, since you don't know if this - * kernel's symbol map is the same as the crashing kernel - */ - printk(KERN_INFO "Recovering from Double Fault event at %pF\n", __retx); - else if (_bfin_swrst & RESET_WDOG) + if (_bfin_swrst & RESET_DOUBLE) { + printk(KERN_EMERG "Recovering from DOUBLE FAULT event\n"); +#ifdef CONFIG_DEBUG_DOUBLEFAULT + /* We assume the crashing kernel, and the current symbol table match */ + printk(KERN_EMERG " While handling exception (EXCAUSE = 0x%x) at %pF\n", + (int)init_saved_seqstat & SEQSTAT_EXCAUSE, init_saved_retx); + printk(KERN_NOTICE " DCPLB_FAULT_ADDR: %pF\n", init_saved_dcplb_fault_addr); + printk(KERN_NOTICE " ICPLB_FAULT_ADDR: %pF\n", init_saved_icplb_fault_addr); +#endif + printk(KERN_NOTICE " The instruction at %pF caused a double exception\n", + init_retx); + } else if (_bfin_swrst & RESET_WDOG) printk(KERN_INFO "Recovering from Watchdog event\n"); else if (_bfin_swrst & RESET_SOFTWARE) printk(KERN_NOTICE "Reset caused by Software reset\n"); diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index fd24e04fc19e..bd41fca315dd 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -68,7 +68,15 @@ void __init trap_init(void) CSYNC(); } -unsigned long saved_icplb_fault_addr, saved_dcplb_fault_addr; +/* + * Used to save the RETX, SEQSTAT, I/D CPLB FAULT ADDR + * values across the transition from exception to IRQ5. + * We put these in L1, so they are going to be in a valid + * location during exception context + */ +__attribute__((l1_data)) +unsigned long saved_retx, saved_seqstat, + saved_icplb_fault_addr, saved_dcplb_fault_addr; static void decode_address(char *buf, unsigned long address) { @@ -186,9 +194,27 @@ asmlinkage void double_fault_c(struct pt_regs *fp) console_verbose(); oops_in_progress = 1; printk(KERN_EMERG "\n" KERN_EMERG "Double Fault\n"); - dump_bfin_process(fp); - dump_bfin_mem(fp); - show_regs(fp); +#ifdef CONFIG_DEBUG_DOUBLEFAULT_PRINT + if (((long)fp->seqstat & SEQSTAT_EXCAUSE) == VEC_UNCOV) { + char buf[150]; + decode_address(buf, saved_retx); + printk(KERN_EMERG "While handling exception (EXCAUSE = 0x%x) at %s:\n", + (int)saved_seqstat & SEQSTAT_EXCAUSE, buf); + decode_address(buf, saved_dcplb_fault_addr); + printk(KERN_NOTICE " DCPLB_FAULT_ADDR: %s\n", buf); + decode_address(buf, saved_icplb_fault_addr); + printk(KERN_NOTICE " ICPLB_FAULT_ADDR: %s\n", buf); + + decode_address(buf, fp->retx); + printk(KERN_NOTICE "The instruction at %s caused a double exception\n", + buf); + } else +#endif + { + dump_bfin_process(fp); + dump_bfin_mem(fp); + show_regs(fp); + } panic("Double Fault - unrecoverable event\n"); } diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 847c172a99eb..90c7397036ed 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -129,6 +129,18 @@ ENTRY(_ex_icplb_miss) #else call __cplb_hdr; #endif + +#ifdef CONFIG_DEBUG_DOUBLEFAULT + /* While we were processing this, did we double fault? */ + r7 = SEQSTAT; /* reason code is in bit 5:0 */ + r6.l = lo(SEQSTAT_EXCAUSE); + r6.h = hi(SEQSTAT_EXCAUSE); + r7 = r7 & r6; + r6 = 0x25; + CC = R7 == R6; + if CC JUMP _double_fault; +#endif + DEBUG_HWTRACE_RESTORE(p5, r7) RESTORE_ALL_SYS SP = EX_SCRATCH_REG; @@ -136,11 +148,8 @@ ENTRY(_ex_icplb_miss) ENDPROC(_ex_icplb_miss) ENTRY(_ex_syscall) - (R7:6,P5:4) = [sp++]; - ASTAT = [sp++]; raise 15; /* invoked by TRAP #0, for sys call */ - sp = EX_SCRATCH_REG; - rtx + jump.s _bfin_return_from_exception; ENDPROC(_ex_syscall) ENTRY(_ex_soft_bp) @@ -250,6 +259,29 @@ ENTRY(_bfin_return_from_exception) R7=LC1; LC1=R7; #endif + +#ifdef CONFIG_DEBUG_DOUBLEFAULT + /* While we were processing the current exception, + * did we cause another, and double fault? + */ + r7 = SEQSTAT; /* reason code is in bit 5:0 */ + r6.l = lo(SEQSTAT_EXCAUSE); + r6.h = hi(SEQSTAT_EXCAUSE); + r7 = r7 & r6; + r6 = 0x25; + CC = R7 == R6; + if CC JUMP _double_fault; + + /* Did we cause a HW error? */ + p5.l = lo(ILAT); + p5.h = hi(ILAT); + r6 = [p5]; + r7 = 0x20; /* Did I just cause anther HW error? */ + r7 = r7 & r1; + CC = R7 == R6; + if CC JUMP _double_fault; +#endif + (R7:6,P5:4) = [sp++]; ASTAT = [sp++]; sp = EX_SCRATCH_REG; @@ -292,6 +324,14 @@ ENTRY(_ex_trap_c) [p4] = p5; csync; +#ifndef CONFIG_DEBUG_DOUBLEFAULT + /* + * Save these registers, as they are only valid in exception context + * (where we are now - as soon as we defer to IRQ5, they can change) + * DCPLB_STATUS and ICPLB_STATUS are also only valid in EVT3, + * but they are not very interesting, so don't save them + */ + p4.l = lo(DCPLB_FAULT_ADDR); p4.h = hi(DCPLB_FAULT_ADDR); r7 = [p4]; @@ -304,12 +344,11 @@ ENTRY(_ex_trap_c) p5.l = _saved_icplb_fault_addr; [p5] = r7; - p4.l = _excpt_saved_stuff; - p4.h = _excpt_saved_stuff; - r6 = retx; + p4.l = _saved_retx; + p4.h = _saved_retx; [p4] = r6; - +#endif r6 = SYSCFG; [p4 + 4] = r6; BITCLR(r6, 0); @@ -327,59 +366,56 @@ ENTRY(_ex_trap_c) r6 = 0x3f; sti r6; - (R7:6,P5:4) = [sp++]; - ASTAT = [sp++]; - SP = EX_SCRATCH_REG; raise 5; - rtx; + jump.s _bfin_return_from_exception; ENDPROC(_ex_trap_c) /* We just realized we got an exception, while we were processing a different * exception. This is a unrecoverable event, so crash */ ENTRY(_double_fault) - /* Turn caches & protection off, to ensure we don't get any more - * double exceptions - */ + /* Turn caches & protection off, to ensure we don't get any more + * double exceptions + */ - P4.L = LO(IMEM_CONTROL); - P4.H = HI(IMEM_CONTROL); + P4.L = LO(IMEM_CONTROL); + P4.H = HI(IMEM_CONTROL); - R5 = [P4]; /* Control Register*/ - BITCLR(R5,ENICPLB_P); - SSYNC; /* SSYNC required before writing to IMEM_CONTROL. */ - .align 8; - [P4] = R5; - SSYNC; + R5 = [P4]; /* Control Register*/ + BITCLR(R5,ENICPLB_P); + SSYNC; /* SSYNC required before writing to IMEM_CONTROL. */ + .align 8; + [P4] = R5; + SSYNC; - P4.L = LO(DMEM_CONTROL); - P4.H = HI(DMEM_CONTROL); - R5 = [P4]; - BITCLR(R5,ENDCPLB_P); - SSYNC; /* SSYNC required before writing to DMEM_CONTROL. */ - .align 8; - [P4] = R5; - SSYNC; + P4.L = LO(DMEM_CONTROL); + P4.H = HI(DMEM_CONTROL); + R5 = [P4]; + BITCLR(R5,ENDCPLB_P); + SSYNC; /* SSYNC required before writing to DMEM_CONTROL. */ + .align 8; + [P4] = R5; + SSYNC; - /* Fix up the stack */ - (R7:6,P5:4) = [sp++]; - ASTAT = [sp++]; - SP = EX_SCRATCH_REG; + /* Fix up the stack */ + (R7:6,P5:4) = [sp++]; + ASTAT = [sp++]; + SP = EX_SCRATCH_REG; - /* We should be out of the exception stack, and back down into - * kernel or user space stack - */ - SAVE_ALL_SYS + /* We should be out of the exception stack, and back down into + * kernel or user space stack + */ + SAVE_ALL_SYS /* The dumping functions expect the return address in the RETI * slot. */ r6 = retx; [sp + PT_PC] = r6; - r0 = sp; /* stack frame pt_regs pointer argument ==> r0 */ - SP += -12; - call _double_fault_c; - SP += 12; + r0 = sp; /* stack frame pt_regs pointer argument ==> r0 */ + SP += -12; + call _double_fault_c; + SP += 12; .L_double_fault_panic: JUMP .L_double_fault_panic @@ -388,8 +424,8 @@ ENDPROC(_double_fault) ENTRY(_exception_to_level5) SAVE_ALL_SYS - p4.l = _excpt_saved_stuff; - p4.h = _excpt_saved_stuff; + p4.l = _saved_retx; + p4.h = _saved_retx; r6 = [p4]; [sp + PT_PC] = r6; @@ -420,6 +456,17 @@ ENTRY(_exception_to_level5) call _trap_c; SP += 12; +#ifdef CONFIG_DEBUG_DOUBLEFAULT + /* Grab ILAT */ + p2.l = lo(ILAT); + p2.h = hi(ILAT); + r0 = [p2]; + r1 = 0x20; /* Did I just cause anther HW error? */ + r0 = r0 & r1; + CC = R0 == R1; + if CC JUMP _double_fault; +#endif + call _ret_from_exception; RESTORE_ALL_SYS rti; @@ -436,7 +483,39 @@ ENTRY(_trap) /* Exception: 4th entry into system event table(supervisor mode)*/ /* Try to deal with syscalls quickly. */ [--sp] = ASTAT; [--sp] = (R7:6,P5:4); + +#ifdef CONFIG_DEBUG_DOUBLEFAULT + /* + * Save these registers, as they are only valid in exception context + * (where we are now - as soon as we defer to IRQ5, they can change) + * DCPLB_STATUS and ICPLB_STATUS are also only valid in EVT3, + * but they are not very interesting, so don't save them + */ + + p4.l = lo(DCPLB_FAULT_ADDR); + p4.h = hi(DCPLB_FAULT_ADDR); + r7 = [p4]; + p5.h = _saved_dcplb_fault_addr; + p5.l = _saved_dcplb_fault_addr; + [p5] = r7; + + r7 = [p4 + (ICPLB_FAULT_ADDR - DCPLB_FAULT_ADDR)]; + p5.h = _saved_icplb_fault_addr; + p5.l = _saved_icplb_fault_addr; + [p5] = r7; + + p4.l = _saved_retx; + p4.h = _saved_retx; + r6 = retx; + [p4] = r6; + r7 = SEQSTAT; /* reason code is in bit 5:0 */ + p4.l = _saved_seqstat; + p4.h = _saved_seqstat; + [p4] = r7; +#else + r7 = SEQSTAT; /* reason code is in bit 5:0 */ +#endif r6.l = lo(SEQSTAT_EXCAUSE); r6.h = hi(SEQSTAT_EXCAUSE); r7 = r7 & r6; @@ -1432,15 +1511,7 @@ ENTRY(_sys_call_table) .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall .endr - - /* - * Used to save the real RETX, IMASK and SYSCFG when temporarily - * storing safe values across the transition from exception to IRQ5. - */ -_excpt_saved_stuff: - .long 0; - .long 0; - .long 0; +END(_sys_call_table) _exception_stack: .rept 1024 diff --git a/arch/blackfin/mach-common/head.S b/arch/blackfin/mach-common/head.S index 191b4e974c4b..7cb21cfcbf28 100644 --- a/arch/blackfin/mach-common/head.S +++ b/arch/blackfin/mach-common/head.S @@ -90,12 +90,46 @@ ENTRY(__start) [p0] = R0; SSYNC; - /* Save RETX, in case of doublefault */ - p0.l = ___retx; - p0.h = ___retx; + /* in case of double faults, save a few things */ + p0.l = _init_retx; + p0.h = _init_retx; R0 = RETX; [P0] = R0; +#ifdef CONFIG_DEBUG_DOUBLEFAULT + /* Only save these if we are storing them, + * This happens here, since L1 gets clobbered + * below + */ + p0.l = _saved_retx; + p0.h = _saved_retx; + p1.l = _init_saved_retx; + p1.h = _init_saved_retx; + r0 = [p0]; + [p1] = r0; + + p0.l = _saved_dcplb_fault_addr; + p0.h = _saved_dcplb_fault_addr; + p1.l = _init_saved_dcplb_fault_addr; + p1.h = _init_saved_dcplb_fault_addr; + r0 = [p0]; + [p1] = r0; + + p0.l = _saved_icplb_fault_addr; + p0.h = _saved_icplb_fault_addr; + p1.l = _init_saved_icplb_fault_addr; + p1.h = _init_saved_icplb_fault_addr; + r0 = [p0]; + [p1] = r0; + + p0.l = _saved_seqstat; + p0.h = _saved_seqstat; + p1.l = _init_saved_seqstat; + p1.h = _init_saved_seqstat; + r0 = [p0]; + [p1] = r0; +#endif + /* Initialize stack pointer */ sp.l = lo(INITIAL_STACK); sp.h = hi(INITIAL_STACK);