diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 5719248d344d..5bdc5faac713 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -66,7 +66,7 @@ pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \ obj-$(CONFIG_PCI) += $(pci64-y) kexec-$(CONFIG_PPC64) := machine_kexec_64.o kexec-$(CONFIG_PPC32) := machine_kexec_32.o -obj-$(CONFIG_KEXEC) += machine_kexec.o $(kexec-y) +obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o $(kexec-y) ifeq ($(CONFIG_PPC_ISERIES),y) $(obj)/head_64.o: $(obj)/lparmap.s diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c new file mode 100644 index 000000000000..4681155121ef --- /dev/null +++ b/arch/powerpc/kernel/crash.c @@ -0,0 +1,264 @@ +/* + * Architecture specific (PPC64) functions for kexec based crash dumps. + * + * Copyright (C) 2005, IBM Corp. + * + * Created by: Haren Myneni + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + * + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef DEBUG +#include +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* This keeps a track of which one is crashing cpu. */ +int crashing_cpu = -1; + +static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note note; + + note.n_namesz = strlen(name) + 1; + note.n_descsz = data_len; + note.n_type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) +3)/4; + memcpy(buf, name, note.n_namesz); + buf += (note.n_namesz + 3)/4; + memcpy(buf, data, note.n_descsz); + buf += (note.n_descsz + 3)/4; + + return buf; +} + +static void final_note(u32 *buf) +{ + struct elf_note note; + + note.n_namesz = 0; + note.n_descsz = 0; + note.n_type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + +static void crash_save_this_cpu(struct pt_regs *regs, int cpu) +{ + struct elf_prstatus prstatus; + u32 *buf; + + if ((cpu < 0) || (cpu >= NR_CPUS)) + return; + + /* Using ELF notes here is opportunistic. + * I need a well defined structure format + * for the data I pass, and I need tags + * on the data to indicate what information I have + * squirrelled away. ELF notes happen to provide + * all of that that no need to invent something new. + */ + buf = &crash_notes[cpu][0]; + memset(&prstatus, 0, sizeof(prstatus)); + prstatus.pr_pid = current->pid; + elf_core_copy_regs(&prstatus.pr_reg, regs); + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + final_note(buf); +} + +/* FIXME Merge this with xmon_save_regs ?? */ +static inline void crash_get_current_regs(struct pt_regs *regs) +{ + unsigned long tmp1, tmp2; + + __asm__ __volatile__ ( + "std 0,0(%2)\n" + "std 1,8(%2)\n" + "std 2,16(%2)\n" + "std 3,24(%2)\n" + "std 4,32(%2)\n" + "std 5,40(%2)\n" + "std 6,48(%2)\n" + "std 7,56(%2)\n" + "std 8,64(%2)\n" + "std 9,72(%2)\n" + "std 10,80(%2)\n" + "std 11,88(%2)\n" + "std 12,96(%2)\n" + "std 13,104(%2)\n" + "std 14,112(%2)\n" + "std 15,120(%2)\n" + "std 16,128(%2)\n" + "std 17,136(%2)\n" + "std 18,144(%2)\n" + "std 19,152(%2)\n" + "std 20,160(%2)\n" + "std 21,168(%2)\n" + "std 22,176(%2)\n" + "std 23,184(%2)\n" + "std 24,192(%2)\n" + "std 25,200(%2)\n" + "std 26,208(%2)\n" + "std 27,216(%2)\n" + "std 28,224(%2)\n" + "std 29,232(%2)\n" + "std 30,240(%2)\n" + "std 31,248(%2)\n" + "mfmsr %0\n" + "std %0, 264(%2)\n" + "mfctr %0\n" + "std %0, 280(%2)\n" + "mflr %0\n" + "std %0, 288(%2)\n" + "bl 1f\n" + "1: mflr %1\n" + "std %1, 256(%2)\n" + "mtlr %0\n" + "mfxer %0\n" + "std %0, 296(%2)\n" + : "=&r" (tmp1), "=&r" (tmp2) + : "b" (regs)); +} + +/* We may have saved_regs from where the error came from + * or it is NULL if via a direct panic(). + */ +static void crash_save_self(struct pt_regs *saved_regs) +{ + struct pt_regs regs; + int cpu; + + cpu = smp_processor_id(); + if (saved_regs) + memcpy(®s, saved_regs, sizeof(regs)); + else + crash_get_current_regs(®s); + crash_save_this_cpu(®s, cpu); +} + +#ifdef CONFIG_SMP +static atomic_t waiting_for_crash_ipi; + +void crash_ipi_callback(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + if (cpu == crashing_cpu) + return; + + if (!cpu_online(cpu)) + return; + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 1); + + local_irq_disable(); + + crash_save_this_cpu(regs, cpu); + atomic_dec(&waiting_for_crash_ipi); + kexec_smp_wait(); + /* NOTREACHED */ +} + +static void crash_kexec_prepare_cpus(void) +{ + unsigned int msecs; + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + + crash_send_ipi(crash_ipi_callback); + smp_wmb(); + + /* + * FIXME: Until we will have the way to stop other CPUSs reliabally, + * the crash CPU will send an IPI and wait for other CPUs to + * respond. If not, proceed the kexec boot even though we failed to + * capture other CPU states. + */ + msecs = 1000000; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && (--msecs > 0)) { + barrier(); + mdelay(1); + } + + /* Would it be better to replace the trap vector here? */ + + /* + * FIXME: In case if we do not get all CPUs, one possibility: ask the + * user to do soft reset such that we get all. + * IPI handler is already set by the panic cpu initially. Therefore, + * all cpus could invoke this handler from die() and the panic CPU + * will call machine_kexec() directly from this handler to do + * kexec boot. + */ + if (atomic_read(&waiting_for_crash_ipi)) + printk(KERN_ALERT "done waiting: %d cpus not responding\n", + atomic_read(&waiting_for_crash_ipi)); + /* Leave the IPI callback set */ +} +#else +static void crash_kexec_prepare_cpus(void) +{ + /* + * move the secondarys to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + */ + smp_release_cpus(); +} + +#endif + +void default_machine_crash_shutdown(struct pt_regs *regs) +{ + /* + * This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means stopping other cpus in + * an SMP system. + * The kernel is broken so disable interrupts. + */ + local_irq_disable(); + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 0); + + /* + * Make a note of crashing cpu. Will be used in machine_kexec + * such that another IPI will not be sent. + */ + crashing_cpu = smp_processor_id(); + crash_kexec_prepare_cpus(); + crash_save_self(regs); +} diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 63919bcfc9fe..5337ab759780 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -11,6 +11,8 @@ #undef DEBUG +#include +#include #include #include #include @@ -51,3 +53,21 @@ void __init kdump_setup(void) DBG(" <- kdump_setup()\n"); } + +static int __init parse_elfcorehdr(char *p) +{ + if (p) + elfcorehdr_addr = memparse(p, &p); + + return 0; +} +__setup("elfcorehdr=", parse_elfcorehdr); + +static int __init parse_savemaxmem(char *p) +{ + if (p) + saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; + + return 0; +} +__setup("savemaxmem=", parse_savemaxmem); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 0b0fa4768995..d6431440c54f 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -265,11 +265,18 @@ extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, /* too late to fail here */ void default_machine_kexec(struct kimage *image) { - /* prepare control code if any */ - /* shutdown other cpus into our wait loop and quiesce interrupts */ - kexec_prepare_cpus(); + /* + * If the kexec boot is the normal one, need to shutdown other cpus + * into our wait loop and quiesce interrupts. + * Otherwise, in the case of crashed mode (crashing_cpu >= 0), + * stopping other CPUs and collecting their pt_regs is done before + * using debugger IPI. + */ + + if (crashing_cpu == -1) + kexec_prepare_cpus(); /* switch to a staticly allocated stack. Based on irq stack code. * XXX: the task struct will likely be invalid once we do the copy! diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a90df6bf0940..8e3ca674d359 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -75,6 +75,8 @@ void smp_call_function_interrupt(void); int smt_enabled_at_boot = 1; +static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; + #ifdef CONFIG_MPIC int __init smp_mpic_probe(void) { @@ -123,11 +125,16 @@ void smp_message_recv(int msg, struct pt_regs *regs) /* XXX Do we have to do this? */ set_need_resched(); break; -#ifdef CONFIG_DEBUGGER case PPC_MSG_DEBUGGER_BREAK: + if (crash_ipi_function_ptr) { + crash_ipi_function_ptr(regs); + break; + } +#ifdef CONFIG_DEBUGGER debugger_ipi(regs); break; -#endif +#endif /* CONFIG_DEBUGGER */ + /* FALLTHROUGH */ default: printk("SMP %d: smp_message_recv(): unknown msg %d\n", smp_processor_id(), msg); @@ -147,6 +154,17 @@ void smp_send_debugger_break(int cpu) } #endif +#ifdef CONFIG_KEXEC +void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) +{ + crash_ipi_function_ptr = crash_ipi_callback; + if (crash_ipi_callback) { + mb(); + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK); + } +} +#endif + static void stop_this_cpu(void *dummy) { local_irq_disable(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1511454c4690..76b579ca5230 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -95,7 +96,7 @@ static DEFINE_SPINLOCK(die_lock); int die(const char *str, struct pt_regs *regs, long err) { - static int die_counter; + static int die_counter, crash_dump_start = 0; int nl = 0; if (debugger(regs)) @@ -156,7 +157,21 @@ int die(const char *str, struct pt_regs *regs, long err) print_modules(); show_regs(regs); bust_spinlocks(0); + + if (!crash_dump_start && kexec_should_crash(current)) { + crash_dump_start = 1; + spin_unlock_irq(&die_lock); + crash_kexec(regs); + /* NOTREACHED */ + } spin_unlock_irq(&die_lock); + if (crash_dump_start) + /* + * Only for soft-reset: Other CPUs will be responded to an IPI + * sent by first kexec CPU. + */ + for(;;) + ; if (in_interrupt()) panic("Fatal exception in interrupt"); diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 25e0f68d0531..56273e56cbfb 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -217,5 +217,6 @@ struct machdep_calls __initdata cell_md = { #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, + .machine_crash_shutdown = default_machine_crash_shutdown, #endif }; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 8724e031e965..65fe4c166a68 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -282,5 +282,6 @@ struct machdep_calls __initdata maple_md = { #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, + .machine_crash_shutdown = default_machine_crash_shutdown, #endif }; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 6ee620fe5195..c0638e47c298 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -771,6 +771,7 @@ struct machdep_calls __initdata pmac_md = { #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, + .machine_crash_shutdown = default_machine_crash_shutdown, #endif #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index d678f228fc0f..2cb082871210 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -629,5 +629,6 @@ struct machdep_calls __initdata pSeries_md = { .kexec_cpu_down = pseries_kexec_cpu_down, .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, + .machine_crash_shutdown = default_machine_crash_shutdown, #endif }; diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 0377decc0719..0c0cfa32eb58 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -407,7 +407,7 @@ irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) smp_message_recv(PPC_MSG_MIGRATE_TASK, regs); } #endif -#ifdef CONFIG_DEBUGGER +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, &xics_ipi_message[cpu].value)) { mb(); diff --git a/include/asm-powerpc/kexec.h b/include/asm-powerpc/kexec.h index 934b4981651d..ae76ed5d973f 100644 --- a/include/asm-powerpc/kexec.h +++ b/include/asm-powerpc/kexec.h @@ -32,6 +32,8 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_KEXEC + #define MAX_NOTE_BYTES 1024 typedef u32 note_buf_t[MAX_NOTE_BYTES / sizeof(u32)]; @@ -41,11 +43,17 @@ extern note_buf_t crash_notes[]; extern void kexec_smp_wait(void); /* get and clear naca physid, wait for master to copy new code to 0 */ extern void __init kexec_setup(void); -#endif +extern int crashing_cpu; +extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); +#endif /* __powerpc64 __ */ struct kimage; +struct pt_regs; extern void default_machine_kexec(struct kimage *image); extern int default_machine_kexec_prepare(struct kimage *image); +extern void default_machine_crash_shutdown(struct pt_regs *regs); + +#endif /* !CONFIG_KEXEC */ #endif /* ! __ASSEMBLY__ */ #endif /* _ASM_POWERPC_KEXEC_H */