From c49f63530bb689744932be88fecb3fd850300876 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 3 Jul 2014 17:20:50 +1000 Subject: [PATCH] powernv: Add OPAL tracepoints Knowing how long we spend in firmware calls is an important part of minimising OS jitter. This patch adds tracepoints to each OPAL call. If tracepoints are enabled we branch out to a common routine that calls an entry and exit tracepoint. This allows us to write tools that monitor the frequency and duration of OPAL calls, eg: name count total(ms) min(ms) max(ms) avg(ms) period(ms) OPAL_HANDLE_INTERRUPT 5 0.199 0.037 0.042 0.040 12547.545 OPAL_POLL_EVENTS 204 2.590 0.012 0.036 0.013 2264.899 OPAL_PCI_MSI_EOI 2830 3.066 0.001 0.005 0.001 81.166 We use jump labels if configured, which means we only add a single nop instruction to every OPAL call when the tracepoints are disabled. Signed-off-by: Anton Blanchard Acked-by: Paul E. McKenney Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/trace.h | 45 +++++++ arch/powerpc/platforms/powernv/Makefile | 1 + .../platforms/powernv/opal-tracepoints.c | 84 +++++++++++++ .../powerpc/platforms/powernv/opal-wrappers.S | 115 ++++++++++++++++-- 4 files changed, 236 insertions(+), 9 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/opal-tracepoints.c diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 5712f06905a9..c15da6073cb8 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -99,6 +99,51 @@ TRACE_EVENT_FN(hcall_exit, ); #endif +#ifdef CONFIG_PPC_POWERNV +extern void opal_tracepoint_regfunc(void); +extern void opal_tracepoint_unregfunc(void); + +TRACE_EVENT_FN(opal_entry, + + TP_PROTO(unsigned long opcode, unsigned long *args), + + TP_ARGS(opcode, args), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + ), + + TP_fast_assign( + __entry->opcode = opcode; + ), + + TP_printk("opcode=%lu", __entry->opcode), + + opal_tracepoint_regfunc, opal_tracepoint_unregfunc +); + +TRACE_EVENT_FN(opal_exit, + + TP_PROTO(unsigned long opcode, unsigned long retval), + + TP_ARGS(opcode, retval), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + __field(unsigned long, retval) + ), + + TP_fast_assign( + __entry->opcode = opcode; + __entry->retval = retval; + ), + + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), + + opal_tracepoint_regfunc, opal_tracepoint_unregfunc +); +#endif + #endif /* _TRACE_POWERPC_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 4ad227d04c1a..70b758aaaffa 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o +obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c new file mode 100644 index 000000000000..d8a000a9988b --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -0,0 +1,84 @@ +#include +#include +#include + +#ifdef CONFIG_JUMP_LABEL +struct static_key opal_tracepoint_key = STATIC_KEY_INIT; + +void opal_tracepoint_regfunc(void) +{ + static_key_slow_inc(&opal_tracepoint_key); +} + +void opal_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&opal_tracepoint_key); +} +#else +/* + * We optimise OPAL calls by placing opal_tracepoint_refcount + * directly in the TOC so we can check if the opal tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long opal_tracepoint_refcount; + +void opal_tracepoint_regfunc(void) +{ + opal_tracepoint_refcount++; +} + +void opal_tracepoint_unregfunc(void) +{ + opal_tracepoint_refcount--; +} +#endif + +/* + * Since the tracing code might execute OPAL calls we need to guard against + * recursion. + */ +static DEFINE_PER_CPU(unsigned int, opal_trace_depth); + +void __trace_opal_entry(unsigned long opcode, unsigned long *args) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = &__get_cpu_var(opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + preempt_disable(); + trace_opal_entry(opcode, args); + (*depth)--; + +out: + local_irq_restore(flags); +} + +void __trace_opal_exit(long opcode, unsigned long retval) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = &__get_cpu_var(opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + trace_opal_exit(opcode, retval); + preempt_enable(); + (*depth)--; + +out: + local_irq_restore(flags); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 4abbff22a61f..27c40bcf9d12 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -13,30 +13,69 @@ #include #include #include +#include + + .section ".text" + +#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_JUMP_LABEL +#define OPAL_BRANCH(LABEL) \ + ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) +#else + + .section ".toc","aw" + + .globl opal_tracepoint_refcount +opal_tracepoint_refcount: + .llong 0 + + .section ".text" + +/* + * We branch around this in early init by using an unconditional cpu + * feature. + */ +#define OPAL_BRANCH(LABEL) \ +BEGIN_FTR_SECTION; \ + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,opal_tracepoint_refcount@toc(r2); \ + std r12,32(r1); \ + cmpdi r12,0; \ + bne- LABEL; \ +1: + +#endif + +#else +#define OPAL_BRANCH(LABEL) +#endif /* TODO: * * - Trace irqs in/off (needs saving/restoring all args, argh...) * - Get r11 feed up by Dave so I can have better register usage */ + #define OPAL_CALL(name, token) \ _GLOBAL(name); \ mflr r0; \ - mfcr r12; \ std r0,16(r1); \ + li r0,token; \ + OPAL_BRANCH(opal_tracepoint_entry) \ + mfcr r12; \ stw r12,8(r1); \ std r1,PACAR1(r13); \ - li r0,0; \ + li r11,0; \ mfmsr r12; \ - ori r0,r0,MSR_EE; \ + ori r11,r11,MSR_EE; \ std r12,PACASAVEDMSR(r13); \ - andc r12,r12,r0; \ + andc r12,r12,r11; \ mtmsrd r12,1; \ - LOAD_REG_ADDR(r0,opal_return); \ - mtlr r0; \ - li r0,MSR_DR|MSR_IR|MSR_LE;\ - andc r12,r12,r0; \ - li r0,token; \ + LOAD_REG_ADDR(r11,opal_return); \ + mtlr r11; \ + li r11,MSR_DR|MSR_IR|MSR_LE;\ + andc r12,r12,r11; \ mtspr SPRN_HSRR1,r12; \ LOAD_REG_ADDR(r11,opal); \ ld r12,8(r11); \ @@ -61,6 +100,64 @@ opal_return: mtcr r4; rfid +#ifdef CONFIG_TRACEPOINTS +opal_tracepoint_entry: + stdu r1,-STACKFRAMESIZE(r1) + std r0,STK_REG(R23)(r1) + std r3,STK_REG(R24)(r1) + std r4,STK_REG(R25)(r1) + std r5,STK_REG(R26)(r1) + std r6,STK_REG(R27)(r1) + std r7,STK_REG(R28)(r1) + std r8,STK_REG(R29)(r1) + std r9,STK_REG(R30)(r1) + std r10,STK_REG(R31)(r1) + mr r3,r0 + addi r4,r1,STK_REG(R24) + bl __trace_opal_entry + ld r0,STK_REG(R23)(r1) + ld r3,STK_REG(R24)(r1) + ld r4,STK_REG(R25)(r1) + ld r5,STK_REG(R26)(r1) + ld r6,STK_REG(R27)(r1) + ld r7,STK_REG(R28)(r1) + ld r8,STK_REG(R29)(r1) + ld r9,STK_REG(R30)(r1) + ld r10,STK_REG(R31)(r1) + LOAD_REG_ADDR(r11,opal_tracepoint_return) + mfcr r12 + std r11,16(r1) + stw r12,8(r1) + std r1,PACAR1(r13) + li r11,0 + mfmsr r12 + ori r11,r11,MSR_EE + std r12,PACASAVEDMSR(r13) + andc r12,r12,r11 + mtmsrd r12,1 + LOAD_REG_ADDR(r11,opal_return) + mtlr r11 + li r11,MSR_DR|MSR_IR|MSR_LE + andc r12,r12,r11 + mtspr SPRN_HSRR1,r12 + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +opal_tracepoint_return: + std r3,STK_REG(R31)(r1) + mr r4,r3 + ld r0,STK_REG(R23)(r1) + bl __trace_opal_exit + ld r3,STK_REG(R31)(r1) + addi r1,r1,STACKFRAMESIZE + ld r0,16(r1) + mtlr r0 + blr +#endif + OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);