From 21da83fb6c3ff67e969b8770bbb33138c9ede88e Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Tue, 24 Jun 2014 13:09:10 +0200 Subject: [PATCH 1/5] tools lib traceevent: Fix a risk for doing free on uninitialized pointer Fix a risk of doing free on an uninitialized pointer. This was found using a static code analysis program called cppcheck. Acked-by: Namhyung Kim Signed-off-by: Rickard Strandqvist Link: http://lkml.kernel.org/r/1403608150-13037-1-git-send-email-rickard_strandqvist@spectrumdigital.se Signed-off-by: Jiri Olsa --- tools/lib/traceevent/event-parse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 93825a17dcce..cf3a44bf1ec3 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2395,7 +2395,7 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) { struct print_arg *field; enum event_type type; - char *token; + char *token = NULL; memset(arg, 0, sizeof(*arg)); arg->type = PRINT_FLAGS; @@ -2448,7 +2448,7 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) { struct print_arg *field; enum event_type type; - char *token; + char *token = NULL; memset(arg, 0, sizeof(*arg)); arg->type = PRINT_SYMBOL; @@ -2487,7 +2487,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) { struct print_arg *field; enum event_type type; - char *token; + char *token = NULL; memset(arg, 0, sizeof(*arg)); arg->type = PRINT_HEX; From a60335ba32981db5bc057b35782644e9e2436407 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 25 Jun 2014 08:49:03 -0700 Subject: [PATCH 2/5] perf tools powerpc: Adjust callchain based on DWARF debug info When saving the callchain on Power, the kernel conservatively saves excess entries in the callchain. A few of these entries are needed in some cases but not others. We should use the DWARF debug information to determine when the entries are needed. Eg: the value in the link register (LR) is needed only when it holds the return address of a function. At other times it must be ignored. If the unnecessary entries are not ignored, we end up with duplicate arcs in the call-graphs. Use the DWARF debug information to determine if any callchain entries should be ignored when building call-graphs. Callgraph before the patch: 14.67% 2234 sprintft libc-2.18.so [.] __random | --- __random | |--61.12%-- __random | | | |--97.15%-- rand | | do_my_sprintf | | main | | generic_start_main.isra.0 | | __libc_start_main | | 0x0 | | | --2.85%-- do_my_sprintf | main | generic_start_main.isra.0 | __libc_start_main | 0x0 | --38.88%-- rand | |--94.01%-- rand | do_my_sprintf | main | generic_start_main.isra.0 | __libc_start_main | 0x0 | --5.99%-- do_my_sprintf main generic_start_main.isra.0 __libc_start_main 0x0 Callgraph after the patch: 14.67% 2234 sprintft libc-2.18.so [.] __random | --- __random | |--95.93%-- rand | do_my_sprintf | main | generic_start_main.isra.0 | __libc_start_main | 0x0 | --4.07%-- do_my_sprintf main generic_start_main.isra.0 __libc_start_main 0x0 TODO: For split-debug info objects like glibc, we can only determine the call-frame-address only when both .eh_frame and .debug_info sections are available. We should be able to determin the CFA even without the .eh_frame section. Fix suggested by Anton Blanchard. Thanks to valuable input on DWARF debug information from Ulrich Weigand. Reported-by: Maynard Johnson Tested-by: Maynard Johnson Signed-off-by: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20140625154903.GA29607@us.ibm.com Signed-off-by: Jiri Olsa --- tools/perf/arch/powerpc/Makefile | 1 + .../arch/powerpc/util/skip-callchain-idx.c | 266 ++++++++++++++++++ tools/perf/config/Makefile | 4 + tools/perf/util/callchain.h | 13 + tools/perf/util/machine.c | 18 +- 5 files changed, 300 insertions(+), 2 deletions(-) create mode 100644 tools/perf/arch/powerpc/util/skip-callchain-idx.c diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 744e629797be..b92219b1900d 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c new file mode 100644 index 000000000000..a7c23a4b3778 --- /dev/null +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -0,0 +1,266 @@ +/* + * Use DWARF Debug information to skip unnecessary callchain entries. + * + * Copyright (C) 2014 Sukadev Bhattiprolu, IBM Corporation. + * Copyright (C) 2014 Ulrich Weigand, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +#include "util/thread.h" +#include "util/callchain.h" + +/* + * When saving the callchain on Power, the kernel conservatively saves + * excess entries in the callchain. A few of these entries are needed + * in some cases but not others. If the unnecessary entries are not + * ignored, we end up with duplicate arcs in the call-graphs. Use + * DWARF debug information to skip over any unnecessary callchain + * entries. + * + * See function header for arch_adjust_callchain() below for more details. + * + * The libdwfl code in this file is based on code from elfutils + * (libdwfl/argp-std.c, libdwfl/tests/addrcfi.c, etc). + */ +static char *debuginfo_path; + +static const Dwfl_Callbacks offline_callbacks = { + .debuginfo_path = &debuginfo_path, + .find_debuginfo = dwfl_standard_find_debuginfo, + .section_address = dwfl_offline_section_address, +}; + + +/* + * Use the DWARF expression for the Call-frame-address and determine + * if return address is in LR and if a new frame was allocated. + */ +static int check_return_reg(int ra_regno, Dwarf_Frame *frame) +{ + Dwarf_Op ops_mem[2]; + Dwarf_Op dummy; + Dwarf_Op *ops = &dummy; + size_t nops; + int result; + + result = dwarf_frame_register(frame, ra_regno, ops_mem, &ops, &nops); + if (result < 0) { + pr_debug("dwarf_frame_register() %s\n", dwarf_errmsg(-1)); + return -1; + } + + /* + * Check if return address is on the stack. + */ + if (nops != 0 || ops != NULL) + return 0; + + /* + * Return address is in LR. Check if a frame was allocated + * but not-yet used. + */ + result = dwarf_frame_cfa(frame, &ops, &nops); + if (result < 0) { + pr_debug("dwarf_frame_cfa() returns %d, %s\n", result, + dwarf_errmsg(-1)); + return -1; + } + + /* + * If call frame address is in r1, no new frame was allocated. + */ + if (nops == 1 && ops[0].atom == DW_OP_bregx && ops[0].number == 1 && + ops[0].number2 == 0) + return 1; + + /* + * A new frame was allocated but has not yet been used. + */ + return 2; +} + +/* + * Get the DWARF frame from the .eh_frame section. + */ +static Dwarf_Frame *get_eh_frame(Dwfl_Module *mod, Dwarf_Addr pc) +{ + int result; + Dwarf_Addr bias; + Dwarf_CFI *cfi; + Dwarf_Frame *frame; + + cfi = dwfl_module_eh_cfi(mod, &bias); + if (!cfi) { + pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1)); + return NULL; + } + + result = dwarf_cfi_addrframe(cfi, pc, &frame); + if (result) { + pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1)); + return NULL; + } + + return frame; +} + +/* + * Get the DWARF frame from the .debug_frame section. + */ +static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) +{ + Dwarf_CFI *cfi; + Dwarf_Addr bias; + Dwarf_Frame *frame; + int result; + + cfi = dwfl_module_dwarf_cfi(mod, &bias); + if (!cfi) { + pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1)); + return NULL; + } + + result = dwarf_cfi_addrframe(cfi, pc, &frame); + if (result) { + pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1)); + return NULL; + } + + return frame; +} + +/* + * Return: + * 0 if return address for the program counter @pc is on stack + * 1 if return address is in LR and no new stack frame was allocated + * 2 if return address is in LR and a new frame was allocated (but not + * yet used) + * -1 in case of errors + */ +static int check_return_addr(const char *exec_file, Dwarf_Addr pc) +{ + int rc = -1; + Dwfl *dwfl; + Dwfl_Module *mod; + Dwarf_Frame *frame; + int ra_regno; + Dwarf_Addr start = pc; + Dwarf_Addr end = pc; + bool signalp; + + dwfl = dwfl_begin(&offline_callbacks); + if (!dwfl) { + pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); + return -1; + } + + if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) { + pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1)); + goto out; + } + + mod = dwfl_addrmodule(dwfl, pc); + if (!mod) { + pr_debug("dwfl_addrmodule() failed, %s\n", dwarf_errmsg(-1)); + goto out; + } + + /* + * To work with split debug info files (eg: glibc), check both + * .eh_frame and .debug_frame sections of the ELF header. + */ + frame = get_eh_frame(mod, pc); + if (!frame) { + frame = get_dwarf_frame(mod, pc); + if (!frame) + goto out; + } + + ra_regno = dwarf_frame_info(frame, &start, &end, &signalp); + if (ra_regno < 0) { + pr_debug("Return address register unavailable: %s\n", + dwarf_errmsg(-1)); + goto out; + } + + rc = check_return_reg(ra_regno, frame); + +out: + dwfl_end(dwfl); + return rc; +} + +/* + * The callchain saved by the kernel always includes the link register (LR). + * + * 0: PERF_CONTEXT_USER + * 1: Program counter (Next instruction pointer) + * 2: LR value + * 3: Caller's caller + * 4: ... + * + * The value in LR is only needed when it holds a return address. If the + * return address is on the stack, we should ignore the LR value. + * + * Further, when the return address is in the LR, if a new frame was just + * allocated but the LR was not saved into it, then the LR contains the + * caller, slot 4: contains the caller's caller and the contents of slot 3: + * (chain->ips[3]) is undefined and must be ignored. + * + * Use DWARF debug information to determine if any entries need to be skipped. + * + * Return: + * index: of callchain entry that needs to be ignored (if any) + * -1 if no entry needs to be ignored or in case of errors + */ +int arch_skip_callchain_idx(struct machine *machine, struct thread *thread, + struct ip_callchain *chain) +{ + struct addr_location al; + struct dso *dso = NULL; + int rc; + u64 ip; + u64 skip_slot = -1; + + if (chain->nr < 3) + return skip_slot; + + ip = chain->ips[2]; + + thread__find_addr_location(thread, machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + + if (al.map) + dso = al.map->dso; + + if (!dso) { + pr_debug("%" PRIx64 " dso is NULL\n", ip); + return skip_slot; + } + + rc = check_return_addr(dso->long_name, ip); + + pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n", + dso->long_name, chain->nr, ip, rc); + + if (rc == 0) { + /* + * Return address on stack. Ignore LR value in callchain + */ + skip_slot = 2; + } else if (rc == 2) { + /* + * New frame allocated but return address still in LR. + * Ignore the caller's caller entry in callchain. + */ + skip_slot = 3; + } + return skip_slot; +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f30ac5e5d271..346bdb617544 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -48,6 +48,10 @@ ifneq ($(ARCH),$(filter $(ARCH),x86 arm)) NO_LIBDW_DWARF_UNWIND := 1 endif +ifeq ($(ARCH),powerpc) + CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX +endif + ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 else diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 8f84423a75da..da43619d6173 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -176,4 +176,17 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, dest->first = src->curr; dest->nr -= src->pos; } + +#ifdef HAVE_SKIP_CALLCHAIN_IDX +extern int arch_skip_callchain_idx(struct machine *machine, + struct thread *thread, struct ip_callchain *chain); +#else +static inline int arch_skip_callchain_idx(struct machine *machine __maybe_unused, + struct thread *thread __maybe_unused, + struct ip_callchain *chain __maybe_unused) +{ + return -1; +} +#endif + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c73e1fc12e53..e9b943acaa5e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1281,7 +1281,9 @@ static int machine__resolve_callchain_sample(struct machine *machine, u8 cpumode = PERF_RECORD_MISC_USER; int chain_nr = min(max_stack, (int)chain->nr); int i; + int j; int err; + int skip_idx __maybe_unused; callchain_cursor_reset(&callchain_cursor); @@ -1290,14 +1292,26 @@ static int machine__resolve_callchain_sample(struct machine *machine, return 0; } + /* + * Based on DWARF debug information, some architectures skip + * a callchain entry saved by the kernel. + */ + skip_idx = arch_skip_callchain_idx(machine, thread, chain); + for (i = 0; i < chain_nr; i++) { u64 ip; struct addr_location al; if (callchain_param.order == ORDER_CALLEE) - ip = chain->ips[i]; + j = i; else - ip = chain->ips[chain->nr - i - 1]; + j = chain->nr - i - 1; + +#ifdef HAVE_SKIP_CALLCHAIN_IDX + if (j == skip_idx) + continue; +#endif + ip = chain->ips[j]; if (ip >= PERF_CONTEXT_MAX) { switch (ip) { From d180ac14a95d738c2d2622db82c2212a8f998200 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 20 Jun 2014 10:56:34 +0200 Subject: [PATCH 3/5] perf tools: Fix wrong condition for allocation failure Check real allocated pointer for NULL. Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-5rfzbalwjphmdzzil74eazyl@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 65a151e36067..3e80aa10cfd8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -184,7 +184,7 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { evsel->priv = zalloc(sizeof(struct perf_stat)); - if (evsel == NULL) + if (evsel->priv == NULL) return -ENOMEM; perf_evsel__reset_stat_priv(evsel); return 0; From 33058b948e545a911e388e69b8be7274da158fb6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 27 May 2014 18:14:33 +0200 Subject: [PATCH 4/5] perf script: Move the number processing into its own function I was going to change something here and the result was so much on the right side of the screen that I decided to move that piece into its own function. This patch should make no function change except the moving the code into its own function. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Namhyung Kim Link: http://lkml.kernel.org/n/1401207274-8170-1-git-send-email-bigeasy@linutronix.de Signed-off-by: Jiri Olsa --- .../scripting-engines/trace-event-python.c | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1c419321f707..99c253604231 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -231,6 +231,28 @@ static inline struct event_format *find_cache_event(struct perf_evsel *evsel) return event; } +static PyObject *get_field_numeric_entry(struct event_format *event, + struct format_field *field, void *data) +{ + PyObject *obj; + unsigned long long val; + + val = read_size(event, data + field->offset, field->size); + if (field->flags & FIELD_IS_SIGNED) { + if ((long long)val >= LONG_MIN && + (long long)val <= LONG_MAX) + obj = PyInt_FromLong(val); + else + obj = PyLong_FromLongLong(val); + } else { + if (val <= LONG_MAX) + obj = PyInt_FromLong(val); + else + obj = PyLong_FromUnsignedLongLong(val); + } + return obj; +} + static void python_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -239,7 +261,6 @@ static void python_process_tracepoint(struct perf_sample *sample, PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; static char handler_name[256]; struct format_field *field; - unsigned long long val; unsigned long s, ns; struct event_format *event; unsigned n = 0; @@ -303,20 +324,7 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = field->offset; obj = PyString_FromString((char *)data + offset); } else { /* FIELD_IS_NUMERIC */ - val = read_size(event, data + field->offset, - field->size); - if (field->flags & FIELD_IS_SIGNED) { - if ((long long)val >= LONG_MIN && - (long long)val <= LONG_MAX) - obj = PyInt_FromLong(val); - else - obj = PyLong_FromLongLong(val); - } else { - if (val <= LONG_MAX) - obj = PyInt_FromLong(val); - else - obj = PyLong_FromUnsignedLongLong(val); - } + obj = get_field_numeric_entry(event, field, data); } if (handler) PyTuple_SetItem(t, n++, obj); From 8ac631cd502d6b31fd29f6d019305305b479fa3e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 27 May 2014 18:14:34 +0200 Subject: [PATCH 5/5] perf script: Handle the num array type in python properly The raw_syscalls:sys_enter tracer for instance passes has one argument named 'arg' which is an array of 6 integers. Right the python scripts gets only 0 passed as an argument. The reason is that pevent_read_number() can not handle data types of 48 and returns always 0. This patch changes this by passing num array as list of nums which fit the description. As a result python will now see a list named arg which contains 6 (integer) items. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Namhyung Kim Link: http://lkml.kernel.org/n/1401207274-8170-2-git-send-email-bigeasy@linutronix.de Signed-off-by: Jiri Olsa --- .../scripting-engines/trace-event-python.c | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 99c253604231..e55b65a65558 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -234,22 +234,41 @@ static inline struct event_format *find_cache_event(struct perf_evsel *evsel) static PyObject *get_field_numeric_entry(struct event_format *event, struct format_field *field, void *data) { - PyObject *obj; + bool is_array = field->flags & FIELD_IS_ARRAY; + PyObject *obj, *list = NULL; unsigned long long val; + unsigned int item_size, n_items, i; - val = read_size(event, data + field->offset, field->size); - if (field->flags & FIELD_IS_SIGNED) { - if ((long long)val >= LONG_MIN && - (long long)val <= LONG_MAX) - obj = PyInt_FromLong(val); - else - obj = PyLong_FromLongLong(val); + if (is_array) { + list = PyList_New(field->arraylen); + item_size = field->size / field->arraylen; + n_items = field->arraylen; } else { - if (val <= LONG_MAX) - obj = PyInt_FromLong(val); - else - obj = PyLong_FromUnsignedLongLong(val); + item_size = field->size; + n_items = 1; } + + for (i = 0; i < n_items; i++) { + + val = read_size(event, data + field->offset + i * item_size, + item_size); + if (field->flags & FIELD_IS_SIGNED) { + if ((long long)val >= LONG_MIN && + (long long)val <= LONG_MAX) + obj = PyInt_FromLong(val); + else + obj = PyLong_FromLongLong(val); + } else { + if (val <= LONG_MAX) + obj = PyInt_FromLong(val); + else + obj = PyLong_FromUnsignedLongLong(val); + } + if (is_array) + PyList_SET_ITEM(list, i, obj); + } + if (is_array) + obj = list; return obj; }