From 51f5fd2e4615dcdc25cd7f9d19b7b27eb9ecdac7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 31 Oct 2018 17:44:08 +0000 Subject: [PATCH 01/18] tools headers barrier: Fix arm64 tools build failure wrt smp_load_{acquire,release} Cheers for reporting this. I managed to reproduce the build failure with gcc version 6.3.0 20170516 (Debian 6.3.0-18+deb9u1). The code in question is the arm64 versions of smp_load_acquire() and smp_store_release(). Unlike other architectures, these are not built around READ_ONCE() and WRITE_ONCE() since we have instructions we can use instead of fences. Bringing our macros up-to-date with those (i.e. tweaking the union initialisation and using the special "uXX_alias_t" types) appears to fix the issue for me. Committer notes: Testing it in the systems previously failing: # time dm android-ndk:r12b-arm \ android-ndk:r15c-arm \ debian:experimental-x-arm64 \ ubuntu:14.04.4-x-linaro-arm64 \ ubuntu:16.04-x-arm \ ubuntu:16.04-x-arm64 \ ubuntu:18.04-x-arm \ ubuntu:18.04-x-arm64 1 android-ndk:r12b-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease) 2 android-ndk:r15c-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease) 3 debian:experimental-x-arm64 : Ok aarch64-linux-gnu-gcc (Debian 8.2.0-7) 8.2.0 4 ubuntu:14.04.4-x-linaro-arm64 : Ok aarch64-linux-gnu-gcc (Linaro GCC 5.5-2017.10) 5.5.0 5 ubuntu:16.04-x-arm : Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 6 ubuntu:16.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 7 ubuntu:18.04-x-arm : Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0 8 ubuntu:18.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0 Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Will Deacon Tested-by: Arnaldo Carvalho de Melo Tested-by: Daniel Borkmann Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20181031174408.GA27871@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/asm/barrier.h | 133 +++++++++++++------------ 1 file changed, 67 insertions(+), 66 deletions(-) diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h index 12835ea0e417..378c051fa177 100644 --- a/tools/arch/arm64/include/asm/barrier.h +++ b/tools/arch/arm64/include/asm/barrier.h @@ -14,74 +14,75 @@ #define wmb() asm volatile("dmb ishst" ::: "memory") #define rmb() asm volatile("dmb ishld" ::: "memory") -#define smp_store_release(p, v) \ -do { \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ - \ - switch (sizeof(*p)) { \ - case 1: \ - asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u8 *)__u.__c) \ - : "memory"); \ - break; \ - case 2: \ - asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u16 *)__u.__c) \ - : "memory"); \ - break; \ - case 4: \ - asm volatile ("stlr %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u32 *)__u.__c) \ - : "memory"); \ - break; \ - case 8: \ - asm volatile ("stlr %1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u64 *)__u.__c) \ - : "memory"); \ - break; \ - default: \ - /* Only to shut up gcc ... */ \ - mb(); \ - break; \ - } \ +#define smp_store_release(p, v) \ +do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (v) }; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u8_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u16_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u32_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u64_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ } while (0) -#define smp_load_acquire(p) \ -({ \ - union { typeof(*p) __val; char __c[1]; } __u; \ - \ - switch (sizeof(*p)) { \ - case 1: \ - asm volatile ("ldarb %w0, %1" \ - : "=r" (*(__u8 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 2: \ - asm volatile ("ldarh %w0, %1" \ - : "=r" (*(__u16 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 4: \ - asm volatile ("ldar %w0, %1" \ - : "=r" (*(__u32 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 8: \ - asm volatile ("ldar %0, %1" \ - : "=r" (*(__u64 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - default: \ - /* Only to shut up gcc ... */ \ - mb(); \ - break; \ - } \ - __u.__val; \ +#define smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (*(__u8_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (*(__u16_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (*(__u32_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (*(__u64_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ + __u.__val; \ }) #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */ From febf8a3712e4209b7e650b37b3b240a2b387794d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Nov 2018 10:34:34 -0300 Subject: [PATCH 02/18] perf examples bpf: Start augmenting raw_syscalls:sys_{start,exit} The previous approach of attaching to each syscall showed how it is possible to augment tracepoints and use that augmentation, pointer payloads, in the existing beautifiers in 'perf trace', but for a more general solution we now will try to augment the main raw_syscalls:sys_{enter,exit} syscalls, and then pass instructions in maps so that it knows which syscalls and which pointer contents, and how many bytes for each of the arguments should be copied. Start with just the bare minimum to collect what is provided by those two tracepoints via the __augmented_syscalls__ map + bpf-output perf event, which results in perf trace showing them without connecting enter+exit: # perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1 0.000 sleep/11563 raw_syscalls:sys_exit:NR 59 = 0 0.019 ( ): sleep/11563 brk() ... 0.021 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642325504 0.033 ( ): sleep/11563 access(filename:, mode: R) ... 0.037 sleep/11563 raw_syscalls:sys_exit:NR 21 = -2 0.041 ( ): sleep/11563 openat(dfd: CWD, filename: , flags: CLOEXEC) ... 0.044 sleep/11563 raw_syscalls:sys_exit:NR 257 = 3 0.045 ( ): sleep/11563 fstat(fd: 3, statbuf: 0x7ffdbf7119b0) ... 0.046 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0 0.047 ( ): sleep/11563 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) ... 0.049 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196285493248 0.050 ( ): sleep/11563 close(fd: 3) ... 0.051 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0 0.059 ( ): sleep/11563 openat(dfd: CWD, filename: , flags: CLOEXEC) ... 0.062 sleep/11563 raw_syscalls:sys_exit:NR 257 = 3 0.063 ( ): sleep/11563 read(fd: 3, buf: 0x7ffdbf711b78, count: 832) ... 0.065 sleep/11563 raw_syscalls:sys_exit:NR 0 = 832 0.066 ( ): sleep/11563 fstat(fd: 3, statbuf: 0x7ffdbf711a10) ... 0.067 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0 0.068 ( ): sleep/11563 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) ... 0.070 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196285485056 0.073 ( ): sleep/11563 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) ... 0.076 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196279463936 0.077 ( ): sleep/11563 mprotect(start: 0x7f81fd8a8000, len: 2093056) ... 0.083 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0 0.084 ( ): sleep/11563 mmap(addr: 0x7f81fdaa7000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) ... 0.088 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196283314176 0.091 ( ): sleep/11563 mmap(addr: 0x7f81fdaad000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) ... 0.093 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196283338752 0.097 ( ): sleep/11563 close(fd: 3) ... 0.098 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0 0.107 ( ): sleep/11563 arch_prctl(option: 4098, arg2: 140196285490432) ... 0.108 sleep/11563 raw_syscalls:sys_exit:NR 158 = 0 0.143 ( ): sleep/11563 mprotect(start: 0x7f81fdaa7000, len: 16384, prot: READ) ... 0.146 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0 0.157 ( ): sleep/11563 mprotect(start: 0x561d037e7000, len: 4096, prot: READ) ... 0.160 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0 0.163 ( ): sleep/11563 mprotect(start: 0x7f81fdcd5000, len: 4096, prot: READ) ... 0.165 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0 0.166 ( ): sleep/11563 munmap(addr: 0x7f81fdcbb000, len: 103334) ... 0.174 sleep/11563 raw_syscalls:sys_exit:NR 11 = 0 0.216 ( ): sleep/11563 brk() ... 0.217 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642325504 0.217 ( ): sleep/11563 brk(brk: 0x561d05453000) ... 0.219 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642460672 0.220 ( ): sleep/11563 brk() ... 0.221 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642460672 0.224 ( ): sleep/11563 open(filename: , flags: CLOEXEC) ... 0.228 sleep/11563 raw_syscalls:sys_exit:NR 2 = 3 0.229 ( ): sleep/11563 fstat(fd: 3, statbuf: 0x7f81fdaacaa0) ... 0.230 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0 0.231 ( ): sleep/11563 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) ... 0.234 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196166418432 0.237 ( ): sleep/11563 close(fd: 3) ... 0.238 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0 0.262 ( ): sleep/11563 nanosleep(rqtp: 0x7ffdbf7126f0) ... 1000.399 sleep/11563 raw_syscalls:sys_exit:NR 35 = 0 1000.440 ( ): sleep/11563 close(fd: 1) ... 1000.447 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0 1000.454 ( ): sleep/11563 close(fd: 2) ... 1000.468 ( ): sleep/11563 exit_group( ) # In the next csets we'll connect those events to the existing enter/exit raw_syscalls handlers in 'perf trace', just like we did with the syscalls:sys_{enter,exit}_* tracepoints. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-5nl8l4hx1tl9pqdx65nkp6pw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../examples/bpf/augmented_raw_syscalls.c | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tools/perf/examples/bpf/augmented_raw_syscalls.c diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c new file mode 100644 index 000000000000..cde91c34b101 --- /dev/null +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null + * + * This exactly matches what is marshalled into the raw_syscall:sys_enter + * payload expected by the 'perf trace' beautifiers. + * + * For now it just uses the existing tracepoint augmentation code in 'perf + * trace', in the next csets we'll hook up these with the sys_enter/sys_exit + * code that will combine entry/exit in a strace like way. + */ + +#include +#include + +/* bpf-output associated map */ +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct syscall_enter_args { + unsigned long long common_tp_fields; + long syscall_nr; + unsigned long args[6]; +}; + +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + struct { + struct syscall_enter_args args; + } augmented_args; + unsigned int len = sizeof(augmented_args); + + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); + return 0; +} + +SEC("raw_syscalls:sys_exit") +int sys_exit(struct syscall_exit_args *args) +{ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ +} + +license(GPL); From 3c5e3dabf3722a883227623a4adf61976c2224ff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Nov 2018 13:50:35 -0300 Subject: [PATCH 03/18] perf trace: When augmenting raw_syscalls plug raw_syscalls:sys_exit too With just this commit we get to support all syscalls via hooking raw_syscalls:sys_{enter,exit} to the trace__sys_{enter,exit} routines to combine, strace-like, those tracepoints. # trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1 ? ( ): sleep/31680 ... [continued]: execve()) = 0 0.043 ( 0.004 ms): sleep/31680 brk() = 0x55652a851000 0.070 ( 0.009 ms): sleep/31680 access(filename:, mode: R) = -1 ENOENT No such file or directory 0.087 ( 0.006 ms): sleep/31680 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3 0.096 ( 0.003 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7ffc5269e190) = 0 0.101 ( 0.005 ms): sleep/31680 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) = 0x7f709c239000 0.109 ( 0.002 ms): sleep/31680 close(fd: 3) = 0 0.126 ( 0.006 ms): sleep/31680 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3 0.135 ( 0.003 ms): sleep/31680 read(fd: 3, buf: 0x7ffc5269e358, count: 832) = 832 0.141 ( 0.002 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7ffc5269e1f0) = 0 0.146 ( 0.005 ms): sleep/31680 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7f709c237000 0.159 ( 0.007 ms): sleep/31680 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7f709bc79000 0.168 ( 0.009 ms): sleep/31680 mprotect(start: 0x7f709be26000, len: 2093056) = 0 0.179 ( 0.010 ms): sleep/31680 mmap(addr: 0x7f709c025000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 0x7f709c025000 0.196 ( 0.005 ms): sleep/31680 mmap(addr: 0x7f709c02b000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7f709c02b000 0.210 ( 0.002 ms): sleep/31680 close(fd: 3) = 0 0.230 ( 0.002 ms): sleep/31680 arch_prctl(option: 4098, arg2: 140121632638208) = 0 0.306 ( 0.009 ms): sleep/31680 mprotect(start: 0x7f709c025000, len: 16384, prot: READ) = 0 0.338 ( 0.005 ms): sleep/31680 mprotect(start: 0x556529607000, len: 4096, prot: READ) = 0 0.348 ( 0.005 ms): sleep/31680 mprotect(start: 0x7f709c253000, len: 4096, prot: READ) = 0 0.356 ( 0.019 ms): sleep/31680 munmap(addr: 0x7f709c239000, len: 103334) = 0 0.463 ( 0.002 ms): sleep/31680 brk() = 0x55652a851000 0.468 ( 0.004 ms): sleep/31680 brk(brk: 0x55652a872000) = 0x55652a872000 0.474 ( 0.002 ms): sleep/31680 brk() = 0x55652a872000 0.484 ( 0.008 ms): sleep/31680 open(filename: , flags: CLOEXEC) = 3 0.497 ( 0.002 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7f709c02aaa0) = 0 0.501 ( 0.006 ms): sleep/31680 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) = 0x7f70950aa000 0.514 ( 0.002 ms): sleep/31680 close(fd: 3) = 0 0.554 (1000.140 ms): sleep/31680 nanosleep(rqtp: 0x7ffc5269eed0) = 0 1000.734 ( 0.007 ms): sleep/31680 close(fd: 1) = 0 1000.748 ( 0.004 ms): sleep/31680 close(fd: 2) = 0 1000.769 ( ): sleep/31680 exit_group() # Now to allow selecting which syscalls should be traced, using a map. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-votqqmqhag8e1i9mgyzfez3o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index dc8a6c4986ce..f582ca575883 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3501,7 +3501,8 @@ int cmd_trace(int argc, const char **argv) evsel->handler = trace__sys_enter; evlist__for_each_entry(trace.evlist, evsel) { - if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { + if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_") || + strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0) { perf_evsel__init_augmented_syscall_tp(evsel); perf_evsel__init_augmented_syscall_tp_ret(evsel); evsel->handler = trace__sys_exit; From cd26ea6d50a207ee37e0364ecc2d196d6c9671e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 3 Nov 2018 08:19:56 -0300 Subject: [PATCH 04/18] perf trace: Fix setting of augmented payload when using eBPF + raw_syscalls For now with BPF raw_augmented we hook into raw_syscalls:sys_enter and there we get all 6 syscall args plus the tracepoint common fields (sizeof(long)) and the syscall_nr (another long). So we check if that is the case and if so don't look after the sc->args_size, but always after the full raw_syscalls:sys_enter payload, which is fixed. We'll revisit this later to pass s->args_size to the BPF augmenter (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it copies only what we need for each syscall, like what happens when we use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace traffic to just what is needed for each syscall. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-nlslrg8apxdsobt4pwl3n7ur@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f582ca575883..835619476370 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -108,6 +108,7 @@ struct trace { } stats; unsigned int max_stack; unsigned int min_stack; + bool raw_augmented_syscalls; bool not_ev_qualifier; bool live; bool full_time; @@ -1724,13 +1725,28 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, return printed; } -static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size) +static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented) { void *augmented_args = NULL; + /* + * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter + * and there we get all 6 syscall args plus the tracepoint common + * fields (sizeof(long)) and the syscall_nr (another long). So we check + * if that is the case and if so don't look after the sc->args_size, + * but always after the full raw_syscalls:sys_enter payload, which is + * fixed. + * + * We'll revisit this later to pass s->args_size to the BPF augmenter + * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it + * copies only what we need for each syscall, like what happens when we + * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace + * traffic to just what is needed for each syscall. + */ + int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size; - *augmented_args_size = sample->raw_size - sc->args_size; + *augmented_args_size = sample->raw_size - args_size; if (*augmented_args_size > 0) - augmented_args = sample->raw_data + sc->args_size; + augmented_args = sample->raw_data + args_size; return augmented_args; } @@ -1780,7 +1796,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, * here and avoid using augmented syscalls when the evsel is the raw_syscalls one. */ if (evsel != trace->syscalls.events.sys_enter) - augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size); + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); @@ -1833,7 +1849,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse goto out_put; args = perf_evsel__sc_tp_ptr(evsel, args, sample); - augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size); + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); fprintf(trace->output, "%s", msg); err = 0; @@ -3501,8 +3517,15 @@ int cmd_trace(int argc, const char **argv) evsel->handler = trace__sys_enter; evlist__for_each_entry(trace.evlist, evsel) { - if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_") || - strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0) { + bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0; + + if (raw_syscalls_sys_exit) { + trace.raw_augmented_syscalls = true; + goto init_augmented_syscall_tp; + } + + if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { +init_augmented_syscall_tp: perf_evsel__init_augmented_syscall_tp(evsel); perf_evsel__init_augmented_syscall_tp_ret(evsel); evsel->handler = trace__sys_exit; From 79ef68c7e1f665578e005b454480b6eca60edabe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 5 Nov 2018 12:23:40 -0300 Subject: [PATCH 05/18] perf augmented_syscalls: Start collecting pathnames in the BPF program This is the start of having the raw_syscalls:sys_enter BPF handler collecting pointer arguments, namely pathnames, and with two syscalls that have that pointer in different arguments, "open" as it as its first argument, "openat" as the second. With this in place the existing beautifiers in 'perf trace' works, those args are shown instead of just the pointer that comes with the syscalls tracepoints. This also serves to show and document pitfalls in the process of using just that place in the kernel (raw_syscalls:sys_enter) plus tables provided by userspace to collect syscall pointer arguments. One is the need to use a barrier, as suggested by Edward, to avoid clang optimizations that make the kernel BPF verifier to refuse loading our pointer contents collector. The end result should be a generic eBPF program that works in all architectures, with the differences amongst archs resolved by the userspace component, 'perf trace', that should get all its tables created automatically from the kernel components where they are defined, via string table constructors for things not expressed in BTF/DWARF (enums, structs, etc), and otherwise using those observability files (BTF). Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: David Ahern Cc: Edward Cree Cc: Jiri Olsa Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Wang Nan Cc: Yonghong Song Link: https://lkml.kernel.org/n/tip-37dz54pmotgpnwg9tb6zuk9j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../examples/bpf/augmented_raw_syscalls.c | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index cde91c34b101..90a19336310b 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -37,15 +37,87 @@ struct syscall_exit_args { long ret; }; +struct augmented_filename { + unsigned int size; + int reserved; + char value[256]; +}; + +#define SYS_OPEN 2 +#define SYS_OPENAT 257 + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { struct { struct syscall_enter_args args; + struct augmented_filename filename; } augmented_args; unsigned int len = sizeof(augmented_args); + const void *filename_arg = NULL; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + /* + * Yonghong and Edward Cree sayz: + * + * https://www.spinics.net/lists/netdev/msg531645.html + * + * >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 + * >> 10: (bf) r1 = r6 + * >> 11: (07) r1 += 16 + * >> 12: (05) goto pc+2 + * >> 15: (79) r3 = *(u64 *)(r1 +0) + * >> dereference of modified ctx ptr R1 off=16 disallowed + * > Aha, we at least got a different error message this time. + * > And indeed llvm has done that optimisation, rather than the more obvious + * > 11: r3 = *(u64 *)(r1 +16) + * > because it wants to have lots of reads share a single insn. You may be able + * > to defeat that optimisation by adding compiler barriers, idk. Maybe someone + * > with llvm knowledge can figure out how to stop it (ideally, llvm would know + * > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯ + * + * The optimization mostly likes below: + * + * br1: + * ... + * r1 += 16 + * goto merge + * br2: + * ... + * r1 += 20 + * goto merge + * merge: + * *(u64 *)(r1 + 0) + * + * The compiler tries to merge common loads. There is no easy way to + * stop this compiler optimization without turning off a lot of other + * optimizations. The easiest way is to add barriers: + * + * __asm__ __volatile__("": : :"memory") + * + * after the ctx memory access to prevent their down stream merging. + */ + switch (augmented_args.args.syscall_nr) { + case SYS_OPEN: filename_arg = (const void *)args->args[0]; + __asm__ __volatile__("": : :"memory"); + break; + case SYS_OPENAT: filename_arg = (const void *)args->args[1]; + break; + } + + if (filename_arg != NULL) { + augmented_args.filename.reserved = 0; + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, + sizeof(augmented_args.filename.value), + filename_arg); + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { + len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; + len &= sizeof(augmented_args.filename.value) - 1; + } + } else { + len = sizeof(augmented_args.args); + } + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); return 0; } From c3537fc251503af18085b8f84126d13743663970 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 1 Oct 2018 12:59:26 -0700 Subject: [PATCH 06/18] perf evlist: Move perf_evsel__reset_weak_group into evlist - Move the function from builtin-stat to evlist for reuse - Rename to evlist to match purpose better - Pass the evlist as first argument. - No functional changes Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20181001195927.14211-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 28 +--------------------------- tools/perf/util/evlist.c | 27 +++++++++++++++++++++++++++ tools/perf/util/evlist.h | 3 +++ 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d1028d7755bb..a635abfa77b6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -383,32 +383,6 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter) return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; } -static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) -{ - struct perf_evsel *c2, *leader; - bool is_open = true; - - leader = evsel->leader; - pr_debug("Weak group for %s/%d failed\n", - leader->name, leader->nr_members); - - /* - * for_each_group_member doesn't work here because it doesn't - * include the first entry. - */ - evlist__for_each_entry(evsel_list, c2) { - if (c2 == evsel) - is_open = false; - if (c2->leader == leader) { - if (is_open) - perf_evsel__close(c2); - c2->leader = c2; - c2->nr_members = 0; - } - } - return leader; -} - static bool is_target_alive(struct target *_target, struct thread_map *threads) { @@ -477,7 +451,7 @@ try_again: if ((errno == EINVAL || errno == EBADF) && counter->leader != counter && counter->weak_group) { - counter = perf_evsel__reset_weak_group(counter); + counter = perf_evlist__reset_weak_group(evsel_list, counter); goto try_again; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e88e6f9b1463..668d2a9ef0f4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1810,3 +1810,30 @@ void perf_evlist__force_leader(struct perf_evlist *evlist) leader->forced_leader = true; } } + +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, + struct perf_evsel *evsel) +{ + struct perf_evsel *c2, *leader; + bool is_open = true; + + leader = evsel->leader; + pr_debug("Weak group for %s/%d failed\n", + leader->name, leader->nr_members); + + /* + * for_each_group_member doesn't work here because it doesn't + * include the first entry. + */ + evlist__for_each_entry(evsel_list, c2) { + if (c2 == evsel) + is_open = false; + if (c2->leader == leader) { + if (is_open) + perf_evsel__close(c2); + c2->leader = c2; + c2->nr_members = 0; + } + } + return leader; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index dc66436add98..9919eed6d15b 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -312,4 +312,7 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); void perf_evlist__force_leader(struct perf_evlist *evlist); +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, + struct perf_evsel *evsel); + #endif /* __PERF_EVLIST_H */ From cf99ad1424c54fc84b84d3a3deb57a48c340c30a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 1 Oct 2018 12:59:27 -0700 Subject: [PATCH 07/18] perf record: Support weak groups Implement a weak group fallback for 'perf record', similar to the existing 'perf stat' support. This allows to use groups that might be longer than the available counters without failing. Before: $ perf record -e '{cycles,cache-misses,cache-references,cpu_clk_unhalted.thread,cycles,cycles,cycles}' -a sleep 1 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles). /bin/dmesg | grep -i perf may provide additional information. After: $ ./perf record -e '{cycles,cache-misses,cache-references,cpu_clk_unhalted.thread,cycles,cycles,cycles}:W' -a sleep 1 WARNING: No sample_id_all support, falling back to unordered processing [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 8.136 MB perf.data (134069 samples) ] Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20181001195927.14211-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 1 - tools/perf/builtin-record.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 236b9b97dfdb..667c14e56031 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -55,7 +55,6 @@ counted. The following modifiers exist: S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU W - group is weak and will fallback to non-group if not schedulable, - only supported in 'perf stat' for now. The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 10cf889c6d75..488779bc4c8d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -391,7 +391,12 @@ try_again: ui__warning("%s\n", msg); goto try_again; } - + if ((errno == EINVAL || errno == EBADF) && + pos->leader != pos && + pos->weak_group) { + pos = perf_evlist__reset_weak_group(evlist, pos); + goto try_again; + } rc = -errno; perf_evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); From ea1fa48c055f833eb25f0c33188feecb7002ada5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 23 Oct 2018 17:16:16 +0200 Subject: [PATCH 08/18] perf stat: Handle different PMU names with common prefix On s390 the CPU Measurement Facility for counters now supports 2 PMUs named cpum_cf (CPU Measurement Facility for counters) and cpum_cf_diag (CPU Measurement Facility for diagnostic counters) for one and the same CPU. Running command [root@s35lp76 perf]# ./perf stat -e tx_c_tend \ -- ~/mytests/cf-tx-events 1 Measuring transactions TX_C_TABORT_NO_SPECIAL: 0 expected:0 TX_C_TABORT_SPECIAL: 0 expected:0 TX_C_TEND: 1 expected:1 TX_NC_TABORT: 11 expected:11 TX_NC_TEND: 1 expected:1 Performance counter stats for '/root/mytests/cf-tx-events 1': 2 tx_c_tend 0.002120091 seconds time elapsed 0.000121000 seconds user 0.002127000 seconds sys [root@s35lp76 perf]# displays output which is unexpected (and wrong): 2 tx_c_tend The test program definitely triggers only one transaction, as shown in line 'TX_C_TEND: 1 expected:1'. This is caused by the following call sequence: pmu_lookup() scans and installs a PMU. +--> pmu_aliases() parses all aliases in directory ...//events/* which are file names. +--> pmu_aliases_parse() Read each file in directory and create an new alias entry. This is done with +--> perf_pmu__new_alias() and +--> __perf_pmu__new_alias() which also check for identical alias names. After pmu_aliases() returns, a complete list of event names for this pmu has been created. Now function pmu_add_cpu_aliases() is called to add the events listed in the json | files to the alias list of the cpu. +--> perf_pmu__find_map() Returns a pointer to the json events. Now function pmu_add_cpu_aliases() scans through all events listed in the JSON files for this CPU. Each json event pmu name is compared with the current PMU being built up and if they mismatch, the json event is added to the current PMUs alias list. To avoid duplicate entries the following comparison is done: if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; if (strncmp(pname, name, strlen(pname))) continue; } The culprit is the strncmp() function. Using current s390 PMU naming, the first PMU is 'cpum_cf' and a long list of events is added, among them 'tx_c_tend' When the second PMU named 'cpum_cf_diag' is added, only one event named 'CF_DIAG' is added by the pmu_aliases() function. Now function pmu_add_cpu_aliases() is invoked for PMU 'cpum_cf_diag'. Since the CPUID string is the same for both PMUs, json file events for PMU named 'cpum_cf' are added to the PMU 'cpm_cf_diag' This happens because the strncmp() actually compares: strncmp("cpum_cf", "cpum_cf_diag", 6); The first parameter is the pmu name taken from the event in the json file. The second parameter is the pmu name of the PMU currently being built. They are different, but the length of the compare only tests the common prefix and this returns 0(true) when it should return false. Now all events for PMU cpum_cf are added to the alias list for pmu cpum_cf_diag. Later on in function parse_events_add_pmu() the event 'tx_c_end' is searched in all available PMUs and found twice, adding it two times to the evsel_list global variable which is the root of all events. This results in a counter value of 2 instead of 1. Output with this patch: [root@s35lp76 perf]# ./perf stat -e tx_c_tend \ -- ~/mytests/cf-tx-events 1 Measuring transactions TX_C_TABORT_NO_SPECIAL: 0 expected:0 TX_C_TABORT_SPECIAL: 0 expected:0 TX_C_TEND: 1 expected:1 TX_NC_TABORT: 11 expected:11 TX_NC_TEND: 1 expected:1 Performance counter stats for '/root/mytests/cf-tx-events 1': 1 tx_c_tend 0.001815365 seconds time elapsed 0.000123000 seconds user 0.001756000 seconds sys [root@s35lp76 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Reviewed-by: Sebastien Boisvert Cc: Heiko Carstens Cc: Kan Liang Cc: Martin Schwidefsky Cc: stable@vger.kernel.org Fixes: 292c34c10249 ("perf pmu: Fix core PMU alias list for X86 platform") Link: http://lkml.kernel.org/r/20181023151616.78193-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7799788f662f..7e49baad304d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -773,7 +773,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) + if (strcmp(pname, name)) continue; } From 590ac60d8aa929bd21e35cd95a7d8720d00eb4f3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 31 Oct 2018 19:06:35 +0800 Subject: [PATCH 09/18] perf top: Display the LBR stats in callchain entry 'perf report' has supported the displaying of LBR stats (such as cycles, predicted%) in callchain entry. For example: $ perf report --branch-history --stdio --1.01%--intel_idle mwait.h:29 intel_idle cpufeature.h:164 (cycles:5) intel_idle cpufeature.h:164 (predicted:76.4%) intel_idle mwait.h:102 (cycles:41) intel_idle current.h:15 While 'perf top' doesn't support that. For example: $ perf top -a -b --call-graph branch - 13.86% 0.23% [kernel] [k] __x86_indirect_thunk_rax - 13.65% __x86_indirect_thunk_rax + 1.69% do_syscall_64 + 1.68% do_select + 1.41% ktime_get + 0.70% __schedule + 0.62% do_sys_poll 0.58% __x86_indirect_thunk_rax Actually it's very easy to enable this feature in 'perf top'. With this patch, the result is: $ perf top -a -b --call-graph branch $ - 13.58% 0.00% [kernel] [k] __x86_indirect_thunk_rax $ - 13.57% __x86_indirect_thunk_rax (predicted:93.9%) $ + 1.78% do_select (cycles:2) $ + 1.68% perf_pmu_disable.part.99 (cycles:1) $ + 1.45% ___sys_recvmsg (cycles:25) $ + 0.81% unix_stream_sendmsg (cycles:18) $ + 0.80% ktime_get (cycles:400) $ 0.58% pick_next_task_fair (cycles:47) $ + 0.56% i915_request_retire (cycles:2) $ + 0.52% do_sys_poll (cycles:4) Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1540983995-20462-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b2838de13de0..aa0c73e57924 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1429,6 +1429,9 @@ int cmd_top(int argc, const char **argv) } } + if (opts->branch_stack && callchain_param.enabled) + symbol_conf.show_branchflag_count = true; + sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ perf_hpp_list.need_collapse = 1; From 5ed4419d47f8ba6bbccd8e3203276b3c39a792b7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sun, 4 Nov 2018 17:12:35 +0200 Subject: [PATCH 10/18] perf scripts python: exported-sql-viewer.py: Fall back to /usr/local/lib/libxed.so Fall back to /usr/local/lib/libxed.so to cater for distributions that do not have /usr/local/lib in the library path by default. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181104151238.15947-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 24cb0bd56afa..20cc8e7879b9 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1929,7 +1929,12 @@ class XEDInstruction(): class LibXED(): def __init__(self): - self.libxed = CDLL("libxed.so") + try: + self.libxed = CDLL("libxed.so") + except: + self.libxed = None + if not self.libxed: + self.libxed = CDLL("/usr/local/lib/libxed.so") self.xed_tables_init = self.libxed.xed_tables_init self.xed_tables_init.restype = None From 210cf1f96185f0c6383df8b6030e3d2945e1b41a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sun, 4 Nov 2018 17:12:36 +0200 Subject: [PATCH 11/18] perf scripts python: exported-sql-viewer.py: Add Selected branches report Fetching data from the database can be slow. Add a report that provides the ability to select a subset of branches. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181104151238.15947-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/exported-sql-viewer.py | 327 ++++++++++++++++++ 1 file changed, 327 insertions(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 20cc8e7879b9..a9d2b3170141 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -119,6 +119,14 @@ def dsoname(name): return "[kernel]" return name +def findnth(s, sub, n, offs=0): + pos = s.find(sub) + if pos < 0: + return pos + if n <= 1: + return offs + pos + return findnth(s[pos + 1:], sub, n - 1, offs + pos + 1) + # Percent to one decimal place def PercentToOneDP(n, d): @@ -1464,6 +1472,317 @@ class BranchWindow(QMdiSubWindow): else: self.find_bar.NotFound() +# Dialog data item converted and validated using a SQL table + +class SQLTableDialogDataItem(): + + def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent): + self.glb = glb + self.label = label + self.placeholder_text = placeholder_text + self.table_name = table_name + self.match_column = match_column + self.column_name1 = column_name1 + self.column_name2 = column_name2 + self.parent = parent + + self.value = "" + + self.widget = QLineEdit() + self.widget.editingFinished.connect(self.Validate) + self.widget.textChanged.connect(self.Invalidate) + self.red = False + self.error = "" + self.validated = True + + self.last_id = 0 + self.first_time = 0 + self.last_time = 2 ** 64 + if self.table_name == "": + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") + if query.next(): + self.last_id = int(query.value(0)) + self.last_time = int(query.value(1)) + QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") + if query.next(): + self.first_time = int(query.value(0)) + if placeholder_text: + placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) + + if placeholder_text: + self.widget.setPlaceholderText(placeholder_text) + + def ValueToIds(self, value): + ids = [] + query = QSqlQuery(self.glb.db) + stmt = "SELECT id FROM " + self.table_name + " WHERE " + self.match_column + " = '" + value + "'" + ret = query.exec_(stmt) + if ret: + while query.next(): + ids.append(str(query.value(0))) + return ids + + def IdBetween(self, query, lower_id, higher_id, order): + QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1") + if query.next(): + return True, int(query.value(0)) + else: + return False, 0 + + def BinarySearchTime(self, lower_id, higher_id, target_time, get_floor): + query = QSqlQuery(self.glb.db) + while True: + next_id = int((lower_id + higher_id) / 2) + QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id)) + if not query.next(): + ok, dbid = self.IdBetween(query, lower_id, next_id, "DESC") + if not ok: + ok, dbid = self.IdBetween(query, next_id, higher_id, "") + if not ok: + return str(higher_id) + next_id = dbid + QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id)) + next_time = int(query.value(0)) + if get_floor: + if target_time > next_time: + lower_id = next_id + else: + higher_id = next_id + if higher_id <= lower_id + 1: + return str(higher_id) + else: + if target_time >= next_time: + lower_id = next_id + else: + higher_id = next_id + if higher_id <= lower_id + 1: + return str(lower_id) + + def ConvertRelativeTime(self, val): + print "val ", val + mult = 1 + suffix = val[-2:] + if suffix == "ms": + mult = 1000000 + elif suffix == "us": + mult = 1000 + elif suffix == "ns": + mult = 1 + else: + return val + val = val[:-2].strip() + if not self.IsNumber(val): + return val + val = int(val) * mult + if val >= 0: + val += self.first_time + else: + val += self.last_time + return str(val) + + def ConvertTimeRange(self, vrange): + print "vrange ", vrange + if vrange[0] == "": + vrange[0] = str(self.first_time) + if vrange[1] == "": + vrange[1] = str(self.last_time) + vrange[0] = self.ConvertRelativeTime(vrange[0]) + vrange[1] = self.ConvertRelativeTime(vrange[1]) + print "vrange2 ", vrange + if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): + return False + print "ok1" + beg_range = max(int(vrange[0]), self.first_time) + end_range = min(int(vrange[1]), self.last_time) + if beg_range > self.last_time or end_range < self.first_time: + return False + print "ok2" + vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True) + vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False) + print "vrange3 ", vrange + return True + + def AddTimeRange(self, value, ranges): + print "value ", value + n = value.count("-") + if n == 1: + pass + elif n == 2: + if value.split("-")[1].strip() == "": + n = 1 + elif n == 3: + n = 2 + else: + return False + pos = findnth(value, "-", n) + vrange = [value[:pos].strip() ,value[pos+1:].strip()] + if self.ConvertTimeRange(vrange): + ranges.append(vrange) + return True + return False + + def InvalidValue(self, value): + self.value = "" + palette = QPalette() + palette.setColor(QPalette.Text,Qt.red) + self.widget.setPalette(palette) + self.red = True + self.error = self.label + " invalid value '" + value + "'" + self.parent.ShowMessage(self.error) + + def IsNumber(self, value): + try: + x = int(value) + except: + x = 0 + return str(x) == value + + def Invalidate(self): + self.validated = False + + def Validate(self): + input_string = self.widget.text() + self.validated = True + if self.red: + palette = QPalette() + self.widget.setPalette(palette) + self.red = False + if not len(input_string.strip()): + self.error = "" + self.value = "" + return + if self.table_name == "": + ranges = [] + for value in [x.strip() for x in input_string.split(",")]: + if not self.AddTimeRange(value, ranges): + return self.InvalidValue(value) + ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] + self.value = " OR ".join(ranges) + elif self.table_name == "": + singles = [] + ranges = [] + for value in [x.strip() for x in input_string.split(",")]: + if "-" in value: + vrange = value.split("-") + if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): + return self.InvalidValue(value) + ranges.append(vrange) + else: + if not self.IsNumber(value): + return self.InvalidValue(value) + singles.append(value) + ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] + if len(singles): + ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")") + self.value = " OR ".join(ranges) + elif self.table_name: + all_ids = [] + for value in [x.strip() for x in input_string.split(",")]: + ids = self.ValueToIds(value) + if len(ids): + all_ids.extend(ids) + else: + return self.InvalidValue(value) + self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")" + if self.column_name2: + self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )" + else: + self.value = input_string.strip() + self.error = "" + self.parent.ClearMessage() + + def IsValid(self): + if not self.validated: + self.Validate() + if len(self.error): + self.parent.ShowMessage(self.error) + return False + return True + +# Selected branch report creation dialog + +class SelectedBranchDialog(QDialog): + + def __init__(self, glb, parent=None): + super(SelectedBranchDialog, self).__init__(parent) + + self.glb = glb + + self.name = "" + self.where_clause = "" + + self.setWindowTitle("Selected Branches") + self.setMinimumWidth(600) + + items = ( + ("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""), + ("Time ranges:", "Enter time ranges", "", "", "samples.id", ""), + ("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "", "", "cpu", ""), + ("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""), + ("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""), + ("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""), + ("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"), + ("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"), + ("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""), + ) + self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items] + + self.grid = QGridLayout() + + for row in xrange(len(self.data_items)): + self.grid.addWidget(QLabel(self.data_items[row].label), row, 0) + self.grid.addWidget(self.data_items[row].widget, row, 1) + + self.status = QLabel() + + self.ok_button = QPushButton("Ok", self) + self.ok_button.setDefault(True) + self.ok_button.released.connect(self.Ok) + self.ok_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.cancel_button = QPushButton("Cancel", self) + self.cancel_button.released.connect(self.reject) + self.cancel_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.hbox = QHBoxLayout() + #self.hbox.addStretch() + self.hbox.addWidget(self.status) + self.hbox.addWidget(self.ok_button) + self.hbox.addWidget(self.cancel_button) + + self.vbox = QVBoxLayout() + self.vbox.addLayout(self.grid) + self.vbox.addLayout(self.hbox) + + self.setLayout(self.vbox); + + def Ok(self): + self.name = self.data_items[0].value + if not self.name: + self.ShowMessage("Report name is required") + return + for d in self.data_items: + if not d.IsValid(): + return + for d in self.data_items[1:]: + if len(d.value): + if len(self.where_clause): + self.where_clause += " AND " + self.where_clause += d.value + if len(self.where_clause): + self.where_clause = " AND ( " + self.where_clause + " ) " + else: + self.ShowMessage("No selection") + return + self.accept() + + def ShowMessage(self, msg): + self.status.setText("" + msg) + + def ClearMessage(self): + self.status.setText("") + # Event list def GetEventList(db): @@ -1888,6 +2207,8 @@ class MainWindow(QMainWindow): if event == "branches": label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) + label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self)) def TableMenu(self, tables, menu): table_menu = menu.addMenu("&Tables") @@ -1900,6 +2221,12 @@ class MainWindow(QMainWindow): def NewBranchView(self, event_id): BranchWindow(self.glb, event_id, "", "", self) + def NewSelectedBranchView(self, event_id): + dialog = SelectedBranchDialog(self.glb, self) + ret = dialog.exec_() + if ret: + BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self) + def NewTableView(self, table_name): TableWindow(self.glb, table_name, self) From 65b24292e8f34df22a16bf7c47823325ac247572 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sun, 4 Nov 2018 17:12:37 +0200 Subject: [PATCH 12/18] perf scripts python: exported-sql-viewer.py: Add help window Add a window to display help. It is also possible to display the help only, by using the option "--help-only" instead of a database name. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181104151238.15947-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/exported-sql-viewer.py | 155 +++++++++++++++++- 1 file changed, 154 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index a9d2b3170141..c2fcf6c5237a 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -2084,6 +2084,147 @@ class WindowMenu(): def setActiveSubWindow(self, nr): self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1]) +# Help text + +glb_help_text = """ +

Contents

+ +

1. Reports

+

1.1 Context-Sensitive Call Graph

+

1.2 All branches

+

1.3 Selected branches

+

2. Tables

+

1. Reports

+

1.1 Context-Sensitive Call Graph

+The result is a GUI window with a tree representing a context-sensitive +call-graph. Expanding a couple of levels of the tree and adjusting column +widths to suit will display something like: +
+                                         Call Graph: pt_example
+Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
+v- ls
+    v- 2638:2638
+        v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
+          |- unknown               unknown       1        13198     0.1              1              0.0
+          >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
+          >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
+          v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
+             >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
+             >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
+             >- __libc_csu_init    ls            1        10354     0.1             10              0.0
+             |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
+             v- main               ls            1      8182043    99.6         180254             99.9
+
+

Points to note:

+
    +
  • The top level is a command name (comm)
  • +
  • The next level is a thread (pid:tid)
  • +
  • Subsequent levels are functions
  • +
  • 'Count' is the number of calls
  • +
  • 'Time' is the elapsed time until the function returns
  • +
  • Percentages are relative to the level above
  • +
  • 'Branch Count' is the total number of branches for that function and all functions that it calls +
+

Find

+Ctrl-F displays a Find bar which finds function names by either an exact match or a pattern match. +The pattern matching symbols are ? for any character and * for zero or more characters. +

1.2 All branches

+The All branches report displays all branches in chronological order. +Not all data is fetched immediately. More records can be fetched using the Fetch bar provided. +

Disassembly

+Open a branch to display disassembly. This only works if: +
    +
  1. The disassembler is available. Currently, only Intel XED is supported - see Intel XED Setup
  2. +
  3. The object code is available. Currently, only the perf build ID cache is searched for object code. +The default directory ~/.debug can be overridden by setting environment variable PERF_BUILDID_DIR. +One exception is kcore where the DSO long name is used (refer dsos_view on the Tables menu), +or alternatively, set environment variable PERF_KCORE to the kcore file name.
  4. +
+

Intel XED Setup

+To use Intel XED, libxed.so must be present. To build and install libxed.so: +
+git clone https://github.com/intelxed/mbuild.git mbuild
+git clone https://github.com/intelxed/xed
+cd xed
+./mfile.py --share
+sudo ./mfile.py --prefix=/usr/local install
+sudo ldconfig
+
+

Find

+Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. +Refer to Python documentation for the regular expression syntax. +All columns are searched, but only currently fetched rows are searched. +

1.3 Selected branches

+This is the same as the All branches report but with the data reduced +by various selection criteria. A dialog box displays available criteria which are AND'ed together. +

1.3.1 Time ranges

+The time ranges hint text shows the total time range. Relative time ranges can also be entered in +ms, us or ns. Also, negative values are relative to the end of trace. Examples: +
+	81073085947329-81073085958238	From 81073085947329 to 81073085958238
+	100us-200us		From 100us to 200us
+	10ms-			From 10ms to the end
+	-100ns			The first 100ns
+	-10ms-			The last 10ms
+
+N.B. Due to the granularity of timestamps, there could be no branches in any given time range. +

2. Tables

+The Tables menu shows all tables and views in the database. Most tables have an associated view +which displays the information in a more friendly way. Not all data for large tables is fetched +immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted, +but that can be slow for large tables. +

There are also tables of database meta-information. +For SQLite3 databases, the sqlite_master table is included. +For PostgreSQL databases, information_schema.tables/views/columns are included. +

Find

+Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. +Refer to Python documentation for the regular expression syntax. +All columns are searched, but only currently fetched rows are searched. +""" + +# Help window + +class HelpWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(HelpWindow, self).__init__(parent) + + self.text = QTextBrowser() + self.text.setHtml(glb_help_text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.setWidget(self.text) + + AddSubWindow(glb.mainwindow.mdi_area, self, "Exported SQL Viewer Help") + +# Main window that only displays the help text + +class HelpOnlyWindow(QMainWindow): + + def __init__(self, parent=None): + super(HelpOnlyWindow, self).__init__(parent) + + self.setMinimumSize(200, 100) + self.resize(800, 600) + self.setWindowTitle("Exported SQL Viewer Help") + self.setWindowIcon(self.style().standardIcon(QStyle.SP_MessageBoxInformation)) + + self.text = QTextBrowser() + self.text.setHtml(glb_help_text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.setCentralWidget(self.text) + # Font resize def ResizeFont(widget, diff): @@ -2170,6 +2311,9 @@ class MainWindow(QMainWindow): self.window_menu = WindowMenu(self.mdi_area, menu) + help_menu = menu.addMenu("&Help") + help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents)) + def Find(self): win = self.mdi_area.activeSubWindow() if win: @@ -2230,6 +2374,9 @@ class MainWindow(QMainWindow): def NewTableView(self, table_name): TableWindow(self.glb, table_name, self) + def Help(self): + HelpWindow(self.glb, self) + # XED Disassembler class xed_state_t(Structure): @@ -2429,10 +2576,16 @@ class DBRef(): def Main(): if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: exported-sql-viewer.py " + print >> sys.stderr, "Usage is: exported-sql-viewer.py { | --help-only}" raise Exception("Too few arguments") dbname = sys.argv[1] + if dbname == "--help-only": + app = QApplication(sys.argv) + mainwindow = HelpOnlyWindow() + mainwindow.show() + err = app.exec_() + sys.exit(err) is_sqlite3 = False try: From 35fa1cee21e34f43db928d022610707d5a234faf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sun, 4 Nov 2018 17:12:38 +0200 Subject: [PATCH 13/18] perf scripts python: exported-sql-viewer.py: Fix table find when table re-ordered Table rows can be re-ordered by selecting a column to sort by. After re-ordering, the "find" operation was highlighting the wrong row, fix it. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181104151238.15947-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index c2fcf6c5237a..f278ce5ebab7 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1975,7 +1975,7 @@ class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase): def FindDone(self, row): self.find_bar.Idle() if row >= 0: - self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) + self.view.setCurrentIndex(self.model.mapFromSource(self.data_model.index(row, 0, QModelIndex()))) else: self.find_bar.NotFound() @@ -2188,6 +2188,8 @@ For PostgreSQL databases, information_schema.tables/views/columns are included. Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. Refer to Python documentation for the regular expression syntax. All columns are searched, but only currently fetched rows are searched. +

N.B. Results are found in id order, so if the table is re-ordered, find-next and find-previous +will go to the next/previous result in id order, instead of display order. """ # Help window From 93f8be2799515e01647c5a9b0d17a90a00ebcf82 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 5 Nov 2018 09:35:04 +0200 Subject: [PATCH 14/18] perf intel-pt: Add more event information to debug log More event information is useful for debugging, especially MMAP events. Signed-off-by: Adrian Hunter Link: http://lkml.kernel.org/r/20181105073505.8129-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-log.c | 5 +++++ tools/perf/util/intel-pt-decoder/intel-pt-log.h | 1 + tools/perf/util/intel-pt.c | 16 +++++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index e02bc7b166a0..5e64da270f97 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -31,6 +31,11 @@ static FILE *f; static char log_name[MAX_LOG_NAME]; bool intel_pt_enable_logging; +void *intel_pt_log_fp(void) +{ + return f; +} + void intel_pt_log_enable(void) { intel_pt_enable_logging = true; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index 45b64f93f358..cc084937f701 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -22,6 +22,7 @@ struct intel_pt_pkt; +void *intel_pt_log_fp(void); void intel_pt_log_enable(void); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 86cc9a64e982..149ff361ca78 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, intel_pt_dump(pt, buf, len); } +static void intel_pt_log_event(union perf_event *event) +{ + FILE *f = intel_pt_log_fp(); + + if (!intel_pt_enable_logging || !f) + return; + + perf_event__fprintf(event, f); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -2010,9 +2020,9 @@ static int intel_pt_process_event(struct perf_session *session, event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", - perf_event__name(event->header.type), event->header.type, - sample->cpu, sample->time, timestamp); + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); return err; } From f6c23e3b55cb93f32a724f41af8d38888bc2ab6b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 5 Nov 2018 09:35:05 +0200 Subject: [PATCH 15/18] perf intel-pt: Add MTC and CYC timestamps to debug log One cause of decoding errors is un-synchronized side-band data. Timestamps are needed to debug such cases. TSC packet timestamps are logged. Log also MTC and CYC timestamps. Signed-off-by: Adrian Hunter Link: http://lkml.kernel.org/r/20181105073505.8129-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 58f6a9ceb590..4503f3ca45ab 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1474,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) decoder->have_calc_cyc_to_tsc = false; intel_pt_calc_cyc_to_tsc(decoder, true); } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) @@ -1514,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } /* Walk PSB+ packets when already in sync. */ From e2c39f36c354a06c6e9d32d4fdf8660b41803d82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 5 Nov 2018 15:46:51 -0300 Subject: [PATCH 16/18] perf beauty: Use SRCARCH, ARCH=x86_64 must map to "x86" to find the headers Guenter reported that using ARCH=x86_64 to build perf has regressed: $ make -C tools/perf O=/tmp/build/perf ARCH=x86_64 make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j4' parallel build HOSTCC /tmp/build/perf/fixdep.o HOSTLD /tmp/build/perf/fixdep-in.o LINK /tmp/build/perf/fixdep Auto-detecting system features: ... dwarf: [ on ] ... bpf: [ on ] GEN /tmp/build/perf/common-cmds.h make[2]: *** No rule to make target '/home/acme/git/perf/tools/arch/x86_64/include/uapi/asm//mman.h', needed by '/tmp/build/perf/trace/beauty/generated/mmap_flags_array.c'. Stop. make[2]: *** Waiting for unfinished jobs.... PERF_VERSION = 4.19.gf6c23e3 make[1]: *** [Makefile.perf:207: sub-make] Error 2 make: *** [Makefile:70: all] Error 2 make: Leaving directory '/home/acme/git/perf/tools/perf' $ This is because we must use $(SRCARCH) where we were using $(ARCH), so that, just like the top level Makefile, we get this done: # Additional ARCH settings for x86 ifeq ($(ARCH),i386) SRCARCH := x86 endif ifeq ($(ARCH),x86_64) SRCARCH := x86 endif Which is done in tools/scripts/Makefile.arch, so switch to use $(SRCARCH). Reported-by: Guenter Roeck Cc: Adrian Hunter Cc: Clark Williams Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: fbd7458db757 ("perf beauty: Wire up the mmap flags table generator to the Makefile") Link: https://lkml.kernel.org/r/20181105184612.GD7077@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 3ccb4f0bf088..d95655489f7e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -387,7 +387,7 @@ SHELL = $(SHELL_PATH) linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic -arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/ +arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ beauty_outdir := $(OUTPUT)trace/beauty/generated beauty_ioctl_outdir := $(beauty_outdir)/ioctl From 6ac2226229d931153331a93d90655a3de05b9290 Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Thu, 1 Nov 2018 20:13:21 -0400 Subject: [PATCH 17/18] perf tools: Fix undefined symbol scnprintf in libperf-jvmti.so Currently jvmti agent can not be used because function scnprintf is not present in the agent libperf-jvmti.so. As a result the JVM when using such agent to record JITed code profiling information will fail on looking up scnprintf: java: symbol lookup error: lib/libperf-jvmti.so: undefined symbol: scnprintf This commit fixes that by reverting to the use of snprintf, that can be looked up, instead of scnprintf, adding a proper check for the returned value in order to print a better error message when the jitdump file pathname is too long. Checking the returned value also helps to comply with some recent gcc versions, like gcc8, which will fail due to truncated writing checks related to the -Werror=format-truncation= flag. Signed-off-by: Gustavo Romero Acked-by: Jiri Olsa LPU-Reference: 1541117601-18937-2-git-send-email-gromero@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-mvpxxxy7wnzaj74cq75muw3f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/jvmti_agent.c | 49 ++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index ac1bcdc17dae..f7eb63cbbc65 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -125,7 +125,7 @@ perf_get_timestamp(void) } static int -debug_cache_init(void) +create_jit_cache_dir(void) { char str[32]; char *base, *p; @@ -144,8 +144,13 @@ debug_cache_init(void) strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because %s/.debug/" + " is too long, please check the cwd, JITDUMPDIR, and" + " HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { @@ -154,20 +159,32 @@ debug_cache_init(void) } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit/%s.XXXXXXXX is too long, please check" + " the cwd, JITDUMPDIR, and HOME variables", + base, str); + return -1; + } p = mkdtemp(jit_path); if (p != jit_path) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } @@ -228,7 +245,7 @@ void *jvmti_open(void) { char dump_path[PATH_MAX]; struct jitheader header; - int fd; + int fd, ret; FILE *fp; init_arch_timestamp(); @@ -245,12 +262,22 @@ void *jvmti_open(void) memset(&header, 0, sizeof(header)); - debug_cache_init(); + /* + * jitdump file dir + */ + if (create_jit_cache_dir() < 0) + return NULL; /* * jitdump file name */ - scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jitdump file full path because" + " %s/jit-%i.dump is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", jit_path, getpid()); + return NULL; + } fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) From 8e88c29b351ed4e09dd63f825f1c8260b0cb0ab3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 23 Sep 2018 17:04:20 +0200 Subject: [PATCH 18/18] perf tools: Do not zero sample_id_all for group members Andi reported following malfunction: # perf record -e '{ref-cycles,cycles}:S' -a sleep 1 # perf script non matching sample_id_all That's because we disable sample_id_all bit for non-sampling group members. We can't do that, because it needs to be the same over the whole event list. This patch keeps it untouched again. Reported-by: Andi Kleen Tested-by: Andi Kleen Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180923150420.27327-1-jolsa@kernel.org Fixes: e9add8bac6c6 ("perf evsel: Disable write_backward for leader sampling group events") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/test-record-group-sampling | 1 - tools/perf/util/evsel.c | 1 - 2 files changed, 2 deletions(-) diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index 8a33ca4f9e1f..f0729c454f16 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -37,4 +37,3 @@ sample_freq=0 sample_period=0 freq=0 write_backward=0 -sample_id_all=0 diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6d187059a373..d37bb1566cd9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -956,7 +956,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->sample_freq = 0; attr->sample_period = 0; attr->write_backward = 0; - attr->sample_id_all = 0; } if (opts->no_samples)