From 8e21be4f815ca8edfee1decd87f298f92123f719 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 20 May 2019 23:44:06 -0700 Subject: [PATCH 01/85] perf data: Add description of header HEADER_BPF_PROG_INFO and HEADER_BPF_BTF This patch addes description of HEADER_BPF_PROG_INFO and HEADER_BPF_BTF to perf.data-file-format.txt. Requested-by: Arnaldo Carvalho de Melo Signed-off-by: Song Liu Cc: Jiri Olsa Cc: Peter Zijlstra Fixes: 606f972b1361 ("perf bpf: Save bpf_prog_info information as headers to perf.data") Link: http://lkml.kernel.org/r/20190521064406.2498925-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/Documentation/perf.data-file-format.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 6967e9b02be5..022bb8b1c84a 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -272,6 +272,22 @@ struct { Two uint64_t for the time of first sample and the time of last sample. + HEADER_BPF_PROG_INFO = 25, + +struct bpf_prog_info_linear, which contains detailed information about +a BPF program, including type, id, tag, jited/xlated instructions, etc. + + HEADER_BPF_BTF = 26, + +Contains BPF Type Format (BTF). For more information about BTF, please +refer to Documentation/bpf/btf.rst. + +struct { + u32 id; + u32 data_size; + char data[]; +}; + HEADER_COMPRESSED = 27, struct { From 835fbf126ce0a359626bc59b8da4e7601f01fc87 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 May 2019 15:35:03 -0300 Subject: [PATCH 02/85] perf data: Document memory topology header: HEADER_MEM_TOPOLOGY We forgot to update the perf.data file format document for the HEADER_MEM_TOPOLOGY header, do it now from comments in the patch introducing it. Cc: Adrian Hunter Cc: Andi Kleen Cc: Chong Jiang Cc: Jiri Olsa Cc: Namhyung Kim Cc: Simon Que Fixes: e2091cedd51b ("perf tools: Add MEM_TOPOLOGY feature to perf data file") Link: https://lkml.kernel.org/n/tip-h5lcm1nbe9ztxwm61gmadd56@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/perf.data-file-format.txt | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 022bb8b1c84a..99733751695b 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -272,6 +272,30 @@ struct { Two uint64_t for the time of first sample and the time of last sample. + HEADER_SAMPLE_TOPOLOGY = 22, + +Physical memory map and its node assignments. + +The format of data in MEM_TOPOLOGY is as follows: + + 0 - version | for future changes + 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes + 16 - count | number of nodes + +For each node we store map of physical indexes: + + 32 - node id | node index + 40 - size | size of bitmap + 48 - bitmap | bitmap of memory indexes that belongs to node + | /sys/devices/system/node/node/memory + +The MEM_TOPOLOGY can be displayed with following command: + +$ perf report --header-only -I +... +# memory nodes (nr 1, block size 0x8000000): +# 0 [7G]: 0-23,32-69 + HEADER_BPF_PROG_INFO = 25, struct bpf_prog_info_linear, which contains detailed information about From a9de7cfc7663882e98ec3b2ecf35c546a013b956 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 May 2019 15:43:51 -0300 Subject: [PATCH 03/85] perf data: Document clockid header: HEADER_CLOCKID We forgot to update the perf.data file format document for the HEADER_CLOCKID header, do it now from comments in the patch introducing it. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Chong Jiang Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Simon Que Fixes: cf7905165fee ("perf record: Encode -k clockid frequency into Perf trace") Link: https://lkml.kernel.org/n/tip-slhnjp06027j3ae17qqetzxj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 99733751695b..600999f89c6d 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -296,6 +296,12 @@ $ perf report --header-only -I # memory nodes (nr 1, block size 0x8000000): # 0 [7G]: 0-23,32-69 + HEADER_CLOCKID = 23, + +One uint64_t for the clockid frequency, specified, for instance, via 'perf +record -k' (see clock_gettime()), to enable timestamps derived metrics +conversion into wall clock time on the reporting stage. + HEADER_BPF_PROG_INFO = 25, struct bpf_prog_info_linear, which contains detailed information about From 0da6ae94e4102fa145149dd0878b266c932507aa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 May 2019 15:50:50 -0300 Subject: [PATCH 04/85] perf data: Document directory format header: HEADER_DIR_FORMAT We forgot to update the perf.data file format document for the HEADER_DIR_FORMAT header, do it now from comments in the patch introducing it. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Chong Jiang Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Simon Que Fixes: 258031c017c3 ("perf header: Add DIR_FORMAT feature to describe directory data") Link: https://lkml.kernel.org/n/tip-jbrzb7ijb5al33gi8br6f9rr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/perf.data-file-format.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 600999f89c6d..6375e6fb8bac 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -302,6 +302,23 @@ One uint64_t for the clockid frequency, specified, for instance, via 'perf record -k' (see clock_gettime()), to enable timestamps derived metrics conversion into wall clock time on the reporting stage. + HEADER_DIR_FORMAT = 24, + +The data files layout is described by HEADER_DIR_FORMAT feature. Currently it +holds only version number (1): + + uint64_t version; + +The current version holds only version value (1) means that data files: + +- Follow the 'data.*' name format. + +- Contain raw events data in standard perf format as read from kernel (and need + to be sorted) + +Future versions are expected to describe different data files layout according +to special needs. + HEADER_BPF_PROG_INFO = 25, struct bpf_prog_info_linear, which contains detailed information about From e5f177a578edf4501d0758bfa922cd0b0f9d0e9d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 30 May 2019 17:38:01 +0800 Subject: [PATCH 05/85] perf symbols: Remove unused variable 'err' Variable 'err' is defined but never used in function symsrc__init(), remove it and directly return -1 at the end of the function. Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190530093801.20510-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4ad106a5f2c0..fdc5bd7dbb90 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -699,7 +699,6 @@ bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr) int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { - int err = -1; GElf_Ehdr ehdr; Elf *elf; int fd; @@ -793,7 +792,7 @@ out_elf_end: elf_end(elf); out_close: close(fd); - return err; + return -1; } /** From d194d8fccf610a3f5cdfa94f2bbbe6b89e11a295 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Thu, 30 May 2019 22:03:36 +0300 Subject: [PATCH 06/85] perf record: Allow mixing --user-regs with --call-graph=dwarf When DWARF stacks were requested and at the same time that the user specifies a register set using the --user-regs option the full register context was being captured on samples: $ perf record -g --call-graph dwarf,1024 --user-regs=IP,SP,BP -- stack_test2.g.O3 188143843893585 0x6b48 [0x4f8]: PERF_RECORD_SAMPLE(IP, 0x4002): 23828/23828: 0x401236 period: 1363819 addr: 0x7ffedbdd51ac ... FP chain: nr:0 ... user regs: mask 0xff0fff ABI 64-bit .... AX 0x53b .... BX 0x7ffedbdd3cc0 .... CX 0xffffffff .... DX 0x33d3a .... SI 0x7f09b74c38d0 .... DI 0x0 .... BP 0x401260 .... SP 0x7ffedbdd3cc0 .... IP 0x401236 .... FLAGS 0x20a .... CS 0x33 .... SS 0x2b .... R8 0x7f09b74c3800 .... R9 0x7f09b74c2da0 .... R10 0xfffffffffffff3ce .... R11 0x246 .... R12 0x401070 .... R13 0x7ffedbdd5db0 .... R14 0x0 .... R15 0x0 ... ustack: size 1024, offset 0xe0 . data_src: 0x5080021 ... thread: stack_test2.g.O:23828 ...... dso: /root/abudanko/stacks/stack_test2.g.O3 I.e. the --user-regs=IP,SP,BP was being ignored, being overridden by the needs of --call-graph=dwarf. After applying the change in this patch the sample data contains the user specified register, but making sure that at least the minimal set of register needed for DWARF unwinding (DWARF_MINIMAL_REGS) is requested. The user is warned that DWARF unwinding may not work if extra registers end up being needed. -g call-graph dwarf,K full_regs --user-regs=user_regs user_regs -g call-graph dwarf,K --user-regs=user_regs user_regs + DWARF_MINIMAL_REGS $ perf record -g --call-graph dwarf,1024 --user-regs=BP -- ls WARNING: The use of --call-graph=dwarf may require all the user registers, specifying a subset with --user-regs may render DWARF unwinding unreliable, so the minimal registers set (IP, SP) is explicitly forced. arch COPYING Documentation include Kbuild lbuild MAINTAINERS modules.builtin Module.symvers perf.data.old scripts System.map virt block CREDITS drivers init Kconfig lib Makefile modules.builtin.modinfo net README security tools vmlinux certs crypto fs ipc kernel LICENSES mm modules.order perf.data samples sound usr vmlinux.o [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.030 MB perf.data (10 samples) ] 188368474305373 0x5e40 [0x470]: PERF_RECORD_SAMPLE(IP, 0x4002): 23839/23839: 0x401236 period: 1260507 addr: 0x7ffd3d85e96c ... FP chain: nr:0 ... user regs: mask 0x1c0 ABI 64-bit .... BP 0x401260 .... SP 0x7ffd3d85cc20 .... IP 0x401236 ... ustack: size 1024, offset 0x58 . data_src: 0x5080021 Committer notes: Detected build failures on arches where PERF_REGS_ is not available, such as debian:experimental-x-{mips,mips64,mipsel}, fedora 24 and 30 for ARC uClibc and glibc, reported to Alexey that provided a patch moving the DWARF_MINIMAL_REGS from evsel.c to util/perf_regs.h, where it is guarded by an HAVE_PERF_REGS_SUPPORT ifdef. Committer testing: # perf record --user-regs=bp,ax -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.955 MB perf.data (1773 samples) ] # perf script -F+uregs | grep AX: | head -5 perf 1719 [000] 181.272398: 1 cycles: ffffffffba06a7c4 native_write_msr+0x4 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffef828fb00 perf 1719 [000] 181.272402: 1 cycles: ffffffffba06a7c4 native_write_msr+0x4 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffef828fb00 perf 1719 [000] 181.272403: 8 cycles: ffffffffba06a7c4 native_write_msr+0x4 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffef828fb00 perf 1719 [000] 181.272405: 181 cycles: ffffffffba06a7c6 native_write_msr+0x6 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffef828fb00 perf 1719 [000] 181.272406: 4405 cycles: ffffffffba06a7c4 native_write_msr+0x4 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffef828fb00 # perf record --call-graph=dwarf --user-regs=bp,ax -a sleep 1 WARNING: The use of --call-graph=dwarf may require all the user registers, specifying a subset with --user-regs may render DWARF unwinding unreliable, so the minimal registers set (IP, SP) is explicitly forced. [ perf record: Woken up 55 times to write data ] [ perf record: Captured and wrote 24.184 MB perf.data (2841 samples) ] [root@quaco ~]# perf script --hide-call-graph -F+uregs | grep AX: | head -5 perf 1729 [000] 211.268006: 1 cycles: ffffffffba06a7c4 native_write_msr+0x4 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffc8679abb0 SP:0x7ffc8679ab78 IP:0x7fa75223a0db perf 1729 [000] 211.268014: 1 cycles: ffffffffba06a7c4 native_write_msr+0x4 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffc8679abb0 SP:0x7ffc8679ab78 IP:0x7fa75223a0db perf 1729 [000] 211.268017: 5 cycles: ffffffffba06a7c4 native_write_msr+0x4 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffc8679abb0 SP:0x7ffc8679ab78 IP:0x7fa75223a0db perf 1729 [000] 211.268020: 48 cycles: ffffffffba06a7c6 native_write_msr+0x6 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffc8679abb0 SP:0x7ffc8679ab78 IP:0x7fa75223a0db perf 1729 [000] 211.268024: 490 cycles: ffffffffba00e471 intel_bts_enable_local+0x21 (/lib/modules/5.2.0-rc1+/build/vmlinux) ABI:2 AX:0xffffffffffffffda BP:0x7ffc8679abb0 SP:0x7ffc8679ab78 IP:0x7fa75223a0db # Signed-off-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/e7fd37b1-af22-0d94-a0dc-5895e803bbfe@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 9 ++++++++- tools/perf/util/perf_regs.h | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a6f572a40deb..cc6e7a0dda92 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -702,7 +702,14 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel, if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); perf_evsel__set_sample_bit(evsel, STACK_USER); - attr->sample_regs_user |= PERF_REGS_MASK; + if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) { + attr->sample_regs_user |= DWARF_MINIMAL_REGS; + pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, " + "specifying a subset with --user-regs may render DWARF unwinding unreliable, " + "so the minimal registers set (IP, SP) is explicitly forced.\n"); + } else { + attr->sample_regs_user |= PERF_REGS_MASK; + } attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; } else { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index cb9c246c8962..47fe34e5f7d5 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -29,12 +29,16 @@ uint64_t arch__user_reg_mask(void); #ifdef HAVE_PERF_REGS_SUPPORT #include +#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) + int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); #else #define PERF_REGS_MASK 0 #define PERF_REGS_MAX 0 +#define DWARF_MINIMAL_REGS PERF_REGS_MASK + static inline const char *perf_reg_name(int id __maybe_unused) { return NULL; From 948e9dc8bb266649a618ac974010292bf36fb213 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:10 +0300 Subject: [PATCH 07/85] perf intel-pt: Factor out intel_pt_update_sample_time To eliminate some duplication and make the code more understandable, factor out intel_pt_update_sample_time. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f4c3c84b090f..1ab4070b5633 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -479,6 +479,12 @@ static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) return -EBADMSG; } +static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) +{ + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; +} + static int intel_pt_get_data(struct intel_pt_decoder *decoder) { struct intel_pt_buffer buffer = { .buf = 0, }; @@ -1319,8 +1325,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) } decoder->ip += intel_pt_insn.length; if (!decoder->tnt.count) { - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + intel_pt_update_sample_time(decoder); return -EAGAIN; } decoder->tnt.payload <<= 1; @@ -2413,8 +2418,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err) { decoder->state.err = intel_pt_ext_err(err); decoder->state.from_ip = decoder->ip; - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + intel_pt_update_sample_time(decoder); } else { decoder->state.err = 0; if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { @@ -2422,10 +2426,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.type |= INTEL_PT_CBR_CHG; decoder->state.cbr_payload = decoder->cbr_payload; } - if (intel_pt_sample_time(decoder->pkt_state)) { - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; - } + if (intel_pt_sample_time(decoder->pkt_state)) + intel_pt_update_sample_time(decoder); } decoder->state.timestamp = decoder->sample_timestamp; From 7b4b4f83881e11b1fe5d8743953f81addb0871de Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:11 +0300 Subject: [PATCH 08/85] perf intel-pt: Accumulate cycle count from CYC packets In preparation for providing instructions-per-cycle (IPC) information, accumulate cycle count from CYC packets. Although CYC packets are optional (requires config term 'cyc' to enable cycle-accurate mode when recording), the simplest way to count cycles is with CYC packets. The first complication is that cycles must be counted only when also counting instructions. That means when control flow packet generation is enabled i.e. between TIP.PGE and TIP.PGD packets. Also, sampling the cycle count follows the same rules as sampling the timestamp, that is, not before the instruction to which the decoder is walking is reached. In addition, the cycle count is not accurate for any but the first branch of a TNT packet. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 14 +++++++++++++- .../perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 1ab4070b5633..ef3a1c1cd250 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -160,6 +160,8 @@ struct intel_pt_decoder { uint64_t period_mask; uint64_t period_ticks; uint64_t last_masked_timestamp; + uint64_t tot_cyc_cnt; + uint64_t sample_tot_cyc_cnt; bool continuous_period; bool overflow; bool set_fup_tx_flags; @@ -167,6 +169,7 @@ struct intel_pt_decoder { bool set_fup_mwait; bool set_fup_pwre; bool set_fup_exstop; + bool sample_cyc; unsigned int fup_tx_flags; unsigned int tx_flags; uint64_t fup_ptw_payload; @@ -1323,6 +1326,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) decoder->ip += intel_pt_insn.length; return 0; } + decoder->sample_cyc = false; decoder->ip += intel_pt_insn.length; if (!decoder->tnt.count) { intel_pt_update_sample_time(decoder); @@ -1515,6 +1519,9 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) decoder->have_cyc = true; decoder->cycle_cnt += decoder->packet.payload; + if (decoder->pge) + decoder->tot_cyc_cnt += decoder->packet.payload; + decoder->sample_cyc = true; if (!decoder->cyc_ref_timestamp) return; @@ -2419,6 +2426,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.err = intel_pt_ext_err(err); decoder->state.from_ip = decoder->ip; intel_pt_update_sample_time(decoder); + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; } else { decoder->state.err = 0; if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { @@ -2426,14 +2434,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.type |= INTEL_PT_CBR_CHG; decoder->state.cbr_payload = decoder->cbr_payload; } - if (intel_pt_sample_time(decoder->pkt_state)) + if (intel_pt_sample_time(decoder->pkt_state)) { intel_pt_update_sample_time(decoder); + if (decoder->sample_cyc) + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; + } } decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; + decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt; return &decoder->state; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index ed088d4726ba..6a61773dc44b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -77,6 +77,7 @@ struct intel_pt_state { uint64_t to_ip; uint64_t cr3; uint64_t tot_insn_cnt; + uint64_t tot_cyc_cnt; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; From 61d276f428a11f0e4ce5203462fa488e6570684f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:12 +0300 Subject: [PATCH 09/85] perf tools: Add IPC information to perf_sample Add counts of instructions and cycles, in order to represent instructions-per-cycle (IPC). Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 9e999550f247..1f1da6082806 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -204,6 +204,8 @@ struct perf_sample { u64 period; u64 weight; u64 transaction; + u64 insn_cnt; + u64 cyc_cnt; u32 cpu; u32 raw_size; u64 data_src; From 5b1dc0fd1da06d6e89f1ca8736cfe0ee84e34cc7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:13 +0300 Subject: [PATCH 10/85] perf intel-pt: Add support for samples to contain IPC ratio Copy the incremental instruction count and cycle count onto 'instructions' and 'branches' samples. Because Intel PT does not update the cycle count on every branch or instruction, the incremental values will often be zero. When there are values, they will be the number of instructions and number of cycles since the last update, and thus represent the average IPC since the last IPC value. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 7a70693c1b91..3cff8fe2eaa0 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -157,6 +157,12 @@ struct intel_pt_queue { u32 flags; u16 insn_len; u64 last_insn_cnt; + u64 ipc_insn_cnt; + u64 ipc_cyc_cnt; + u64 last_in_insn_cnt; + u64 last_in_cyc_cnt; + u64 last_br_insn_cnt; + u64 last_br_cyc_cnt; char insn[INTEL_PT_INSN_BUF_SZ]; }; @@ -1162,6 +1168,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; + if (sample.cyc_cnt) { + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; + ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; + } + return intel_pt_deliver_synth_b_event(pt, event, &sample, pt->branches_sample_type); } @@ -1217,6 +1230,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.stream_id = ptq->pt->instructions_id; sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; + if (sample.cyc_cnt) { + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; + ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt; + } + ptq->last_insn_cnt = ptq->state->tot_insn_cnt; return intel_pt_deliver_synth_event(pt, ptq, event, &sample, @@ -1488,6 +1508,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; + if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) { + /* + * Cycle count and instruction count only go together to create + * a valid IPC ratio when the cycle count changes. + */ + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + } + if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { if (state->type & INTEL_PT_CBR_CHG) { err = intel_pt_synth_cbr_sample(ptq); From 68fb45bf175e702aec6668c776050e5dbd2a6f1f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:14 +0300 Subject: [PATCH 11/85] perf script: Add output of IPC ratio Add field 'ipc' to display instructions-per-cycle. Example: perf record -e intel_pt/cyc/u ls perf script --insn-trace --xed -F+ipc,-dso,-cpu,-tid ls 2670177.697113434: 7f0dfdbcd090 _start+0x0 mov %rsp, %rdi IPC: 0.00 (1/877) ls 2670177.697113434: 7f0dfdbcd093 _start+0x3 callq 0x7f0dfdbce030 ls 2670177.697113434: 7f0dfdbce030 _dl_start+0x0 pushq %rbp ls 2670177.697113434: 7f0dfdbce031 _dl_start+0x1 mov %rsp, %rbp ls 2670177.697113434: 7f0dfdbce034 _dl_start+0x4 pushq %r15 ls 2670177.697113434: 7f0dfdbce036 _dl_start+0x6 pushq %r14 ls 2670177.697113434: 7f0dfdbce038 _dl_start+0x8 pushq %r13 ls 2670177.697113434: 7f0dfdbce03a _dl_start+0xa pushq %r12 ls 2670177.697113434: 7f0dfdbce03c _dl_start+0xc mov %rdi, %r12 ls 2670177.697113434: 7f0dfdbce03f _dl_start+0xf pushq %rbx ls 2670177.697113434: 7f0dfdbce040 _dl_start+0x10 sub $0x38, %rsp ls 2670177.697113434: 7f0dfdbce044 _dl_start+0x14 rdtsc ls 2670177.697113434: 7f0dfdbce046 _dl_start+0x16 mov %eax, %eax ls 2670177.697113434: 7f0dfdbce048 _dl_start+0x18 shl $0x20, %rdx ls 2670177.697113434: 7f0dfdbce04c _dl_start+0x1c or %rax, %rdx ls 2670177.697114471: 7f0dfdbce04f _dl_start+0x1f movq 0x27e22(%rip), %rax IPC: 0.00 (15/1685) ls 2670177.697116177: 7f0dfdbce056 _dl_start+0x26 movq %rdx, 0x27683(%rip) IPC: 0.00 (1/881) Note, the IPC values are low due to page faults at the beginning of execution. The additional cycles are due to the time to enter the kernel, not the actual kernel page fault handler. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 5 ++++- tools/perf/builtin-script.c | 23 ++++++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index af8282782911..c59fd52e9e91 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode. + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -203,6 +203,9 @@ OPTIONS The synth field is used by synthesized events which may be created when Instruction Trace decoding. + The ipc (instructions per cycle) field is synthesized and may have a value when + Instruction Trace decoding. + Finally, a user may not set fields to none for all event types. i.e., -F "" is not allowed. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3a48a2627670..80c722ade852 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -102,6 +102,7 @@ enum perf_output_field { PERF_OUTPUT_METRIC = 1U << 28, PERF_OUTPUT_MISC = 1U << 29, PERF_OUTPUT_SRCCODE = 1U << 30, + PERF_OUTPUT_IPC = 1U << 31, }; struct output_option { @@ -139,6 +140,7 @@ struct output_option { {.str = "metric", .field = PERF_OUTPUT_METRIC}, {.str = "misc", .field = PERF_OUTPUT_MISC}, {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, + {.str = "ipc", .field = PERF_OUTPUT_IPC}, }; enum { @@ -1268,6 +1270,20 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, return printed; } +static int perf_sample__fprintf_ipc(struct perf_sample *sample, + struct perf_event_attr *attr, FILE *fp) +{ + unsigned int ipc; + + if (!PRINT_FIELD(IPC) || !sample->cyc_cnt || !sample->insn_cnt) + return 0; + + ipc = (sample->insn_cnt * 100) / sample->cyc_cnt; + + return fprintf(fp, " \t IPC: %u.%02u (%" PRIu64 "/%" PRIu64 ") ", + ipc / 100, ipc % 100, sample->insn_cnt, sample->cyc_cnt); +} + static int perf_sample__fprintf_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -1312,6 +1328,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample, printed += perf_sample__fprintf_addr(sample, thread, attr, fp); } + printed += perf_sample__fprintf_ipc(sample, attr, fp); + if (print_srcline_last) printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); @@ -1859,6 +1877,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(PHYS_ADDR)) fprintf(fp, "%16" PRIx64, sample->phys_addr); + + perf_sample__fprintf_ipc(sample, attr, fp); + fprintf(fp, "\n"); if (PRINT_FIELD(SRCCODE)) { @@ -3433,7 +3454,7 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," - "callindent,insn,insnlen,synth,phys_addr,metric,misc", + "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), From 9bc668e3bca8fadc50d5a121a1992a72ada0d3f4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:15 +0300 Subject: [PATCH 12/85] perf intel-pt: Record when decoding PSB+ packets In preparation for using MTC packets to count cycles, record whether decoding is between a PSB and PSBEND packets. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-10-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index ef3a1c1cd250..a2384a314990 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -116,6 +116,7 @@ struct intel_pt_decoder { bool have_cyc; bool fixup_last_mtc; bool have_last_ip; + bool in_psb; enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; @@ -1549,14 +1550,17 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) { int err; + decoder->in_psb = true; + while (1) { err = intel_pt_get_next_packet(decoder); if (err) - return err; + goto out; switch (decoder->packet.type) { case INTEL_PT_PSBEND: - return 0; + err = 0; + goto out; case INTEL_PT_TIP_PGD: case INTEL_PT_TIP_PGE: @@ -1574,10 +1578,12 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_PWRX: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); - return -EAGAIN; + err = -EAGAIN; + goto out; case INTEL_PT_OVF: - return intel_pt_overflow(decoder); + err = intel_pt_overflow(decoder); + goto out; case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); @@ -1623,6 +1629,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; } } +out: + decoder->in_psb = false; + + return err; } static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) @@ -1996,10 +2006,12 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) { int err; + decoder->in_psb = true; + while (1) { err = intel_pt_get_next_packet(decoder); if (err) - return err; + goto out; switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: @@ -2015,7 +2027,8 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_PWRE: case INTEL_PT_PWRX: intel_pt_log("ERROR: Unexpected packet\n"); - return -ENOENT; + err = -ENOENT; + goto out; case INTEL_PT_FUP: decoder->pge = true; @@ -2074,16 +2087,20 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_ERR4; else decoder->pkt_state = INTEL_PT_STATE_ERR3; - return -ENOENT; + err = -ENOENT; + goto out; case INTEL_PT_BAD: /* Does not happen */ - return intel_pt_bug(decoder); + err = intel_pt_bug(decoder); + goto out; case INTEL_PT_OVF: - return intel_pt_overflow(decoder); + err = intel_pt_overflow(decoder); + goto out; case INTEL_PT_PSBEND: - return 0; + err = 0; + goto out; case INTEL_PT_PSB: case INTEL_PT_VMCS: @@ -2093,6 +2110,10 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) break; } } +out: + decoder->in_psb = false; + + return err; } static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) From f3c98c4b5ac831f29b1cc19fa84d3c8401f846d6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:16 +0300 Subject: [PATCH 13/85] perf intel-pt: Re-factor TIP cases in intel_pt_walk_to_ip To make it easier to add new code for different TIP cases, separate each case. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-11-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index a2384a314990..99773445872d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2128,18 +2128,29 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: decoder->continuous_period = false; - __fallthrough; - case INTEL_PT_TIP_PGE: - case INTEL_PT_TIP: - decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; + decoder->pge = false; + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (!decoder->ip) + break; + decoder->state.type |= INTEL_PT_TRACE_END; + return 0; + + case INTEL_PT_TIP_PGE: + decoder->pge = true; + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (!decoder->ip) + break; + decoder->state.type |= INTEL_PT_TRACE_BEGIN; + return 0; + + case INTEL_PT_TIP: + decoder->pge = true; if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (!decoder->ip) break; - if (decoder->packet.type == INTEL_PT_TIP_PGE) - decoder->state.type |= INTEL_PT_TRACE_BEGIN; - if (decoder->packet.type == INTEL_PT_TIP_PGD) - decoder->state.type |= INTEL_PT_TRACE_END; return 0; case INTEL_PT_FUP: From 3f05516758bef438cef7adc47599f8b8faad7c3a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:17 +0300 Subject: [PATCH 14/85] perf intel-pt: Accumulate cycle count from TSC/TMA/MTC packets When CYC packets are not available, it is still possible to count cycles using TSC/TMA/MTC timestamps. As the timestamp increments in TSC ticks, convert to CPU cycles using the current core-to-bus ratio. Do not accumulate cycles when control flow packet generation is not enabled, nor when time has been "lost", typically due to mwait, which is indicated by a TSC/TMA packet that is not part of PSB+. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-12-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 99773445872d..9eb778f9c911 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -163,6 +163,9 @@ struct intel_pt_decoder { uint64_t last_masked_timestamp; uint64_t tot_cyc_cnt; uint64_t sample_tot_cyc_cnt; + uint64_t base_cyc_cnt; + uint64_t cyc_cnt_timestamp; + double tsc_to_cyc; bool continuous_period; bool overflow; bool set_fup_tx_flags; @@ -1423,6 +1426,42 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) return -EOVERFLOW; } +static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder) +{ + if (decoder->have_cyc) + return; + + decoder->cyc_cnt_timestamp = decoder->timestamp; + decoder->base_cyc_cnt = decoder->tot_cyc_cnt; +} + +static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder) +{ + decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp; + + if (decoder->pge) + intel_pt_mtc_cyc_cnt_pge(decoder); +} + +static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder) +{ + uint64_t tot_cyc_cnt, tsc_delta; + + if (decoder->have_cyc) + return; + + decoder->sample_cyc = true; + + if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp) + return; + + tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp; + tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt; + + if (tot_cyc_cnt > decoder->tot_cyc_cnt) + decoder->tot_cyc_cnt = tot_cyc_cnt; +} + static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) { uint32_t ctc = decoder->packet.payload; @@ -1432,6 +1471,11 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) if (!decoder->tsc_ctc_ratio_d) return; + if (decoder->pge && !decoder->in_psb) + intel_pt_mtc_cyc_cnt_pge(decoder); + else + intel_pt_mtc_cyc_cnt_upd(decoder); + decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; decoder->ctc_timestamp = decoder->tsc_timestamp - fc; if (decoder->tsc_ctc_mult) { @@ -1487,6 +1531,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) else decoder->timestamp = timestamp; + intel_pt_mtc_cyc_cnt_upd(decoder); + decoder->timestamp_insn_cnt = 0; decoder->last_mtc = mtc; @@ -1511,6 +1557,8 @@ static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) decoder->cbr = cbr; decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + + intel_pt_mtc_cyc_cnt_cbr(decoder); } static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) @@ -1706,6 +1754,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) decoder->state.to_ip = decoder->ip; } decoder->state.type |= INTEL_PT_TRACE_BEGIN; + intel_pt_mtc_cyc_cnt_pge(decoder); return 0; case INTEL_PT_TIP: @@ -1776,6 +1825,7 @@ next: case INTEL_PT_TIP_PGE: { decoder->pge = true; + intel_pt_mtc_cyc_cnt_pge(decoder); if (decoder->packet.count == 0) { intel_pt_log_at("Skipping zero TIP.PGE", decoder->pos); @@ -2138,6 +2188,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: decoder->pge = true; + intel_pt_mtc_cyc_cnt_pge(decoder); if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (!decoder->ip) From 5db47f43ccbbdee8c48f76ace4c287187a28b87f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:18 +0300 Subject: [PATCH 15/85] perf intel-pt: Document IPC usage Add brief documentation about instructions-per-cycle (IPC) information derived from Intel PT. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-13-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 60d99e5e7921..50c5b60101bd 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -103,6 +103,36 @@ The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, and in transaction, respectively. +Another interesting field that is not printed by default is 'ipc' which can be +displayed as follows: + + perf script --itrace=be -F+ipc + +There are two ways that instructions-per-cycle (IPC) can be calculated depending +on the recording. + +If the 'cyc' config term (see config terms section below) was used, then IPC is +calculated using the cycle count from CYC packets, otherwise MTC packets are +used - refer to the 'mtc' config term. When MTC is used, however, the values +are less accurate because the timing is less accurate. + +Because Intel PT does not update the cycle count on every branch or instruction, +the values will often be zero. When there are values, they will be the number +of instructions and number of cycles since the last update, and thus represent +the average IPC since the last IPC for that event type. Note IPC for "branches" +events is calculated separately from IPC for "instructions" events. + +Also note that the IPC instruction count may or may not include the current +instruction. If the cycle count is associated with an asynchronous branch +(e.g. page fault or interrupt), then the instruction count does not include the +current instruction, otherwise it does. That is consistent with whether or not +that instruction has retired when the cycle count is updated. + +Another note, in the case of "branches" events, non-taken branches are not +presently sampled, so IPC values for them do not appear e.g. a CYC packet with a +TNT packet that starts with a non-taken branch. To see every possible IPC +value, "instructions" events can be used e.g. --itrace=i0ns + While it is possible to create scripts to analyze the data, an alternative approach is available to export the data to a sqlite or postgresql database. Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, From 003ccdc7165accee073ce261fc670f64cc98d0f7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:19 +0300 Subject: [PATCH 16/85] perf thread-stack: Accumulate IPC information Cycle and instruction counts are added to the stack. The IPC of a function and all functions it calls, is also recorded. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-14-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 14 ++++++++++++++ tools/perf/util/thread-stack.h | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 41942c2aaa18..8e390f78486f 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -49,6 +49,8 @@ enum retpoline_state_t { * @timestamp: timestamp (if known) * @ref: external reference (e.g. db_id of sample) * @branch_count: the branch count when the entry was created + * @insn_count: the instruction count when the entry was created + * @cyc_count the cycle count when the entry was created * @db_id: id used for db-export * @cp: call path * @no_call: a 'call' was not seen @@ -60,6 +62,8 @@ struct thread_stack_entry { u64 timestamp; u64 ref; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 db_id; struct call_path *cp; bool no_call; @@ -75,6 +79,8 @@ struct thread_stack_entry { * @sz: current maximum stack size * @trace_nr: current trace number * @branch_count: running branch count + * @insn_count: running instruction count + * @cyc_count running cycle count * @kernel_start: kernel start address * @last_time: last timestamp * @crp: call/return processor @@ -88,6 +94,8 @@ struct thread_stack { size_t sz; u64 trace_nr; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 kernel_start; u64 last_time; struct call_return_processor *crp; @@ -289,6 +297,8 @@ static int thread_stack__call_return(struct thread *thread, cr.call_time = tse->timestamp; cr.return_time = timestamp; cr.branch_count = ts->branch_count - tse->branch_count; + cr.insn_count = ts->insn_count - tse->insn_count; + cr.cyc_count = ts->cyc_count - tse->cyc_count; cr.db_id = tse->db_id; cr.call_ref = tse->ref; cr.return_ref = ref; @@ -544,6 +554,8 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->timestamp = timestamp; tse->ref = ref; tse->branch_count = ts->branch_count; + tse->insn_count = ts->insn_count; + tse->cyc_count = ts->cyc_count; tse->cp = cp; tse->no_call = no_call; tse->trace_end = trace_end; @@ -874,6 +886,8 @@ int thread_stack__process(struct thread *thread, struct comm *comm, } ts->branch_count += 1; + ts->insn_count += sample->insn_cnt; + ts->cyc_count += sample->cyc_cnt; ts->last_time = sample->time; if (sample->flags & PERF_IP_FLAG_CALL) { diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 9c45f947f5a9..bddb1daf6453 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -52,6 +52,8 @@ enum { * @call_time: timestamp of call (if known) * @return_time: timestamp of return (if known) * @branch_count: number of branches seen between call and return + * @insn_count: approx. number of instructions between call and return + * @cyc_count: approx. number of cycles between call and return * @call_ref: external reference to 'call' sample (e.g. db_id) * @return_ref: external reference to 'return' sample (e.g. db_id) * @db_id: id used for db-export @@ -65,6 +67,8 @@ struct call_return { u64 call_time; u64 return_time; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 call_ref; u64 return_ref; u64 db_id; From 1159facee97fe184a434db3086604c7572fd7dfa Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:20 +0300 Subject: [PATCH 17/85] perf db-export: Add brief documentation Add brief documentation to explain how the database export maintains backward and forward compatibility. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-15-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/db-export.txt | 41 ++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tools/perf/Documentation/db-export.txt diff --git a/tools/perf/Documentation/db-export.txt b/tools/perf/Documentation/db-export.txt new file mode 100644 index 000000000000..52ffccb02d55 --- /dev/null +++ b/tools/perf/Documentation/db-export.txt @@ -0,0 +1,41 @@ +Database Export +=============== + +perf tool's python scripting engine: + + tools/perf/util/scripting-engines/trace-event-python.c + +supports scripts: + + tools/perf/scripts/python/export-to-sqlite.py + tools/perf/scripts/python/export-to-postgresql.py + +which export data to a SQLite3 or PostgreSQL database. + +The export process provides records with unique sequential ids which allows the +data to be imported directly to a database and provides the relationships +between tables. + +Over time it is possible to continue to expand the export while maintaining +backward and forward compatibility, by following some simple rules: + +1. Because of the nature of SQL, existing tables and columns can continue to be +used so long as the names and meanings (and to some extent data types) remain +the same. + +2. New tables and columns can be added, without affecting existing SQL queries, +so long as the new names are unique. + +3. Scripts that use a database (e.g. exported-sql-viewer.py) can maintain +backward compatibility by testing for the presence of new tables and columns +before using them. e.g. function IsSelectable() in exported-sql-viewer.py + +4. The export scripts themselves maintain forward compatibility (i.e. an existing +script will continue to work with new versions of perf) by accepting a variable +number of arguments (e.g. def call_return_table(*x)) i.e. perf can pass more +arguments which old scripts will ignore. + +5. The scripting engine tests for the existence of script handler functions +before calling them. The scripting engine can also test for the support of new +or optional features by checking for the existence and value of script global +variables. From 52a2ab6fa99df9288f2c8c7f461b815550b9b366 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:21 +0300 Subject: [PATCH 18/85] perf db-export: Export IPC information Export cycle and instruction counts on samples and call-returns. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-16-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 22f52b669871..6acb379b53ec 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1111,7 +1111,7 @@ static int python_export_sample(struct db_export *dbe, struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(22); + t = tuple_new(24); tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); @@ -1135,6 +1135,8 @@ static int python_export_sample(struct db_export *dbe, tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); tuple_set_u64(t, 21, es->call_path_id); + tuple_set_u64(t, 22, es->sample->insn_cnt); + tuple_set_u64(t, 23, es->sample->cyc_cnt); call_object(tables->sample_handler, t, "sample_table"); @@ -1173,7 +1175,7 @@ static int python_export_call_return(struct db_export *dbe, u64 comm_db_id = cr->comm ? cr->comm->db_id : 0; PyObject *t; - t = tuple_new(12); + t = tuple_new(14); tuple_set_u64(t, 0, cr->db_id); tuple_set_u64(t, 1, cr->thread->db_id); @@ -1187,6 +1189,8 @@ static int python_export_call_return(struct db_export *dbe, tuple_set_u64(t, 9, cr->cp->parent->db_id); tuple_set_s32(t, 10, cr->flags); tuple_set_u64(t, 11, cr->parent_db_id); + tuple_set_u64(t, 12, cr->insn_count); + tuple_set_u64(t, 13, cr->cyc_count); call_object(tables->call_return_handler, t, "call_return_table"); From 64adadb3f9dbaaae3d14ea75fa71a3b877cbe82e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:22 +0300 Subject: [PATCH 19/85] perf scripts python: export-to-sqlite.py: Export IPC information Export cycle and instruction counts on samples and calls tables. Committer testing: First runs some workload collecting intel_pt with the 'cyc' ter just for userspace: [root@quaco adrian.hunter]# perf record -o simple-retpoline.perf.data -e intel_pt/cyc/u ./simple-retpoline [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.035 MB simple-retpoline.perf.data ] [root@quaco adrian.hunter]# Then use the export-to-sqlite.py script to see if the changes in this cset don't make it to break and if the changes in the db schema are the ones expected: [root@quaco adrian.hunter]# perf script -i simple-retpoline.perf.data --itrace=be -s ~acme/libexec/perf-core/scripts/python/export-to-sqlite.py simple-retpoline.db branches calls 2019-05-31 11:50:46.942710 Creating database ... 2019-05-31 11:50:46.949663 Writing records... 2019-05-31 11:50:47.224033 Adding indexes 2019-05-31 11:50:47.231599 Done [root@quaco adrian.hunter]# Now lets use the db: [root@quaco adrian.hunter]# sqlite3 simple-retpoline.db SQLite version 3.26.0 2018-12-01 12:34:55 Enter ".help" for usage hints. sqlite> .schema samples CREATE TABLE samples (id integer NOT NULL PRIMARY KEY,evsel_id bigint,machine_id bigint,thread_id bigint,comm_id bigint,dso_id bigint,symbol_id bigint,sym_offset bigint,ip bigint,time bigint,cpuinteger,to_dso_id bigint,to_symbol_id bigint,to_sym_offset bigint,to_ip bigint,branch_type integer,in_tx boolean,call_path_id bigint,insn_count bigint,cyc_count bigint); sqlite> Cool, the 'insn_count' and 'cyc_count' are there, now lets see if we can use them in a query: sqlite> select insn_count,cyc_count from samples where cyc_count > 1500 and insn_count < 10; 6|1507 sqlite> select insn_count,cyc_count from samples where cyc_count > 1500; 118|2210 140|1516 3783|1861 132|1521 6|1507 sqlite> Seems to work :-) Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-17-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index f617e518332f..4542ce89034b 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -218,7 +218,9 @@ if branches: 'to_ip bigint,' 'branch_type integer,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') else: do_query(query, 'CREATE TABLE samples (' 'id integer NOT NULL PRIMARY KEY,' @@ -242,7 +244,9 @@ else: 'data_src bigint,' 'branch_type integer,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE TABLE call_paths (' @@ -263,7 +267,9 @@ if perf_db_export_calls: 'return_id bigint,' 'parent_call_path_id bigint,' 'flags integer,' - 'parent_id bigint)') + 'parent_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') # printf was added to sqlite in version 3.8.3 sqlite_has_printf = False @@ -359,6 +365,9 @@ if perf_db_export_calls: 'return_time,' 'return_time - call_time AS elapsed_time,' 'branch_count,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC,' 'call_id,' 'return_id,' 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' @@ -384,7 +393,10 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'to_sym_offset,' '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' - 'in_tx' + 'in_tx,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC' ' FROM samples') do_query(query, 'END TRANSACTION') @@ -407,15 +419,15 @@ branch_type_query = QSqlQuery(db) branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)") sample_query = QSqlQuery(db) if branches: - sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") else: - sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") if perf_db_export_calls or perf_db_export_callchains: call_path_query = QSqlQuery(db) call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)") if perf_db_export_calls: call_query = QSqlQuery(db) - call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") def trace_begin(): printdate("Writing records...") @@ -427,10 +439,10 @@ def trace_begin(): comm_table(0, "unknown") dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) if perf_db_export_calls or perf_db_export_callchains: call_path_table(0, 0, 0, 0) - call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) unhandled_count = 0 @@ -486,14 +498,14 @@ def sample_table(*x): if branches: for xx in x[0:15]: sample_query.addBindValue(str(xx)) - for xx in x[19:22]: + for xx in x[19:24]: sample_query.addBindValue(str(xx)) do_query_(sample_query) else: - bind_exec(sample_query, 22, x) + bind_exec(sample_query, 24, x) def call_path_table(*x): bind_exec(call_path_query, 4, x) def call_return_table(*x): - bind_exec(call_query, 12, x) + bind_exec(call_query, 14, x) From ec7f448e2b2e13d1629300c5881cb3b5e0a99c2f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:23 +0300 Subject: [PATCH 20/85] perf scripts python: export-to-postgresql.py: Export IPC information Export cycle and instruction counts on samples and calls tables. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-18-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/export-to-postgresql.py | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index b2f481b0d28d..93225c02117e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -394,7 +394,9 @@ if branches: 'to_ip bigint,' 'branch_type integer,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') else: do_query(query, 'CREATE TABLE samples (' 'id bigint NOT NULL,' @@ -418,7 +420,9 @@ else: 'data_src bigint,' 'branch_type integer,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE TABLE call_paths (' @@ -439,7 +443,9 @@ if perf_db_export_calls: 'return_id bigint,' 'parent_call_path_id bigint,' 'flags integer,' - 'parent_id bigint)') + 'parent_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') do_query(query, 'CREATE VIEW machines_view AS ' 'SELECT ' @@ -521,6 +527,9 @@ if perf_db_export_calls: 'return_time,' 'return_time - call_time AS elapsed_time,' 'branch_count,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC,' 'call_id,' 'return_id,' 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE CAST ( flags AS VARCHAR(6) ) END AS flags,' @@ -546,7 +555,10 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'to_sym_offset,' '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' - 'in_tx' + 'in_tx,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC' ' FROM samples') @@ -618,10 +630,10 @@ def trace_begin(): comm_table(0, "unknown") dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) if perf_db_export_calls or perf_db_export_callchains: call_path_table(0, 0, 0, 0) - call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) unhandled_count = 0 @@ -772,11 +784,11 @@ def branch_type_table(branch_type, name, *x): value = struct.pack(fmt, 2, 4, branch_type, n, name) branch_type_file.write(value) -def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x): +def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, insn_cnt, cyc_cnt, *x): if branches: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiqiqiq", 20, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt) else: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiqiqiq", 24, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt) sample_file.write(value) def call_path_table(cp_id, parent_id, symbol_id, ip, *x): @@ -784,7 +796,7 @@ def call_path_table(cp_id, parent_id, symbol_id, ip, *x): value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip) call_path_file.write(value) -def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, *x): - fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiq" - value = struct.pack(fmt, 12, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id) +def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, insn_cnt, cyc_cnt, *x): + fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiqiqiq" + value = struct.pack(fmt, 14, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id, 8, insn_cnt, 8, cyc_cnt) call_file.write(value) From 530e22fd5c6d2c572b1bbdda23eafa01a005fce0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:24 +0300 Subject: [PATCH 21/85] perf scripts python: exported-sql-viewer.py: Add IPC information to the Branch reports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhance the "All branches" and "Selected branches" reports to display IPC information if it is available. Committer testing: So, testing this I noticed that it all starts with the left arrow in every line, that should mean there is some tree there, i.e. look at all those â–¶ symbols: Reports -> All Branches: Time CPU Command PID TID Branch Type In Tx Insn Cnt Cyc Cnt IPC Branch â–¶ 187836112195670 7 simple-retpolin 23003 23003 trace begin No 0 0 0 0 unknown (unknown) -> 7f6f33d4f110 +_start (ld-2.28.so) â–¶ 187836112195987 7 simple-retpolin 23003 23003 trace end No 0 883 0 7f6f33d4f110 _start (ld-2.28.so) -> 0 unknown +(unknown) â–¶ 187836112199189 7 simple-retpolin 23003 23003 trace begin No 0 0 0 0 unknown (unknown) -> 7f6f33d4f110 +_start (ld-2.28.so) â–¶ 187836112199189 7 simple-retpolin 23003 23003 call No 0 0 0 7f6f33d4f113 _start+0x3 (ld-2.28.so) -> 7f6f33d4ff50 +_dl_start (ld-2.28.so) â–¶ 187836112199544 7 simple-retpolin 23003 23003 trace end No 17 996 0.02 7f6f33d4ff73 _dl_start+0x23 (ld-2.28.so) -> 0 +unknown (unknown) â–¶ 187836112200939 7 simple-retpolin 23003 23003 trace begin No 0 0 0 0 unknown (unknown) -> 7f6f33d4ff73 +_dl_start+0x23 (ld-2.28.so) â–¶ 187836112201229 7 simple-retpolin 23003 23003 trace end No 1 816 0.00 7f6f33d4ff7a _dl_start+0x2a (ld-2.28.so) -> 0 +unknown (unknown) â–¶ 187836112203500 7 simple-retpolin 23003 23003 trace begin No 0 0 0 0 unknown (unknown) -> 7f6f33d4ff7a +_dl_start+0x2a (ld-2.28.so) But if you click on it, that â–¶ disappears and a new click doesn't make it reappear, looks buggy, minor oddity, reported to Adrian. Reports -> Selected Branches, then ask for branches in the ld-2.28.so DSO: Time CPU Command PID TID Branch Type In Tx Insn Cnt Cyc Cnt IPC Branch â–¶ 187836112195987 7 simple-retpolin 23003 23003 trace end No 0 883 0 7f6f33d4f110 _start (ld-2.28.so) -> 0 unknown (unknown) â–¶ 187836112199189 7 simple-retpolin 23003 23003 trace begin No 0 0 0 0 unknown (unknown) -> 7f6f33d4f110 _start (ld-2.28.so) â–¶ 187836112199189 7 simple-retpolin 23003 23003 call No 0 0 0 7f6f33d4f113 _start+0x3 (ld-2.28.so) -> 7f6f33d4ff50 _dl_start (ld-2.28.so) â–¶ 187836112199544 7 simple-retpolin 23003 23003 trace end No 17 996 0.02 7f6f33d4ff73 _dl_start+0x23 (ld-2.28.so) -> 0 unknown (unknown) â–¶ 187836112200939 7 simple-retpolin 23003 23003 trace begin No 0 0 0 0 unknown (unknown) -> 7f6f33d4ff73 _dl_start+0x23 (ld-2.28.so) â–¶ 187836112201229 7 simple-retpolin 23003 23003 trace end No 1 816 0.00 7f6f33d4ff7a _dl_start+0x2a (ld-2.28.so) -> 0 unknown (unknown) â–¶ 187836112203500 7 simple-retpolin 23003 23003 trace begin No 0 0 0 0 unknown (unknown) -> 7f6f33d4ff7a _dl_start+0x2a (ld-2.28.so) â–¶ 187836112203528 7 simple-retpolin 23003 23003 unconditional jump No 0 0 0 7f6f33d4ffe7 _dl_start+0x97 (ld-2.28.so) -> 7f6f33d5000b _dl_start+0xbb (ld-2.28.so) â–¶ 187836112203528 7 simple-retpolin 23003 23003 conditional jump No 0 0 0 7f6f33d5000f _dl_start+0xbf (ld-2.28.so) -> 7f6f33d4fffb _dl_start+0xab (ld-2.28.so) â–¶ 187836112203528 7 simple-retpolin 23003 23003 conditional jump No 0 0 0 7f6f33d5000f _dl_start+0xbf (ld-2.28.so) -> 7f6f33d4fffb _dl_start+0xab (ld-2.28.so) â–¶ 187836112203539 7 simple-retpolin 23003 23003 conditional jump No 0 0 0 7f6f33d50025 _dl_start+0xd5 (ld-2.28.so) -> 7f6f33d50210 _dl_start+0x2c0 (ld-2.28.so) â–¶ 187836112203539 7 simple-retpolin 23003 23003 conditional jump No 0 0 0 7f6f33d5021a _dl_start+0x2ca (ld-2.28.so) -> 7f6f33d50360 _dl_start+0x410 (ld-2.28.so) â–¶ 187836112203539 7 simple-retpolin 23003 23003 unconditional jump No 0 0 0 7f6f33d50377 _dl_start+0x427 (ld-2.28.so) -> 7f6f33d4ffff _dl_start+0xaf (ld-2.28.so) â–¶ 187836112203539 7 simple-retpolin 23003 23003 conditional jump No 0 0 0 7f6f33d5000f _dl_start+0xbf (ld-2.28.so) -> 7f6f33d4fffb _dl_start+0xab (ld-2.28.so) â–¶ 187836112203562 7 simple-retpolin 23003 23003 conditional jump No 0 0 0 7f6f33d5000f _dl_start+0xbf (ld-2.28.so) -> 7f6f33d4fffb _dl_start+0xab (ld-2.28.so) Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-19-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/exported-sql-viewer.py | 102 ++++++++++++++---- 1 file changed, 83 insertions(+), 19 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 6fe553258ce5..a607235c8cd9 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1369,11 +1369,11 @@ class FetchMoreRecordsBar(): class BranchLevelTwoItem(): - def __init__(self, row, text, parent_item): + def __init__(self, row, col, text, parent_item): self.row = row self.parent_item = parent_item - self.data = [""] * 8 - self.data[7] = text + self.data = [""] * (col + 1) + self.data[col] = text self.level = 2 def getParentItem(self): @@ -1405,6 +1405,7 @@ class BranchLevelOneItem(): self.dbid = data[0] self.level = 1 self.query_done = False + self.br_col = len(self.data) - 1 def getChildItem(self, row): return self.child_items[row] @@ -1485,7 +1486,7 @@ class BranchLevelOneItem(): while k < 15: byte_str += " " k += 1 - self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self)) + self.child_items.append(BranchLevelTwoItem(0, self.br_col, byte_str + " " + text, self)) self.child_count += 1 else: return @@ -1536,16 +1537,37 @@ class BranchRootItem(): def getData(self, column): return "" +# Calculate instructions per cycle + +def CalcIPC(cyc_cnt, insn_cnt): + if cyc_cnt and insn_cnt: + ipc = Decimal(float(insn_cnt) / cyc_cnt) + ipc = str(ipc.quantize(Decimal(".01"), rounding=ROUND_HALF_UP)) + else: + ipc = "0" + return ipc + # Branch data preparation +def BranchDataPrepBr(query, data): + data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + + " (" + dsoname(query.value(11)) + ")" + " -> " + + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + + " (" + dsoname(query.value(15)) + ")") + +def BranchDataPrepIPC(query, data): + insn_cnt = query.value(16) + cyc_cnt = query.value(17) + ipc = CalcIPC(cyc_cnt, insn_cnt) + data.append(insn_cnt) + data.append(cyc_cnt) + data.append(ipc) + def BranchDataPrep(query): data = [] for i in xrange(0, 8): data.append(query.value(i)) - data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + - " (" + dsoname(query.value(11)) + ")" + " -> " + - tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + - " (" + dsoname(query.value(15)) + ")") + BranchDataPrepBr(query, data) return data def BranchDataPrepWA(query): @@ -1555,10 +1577,26 @@ def BranchDataPrepWA(query): data.append("{:>19}".format(query.value(1))) for i in xrange(2, 8): data.append(query.value(i)) - data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + - " (" + dsoname(query.value(11)) + ")" + " -> " + - tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + - " (" + dsoname(query.value(15)) + ")") + BranchDataPrepBr(query, data) + return data + +def BranchDataWithIPCPrep(query): + data = [] + for i in xrange(0, 8): + data.append(query.value(i)) + BranchDataPrepIPC(query, data) + BranchDataPrepBr(query, data) + return data + +def BranchDataWithIPCPrepWA(query): + data = [] + data.append(query.value(0)) + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string + data.append("{:>19}".format(query.value(1))) + for i in xrange(2, 8): + data.append(query.value(i)) + BranchDataPrepIPC(query, data) + BranchDataPrepBr(query, data) return data # Branch data model @@ -1572,10 +1610,20 @@ class BranchModel(TreeModel): self.event_id = event_id self.more = True self.populated = 0 + self.have_ipc = IsSelectable(glb.db, "samples", columns = "insn_count, cyc_count") + if self.have_ipc: + select_ipc = ", insn_count, cyc_count" + prep_fn = BranchDataWithIPCPrep + prep_wa_fn = BranchDataWithIPCPrepWA + else: + select_ipc = "" + prep_fn = BranchDataPrep + prep_wa_fn = BranchDataPrepWA sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name," " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END," " ip, symbols.name, sym_offset, dsos.short_name," " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name" + + select_ipc + " FROM samples" " INNER JOIN comms ON comm_id = comms.id" " INNER JOIN threads ON thread_id = threads.id" @@ -1589,9 +1637,9 @@ class BranchModel(TreeModel): " ORDER BY samples.id" " LIMIT " + str(glb_chunk_sz)) if pyside_version_1 and sys.version_info[0] == 3: - prep = BranchDataPrepWA + prep = prep_fn else: - prep = BranchDataPrep + prep = prep_wa_fn self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample) self.fetcher.done.connect(self.Update) self.fetcher.Fetch(glb_chunk_sz) @@ -1600,13 +1648,23 @@ class BranchModel(TreeModel): return BranchRootItem() def columnCount(self, parent=None): - return 8 + if self.have_ipc: + return 11 + else: + return 8 def columnHeader(self, column): - return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] + if self.have_ipc: + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Insn Cnt", "Cyc Cnt", "IPC", "Branch")[column] + else: + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] def columnFont(self, column): - if column != 7: + if self.have_ipc: + br_col = 10 + else: + br_col = 7 + if column != br_col: return None return QFont("Monospace") @@ -2114,10 +2172,10 @@ def GetEventList(db): # Is a table selectable -def IsSelectable(db, table, sql = ""): +def IsSelectable(db, table, sql = "", columns = "*"): query = QSqlQuery(db) try: - QueryExec(query, "SELECT * FROM " + table + " " + sql + " LIMIT 1") + QueryExec(query, "SELECT " + columns + " FROM " + table + " " + sql + " LIMIT 1") except: return False return True @@ -2854,6 +2912,12 @@ cd xed sudo ./mfile.py --prefix=/usr/local install sudo ldconfig +

Instructions per Cycle (IPC)

+If available, IPC information is displayed in columns 'insn_cnt', 'cyc_cnt' and 'IPC'. +

Intel PT note: The information applies to the blocks of code ending with, and including, that branch. +Due to the granularity of timing information, the number of cycles for some code blocks will not be known. +In that case, 'insn_cnt', 'cyc_cnt' and 'IPC' are zero, but when 'IPC' is displayed it covers the period +since the previous displayed 'IPC'.

Find

Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. Refer to Python documentation for the regular expression syntax. From 4a0979d4b4feee67a7f9a5605b5bfae3b0a2b6a9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:25 +0300 Subject: [PATCH 22/85] perf scripts python: exported-sql-viewer.py: Add CallGraphModelParams Add a parameter to call graph and call tree, to determine whether IPC information is available. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-20-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/exported-sql-viewer.py | 73 +++++++++++-------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index a607235c8cd9..b3508bd4eb00 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -200,9 +200,10 @@ class Thread(QThread): class TreeModel(QAbstractItemModel): - def __init__(self, glb, parent=None): + def __init__(self, glb, params, parent=None): super(TreeModel, self).__init__(parent) self.glb = glb + self.params = params self.root = self.GetRoot() self.last_row_read = 0 @@ -463,8 +464,9 @@ class FindBar(): class CallGraphLevelItemBase(object): - def __init__(self, glb, row, parent_item): + def __init__(self, glb, params, row, parent_item): self.glb = glb + self.params = params self.row = row self.parent_item = parent_item self.query_done = False; @@ -503,8 +505,8 @@ class CallGraphLevelItemBase(object): class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): - super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): + super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.call_path_id = call_path_id @@ -525,7 +527,7 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): " GROUP BY call_path_id, name, short_name" " ORDER BY call_path_id") while query.next(): - child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + child_item = CallGraphLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) self.child_items.append(child_item) self.child_count += 1 @@ -533,8 +535,8 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): - super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): + super(CallGraphLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) dso = dsoname(dso) self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = call_path_id @@ -543,8 +545,8 @@ class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): - super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): + super(CallGraphLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 1, 0, 0, parent_item) self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] self.dbid = thread_id @@ -561,8 +563,8 @@ class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): class CallGraphLevelOneItem(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, comm, parent_item): - super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) + def __init__(self, glb, params, row, comm_id, comm, parent_item): + super(CallGraphLevelOneItem, self).__init__(glb, params, row, parent_item) self.data = [comm, "", "", "", "", "", ""] self.dbid = comm_id @@ -574,7 +576,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase): " INNER JOIN threads ON thread_id = threads.id" " WHERE comm_id = " + str(self.dbid)) while query.next(): - child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) + child_item = CallGraphLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) self.child_items.append(child_item) self.child_count += 1 @@ -582,8 +584,8 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase): class CallGraphRootItem(CallGraphLevelItemBase): - def __init__(self, glb): - super(CallGraphRootItem, self).__init__(glb, 0, None) + def __init__(self, glb, params): + super(CallGraphRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 self.query_done = True; query = QSqlQuery(glb.db) @@ -591,16 +593,23 @@ class CallGraphRootItem(CallGraphLevelItemBase): while query.next(): if not query.value(0): continue - child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) + child_item = CallGraphLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self) self.child_items.append(child_item) self.child_count += 1 +# Call graph model parameters + +class CallGraphModelParams(): + + def __init__(self, glb, parent=None): + self.have_ipc = IsSelectable(glb.db, "calls", columns = "insn_count, cyc_count") + # Context-sensitive call graph data model base class CallGraphModelBase(TreeModel): def __init__(self, glb, parent=None): - super(CallGraphModelBase, self).__init__(glb, parent) + super(CallGraphModelBase, self).__init__(glb, CallGraphModelParams(glb), parent) def FindSelect(self, value, pattern, query): if pattern: @@ -682,7 +691,7 @@ class CallGraphModel(CallGraphModelBase): super(CallGraphModel, self).__init__(glb, parent) def GetRoot(self): - return CallGraphRootItem(self.glb) + return CallGraphRootItem(self.glb, self.params) def columnCount(self, parent=None): return 7 @@ -729,8 +738,8 @@ class CallGraphModel(CallGraphModelBase): class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item): - super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, branch_count, parent_item): + super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.calls_id = calls_id @@ -752,7 +761,7 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): " WHERE calls.parent_id = " + str(self.calls_id) + comm_thread + " ORDER BY call_time, calls.id") while query.next(): - child_item = CallTreeLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + child_item = CallTreeLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) self.child_items.append(child_item) self.child_count += 1 @@ -760,8 +769,8 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, calls_id, name, dso, count, time, branch_count, parent_item): - super(CallTreeLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, branch_count, parent_item): + super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, branch_count, parent_item) dso = dsoname(dso) self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = calls_id @@ -770,8 +779,8 @@ class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): - super(CallTreeLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 0, 0, 0, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): + super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, parent_item) self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] self.dbid = thread_id @@ -788,8 +797,8 @@ class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): class CallTreeLevelOneItem(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, comm, parent_item): - super(CallTreeLevelOneItem, self).__init__(glb, row, parent_item) + def __init__(self, glb, params, row, comm_id, comm, parent_item): + super(CallTreeLevelOneItem, self).__init__(glb, params, row, parent_item) self.data = [comm, "", "", "", "", "", ""] self.dbid = comm_id @@ -801,7 +810,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase): " INNER JOIN threads ON thread_id = threads.id" " WHERE comm_id = " + str(self.dbid)) while query.next(): - child_item = CallTreeLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) + child_item = CallTreeLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) self.child_items.append(child_item) self.child_count += 1 @@ -809,8 +818,8 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase): class CallTreeRootItem(CallGraphLevelItemBase): - def __init__(self, glb): - super(CallTreeRootItem, self).__init__(glb, 0, None) + def __init__(self, glb, params): + super(CallTreeRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 self.query_done = True; query = QSqlQuery(glb.db) @@ -818,7 +827,7 @@ class CallTreeRootItem(CallGraphLevelItemBase): while query.next(): if not query.value(0): continue - child_item = CallTreeLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) + child_item = CallTreeLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self) self.child_items.append(child_item) self.child_count += 1 @@ -830,7 +839,7 @@ class CallTreeModel(CallGraphModelBase): super(CallTreeModel, self).__init__(glb, parent) def GetRoot(self): - return CallTreeRootItem(self.glb) + return CallTreeRootItem(self.glb, self.params) def columnCount(self, parent=None): return 7 @@ -1606,7 +1615,7 @@ class BranchModel(TreeModel): progress = Signal(object) def __init__(self, glb, event_id, where_clause, parent=None): - super(BranchModel, self).__init__(glb, parent) + super(BranchModel, self).__init__(glb, None, parent) self.event_id = event_id self.more = True self.populated = 0 From 38a846d47f3d2fe6783e2df7bc5c2415239e6a63 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:26 +0300 Subject: [PATCH 23/85] perf scripts python: exported-sql-viewer.py: Add IPC information to Call Graph Graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhance the call graph to display IPC information if it is available. Committer testing: [acme@quaco adrian.hunter]$ python ~acme/libexec/perf-core/scripts/python/exported-sql-viewer.py ~/c/adrian.hunter/simple-retpoline.db Reports -> Context Sensitive Callgraph, then expand a few trees, then select with the mouse and press control+C: Call Path Object Count Time(ns) Time(%) Insn Insn Cyc Cyc IPC Branch Branch ▼ simple-retpolin Cnt Cnt(%) Cnt Cnt(%) Cnt Cnt(%) ▼ 23003:23003 ▼ _start ld-2.28.so 1 218295 100.0 127746 100.0 207320 100.0 0.62 13046 100.0 ▶ unknown unknown 1 3202 1.5 0 0.0 0 0.0 0 1 0.0 ▶ _dl_start ld-2.28.so 1 188471 86.3 123394 96.6 180007 86.8 0.69 12529 96.0 ▶ _dl_init ld-2.28.so 1 13406 6.1 3207 2.5 14868 7.2 0.22 327 2.5 ▼ _start simple-retpoline 1 12899 5.9 1142 0.9 11561 5.6 0.10 184 1.4 ▶ unknown unknown 1 846 6.6 0 0.0 0 0.0 0 1 0.5 ▼ __libc_start_main libc-2.28.so 1 11621 90.1 1129 98.9 10350 89.5 0.11 181 98.4 ▶ __cxa_atexit libc-2.28.so 1 2302 19.8 101 8.9 1817 17.6 0.06 13 7.2 ▶ __libc_csu_init simple-retpoline 1 121 1.0 43 3.8 340 3.3 0.13 8 4.4 ▼ _setjmp libc-2.28.so 1 74 0.6 46 4.1 206 2.0 0.22 4 2.2 ▼ __sigsetjmp libc-2.28.so 1 74 100.0 46 100.0 206 100.0 0.22 3 75.0 ▶ __sigjmp_save libc-2.28.so 1 0 0.0 0 0.0 0 0.0 0 1 33.3 ▼ main simple-retpoline 1 44 0.4 23 2.0 126 1.2 0.18 12 6.6 ▼ foo simple-retpoline 2 44 100.0 23 100.0 126 100.0 0.18 10 83.3 bar simple-retpoline 2 22 50.0 6 26.1 61 48.4 0.10 2 20.0 ▶ exit libc-2.28.so 1 9029 77.7 878 77.8 7765 75.0 0.11 139 76.8 Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-21-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/exported-sql-viewer.py | 69 +++++++++++++++---- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index b3508bd4eb00..f5b1b63995b0 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -505,18 +505,24 @@ class CallGraphLevelItemBase(object): class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.call_path_id = call_path_id + self.insn_cnt = insn_cnt + self.cyc_cnt = cyc_cnt self.branch_count = branch_count self.time = time def Select(self): self.query_done = True; query = QSqlQuery(self.glb.db) - QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" + if self.params.have_ipc: + ipc_str = ", SUM(insn_count), SUM(cyc_count)" + else: + ipc_str = "" + QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time)" + ipc_str + ", SUM(branch_count)" " FROM calls" " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" @@ -527,7 +533,15 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): " GROUP BY call_path_id, name, short_name" " ORDER BY call_path_id") while query.next(): - child_item = CallGraphLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + if self.params.have_ipc: + insn_cnt = int(query.value(5)) + cyc_cnt = int(query.value(6)) + branch_count = int(query.value(7)) + else: + insn_cnt = 0 + cyc_cnt = 0 + branch_count = int(query.value(5)) + child_item = CallGraphLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self) self.child_items.append(child_item) self.child_count += 1 @@ -535,10 +549,17 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): - super(CallGraphLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): + super(CallGraphLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) dso = dsoname(dso) - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + if self.params.have_ipc: + insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) + cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) + br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) + ipc = CalcIPC(cyc_cnt, insn_cnt) + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] + else: + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = call_path_id # Context-sensitive call graph data model level two item @@ -546,18 +567,28 @@ class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): - super(CallGraphLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 1, 0, 0, parent_item) - self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] + super(CallGraphLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 1, 0, 0, 0, 0, parent_item) + if self.params.have_ipc: + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] + else: + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] self.dbid = thread_id def Select(self): super(CallGraphLevelTwoItem, self).Select() for child_item in self.child_items: self.time += child_item.time + self.insn_cnt += child_item.insn_cnt + self.cyc_cnt += child_item.cyc_cnt self.branch_count += child_item.branch_count for child_item in self.child_items: child_item.data[4] = PercentToOneDP(child_item.time, self.time) - child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) + if self.params.have_ipc: + child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt) + child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt) + child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count) + else: + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) # Context-sensitive call graph data model level one item @@ -565,7 +596,10 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase): def __init__(self, glb, params, row, comm_id, comm, parent_item): super(CallGraphLevelOneItem, self).__init__(glb, params, row, parent_item) - self.data = [comm, "", "", "", "", "", ""] + if self.params.have_ipc: + self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""] + else: + self.data = [comm, "", "", "", "", "", ""] self.dbid = comm_id def Select(self): @@ -694,14 +728,23 @@ class CallGraphModel(CallGraphModelBase): return CallGraphRootItem(self.glb, self.params) def columnCount(self, parent=None): - return 7 + if self.params.have_ipc: + return 12 + else: + return 7 def columnHeader(self, column): - headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + if self.params.have_ipc: + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "] + else: + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] return headers[column] def columnAlignment(self, column): - alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + if self.params.have_ipc: + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + else: + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] return alignment[column] def DoFindSelect(self, query, match): From b3b660792e049c7ef4a40c4caa7008efd4777b3c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:27 +0300 Subject: [PATCH 24/85] perf scripts python: exported-sql-viewer.py: Add IPC information to Call Tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhance the call tree to display IPC information if it is available. Committer testing: [acme@quaco adrian.hunter]$ python ~acme/libexec/perf-core/scripts/python/exported-sql-viewer.py ~/c/adrian.hunter/simple-retpoline.db Reports -> Call Tree, then expand a few trees, then select with the mouse and press control+C (copy): Call Path Object Call Time Time Time(%) Insn Insn Cyc Cyc IPC Branch Branch ▼ simple-retpolin (ns) Cnt Cnt(%) Cnt Cnt(%) Count Count(%) ▼ 23003:23003 ▼ _start ld-2.28.so 112195670 218295 100.0 127746 100.0 207320 100.0 0.62 13046 100.0 ▶ unknown unknown 112195987 3202 1.5 0 0.0 0 0.0 0 1 0.0 ▶ _dl_start ld-2.28.so 112199189 188471 86.3 123394 96.6 180007 86.8 0.69 12529 96.0 ▼ _dl_init ld-2.28.so 112387660 13406 6.1 3207 2.5 14868 7.2 0.22 327 2.5 ▶ call_init.part.0 ld-2.28.so 112387773 117 0.9 70 2.2 639 4.3 0.11 3 0.9 ▶ call_init.part.0 ld-2.28.so 112387890 13129 97.9 3103 96.8 14100 94.8 0.22 315 96.3 ▶ call_init.part.0 ld-2.28.so 112401020 0 0.0 0 0.0 0 0.0 0 2 0.6 ▼ _start simple-retpol 112401066 12899 5.9 1142 0.9 11561 5.6 0.10 184 1.4 ▶ unknown unknown 112401388 846 6.6 0 0.0 0 0.0 0 1 0.5 ▼ __libc_start_main libc-2.28.so 112402344 11621 90.1 1129 98.9 10350 89.5 0.11 181 98.4 ▶ __cxa_atexit libc-2.28.so 112402360 2302 19.8 101 8.9 1817 17.6 0.06 13 7.2 ▶ __libc_csu_init simple-retpol 112404673 121 1.0 43 3.8 340 3.3 0.13 8 4.4 ▶ _setjmp libc-2.28.so 112404794 74 0.6 46 4.1 206 2.0 0.22 4 2.2 ▼ main simple-retpol 112404892 44 0.4 23 2.0 126 1.2 0.18 12 6.6 ▼ foo simple-retpol 112404892 19 43.2 12 52.2 55 43.7 0.22 5 41.7 bar simple-retpol 112404896 12 63.2 3 25.0 34 61.8 0.09 1 20.0 ▼ foo simple-retpol 112404911 25 56.8 11 47.8 71 56.3 0.15 5 41.7 ▶ bar simple-retpol 112404924 10 40.0 3 27.3 27 38.0 0.11 1 20.0 ▶ exit libc-2.28.so 112404936 9029 77.7 878 77.8 7765 75.0 0.11 139 76.8 Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-22-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/exported-sql-viewer.py | 69 +++++++++++++++---- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index f5b1b63995b0..94489cf2ce0e 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -781,11 +781,13 @@ class CallGraphModel(CallGraphModelBase): class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, branch_count, parent_item): + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.calls_id = calls_id + self.insn_cnt = insn_cnt + self.cyc_cnt = cyc_cnt self.branch_count = branch_count self.time = time @@ -795,8 +797,12 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id) else: comm_thread = "" + if self.params.have_ipc: + ipc_str = ", insn_count, cyc_count" + else: + ipc_str = "" query = QSqlQuery(self.glb.db) - QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time, branch_count" + QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time" + ipc_str + ", branch_count" " FROM calls" " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" @@ -804,7 +810,15 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): " WHERE calls.parent_id = " + str(self.calls_id) + comm_thread + " ORDER BY call_time, calls.id") while query.next(): - child_item = CallTreeLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + if self.params.have_ipc: + insn_cnt = int(query.value(5)) + cyc_cnt = int(query.value(6)) + branch_count = int(query.value(7)) + else: + insn_cnt = 0 + cyc_cnt = 0 + branch_count = int(query.value(5)) + child_item = CallTreeLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self) self.child_items.append(child_item) self.child_count += 1 @@ -812,10 +826,17 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, branch_count, parent_item): - super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): + super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) dso = dsoname(dso) - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + if self.params.have_ipc: + insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) + cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) + br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) + ipc = CalcIPC(cyc_cnt, insn_cnt) + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] + else: + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = calls_id # Call tree data model level two item @@ -823,18 +844,28 @@ class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): - super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, parent_item) - self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] + super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item) + if self.params.have_ipc: + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] + else: + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] self.dbid = thread_id def Select(self): super(CallTreeLevelTwoItem, self).Select() for child_item in self.child_items: self.time += child_item.time + self.insn_cnt += child_item.insn_cnt + self.cyc_cnt += child_item.cyc_cnt self.branch_count += child_item.branch_count for child_item in self.child_items: child_item.data[4] = PercentToOneDP(child_item.time, self.time) - child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) + if self.params.have_ipc: + child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt) + child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt) + child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count) + else: + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) # Call tree data model level one item @@ -842,7 +873,10 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase): def __init__(self, glb, params, row, comm_id, comm, parent_item): super(CallTreeLevelOneItem, self).__init__(glb, params, row, parent_item) - self.data = [comm, "", "", "", "", "", ""] + if self.params.have_ipc: + self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""] + else: + self.data = [comm, "", "", "", "", "", ""] self.dbid = comm_id def Select(self): @@ -885,14 +919,23 @@ class CallTreeModel(CallGraphModelBase): return CallTreeRootItem(self.glb, self.params) def columnCount(self, parent=None): - return 7 + if self.params.have_ipc: + return 12 + else: + return 7 def columnHeader(self, column): - headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + if self.params.have_ipc: + headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "] + else: + headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] return headers[column] def columnAlignment(self, column): - alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + if self.params.have_ipc: + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + else: + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] return alignment[column] def DoFindSelect(self, query, match): From 80b3fb64a55a7e4ba1ef8f9a7e87fbe1a26dc709 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 May 2019 14:37:28 +0300 Subject: [PATCH 25/85] perf scripts python: exported-sql-viewer.py: Select find text when find bar is activated The user probably wants to replace the find text, so select the find text when the find bar is activated. That is fairly standard behaviour for search text entry. Entering text will replace the current text, but using edit keys (arrows, home, end etc) cancels the selection and enables editing. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190520113728.14389-23-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 94489cf2ce0e..6e7934f2ac9a 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -400,6 +400,7 @@ class FindBar(): def Activate(self): self.bar.show() + self.textbox.lineEdit().selectAll() self.textbox.setFocus() def Deactivate(self): From 4cae8675ea798b141ccdeea0a5b3f46a1e4605eb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Jun 2019 16:52:46 -0300 Subject: [PATCH 26/85] perf augmented_raw_syscalls: Tell which args are filenames and how many bytes to copy Since we know what args are strings from reading the syscall descriptions in tracefs and also already mark such args to be beautified using the syscall_arg__scnprintf_filename() helper, all we need is to fill in this info in the 'syscalls' BPF map we were using to state which syscalls the user is interested in, i.e. the syscall filter. Right now just set that with PATH_MAX and unroll the syscall arg in the BPF program, as the verifier isn't liking something clang generates when unrolling the loop. This also makes the augmented_raw_syscalls.c program support all arches, since we removed that set of defines with the hard coded syscall numbers, all should be automatically set for all arches, with the syscall id mapping done correcly. Doing baby steps here, i.e. just the first string arg for a syscall is printed, syscalls with more than one, say, the various rename* syscalls, need further work, but lets get first something that the BPF verifier accepts before increasing the complexity To test it, something like: # perf trace -e string -e /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c With: # cat ~/.perfconfig [llvm] dump-obj = true clang-opt = -g [trace] #add_events = /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c show_zeros = yes show_duration = no no_inherit = yes show_timestamp = no show_arg_names = no args_alignment = 40 show_prefix = yes # That commented add_events line is needed for developing this augmented_raw_syscalls.c BPF program, as if we add it via the 'add_events' mechanism so as to shorten the 'perf trace' command lines, then we end up not setting up the -v option which precludes us having access to the bpf verifier log :-\ Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jesper Dangaard Brouer Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Cc: Song Liu Cc: Yonghong Song Link: https://lkml.kernel.org/n/tip-dn863ya0cbsqycxuy0olvbt1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 31 +++ .../examples/bpf/augmented_raw_syscalls.c | 232 +++++------------- 2 files changed, 95 insertions(+), 168 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 54b2d0fd0d02..19f22127f02e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -972,8 +972,14 @@ struct syscall { struct syscall_arg_fmt *arg_fmt; }; +/* + * Must match what is in the BPF program: + * + * tools/perf/examples/bpf/augmented_raw_syscalls.c + */ struct bpf_map_syscall_entry { bool enabled; + u16 string_args_len[6]; }; /* @@ -2711,6 +2717,25 @@ out_enomem: } #ifdef HAVE_LIBBPF_SUPPORT +static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) +{ + struct syscall *sc = trace__syscall_info(trace, NULL, id); + int arg = 0; + + if (sc == NULL) + goto out; + + for (; arg < sc->nr_args; ++arg) { + entry->string_args_len[arg] = 0; + if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) { + /* Should be set like strace -s strsize */ + entry->string_args_len[arg] = PATH_MAX; + } + } +out: + for (; arg < 6; ++arg) + entry->string_args_len[arg] = 0; +} static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) { int fd = bpf_map__fd(trace->syscalls.map); @@ -2723,6 +2748,9 @@ static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { int key = trace->ev_qualifier_ids.entries[i]; + if (value.enabled) + trace__init_bpf_map_syscall_args(trace, key, &value); + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); if (err) break; @@ -2740,6 +2768,9 @@ static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled) int err = 0, key; for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { + if (enabled) + trace__init_bpf_map_syscall_args(trace, key, &value); + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); if (err) break; diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 68a3d61752ce..c9fd3b4d8e55 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -21,8 +21,14 @@ /* bpf-output associated map */ bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); +/* + * string_args_len: one per syscall arg, 0 means not a string or don't copy it, + * PATH_MAX for copying everything, any other value to limit + * it a la 'strace -s strsize'. + */ struct syscall { bool enabled; + u16 string_args_len[6]; }; bpf_map(syscalls, ARRAY, int, struct syscall, 512); @@ -45,81 +51,6 @@ struct augmented_filename { char value[PATH_MAX]; }; -/* syscalls where the first arg is a string */ -#define SYS_OPEN 2 -#define SYS_STAT 4 -#define SYS_LSTAT 6 -#define SYS_ACCESS 21 -#define SYS_EXECVE 59 -#define SYS_TRUNCATE 76 -#define SYS_CHDIR 80 -#define SYS_RENAME 82 -#define SYS_MKDIR 83 -#define SYS_RMDIR 84 -#define SYS_CREAT 85 -#define SYS_LINK 86 -#define SYS_UNLINK 87 -#define SYS_SYMLINK 88 -#define SYS_READLINK 89 -#define SYS_CHMOD 90 -#define SYS_CHOWN 92 -#define SYS_LCHOWN 94 -#define SYS_MKNOD 133 -#define SYS_STATFS 137 -#define SYS_PIVOT_ROOT 155 -#define SYS_CHROOT 161 -#define SYS_ACCT 163 -#define SYS_SWAPON 167 -#define SYS_SWAPOFF 168 -#define SYS_DELETE_MODULE 176 -#define SYS_SETXATTR 188 -#define SYS_LSETXATTR 189 -#define SYS_GETXATTR 191 -#define SYS_LGETXATTR 192 -#define SYS_LISTXATTR 194 -#define SYS_LLISTXATTR 195 -#define SYS_REMOVEXATTR 197 -#define SYS_LREMOVEXATTR 198 -#define SYS_MQ_OPEN 240 -#define SYS_MQ_UNLINK 241 -#define SYS_ADD_KEY 248 -#define SYS_REQUEST_KEY 249 -#define SYS_SYMLINKAT 266 -#define SYS_MEMFD_CREATE 319 - -/* syscalls where the second arg is a string */ - -#define SYS_PWRITE64 18 -#define SYS_EXECVE 59 -#define SYS_RENAME 82 -#define SYS_QUOTACTL 179 -#define SYS_FSETXATTR 190 -#define SYS_FGETXATTR 193 -#define SYS_FREMOVEXATTR 199 -#define SYS_MQ_TIMEDSEND 242 -#define SYS_REQUEST_KEY 249 -#define SYS_INOTIFY_ADD_WATCH 254 -#define SYS_OPENAT 257 -#define SYS_MKDIRAT 258 -#define SYS_MKNODAT 259 -#define SYS_FCHOWNAT 260 -#define SYS_FUTIMESAT 261 -#define SYS_NEWFSTATAT 262 -#define SYS_UNLINKAT 263 -#define SYS_RENAMEAT 264 -#define SYS_LINKAT 265 -#define SYS_READLINKAT 267 -#define SYS_FCHMODAT 268 -#define SYS_FACCESSAT 269 -#define SYS_UTIMENSAT 280 -#define SYS_NAME_TO_HANDLE_AT 303 -#define SYS_FINIT_MODULE 313 -#define SYS_RENAMEAT2 316 -#define SYS_EXECVEAT 322 -#define SYS_STATX 332 -#define SYS_MOVE_MOUNT 429 -#define SYS_FSPICK 433 - pid_filter(pids_filtered); struct augmented_args_filename { @@ -133,7 +64,7 @@ SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { struct augmented_args_filename *augmented_args; - unsigned int len = sizeof(*augmented_args); + unsigned int len = sizeof(*augmented_args), filename_len; const void *filename_arg = NULL; struct syscall *syscall; int key = 0; @@ -191,104 +122,69 @@ int sys_enter(struct syscall_enter_args *args) * after the ctx memory access to prevent their down stream merging. */ /* - * This table of what args are strings will be provided by userspace, - * in the syscalls map, i.e. we will already have to do the lookup to - * see if this specific syscall is filtered, so we can as well get more - * info about what syscall args are strings or pointers, and how many - * bytes to copy, per arg, etc. + * For now copy just the first string arg, we need to improve the protocol + * and have more than one. * - * For now hard code it, till we have all the basic mechanisms in place - * to automate everything and make the kernel part be completely driven - * by information obtained in userspace for each kernel version and - * processor architecture, making the kernel part the same no matter what - * kernel version or processor architecture it runs on. - */ - switch (augmented_args->args.syscall_nr) { - case SYS_ACCT: - case SYS_ADD_KEY: - case SYS_CHDIR: - case SYS_CHMOD: - case SYS_CHOWN: - case SYS_CHROOT: - case SYS_CREAT: - case SYS_DELETE_MODULE: - case SYS_EXECVE: - case SYS_GETXATTR: - case SYS_LCHOWN: - case SYS_LGETXATTR: - case SYS_LINK: - case SYS_LISTXATTR: - case SYS_LLISTXATTR: - case SYS_LREMOVEXATTR: - case SYS_LSETXATTR: - case SYS_LSTAT: - case SYS_MEMFD_CREATE: - case SYS_MKDIR: - case SYS_MKNOD: - case SYS_MQ_OPEN: - case SYS_MQ_UNLINK: - case SYS_PIVOT_ROOT: - case SYS_READLINK: - case SYS_REMOVEXATTR: - case SYS_RENAME: - case SYS_REQUEST_KEY: - case SYS_RMDIR: - case SYS_SETXATTR: - case SYS_STAT: - case SYS_STATFS: - case SYS_SWAPOFF: - case SYS_SWAPON: - case SYS_SYMLINK: - case SYS_SYMLINKAT: - case SYS_TRUNCATE: - case SYS_UNLINK: - case SYS_ACCESS: - case SYS_OPEN: filename_arg = (const void *)args->args[0]; + * Using the unrolled loop is not working, only when we do it manually, + * check this out later... + + u8 arg; +#pragma clang loop unroll(full) + for (arg = 0; arg < 6; ++arg) { + if (syscall->string_args_len[arg] != 0) { + filename_len = syscall->string_args_len[arg]; + filename_arg = (const void *)args->args[arg]; __asm__ __volatile__("": : :"memory"); - break; - case SYS_EXECVEAT: - case SYS_FACCESSAT: - case SYS_FCHMODAT: - case SYS_FCHOWNAT: - case SYS_FGETXATTR: - case SYS_FINIT_MODULE: - case SYS_FREMOVEXATTR: - case SYS_FSETXATTR: - case SYS_FSPICK: - case SYS_FUTIMESAT: - case SYS_INOTIFY_ADD_WATCH: - case SYS_LINKAT: - case SYS_MKDIRAT: - case SYS_MKNODAT: - // case SYS_MOVE_MOUNT: - // For now don't copy move_mount first string arg, as it has two and - // 'perf trace's syscall_arg__scnprintf_filename() will use the one - // copied here, the first, for both args, duplicating the first and - // ignoring the second. - // - // We need to copy both here and make syscall_arg__scnprintf_filename - // skip the first when reading the second, using the size of the first, etc. - // Shouldn't be difficult, but now its perf/urgent time, lets wait for - // the next devel window. - case SYS_MQ_TIMEDSEND: - case SYS_NAME_TO_HANDLE_AT: - case SYS_NEWFSTATAT: - case SYS_PWRITE64: - case SYS_QUOTACTL: - case SYS_READLINKAT: - case SYS_RENAMEAT: - case SYS_RENAMEAT2: - case SYS_STATX: - case SYS_UNLINKAT: - case SYS_UTIMENSAT: - case SYS_OPENAT: filename_arg = (const void *)args->args[1]; - break; + break; + } } - if (filename_arg != NULL) { + verifier log: + +; if (syscall->string_args_len[arg] != 0) { +37: (69) r3 = *(u16 *)(r0 +2) + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv0 R2_w=map_value(id=0,off=2,ks=4,vs=14,imm=0) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; if (syscall->string_args_len[arg] != 0) { +38: (55) if r3 != 0x0 goto pc+5 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv0 R2=map_value(id=0,off=2,ks=4,vs=14,imm=0) R3=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +39: (b7) r1 = 1 +; if (syscall->string_args_len[arg] != 0) { +40: (bf) r2 = r0 +41: (07) r2 += 4 +42: (69) r3 = *(u16 *)(r0 +4) + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv1 R2_w=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3_w=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; if (syscall->string_args_len[arg] != 0) { +43: (15) if r3 == 0x0 goto pc+32 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv1 R2=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; filename_arg = (const void *)args->args[arg]; +44: (67) r1 <<= 3 +45: (bf) r3 = r6 +46: (0f) r3 += r1 +47: (b7) r5 = 64 +48: (79) r3 = *(u64 *)(r3 +16) +dereference of modified ctx ptr R3 off=8 disallowed +processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_states 12 mark_read 7 + */ + +#define __loop_iter(arg) \ + if (syscall->string_args_len[arg] != 0) { \ + filename_len = syscall->string_args_len[arg]; \ + filename_arg = (const void *)args->args[arg]; +#define loop_iter_first() __loop_iter(0); } +#define loop_iter(arg) else __loop_iter(arg); } +#define loop_iter_last(arg) else __loop_iter(arg); __asm__ __volatile__("": : :"memory"); } + + loop_iter_first() + loop_iter(1) + loop_iter(2) + loop_iter(3) + loop_iter(4) + loop_iter_last(5) + + if (filename_arg != NULL && filename_len <= sizeof(augmented_args->filename.value)) { augmented_args->filename.reserved = 0; augmented_args->filename.size = probe_read_str(&augmented_args->filename.value, - sizeof(augmented_args->filename.value), + filename_len, filename_arg); if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) { len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size; From 0c95a7ff76fb1c5bb614e6ee438fce06d1b957c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Jun 2019 15:31:28 -0300 Subject: [PATCH 27/85] perf augmented_raw_syscalls: Move the probe_read_str to a separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One more step into copying multiple filenames to support syscalls like rename*. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-xdqtjexdyp81oomm1rkzeifl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../examples/bpf/augmented_raw_syscalls.c | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index c9fd3b4d8e55..356513e81ec1 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -60,11 +60,27 @@ struct augmented_args_filename { bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1); +static inline +unsigned int augmented_args__read_filename(struct augmented_args_filename *augmented_args, + const void *filename_arg, unsigned int filename_len) +{ + unsigned int len = sizeof(*augmented_args); + + augmented_args->filename.reserved = 0; + augmented_args->filename.size = probe_read_str(&augmented_args->filename.value, filename_len, filename_arg); + if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) { + len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size; + len &= sizeof(augmented_args->filename.value) - 1; + } + + return len; +} + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { struct augmented_args_filename *augmented_args; - unsigned int len = sizeof(*augmented_args), filename_len; + unsigned int len, filename_len; const void *filename_arg = NULL; struct syscall *syscall; int key = 0; @@ -182,14 +198,7 @@ processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_st loop_iter_last(5) if (filename_arg != NULL && filename_len <= sizeof(augmented_args->filename.value)) { - augmented_args->filename.reserved = 0; - augmented_args->filename.size = probe_read_str(&augmented_args->filename.value, - filename_len, - filename_arg); - if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) { - len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size; - len &= sizeof(augmented_args->filename.value) - 1; - } + len = augmented_args__read_filename(augmented_args, filename_arg, filename_len); } else { len = sizeof(augmented_args->args); } From deaf4da48a67f73eb7d5d1802c14eaf58d33f2da Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Jun 2019 15:57:28 -0300 Subject: [PATCH 28/85] perf augmented_raw_syscalls: Change helper to consider just the augmented_filename part MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that we can use it for multiple args, baby steps not to step into the verifier toes. In the process make sure we handle -EFAULT from bpf_prog_read_str(), as this really is needed now that we'll handle more than one augmented argument, i.e. if there is failure, then we have the argument that fails have: (size = 0, err = -EFAULT, value = [] ) followed by the next, lets say that worked for a second pathname: (size = 4, err = 0, value = "/tmp" ) So we can skip the first while telling the user about the problem and then process the second. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-deyvqi39um6gp6hux6jovos8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../examples/bpf/augmented_raw_syscalls.c | 46 +++++++++++++------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 356513e81ec1..5054b4bc9e55 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -47,7 +47,7 @@ struct syscall_exit_args { struct augmented_filename { unsigned int size; - int reserved; + int err; char value[PATH_MAX]; }; @@ -61,16 +61,28 @@ struct augmented_args_filename { bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1); static inline -unsigned int augmented_args__read_filename(struct augmented_args_filename *augmented_args, - const void *filename_arg, unsigned int filename_len) +unsigned int augmented_filename__read(struct augmented_filename *augmented_filename, + const void *filename_arg, unsigned int filename_len) { - unsigned int len = sizeof(*augmented_args); + unsigned int len = sizeof(*augmented_filename); + int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg); - augmented_args->filename.reserved = 0; - augmented_args->filename.size = probe_read_str(&augmented_args->filename.value, filename_len, filename_arg); - if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) { - len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size; - len &= sizeof(augmented_args->filename.value) - 1; + augmented_filename->size = augmented_filename->err = 0; + /* + * probe_read_str may return < 0, e.g. -EFAULT + * So we leave that in the augmented_filename->size that userspace will + */ + if (size > 0) { + len -= sizeof(augmented_filename->value) - size; + len &= sizeof(augmented_filename->value) - 1; + augmented_filename->size = size; + } else { + /* + * So that username notice the error while still being able + * to skip this augmented arg record + */ + augmented_filename->err = size; + len = offsetof(struct augmented_filename, value); } return len; @@ -80,7 +92,17 @@ SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { struct augmented_args_filename *augmented_args; - unsigned int len, filename_len; + /* + * We start len, the amount of data that will be in the perf ring + * buffer, if this is not filtered out by one of pid_filter__has(), + * syscall->enabled, etc, with the non-augmented raw syscall payload, + * i.e. sizeof(augmented_args->args). + * + * We'll add to this as we add augmented syscalls right after that + * initial, non-augmented raw_syscalls:sys_enter payload. + */ + unsigned int len = sizeof(augmented_args->args); + unsigned int filename_len; const void *filename_arg = NULL; struct syscall *syscall; int key = 0; @@ -198,9 +220,7 @@ processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_st loop_iter_last(5) if (filename_arg != NULL && filename_len <= sizeof(augmented_args->filename.value)) { - len = augmented_args__read_filename(augmented_args, filename_arg, filename_len); - } else { - len = sizeof(augmented_args->args); + len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len); } /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ From 602bce09fb43ca6fc41f1bdcba155b839b5e7f38 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Jun 2019 16:04:34 -0300 Subject: [PATCH 29/85] perf augmented_raw_syscalls: Move reading filename to the loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Almost there, next step is to copy more than one filename payload. Probably to read syscall arg structs, etc we'll need just a variation of this that will decide what to use, if probe_read_str() or plain probe_read for structs, i.e. fixed size. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-uf6u0pld6xe4xuo16f04owlz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_raw_syscalls.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 5054b4bc9e55..2f822bb51717 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -102,8 +102,6 @@ int sys_enter(struct syscall_enter_args *args) * initial, non-augmented raw_syscalls:sys_enter payload. */ unsigned int len = sizeof(augmented_args->args); - unsigned int filename_len; - const void *filename_arg = NULL; struct syscall *syscall; int key = 0; @@ -206,8 +204,10 @@ processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_st #define __loop_iter(arg) \ if (syscall->string_args_len[arg] != 0) { \ - filename_len = syscall->string_args_len[arg]; \ - filename_arg = (const void *)args->args[arg]; + unsigned int filename_len = syscall->string_args_len[arg]; \ + const void *filename_arg = (const void *)args->args[arg]; \ + if (filename_len <= sizeof(augmented_args->filename.value)) \ + len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len); #define loop_iter_first() __loop_iter(0); } #define loop_iter(arg) else __loop_iter(arg); } #define loop_iter_last(arg) else __loop_iter(arg); __asm__ __volatile__("": : :"memory"); } @@ -219,10 +219,6 @@ processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_st loop_iter(4) loop_iter_last(5) - if (filename_arg != NULL && filename_len <= sizeof(augmented_args->filename.value)) { - len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len); - } - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len); } From 279ab04dbea1370d2eac0f854270369ccaef8a44 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 31 May 2019 15:13:21 +0200 Subject: [PATCH 30/85] perf jvmti: Address gcc string overflow warning for strncpy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are getting false positive gcc warning when we compile with gcc9 (9.1.1): CC jvmti/libjvmti.o In file included from /usr/include/string.h:494, from jvmti/libjvmti.c:5: In function ‘strncpy’, inlined from ‘copy_class_filename.constprop’ at jvmti/libjvmti.c:166:3: /usr/include/bits/string_fortified.h:106:10: error: ‘__builtin_strncpy’ specified bound depends on the length of the source argument [-Werror=stringop-overflow=] 106 | return __builtin___strncpy_chk (__dest, __src, __len, __bos (__dest)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jvmti/libjvmti.c: In function ‘copy_class_filename.constprop’: jvmti/libjvmti.c:165:26: note: length computed here 165 | size_t file_name_len = strlen(file_name); | ^~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors As per Arnaldo's suggestion use strlcpy(), which does the same thing and keeps gcc silent. Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ben Gainey Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190531131321.GB1281@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/libjvmti.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index aea7b1fe85aa..c441a34cb1c0 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -162,8 +163,7 @@ copy_class_filename(const char * class_sign, const char * file_name, char * resu result[i] = '\0'; } else { /* fallback case */ - size_t file_name_len = strlen(file_name); - strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length); + strlcpy(result, file_name, max_length); } } From 8195168e877948bb9a943ce11ad3e6ee71014bd5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Jun 2019 10:34:47 -0300 Subject: [PATCH 31/85] perf trace: Consume the augmented_raw_syscalls payload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support the SCA_FILENAME beautifier in more than one syscall arg, as needed for syscalls such as the rename* family, we need to, after processing one such arg, bump the augmented pointers so that the next augmented arg don't reuse data for the previous augmented arguments. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-4e4cmzyjxb3wkonfo1x9a27y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 19f22127f02e..905e57c336b0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1233,8 +1233,17 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf, static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size) { struct augmented_arg *augmented_arg = arg->augmented.args; + size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value); + /* + * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls + * we would have two strings, each prefixed by its size. + */ + int consumed = sizeof(*augmented_arg) + augmented_arg->size; - return scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value); + arg->augmented.args += consumed; + arg->augmented.size -= consumed; + + return printed; } static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, From dea87bfb7b280e8b14519eb6b5e8bafbbf4c66f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Jun 2019 10:53:06 -0300 Subject: [PATCH 32/85] perf trace: Associate more argument names with the filename beautifier For instance, the rename* family uses "oldname", "newname", so check if "name" is at the end and treat it as a filename. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-wjy7j4bk06g7atzwoz1mid24@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 905e57c336b0..f7e4e50bddbd 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1431,10 +1431,11 @@ static int syscall__set_arg_fmts(struct syscall *sc) if (sc->fmt && sc->fmt->arg[idx].scnprintf) continue; + len = strlen(field->name); + if (strcmp(field->type, "const char *") == 0 && - (strcmp(field->name, "filename") == 0 || - strcmp(field->name, "path") == 0 || - strcmp(field->name, "pathname") == 0)) + ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) || + strstr(field->name, "path") != NULL)) sc->arg_fmt[idx].scnprintf = SCA_FILENAME; else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) sc->arg_fmt[idx].scnprintf = SCA_PTR; @@ -1445,8 +1446,7 @@ static int syscall__set_arg_fmts(struct syscall *sc) else if ((strcmp(field->type, "int") == 0 || strcmp(field->type, "unsigned int") == 0 || strcmp(field->type, "long") == 0) && - (len = strlen(field->name)) >= 2 && - strcmp(field->name + len - 2, "fd") == 0) { + len >= 2 && strcmp(field->name + len - 2, "fd") == 0) { /* * /sys/kernel/tracing/events/syscalls/sys_enter* * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c From 012749caf9419f22636891259b664c6dd383e897 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 6 Jun 2019 10:38:59 -0300 Subject: [PATCH 33/85] perf trace: Exit when failing to build eBPF program On my Juno board with ARM64 CPUs, perf trace command reports the eBPF program building failure but the command will not exit and continue to run. If we define an eBPF event in config file, the event will be parsed with below flow: perf_config() `> trace__config() `> parse_events_option() `> parse_events__scanner() `-> parse_events_parse() `> parse_events_load_bpf() `> llvm__compile_bpf() Though the low level functions return back error values when detect eBPF building failure, but parse_events_option() returns 1 for this case and trace__config() passes 1 to perf_config(); perf_config() doesn't treat the returned value 1 as failure and it continues to parse other configurations. Thus the perf command continues to run even without enabling eBPF event successfully. This patch changes error handling in trace__config(), when it detects failure it will return -1 rather than directly pass error value (1); finally, perf_config() will directly bail out and perf will exit for this case. Committer notes: Simplified the patch to just check directly the return of parse_events_option() and it it is non-zero, change err from its initial zero value to -1. Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Song Liu Cc: Suzuki Poulouse Cc: Yonghong Song Fixes: ac96287cae08 ("perf trace: Allow specifying a set of events to add in perfconfig") Link: https://lkml.kernel.org/n/tip-x4i63f5kscykfok0hqim3zma@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f7e4e50bddbd..1a2a605cf068 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3703,7 +3703,12 @@ static int trace__config(const char *var, const char *value, void *arg) struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option); - err = parse_events_option(&o, value, 0); + /* + * We can't propagate parse_event_option() return, as it is 1 + * for failure while perf_config() expects -1. + */ + if (parse_events_option(&o, value, 0)) + err = -1; } else if (!strcmp(var, "trace.show_timestamp")) { trace->show_tstamp = perf_config_bool(var, value); } else if (!strcmp(var, "trace.show_duration")) { From 22d4621987faf651fee92c532a3eaba9a0e31ba0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Jun 2019 10:56:55 -0300 Subject: [PATCH 34/85] perf config: Bail out when a handler returns failure for a key-value pair So perf_config() uses: int ret = 0; perf_config_set__for_each_entry(config_set, section, item) { ... ret = fn(); if (ret < 0) break; } return ret; Expecting that that break will imediatelly go to function exit to return that error value (ret). The problem is that perf_config_set__for_each_entry() expands into two nested for() loops, one traversing the sections in a config and the second the items in each of those sections, so we have to change that 'break' to a goto label right before that final 'return ret'. With that, for instance 'perf trace' now correctly bails out when a event that is requested to be added via its 'trace.add_events' ~/.perfconfig entry gets rejected by the kernel BPF verifier: # perf trace ls event syntax error: '/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o' \___ Kernel verifier blocks program loading (add -v to see detail) Run 'perf list' for a list of valid events Error: wrong config key-value pair trace.add_events=/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o # While before it would continue and explode later, when trying to find maps that would have been in place had that augmented_raw_syscalls.o precompiled BPF proggie been accepted by the, humm, bast... rigorous kernel BPF verifier 8-) Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Song Liu Cc: Suzuki Poulouse Cc: Taeung Song Cc: Yonghong Song Fixes: 8a0a9c7e9146 ("perf config: Introduce new init() and exit()") Link: https://lkml.kernel.org/n/tip-qvqxfk9d0rn1l7lcntwiezrr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 7e3c1b60120c..e7d2c08d263a 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -739,11 +739,15 @@ int perf_config(config_fn_t fn, void *data) if (ret < 0) { pr_err("Error: wrong config key-value pair %s=%s\n", key, value); - break; + /* + * Can't be just a 'break', as perf_config_set__for_each_entry() + * expands to two nested for() loops. + */ + goto out; } } } - +out: return ret; } From 53651b28cfb637ef604abc189d877948d1af39bb Mon Sep 17 00:00:00 2001 From: yuzhoujian Date: Thu, 30 May 2019 14:29:22 +0100 Subject: [PATCH 35/85] perf record: Add support to collect callchains from kernel or user space only One can just record callchains in the kernel or user space with this new options. We can use it together with "--all-kernel" options. This two options is used just like print_stack(sys) or print_ustack(usr) for systemtap. Shown below is the usage of this new option combined with "--all-kernel" options: 1. Configure all used events to run in kernel space and just collect kernel callchains. $ perf record -a -g --all-kernel --kernel-callchains 2. Configure all used events to run in kernel space and just collect user callchains. $ perf record -a -g --all-kernel --user-callchains Committer notes: Improved documentation to state that asking for kernel callchains really is asking for excluding user callchains, and vice versa. Further mentioned that using both won't get both, but nothing, as both will be excluded. Signed-off-by: yuzhoujian Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1559222962-22891-1-git-send-email-ufo19890607@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 11 +++++++++++ tools/perf/builtin-record.c | 4 ++++ tools/perf/perf.h | 2 ++ tools/perf/util/evsel.c | 4 ++++ 4 files changed, 21 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index de269430720a..15e0fa87241b 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -490,6 +490,17 @@ Configure all used events to run in kernel space. --all-user:: Configure all used events to run in user space. +--kernel-callchains:: +Collect callchains only from kernel space. I.e. this option sets +perf_event_attr.exclude_callchain_user to 1. + +--user-callchains:: +Collect callchains only from user space. I.e. this option sets +perf_event_attr.exclude_callchain_kernel to 1. + +Don't use both --kernel-callchains and --user-callchains at the same time or no +callchains will be collected. + --timestamp-filename Append timestamp to output file name. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e2c3a585a61e..dca55997934e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2191,6 +2191,10 @@ static struct option __record_options[] = { OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, "Configure all used events to run in user space.", PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, + "collect kernel callchains"), + OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, + "collect user callchains"), OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", "clang binary to use for compiling BPF scriptlets"), OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", diff --git a/tools/perf/perf.h b/tools/perf/perf.h index d59dee61b64d..711e009381ec 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -61,6 +61,8 @@ struct record_opts { bool record_switch_events; bool all_kernel; bool all_user; + bool kernel_callchains; + bool user_callchains; bool tail_synthesize; bool overwrite; bool ignore_missing_thread; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cc6e7a0dda92..9f3b58071863 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -680,6 +680,10 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel, attr->sample_max_stack = param->max_stack; + if (opts->kernel_callchains) + attr->exclude_callchain_user = 1; + if (opts->user_callchains) + attr->exclude_callchain_kernel = 1; if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { From 10981c8012bc9ad4119420716a5dccfe8043b596 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 17 May 2019 13:33:47 +0200 Subject: [PATCH 36/85] perf evsel: Remove superfluous nthreads system_wide setup in alloc_fd() It's already setup in the only caller of this method in perf_evsel__open(), right before calling perf_evsel__alloc_fd(), no need to do it again. Also it's better to have it out of the function before we move it to libperf. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-1k8lhyjxfk7o8v4g3r7eyjc9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9f3b58071863..68beef8f47ff 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1148,9 +1148,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { - if (evsel->system_wide) - nthreads = 1; - evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); if (evsel->fd) { From 3399ad9ac234364396f75531203c38be5d2872c7 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:34:52 -0600 Subject: [PATCH 37/85] perf cs-etm: Configure contextID tracing in CPU-wide mode When operating in CPU-wide mode being notified of contextID changes is required so that the decoding mechanic is aware of the process context switch. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki Poulouse Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-2-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 126 +++++++++++++++++++++++++----- tools/perf/util/cs-etm.h | 12 +++ 2 files changed, 119 insertions(+), 19 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 911426721170..3912f0bf04ed 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -35,8 +35,100 @@ struct cs_etm_recording { size_t snapshot_size; }; +static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { + [CS_ETM_ETMCCER] = "mgmt/etmccer", + [CS_ETM_ETMIDR] = "mgmt/etmidr", +}; + +static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { + [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", + [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", + [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", + [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", + [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", +}; + static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); +static int cs_etm_set_context_id(struct auxtrace_record *itr, + struct perf_evsel *evsel, int cpu) +{ + struct cs_etm_recording *ptr; + struct perf_pmu *cs_etm_pmu; + char path[PATH_MAX]; + int err = -EINVAL; + u32 val; + + ptr = container_of(itr, struct cs_etm_recording, itr); + cs_etm_pmu = ptr->cs_etm_pmu; + + if (!cs_etm_is_etmv4(itr, cpu)) + goto out; + + /* Get a handle on TRCIRD2 */ + snprintf(path, PATH_MAX, "cpu%d/%s", + cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); + err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); + + /* There was a problem reading the file, bailing out */ + if (err != 1) { + pr_err("%s: can't read file %s\n", + CORESIGHT_ETM_PMU_NAME, path); + goto out; + } + + /* + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing + * is supported: + * 0b00000 Context ID tracing is not supported. + * 0b00100 Maximum of 32-bit Context ID size. + * All other values are reserved. + */ + val = BMVAL(val, 5, 9); + if (!val || val != 0x4) { + err = -EINVAL; + goto out; + } + + /* All good, let the kernel know */ + evsel->attr.config |= (1 << ETM_OPT_CTXTID); + err = 0; + +out: + + return err; +} + +static int cs_etm_set_option(struct auxtrace_record *itr, + struct perf_evsel *evsel, u32 option) +{ + int i, err = -EINVAL; + struct cpu_map *event_cpus = evsel->evlist->cpus; + struct cpu_map *online_cpus = cpu_map__new(NULL); + + /* Set option of each CPU we have */ + for (i = 0; i < cpu__max_cpu(); i++) { + if (!cpu_map__has(event_cpus, i) || + !cpu_map__has(online_cpus, i)) + continue; + + switch (option) { + case ETM_OPT_CTXTID: + err = cs_etm_set_context_id(itr, evsel, i); + if (err) + goto out; + break; + default: + goto out; + } + } + + err = 0; +out: + cpu_map__put(online_cpus); + return err; +} + static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr, struct record_opts *opts, const char *str) @@ -105,8 +197,9 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; struct perf_evsel *evsel, *cs_etm_evsel = NULL; - const struct cpu_map *cpus = evlist->cpus; + struct cpu_map *cpus = evlist->cpus; bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0); + int err = 0; ptr->evlist = evlist; ptr->snapshot_mode = opts->auxtrace_snapshot_mode; @@ -241,19 +334,24 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, /* * In the case of per-cpu mmaps, we need the CPU on the - * AUX event. + * AUX event. We also need the contextID in order to be notified + * when a context switch happened. */ - if (!cpu_map__empty(cpus)) + if (!cpu_map__empty(cpus)) { perf_evsel__set_sample_bit(cs_etm_evsel, CPU); + err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_CTXTID); + if (err) + goto out; + } + /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { struct perf_evsel *tracking_evsel; - int err; err = parse_events(evlist, "dummy:u", NULL); if (err) - return err; + goto out; tracking_evsel = perf_evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); @@ -266,7 +364,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, perf_evsel__set_sample_bit(tracking_evsel, TIME); } - return 0; +out: + return err; } static u64 cs_etm_get_config(struct auxtrace_record *itr) @@ -314,6 +413,8 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) config_opts = cs_etm_get_config(itr); if (config_opts & BIT(ETM_OPT_CYCACC)) config |= BIT(ETM4_CFG_BIT_CYCACC); + if (config_opts & BIT(ETM_OPT_CTXTID)) + config |= BIT(ETM4_CFG_BIT_CTXTID); if (config_opts & BIT(ETM_OPT_TS)) config |= BIT(ETM4_CFG_BIT_TS); if (config_opts & BIT(ETM_OPT_RETSTK)) @@ -363,19 +464,6 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, (etmv3 * CS_ETMV3_PRIV_SIZE)); } -static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { - [CS_ETM_ETMCCER] = "mgmt/etmccer", - [CS_ETM_ETMIDR] = "mgmt/etmidr", -}; - -static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { - [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", - [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", - [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", - [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", - [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", -}; - static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu) { bool ret = false; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 0e97c196147a..826c9eedaf5c 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -103,6 +103,18 @@ struct intlist *traceid_list; #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + * + * Carbon copy of implementation found in $KERNEL/include/linux/bitops.h + */ +#define GENMASK(h, l) \ + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) + #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) #define __perf_cs_etmv3_magic 0x3030303030303030ULL From 1c839a5a4061bf4554430037b04a115efc9dd8a1 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:34:53 -0600 Subject: [PATCH 38/85] perf cs-etm: Configure timestamp generation in CPU-wide mode When operating in CPU-wide mode tracers need to generate timestamps in order to correlate the code being traced on one CPU with what is executed on other CPUs. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-3-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 3912f0bf04ed..be1e4f20affa 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -99,6 +99,54 @@ out: return err; } +static int cs_etm_set_timestamp(struct auxtrace_record *itr, + struct perf_evsel *evsel, int cpu) +{ + struct cs_etm_recording *ptr; + struct perf_pmu *cs_etm_pmu; + char path[PATH_MAX]; + int err = -EINVAL; + u32 val; + + ptr = container_of(itr, struct cs_etm_recording, itr); + cs_etm_pmu = ptr->cs_etm_pmu; + + if (!cs_etm_is_etmv4(itr, cpu)) + goto out; + + /* Get a handle on TRCIRD0 */ + snprintf(path, PATH_MAX, "cpu%d/%s", + cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); + err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); + + /* There was a problem reading the file, bailing out */ + if (err != 1) { + pr_err("%s: can't read file %s\n", + CORESIGHT_ETM_PMU_NAME, path); + goto out; + } + + /* + * TRCIDR0.TSSIZE, bit [28-24], indicates whether global timestamping + * is supported: + * 0b00000 Global timestamping is not implemented + * 0b00110 Implementation supports a maximum timestamp of 48bits. + * 0b01000 Implementation supports a maximum timestamp of 64bits. + */ + val &= GENMASK(28, 24); + if (!val) { + err = -EINVAL; + goto out; + } + + /* All good, let the kernel know */ + evsel->attr.config |= (1 << ETM_OPT_TS); + err = 0; + +out: + return err; +} + static int cs_etm_set_option(struct auxtrace_record *itr, struct perf_evsel *evsel, u32 option) { @@ -118,6 +166,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr, if (err) goto out; break; + case ETM_OPT_TS: + err = cs_etm_set_timestamp(itr, evsel, i); + if (err) + goto out; + break; default: goto out; } @@ -343,6 +396,10 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_CTXTID); if (err) goto out; + + err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_TS); + if (err) + goto out; } /* Add dummy event to keep tracking */ From e5993c42e8bb8dd90eb81a05fb58d5aa243325dd Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:34:54 -0600 Subject: [PATCH 39/85] perf cs-etm: Configure SWITCH_EVENTS in CPU-wide mode Ask the perf core to generate an event when processes are swapped in/out of context. That way proper action can be taken by the decoding code when faced with such event. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-4-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index be1e4f20affa..cc7f1cd23b14 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -257,6 +257,9 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, ptr->evlist = evlist; ptr->snapshot_mode = opts->auxtrace_snapshot_mode; + if (perf_can_record_switch_events()) + opts->record_switch_events = true; + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type == cs_etm_pmu->type) { if (cs_etm_evsel) { From a465f3c3e3e6da729c645ea17615d4ae673588bf Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:34:55 -0600 Subject: [PATCH 40/85] perf cs-etm: Add handling of itrace start events Add handling of ITRACE events in order to add the tid/pid of the executing process to the perf tools machine infrastructure. This information is later retrieved when a contextID packet is found in the trace stream. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-5-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index de488b43f440..0742c50fce46 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1657,6 +1657,29 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, return 0; } +static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, + union perf_event *event) +{ + struct thread *th; + + if (etm->timeless_decoding) + return 0; + + /* + * Add the tid/pid to the log so that we can get a match when + * we get a contextID from the decoder. + */ + th = machine__findnew_thread(etm->machine, + event->itrace_start.pid, + event->itrace_start.tid); + if (!th) + return -ENOMEM; + + thread__put(th); + + return 0; +} + static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -1694,6 +1717,9 @@ static int cs_etm__process_event(struct perf_session *session, return cs_etm__process_timeless_queues(etm, event->fork.tid); + if (event->header.type == PERF_RECORD_ITRACE_START) + return cs_etm__process_itrace_start(etm, event); + return 0; } From e0d170fa9a5c6dfc29fb8f900497d8c5ec29c6e7 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:34:56 -0600 Subject: [PATCH 41/85] perf cs-etm: Add handling of switch-CPU-wide events Add handling of SWITCH-CPU-WIDE events in order to add the tid/pid of the incoming process to the perf tools machine infrastructure. This information is later retrieved when a contextID packet is found in the trace stream. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-6-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 0742c50fce46..5322dcaaf654 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1680,6 +1680,42 @@ static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, return 0; } +static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, + union perf_event *event) +{ + struct thread *th; + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + + /* + * Context switch in per-thread mode are irrelevant since perf + * will start/stop tracing as the process is scheduled. + */ + if (etm->timeless_decoding) + return 0; + + /* + * SWITCH_IN events carry the next process to be switched out while + * SWITCH_OUT events carry the process to be switched in. As such + * we don't care about IN events. + */ + if (!out) + return 0; + + /* + * Add the tid/pid to the log so that we can get a match when + * we get a contextID from the decoder. + */ + th = machine__findnew_thread(etm->machine, + event->context_switch.next_prev_pid, + event->context_switch.next_prev_tid); + if (!th) + return -ENOMEM; + + thread__put(th); + + return 0; +} + static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -1719,6 +1755,8 @@ static int cs_etm__process_event(struct perf_session *session, if (event->header.type == PERF_RECORD_ITRACE_START) return cs_etm__process_itrace_start(etm, event); + else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) + return cs_etm__process_switch_cpu_wide(etm, event); return 0; } From 3470d48a4ef30c554934d4a188a97a53656bff57 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:34:57 -0600 Subject: [PATCH 42/85] perf cs-etm: Refactor error path in cs_etm_decoder__new() There is no point in having two different error goto statement since the openCSD API to free a decoder handles NULL pointers. As such function cs_etm_decoder__free() can be called to deal with all aspect of freeing decoder memory. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-7-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 39fe21e1cf93..5dafec421b0d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -577,7 +577,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, /* init library print logging support */ ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder); if (ret != 0) - goto err_free_decoder_tree; + goto err_free_decoder; /* init raw frame logging if required */ cs_etm_decoder__init_raw_frame_logging(d_params, decoder); @@ -587,15 +587,13 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, &t_params[i], decoder); if (ret != 0) - goto err_free_decoder_tree; + goto err_free_decoder; } return decoder; -err_free_decoder_tree: - ocsd_destroy_dcd_tree(decoder->dcd_tree); err_free_decoder: - free(decoder); + cs_etm_decoder__free(decoder); return NULL; } From 5f7cb03555c651cfb702d27c816559696ac734f3 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:34:58 -0600 Subject: [PATCH 43/85] perf cs-etm: Move packet queue out of decoder structure The decoder needs to work with more than one traceID queue if we want to support CPU-wide scenarios with N:1 source/sink topologies. As such move the packet buffer and related fields out of the decoder structure and into the cs_etm_queue structure. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-8-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 125 +++++++----------- .../perf/util/cs-etm-decoder/cs-etm-decoder.h | 36 +---- tools/perf/util/cs-etm.c | 37 +++++- tools/perf/util/cs-etm.h | 53 ++++++++ 4 files changed, 142 insertions(+), 109 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 5dafec421b0d..3ac238e58901 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -18,8 +18,6 @@ #include "intlist.h" #include "util.h" -#define MAX_BUFFER 1024 - /* use raw logging */ #ifdef CS_DEBUG_RAW #define CS_LOG_RAW_FRAMES @@ -31,18 +29,12 @@ #endif #endif -#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL - struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; - u32 packet_count; - u32 head; - u32 tail; - struct cs_etm_packet packet_buffer[MAX_BUFFER]; }; static u32 @@ -88,14 +80,14 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) return 0; } -int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, +int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, struct cs_etm_packet *packet) { - if (!decoder || !packet) + if (!packet_queue || !packet) return -EINVAL; /* Nothing to do, might as well just return */ - if (decoder->packet_count == 0) + if (packet_queue->packet_count == 0) return 0; /* * The queueing process in function cs_etm_decoder__buffer_packet() @@ -106,11 +98,12 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, * value. Otherwise the first element of the packet queue is not * used. */ - decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); + packet_queue->head = (packet_queue->head + 1) & + (CS_ETM_PACKET_MAX_BUFFER - 1); - *packet = decoder->packet_buffer[decoder->head]; + *packet = packet_queue->packet_buffer[packet_queue->head]; - decoder->packet_count--; + packet_queue->packet_count--; return 1; } @@ -276,84 +269,60 @@ cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, trace_config); } -static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) -{ - int i; - - decoder->head = 0; - decoder->tail = 0; - decoder->packet_count = 0; - for (i = 0; i < MAX_BUFFER; i++) { - decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; - decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; - decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; - decoder->packet_buffer[i].instr_count = 0; - decoder->packet_buffer[i].last_instr_taken_branch = false; - decoder->packet_buffer[i].last_instr_size = 0; - decoder->packet_buffer[i].last_instr_type = 0; - decoder->packet_buffer[i].last_instr_subtype = 0; - decoder->packet_buffer[i].last_instr_cond = 0; - decoder->packet_buffer[i].flags = 0; - decoder->packet_buffer[i].exception_number = UINT32_MAX; - decoder->packet_buffer[i].trace_chan_id = UINT8_MAX; - decoder->packet_buffer[i].cpu = INT_MIN; - } -} - static ocsd_datapath_resp_t -cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, +cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, const u8 trace_chan_id, enum cs_etm_sample_type sample_type) { u32 et = 0; int cpu; - if (decoder->packet_count >= MAX_BUFFER - 1) + if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1) return OCSD_RESP_FATAL_SYS_ERR; if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) return OCSD_RESP_FATAL_SYS_ERR; - et = decoder->tail; - et = (et + 1) & (MAX_BUFFER - 1); - decoder->tail = et; - decoder->packet_count++; + et = packet_queue->tail; + et = (et + 1) & (CS_ETM_PACKET_MAX_BUFFER - 1); + packet_queue->tail = et; + packet_queue->packet_count++; - decoder->packet_buffer[et].sample_type = sample_type; - decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; - decoder->packet_buffer[et].cpu = cpu; - decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; - decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; - decoder->packet_buffer[et].instr_count = 0; - decoder->packet_buffer[et].last_instr_taken_branch = false; - decoder->packet_buffer[et].last_instr_size = 0; - decoder->packet_buffer[et].last_instr_type = 0; - decoder->packet_buffer[et].last_instr_subtype = 0; - decoder->packet_buffer[et].last_instr_cond = 0; - decoder->packet_buffer[et].flags = 0; - decoder->packet_buffer[et].exception_number = UINT32_MAX; - decoder->packet_buffer[et].trace_chan_id = trace_chan_id; + packet_queue->packet_buffer[et].sample_type = sample_type; + packet_queue->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; + packet_queue->packet_buffer[et].cpu = cpu; + packet_queue->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; + packet_queue->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; + packet_queue->packet_buffer[et].instr_count = 0; + packet_queue->packet_buffer[et].last_instr_taken_branch = false; + packet_queue->packet_buffer[et].last_instr_size = 0; + packet_queue->packet_buffer[et].last_instr_type = 0; + packet_queue->packet_buffer[et].last_instr_subtype = 0; + packet_queue->packet_buffer[et].last_instr_cond = 0; + packet_queue->packet_buffer[et].flags = 0; + packet_queue->packet_buffer[et].exception_number = UINT32_MAX; + packet_queue->packet_buffer[et].trace_chan_id = trace_chan_id; - if (decoder->packet_count == MAX_BUFFER - 1) + if (packet_queue->packet_count == CS_ETM_PACKET_MAX_BUFFER - 1) return OCSD_RESP_WAIT; return OCSD_RESP_CONT; } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, +cs_etm_decoder__buffer_range(struct cs_etm_packet_queue *packet_queue, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { int ret = 0; struct cs_etm_packet *packet; - ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id, CS_ETM_RANGE); if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) return ret; - packet = &decoder->packet_buffer[decoder->tail]; + packet = &packet_queue->packet_buffer[packet_queue->tail]; switch (elem->isa) { case ocsd_isa_aarch64: @@ -400,36 +369,36 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder, - const uint8_t trace_chan_id) +cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue, + const uint8_t trace_chan_id) { - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + return cs_etm_decoder__buffer_packet(queue, trace_chan_id, CS_ETM_DISCONTINUITY); } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, +cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { int ret = 0; struct cs_etm_packet *packet; - ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id, CS_ETM_EXCEPTION); if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) return ret; - packet = &decoder->packet_buffer[decoder->tail]; + packet = &queue->packet_buffer[queue->tail]; packet->exception_number = elem->exception_number; return ret; } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder, +cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue, const uint8_t trace_chan_id) { - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + return cs_etm_decoder__buffer_packet(queue, trace_chan_id, CS_ETM_EXCEPTION_RET); } @@ -441,6 +410,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( { ocsd_datapath_resp_t resp = OCSD_RESP_CONT; struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + struct cs_etm_queue *etmq = decoder->data; + struct cs_etm_packet_queue *packet_queue; + + /* First get the packet queue */ + packet_queue = cs_etm__etmq_get_packet_queue(etmq); + if (!packet_queue) + return OCSD_RESP_FATAL_SYS_ERR; switch (elem->elem_type) { case OCSD_GEN_TRC_ELEM_UNKNOWN: @@ -448,19 +424,19 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( case OCSD_GEN_TRC_ELEM_EO_TRACE: case OCSD_GEN_TRC_ELEM_NO_SYNC: case OCSD_GEN_TRC_ELEM_TRACE_ON: - resp = cs_etm_decoder__buffer_discontinuity(decoder, + resp = cs_etm_decoder__buffer_discontinuity(packet_queue, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: - resp = cs_etm_decoder__buffer_range(decoder, elem, + resp = cs_etm_decoder__buffer_range(packet_queue, elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: - resp = cs_etm_decoder__buffer_exception(decoder, elem, + resp = cs_etm_decoder__buffer_exception(packet_queue, elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: - resp = cs_etm_decoder__buffer_exception_ret(decoder, + resp = cs_etm_decoder__buffer_exception_ret(packet_queue, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: @@ -554,7 +530,6 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, decoder->data = d_params->data; decoder->prev_return = OCSD_RESP_CONT; - cs_etm_decoder__clear_buffer(decoder); format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : OCSD_TRC_SRC_SINGLE); flags = 0; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 3ab11dfa92ae..6ae7ab4cf5fe 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -14,38 +14,8 @@ #include struct cs_etm_decoder; - -enum cs_etm_sample_type { - CS_ETM_EMPTY, - CS_ETM_RANGE, - CS_ETM_DISCONTINUITY, - CS_ETM_EXCEPTION, - CS_ETM_EXCEPTION_RET, -}; - -enum cs_etm_isa { - CS_ETM_ISA_UNKNOWN, - CS_ETM_ISA_A64, - CS_ETM_ISA_A32, - CS_ETM_ISA_T32, -}; - -struct cs_etm_packet { - enum cs_etm_sample_type sample_type; - enum cs_etm_isa isa; - u64 start_addr; - u64 end_addr; - u32 instr_count; - u32 last_instr_type; - u32 last_instr_subtype; - u32 flags; - u32 exception_number; - u8 last_instr_cond; - u8 last_instr_taken_branch; - u8 last_instr_size; - u8 trace_chan_id; - int cpu; -}; +struct cs_etm_packet; +struct cs_etm_packet_queue; struct cs_etm_queue; @@ -119,7 +89,7 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, u64 start, u64 end, cs_etm_mem_cb_type cb_func); -int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, +int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, struct cs_etm_packet *packet); int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 5322dcaaf654..a74c53a45839 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -78,6 +78,7 @@ struct cs_etm_queue { struct cs_etm_packet *packet; const unsigned char *buf; size_t buf_len, buf_used; + struct cs_etm_packet_queue packet_queue; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); @@ -125,6 +126,36 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) +{ + int i; + + queue->head = 0; + queue->tail = 0; + queue->packet_count = 0; + for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { + queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; + queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; + queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; + queue->packet_buffer[i].instr_count = 0; + queue->packet_buffer[i].last_instr_taken_branch = false; + queue->packet_buffer[i].last_instr_size = 0; + queue->packet_buffer[i].last_instr_type = 0; + queue->packet_buffer[i].last_instr_subtype = 0; + queue->packet_buffer[i].last_instr_cond = 0; + queue->packet_buffer[i].flags = 0; + queue->packet_buffer[i].exception_number = UINT32_MAX; + queue->packet_buffer[i].trace_chan_id = UINT8_MAX; + queue->packet_buffer[i].cpu = INT_MIN; + } +} + +struct cs_etm_packet_queue +*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq) +{ + return &etmq->packet_queue; +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -513,6 +544,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, etmq->pid = -1; etmq->offset = 0; etmq->period_instructions = 0; + cs_etm__clear_packet_queue(&etmq->packet_queue); out: return ret; @@ -1542,10 +1574,13 @@ out: static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) { int ret; + struct cs_etm_packet_queue *packet_queue; + + packet_queue = cs_etm__etmq_get_packet_queue(etmq); /* Process each packet in this chunk */ while (1) { - ret = cs_etm_decoder__get_packet(etmq->decoder, + ret = cs_etm_decoder__get_packet(packet_queue, etmq->packet); if (ret <= 0) /* diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 826c9eedaf5c..75385e2fd283 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -97,12 +97,57 @@ enum { CS_ETMV4_EXC_END = 31, }; +enum cs_etm_sample_type { + CS_ETM_EMPTY, + CS_ETM_RANGE, + CS_ETM_DISCONTINUITY, + CS_ETM_EXCEPTION, + CS_ETM_EXCEPTION_RET, +}; + +enum cs_etm_isa { + CS_ETM_ISA_UNKNOWN, + CS_ETM_ISA_A64, + CS_ETM_ISA_A32, + CS_ETM_ISA_T32, +}; + /* RB tree for quick conversion between traceID and metadata pointers */ struct intlist *traceid_list; +struct cs_etm_queue; + +struct cs_etm_packet { + enum cs_etm_sample_type sample_type; + enum cs_etm_isa isa; + u64 start_addr; + u64 end_addr; + u32 instr_count; + u32 last_instr_type; + u32 last_instr_subtype; + u32 flags; + u32 exception_number; + u8 last_instr_cond; + u8 last_instr_taken_branch; + u8 last_instr_size; + u8 trace_chan_id; + int cpu; +}; + +#define CS_ETM_PACKET_MAX_BUFFER 1024 + +struct cs_etm_packet_queue { + u32 packet_count; + u32 head; + u32 tail; + struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; +}; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) +#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL + /* * Create a contiguous bitmask starting at bit position @l and ending at * position @h. For example @@ -126,6 +171,8 @@ struct intlist *traceid_list; int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +struct cs_etm_packet_queue +*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq); #else static inline int cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, @@ -139,6 +186,12 @@ static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused, { return -1; } + +static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue( + struct cs_etm_queue *etmq __maybe_unused) +{ + return NULL; +} #endif #endif From 882f4874ad74c528c3437c9c8783310b073323a1 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:34:59 -0600 Subject: [PATCH 44/85] perf cs-etm: Fix indentation in function cs_etm__process_decoder_queue() Fixing wrong indentation of the while() loop - no change of functionality. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Fixes: 3fa0e83e2948 ("perf cs-etm: Modularize main packet processing loop") Link: http://lkml.kernel.org/r/20190524173508.29044-9-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 108 +++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a74c53a45839..68fec6f019fe 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1578,64 +1578,64 @@ static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) packet_queue = cs_etm__etmq_get_packet_queue(etmq); - /* Process each packet in this chunk */ - while (1) { - ret = cs_etm_decoder__get_packet(packet_queue, - etmq->packet); - if (ret <= 0) - /* - * Stop processing this chunk on - * end of data or error - */ - break; - + /* Process each packet in this chunk */ + while (1) { + ret = cs_etm_decoder__get_packet(packet_queue, + etmq->packet); + if (ret <= 0) /* - * Since packet addresses are swapped in packet - * handling within below switch() statements, - * thus setting sample flags must be called - * prior to switch() statement to use address - * information before packets swapping. + * Stop processing this chunk on + * end of data or error */ - ret = cs_etm__set_sample_flags(etmq); - if (ret < 0) - break; + break; - switch (etmq->packet->sample_type) { - case CS_ETM_RANGE: - /* - * If the packet contains an instruction - * range, generate instruction sequence - * events. - */ - cs_etm__sample(etmq); - break; - case CS_ETM_EXCEPTION: - case CS_ETM_EXCEPTION_RET: - /* - * If the exception packet is coming, - * make sure the previous instruction - * range packet to be handled properly. - */ - cs_etm__exception(etmq); - break; - case CS_ETM_DISCONTINUITY: - /* - * Discontinuity in trace, flush - * previous branch stack - */ - cs_etm__flush(etmq); - break; - case CS_ETM_EMPTY: - /* - * Should not receive empty packet, - * report error. - */ - pr_err("CS ETM Trace: empty packet\n"); - return -EINVAL; - default: - break; - } + /* + * Since packet addresses are swapped in packet + * handling within below switch() statements, + * thus setting sample flags must be called + * prior to switch() statement to use address + * information before packets swapping. + */ + ret = cs_etm__set_sample_flags(etmq); + if (ret < 0) + break; + + switch (etmq->packet->sample_type) { + case CS_ETM_RANGE: + /* + * If the packet contains an instruction + * range, generate instruction sequence + * events. + */ + cs_etm__sample(etmq); + break; + case CS_ETM_EXCEPTION: + case CS_ETM_EXCEPTION_RET: + /* + * If the exception packet is coming, + * make sure the previous instruction + * range packet to be handled properly. + */ + cs_etm__exception(etmq); + break; + case CS_ETM_DISCONTINUITY: + /* + * Discontinuity in trace, flush + * previous branch stack + */ + cs_etm__flush(etmq); + break; + case CS_ETM_EMPTY: + /* + * Should not receive empty packet, + * report error. + */ + pr_err("CS ETM Trace: empty packet\n"); + return -EINVAL; + default: + break; } + } return ret; } From c7bfa2fd0da966bf83061102fbe1e7b974e81764 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:00 -0600 Subject: [PATCH 45/85] perf cs-etm: Introduce the concept of trace ID queues In an ideal world there is one CPU per cs_etm_queue and as such, one trace ID per cs_etm_queue. In the real world CoreSight topologies allow multiple CPUs to use the same sink, which translates to multiple trace IDs per cs_etm_queue. To deal with this a new cs_etm_traceid_queue structure is introduced to enclose all the information related to a single trace ID, allowing a cs_etm_queue to handle traces generated by any number of CPUs. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-10-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 +- tools/perf/util/cs-etm.c | 362 +++++++++++------- tools/perf/util/cs-etm.h | 15 +- 3 files changed, 235 insertions(+), 146 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3ac238e58901..4303d2d00d31 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -413,8 +413,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( struct cs_etm_queue *etmq = decoder->data; struct cs_etm_packet_queue *packet_queue; - /* First get the packet queue */ - packet_queue = cs_etm__etmq_get_packet_queue(etmq); + /* First get the packet queue for this traceID */ + packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); if (!packet_queue) return OCSD_RESP_FATAL_SYS_ERR; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 68fec6f019fe..9e8212c74055 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -60,25 +60,30 @@ struct cs_etm_auxtrace { unsigned int pmu_type; }; +struct cs_etm_traceid_queue { + u8 trace_chan_id; + u64 period_instructions; + size_t last_branch_pos; + union perf_event *event_buf; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + struct cs_etm_packet *prev_packet; + struct cs_etm_packet *packet; + struct cs_etm_packet_queue packet_queue; +}; + struct cs_etm_queue { struct cs_etm_auxtrace *etm; struct thread *thread; struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; - union perf_event *event_buf; unsigned int queue_nr; pid_t pid, tid; int cpu; u64 offset; - u64 period_instructions; - struct branch_stack *last_branch; - struct branch_stack *last_branch_rb; - size_t last_branch_pos; - struct cs_etm_packet *prev_packet; - struct cs_etm_packet *packet; const unsigned char *buf; size_t buf_len, buf_used; - struct cs_etm_packet_queue packet_queue; + struct cs_etm_traceid_queue *traceid_queues; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); @@ -150,10 +155,96 @@ static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) } } -struct cs_etm_packet_queue -*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq) +static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, + u8 trace_chan_id) { - return &etmq->packet_queue; + int rc = -ENOMEM; + struct cs_etm_auxtrace *etm = etmq->etm; + + cs_etm__clear_packet_queue(&tidq->packet_queue); + + tidq->trace_chan_id = trace_chan_id; + + tidq->packet = zalloc(sizeof(struct cs_etm_packet)); + if (!tidq->packet) + goto out; + + tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); + if (!tidq->prev_packet) + goto out_free; + + if (etm->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += etm->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + tidq->last_branch = zalloc(sz); + if (!tidq->last_branch) + goto out_free; + tidq->last_branch_rb = zalloc(sz); + if (!tidq->last_branch_rb) + goto out_free; + } + + tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!tidq->event_buf) + goto out_free; + + return 0; + +out_free: + zfree(&tidq->last_branch_rb); + zfree(&tidq->last_branch); + zfree(&tidq->prev_packet); + zfree(&tidq->packet); +out: + return rc; +} + +static struct cs_etm_traceid_queue +*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) +{ + struct cs_etm_traceid_queue *tidq; + struct cs_etm_auxtrace *etm = etmq->etm; + + if (!etm->timeless_decoding) + return NULL; + + tidq = etmq->traceid_queues; + + if (tidq) + return tidq; + + tidq = malloc(sizeof(*tidq)); + if (!tidq) + return NULL; + + memset(tidq, 0, sizeof(*tidq)); + + if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) + goto out_free; + + etmq->traceid_queues = tidq; + + return etmq->traceid_queues; + +out_free: + free(tidq); + + return NULL; +} + +struct cs_etm_packet_queue +*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) +{ + struct cs_etm_traceid_queue *tidq; + + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); + if (tidq) + return &tidq->packet_queue; + + return NULL; } static void cs_etm__packet_dump(const char *pkt_string) @@ -327,11 +418,12 @@ static void cs_etm__free_queue(void *priv) thread__zput(etmq->thread); cs_etm_decoder__free(etmq->decoder); - zfree(&etmq->event_buf); - zfree(&etmq->last_branch); - zfree(&etmq->last_branch_rb); - zfree(&etmq->prev_packet); - zfree(&etmq->packet); + zfree(&etmq->traceid_queues->event_buf); + zfree(&etmq->traceid_queues->last_branch); + zfree(&etmq->traceid_queues->last_branch_rb); + zfree(&etmq->traceid_queues->prev_packet); + zfree(&etmq->traceid_queues->packet); + zfree(&etmq->traceid_queues); free(etmq); } @@ -443,37 +535,11 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; - size_t szp = sizeof(struct cs_etm_packet); etmq = zalloc(sizeof(*etmq)); if (!etmq) return NULL; - etmq->packet = zalloc(szp); - if (!etmq->packet) - goto out_free; - - etmq->prev_packet = zalloc(szp); - if (!etmq->prev_packet) - goto out_free; - - if (etm->synth_opts.last_branch) { - size_t sz = sizeof(struct branch_stack); - - sz += etm->synth_opts.last_branch_sz * - sizeof(struct branch_entry); - etmq->last_branch = zalloc(sz); - if (!etmq->last_branch) - goto out_free; - etmq->last_branch_rb = zalloc(sz); - if (!etmq->last_branch_rb) - goto out_free; - } - - etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); - if (!etmq->event_buf) - goto out_free; - /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); @@ -508,12 +574,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: - zfree(&t_params); - zfree(&etmq->event_buf); - zfree(&etmq->last_branch); - zfree(&etmq->last_branch_rb); - zfree(&etmq->prev_packet); - zfree(&etmq->packet); free(etmq); return NULL; @@ -543,8 +603,6 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, etmq->tid = queue->tid; etmq->pid = -1; etmq->offset = 0; - etmq->period_instructions = 0; - cs_etm__clear_packet_queue(&etmq->packet_queue); out: return ret; @@ -577,10 +635,12 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) return 0; } -static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) +static inline +void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { - struct branch_stack *bs_src = etmq->last_branch_rb; - struct branch_stack *bs_dst = etmq->last_branch; + struct branch_stack *bs_src = tidq->last_branch_rb; + struct branch_stack *bs_dst = tidq->last_branch; size_t nr = 0; /* @@ -600,9 +660,9 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) * two steps. First, copy the branches from the most recently inserted * branch ->last_branch_pos until the end of bs_src->entries buffer. */ - nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; + nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; memcpy(&bs_dst->entries[0], - &bs_src->entries[etmq->last_branch_pos], + &bs_src->entries[tidq->last_branch_pos], sizeof(struct branch_entry) * nr); /* @@ -615,14 +675,15 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { memcpy(&bs_dst->entries[nr], &bs_src->entries[0], - sizeof(struct branch_entry) * etmq->last_branch_pos); + sizeof(struct branch_entry) * tidq->last_branch_pos); } } -static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) +static inline +void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) { - etmq->last_branch_pos = 0; - etmq->last_branch_rb->nr = 0; + tidq->last_branch_pos = 0; + tidq->last_branch_rb->nr = 0; } static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, @@ -675,9 +736,10 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, return packet->start_addr + offset * 4; } -static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) +static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { - struct branch_stack *bs = etmq->last_branch_rb; + struct branch_stack *bs = tidq->last_branch_rb; struct branch_entry *be; /* @@ -686,14 +748,14 @@ static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) * buffer down. After writing the first element of the stack, move the * insert position back to the end of the buffer. */ - if (!etmq->last_branch_pos) - etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; + if (!tidq->last_branch_pos) + tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; - etmq->last_branch_pos -= 1; + tidq->last_branch_pos -= 1; - be = &bs->entries[etmq->last_branch_pos]; - be->from = cs_etm__last_executed_instr(etmq->prev_packet); - be->to = cs_etm__first_executed_instr(etmq->packet); + be = &bs->entries[tidq->last_branch_pos]; + be->from = cs_etm__last_executed_instr(tidq->prev_packet); + be->to = cs_etm__first_executed_instr(tidq->packet); /* No support for mispredict */ be->flags.mispred = 0; be->flags.predicted = 1; @@ -777,11 +839,12 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, } static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, u64 addr, u64 period) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; - union perf_event *event = etmq->event_buf; + union perf_event *event = tidq->event_buf; struct perf_sample sample = {.ip = 0,}; event->sample.header.type = PERF_RECORD_SAMPLE; @@ -794,14 +857,14 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id; sample.period = period; - sample.cpu = etmq->packet->cpu; - sample.flags = etmq->prev_packet->flags; + sample.cpu = tidq->packet->cpu; + sample.flags = tidq->prev_packet->flags; sample.insn_len = 1; sample.cpumode = event->sample.header.misc; if (etm->synth_opts.last_branch) { - cs_etm__copy_last_branch_rb(etmq); - sample.branch_stack = etmq->last_branch; + cs_etm__copy_last_branch_rb(etmq, tidq); + sample.branch_stack = tidq->last_branch; } if (etm->synth_opts.inject) { @@ -819,7 +882,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, ret); if (etm->synth_opts.last_branch) - cs_etm__reset_last_branch_rb(etmq); + cs_etm__reset_last_branch_rb(tidq); return ret; } @@ -828,19 +891,20 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, * The cs etm packet encodes an instruction range between a branch target * and the next taken branch. Generate sample accordingly. */ -static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; struct perf_sample sample = {.ip = 0,}; - union perf_event *event = etmq->event_buf; + union perf_event *event = tidq->event_buf; struct dummy_branch_stack { u64 nr; struct branch_entry entries; } dummy_bs; u64 ip; - ip = cs_etm__last_executed_instr(etmq->prev_packet); + ip = cs_etm__last_executed_instr(tidq->prev_packet); event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); @@ -849,12 +913,12 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) sample.ip = ip; sample.pid = etmq->pid; sample.tid = etmq->tid; - sample.addr = cs_etm__first_executed_instr(etmq->packet); + sample.addr = cs_etm__first_executed_instr(tidq->packet); sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; sample.period = 1; - sample.cpu = etmq->packet->cpu; - sample.flags = etmq->prev_packet->flags; + sample.cpu = tidq->packet->cpu; + sample.flags = tidq->prev_packet->flags; sample.cpumode = event->sample.header.misc; /* @@ -997,33 +1061,34 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, return 0; } -static int cs_etm__sample(struct cs_etm_queue *etmq) +static int cs_etm__sample(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; int ret; - u64 instrs_executed = etmq->packet->instr_count; + u64 instrs_executed = tidq->packet->instr_count; - etmq->period_instructions += instrs_executed; + tidq->period_instructions += instrs_executed; /* * Record a branch when the last instruction in * PREV_PACKET is a branch. */ if (etm->synth_opts.last_branch && - etmq->prev_packet->sample_type == CS_ETM_RANGE && - etmq->prev_packet->last_instr_taken_branch) - cs_etm__update_last_branch_rb(etmq); + tidq->prev_packet->sample_type == CS_ETM_RANGE && + tidq->prev_packet->last_instr_taken_branch) + cs_etm__update_last_branch_rb(etmq, tidq); if (etm->sample_instructions && - etmq->period_instructions >= etm->instructions_sample_period) { + tidq->period_instructions >= etm->instructions_sample_period) { /* * Emit instruction sample periodically * TODO: allow period to be defined in cycles and clock time */ /* Get number of instructions executed after the sample point */ - u64 instrs_over = etmq->period_instructions - + u64 instrs_over = tidq->period_instructions - etm->instructions_sample_period; /* @@ -1032,31 +1097,31 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) * executed, but PC has not advanced to next instruction) */ u64 offset = (instrs_executed - instrs_over - 1); - u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset); + u64 addr = cs_etm__instr_addr(etmq, tidq->packet, offset); ret = cs_etm__synth_instruction_sample( - etmq, addr, etm->instructions_sample_period); + etmq, tidq, addr, etm->instructions_sample_period); if (ret) return ret; /* Carry remaining instructions into next sample period */ - etmq->period_instructions = instrs_over; + tidq->period_instructions = instrs_over; } if (etm->sample_branches) { bool generate_sample = false; /* Generate sample for tracing on packet */ - if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) + if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) generate_sample = true; /* Generate sample for branch taken packet */ - if (etmq->prev_packet->sample_type == CS_ETM_RANGE && - etmq->prev_packet->last_instr_taken_branch) + if (tidq->prev_packet->sample_type == CS_ETM_RANGE && + tidq->prev_packet->last_instr_taken_branch) generate_sample = true; if (generate_sample) { - ret = cs_etm__synth_branch_sample(etmq); + ret = cs_etm__synth_branch_sample(etmq, tidq); if (ret) return ret; } @@ -1067,15 +1132,15 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. */ - tmp = etmq->packet; - etmq->packet = etmq->prev_packet; - etmq->prev_packet = tmp; + tmp = tidq->packet; + tidq->packet = tidq->prev_packet; + tidq->prev_packet = tmp; } return 0; } -static int cs_etm__exception(struct cs_etm_queue *etmq) +static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) { /* * When the exception packet is inserted, whether the last instruction @@ -1088,24 +1153,25 @@ static int cs_etm__exception(struct cs_etm_queue *etmq) * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful * for generating instruction and branch samples. */ - if (etmq->prev_packet->sample_type == CS_ETM_RANGE) - etmq->prev_packet->last_instr_taken_branch = true; + if (tidq->prev_packet->sample_type == CS_ETM_RANGE) + tidq->prev_packet->last_instr_taken_branch = true; return 0; } -static int cs_etm__flush(struct cs_etm_queue *etmq) +static int cs_etm__flush(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { int err = 0; struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; /* Handle start tracing packet */ - if (etmq->prev_packet->sample_type == CS_ETM_EMPTY) + if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) goto swap_packet; if (etmq->etm->synth_opts.last_branch && - etmq->prev_packet->sample_type == CS_ETM_RANGE) { + tidq->prev_packet->sample_type == CS_ETM_RANGE) { /* * Generate a last branch event for the branches left in the * circular buffer at the end of the trace. @@ -1113,21 +1179,21 @@ static int cs_etm__flush(struct cs_etm_queue *etmq) * Use the address of the end of the last reported execution * range */ - u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); + u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( - etmq, addr, - etmq->period_instructions); + etmq, tidq, addr, + tidq->period_instructions); if (err) return err; - etmq->period_instructions = 0; + tidq->period_instructions = 0; } if (etm->sample_branches && - etmq->prev_packet->sample_type == CS_ETM_RANGE) { - err = cs_etm__synth_branch_sample(etmq); + tidq->prev_packet->sample_type == CS_ETM_RANGE) { + err = cs_etm__synth_branch_sample(etmq, tidq); if (err) return err; } @@ -1138,15 +1204,16 @@ swap_packet: * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. */ - tmp = etmq->packet; - etmq->packet = etmq->prev_packet; - etmq->prev_packet = tmp; + tmp = tidq->packet; + tidq->packet = tidq->prev_packet; + tidq->prev_packet = tmp; } return err; } -static int cs_etm__end_block(struct cs_etm_queue *etmq) +static int cs_etm__end_block(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { int err; @@ -1160,20 +1227,20 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) * the trace. */ if (etmq->etm->synth_opts.last_branch && - etmq->prev_packet->sample_type == CS_ETM_RANGE) { + tidq->prev_packet->sample_type == CS_ETM_RANGE) { /* * Use the address of the end of the last reported execution * range. */ - u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); + u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( - etmq, addr, - etmq->period_instructions); + etmq, tidq, addr, + tidq->period_instructions); if (err) return err; - etmq->period_instructions = 0; + tidq->period_instructions = 0; } return 0; @@ -1272,10 +1339,11 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, return false; } -static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) +static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, u64 magic) { - struct cs_etm_packet *packet = etmq->packet; - struct cs_etm_packet *prev_packet = etmq->prev_packet; + struct cs_etm_packet *packet = tidq->packet; + struct cs_etm_packet *prev_packet = tidq->prev_packet; if (magic == __perf_cs_etmv3_magic) if (packet->exception_number == CS_ETMV3_EXC_SVC) @@ -1296,9 +1364,10 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) return false; } -static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) +static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, + u64 magic) { - struct cs_etm_packet *packet = etmq->packet; + struct cs_etm_packet *packet = tidq->packet; if (magic == __perf_cs_etmv3_magic) if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || @@ -1321,10 +1390,12 @@ static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) return false; } -static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) +static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, + u64 magic) { - struct cs_etm_packet *packet = etmq->packet; - struct cs_etm_packet *prev_packet = etmq->prev_packet; + struct cs_etm_packet *packet = tidq->packet; + struct cs_etm_packet *prev_packet = tidq->prev_packet; if (magic == __perf_cs_etmv3_magic) if (packet->exception_number == CS_ETMV3_EXC_SMC || @@ -1367,10 +1438,11 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) return false; } -static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) +static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { - struct cs_etm_packet *packet = etmq->packet; - struct cs_etm_packet *prev_packet = etmq->prev_packet; + struct cs_etm_packet *packet = tidq->packet; + struct cs_etm_packet *prev_packet = tidq->prev_packet; u64 magic; int ret; @@ -1472,7 +1544,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) return ret; /* The exception is for system call. */ - if (cs_etm__is_syscall(etmq, magic)) + if (cs_etm__is_syscall(etmq, tidq, magic)) packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET; @@ -1480,7 +1552,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) * The exceptions are triggered by external signals from bus, * interrupt controller, debug module, PE reset or halt. */ - else if (cs_etm__is_async_exception(etmq, magic)) + else if (cs_etm__is_async_exception(tidq, magic)) packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | @@ -1489,7 +1561,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) * Otherwise, exception is caused by trap, instruction & * data fault, or alignment errors. */ - else if (cs_etm__is_sync_exception(etmq, magic)) + else if (cs_etm__is_sync_exception(etmq, tidq, magic)) packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT; @@ -1571,17 +1643,18 @@ out: return ret; } -static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) +static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { int ret; struct cs_etm_packet_queue *packet_queue; - packet_queue = cs_etm__etmq_get_packet_queue(etmq); + packet_queue = &tidq->packet_queue; /* Process each packet in this chunk */ while (1) { ret = cs_etm_decoder__get_packet(packet_queue, - etmq->packet); + tidq->packet); if (ret <= 0) /* * Stop processing this chunk on @@ -1596,18 +1669,18 @@ static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) * prior to switch() statement to use address * information before packets swapping. */ - ret = cs_etm__set_sample_flags(etmq); + ret = cs_etm__set_sample_flags(etmq, tidq); if (ret < 0) break; - switch (etmq->packet->sample_type) { + switch (tidq->packet->sample_type) { case CS_ETM_RANGE: /* * If the packet contains an instruction * range, generate instruction sequence * events. */ - cs_etm__sample(etmq); + cs_etm__sample(etmq, tidq); break; case CS_ETM_EXCEPTION: case CS_ETM_EXCEPTION_RET: @@ -1616,14 +1689,14 @@ static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) * make sure the previous instruction * range packet to be handled properly. */ - cs_etm__exception(etmq); + cs_etm__exception(tidq); break; case CS_ETM_DISCONTINUITY: /* * Discontinuity in trace, flush * previous branch stack */ - cs_etm__flush(etmq); + cs_etm__flush(etmq, tidq); break; case CS_ETM_EMPTY: /* @@ -1643,6 +1716,11 @@ static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { int err = 0; + struct cs_etm_traceid_queue *tidq; + + tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); + if (!tidq) + return -EINVAL; /* Go through each buffer in the queue and decode them one by one */ while (1) { @@ -1661,13 +1739,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) * an error occurs other than hoping the next one will * be better. */ - err = cs_etm__process_decoder_queue(etmq); + err = cs_etm__process_traceid_queue(etmq, tidq); } while (etmq->buf_len); if (err == 0) /* Flush any remaining branch stack entries */ - err = cs_etm__end_block(etmq); + err = cs_etm__end_block(etmq, tidq); } return err; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 75385e2fd283..f16082d37ab5 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -136,6 +136,16 @@ struct cs_etm_packet { #define CS_ETM_PACKET_MAX_BUFFER 1024 +/* + * When working with per-thread scenarios the process under trace can + * be scheduled on any CPU and as such, more than one traceID may be + * associated with the same process. Since a traceID of '0' is illegal + * as per the CoreSight architecture, use that specific value to + * identify the queue where all packets (with any traceID) are + * aggregated. + */ +#define CS_ETM_PER_THREAD_TRACEID 0 + struct cs_etm_packet_queue { u32 packet_count; u32 head; @@ -172,7 +182,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); struct cs_etm_packet_queue -*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq); +*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id); #else static inline int cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, @@ -188,7 +198,8 @@ static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused, } static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue( - struct cs_etm_queue *etmq __maybe_unused) + struct cs_etm_queue *etmq __maybe_unused, + u8 trace_chan_id __maybe_unused) { return NULL; } From 6672559307d07f6b58bfd1b4b4cb44a247f51365 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:01 -0600 Subject: [PATCH 46/85] perf cs-etm: Get rid of unused cpu in struct cs_etm_queue Nowadays the synthesize code is using the packet's cpu information, making cs_etm_queue::cpu useless. As such simply remove it. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-11-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 9e8212c74055..531bbb355ba4 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -79,7 +79,6 @@ struct cs_etm_queue { struct auxtrace_buffer *buffer; unsigned int queue_nr; pid_t pid, tid; - int cpu; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; @@ -599,7 +598,6 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; - etmq->cpu = queue->cpu; etmq->tid = queue->tid; etmq->pid = -1; etmq->offset = 0; @@ -831,11 +829,8 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, etmq->thread = machine__find_thread(etm->machine, -1, etmq->tid); - if (etmq->thread) { + if (etmq->thread) etmq->pid = etmq->thread->pid_; - if (queue->cpu == -1) - etmq->cpu = etmq->thread->cpu; - } } static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, From 3c21d7d813c55fa1688c2223fe15a1c5cb14a559 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:02 -0600 Subject: [PATCH 47/85] perf cs-etm: Move thread to traceid_queue The thread field of structure cs_etm_queue is CPU dependent and as such need to be part of the cs_etm_traceid_queue in order to support CPU-wide trace scenarios. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-12-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 531bbb355ba4..0d51d6d9a594 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -65,6 +65,7 @@ struct cs_etm_traceid_queue { u64 period_instructions; size_t last_branch_pos; union perf_event *event_buf; + struct thread *thread; struct branch_stack *last_branch; struct branch_stack *last_branch_rb; struct cs_etm_packet *prev_packet; @@ -74,7 +75,6 @@ struct cs_etm_traceid_queue { struct cs_etm_queue { struct cs_etm_auxtrace *etm; - struct thread *thread; struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; @@ -415,7 +415,7 @@ static void cs_etm__free_queue(void *priv) if (!etmq) return; - thread__zput(etmq->thread); + thread__zput(etmq->traceid_queues->thread); cs_etm_decoder__free(etmq->decoder); zfree(&etmq->traceid_queues->event_buf); zfree(&etmq->traceid_queues->last_branch); @@ -503,7 +503,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, machine = etmq->etm->machine; cpumode = cs_etm__cpu_mode(etmq, address); - thread = etmq->thread; + thread = etmq->traceid_queues->thread; if (!thread) { if (cpumode != PERF_RECORD_MISC_KERNEL) return 0; @@ -819,18 +819,21 @@ cs_etm__get_trace(struct cs_etm_queue *etmq) static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue) { + struct cs_etm_traceid_queue *tidq; struct cs_etm_queue *etmq = queue->priv; + tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); + /* CPU-wide tracing isn't supported yet */ if (queue->tid == -1) return; - if ((!etmq->thread) && (etmq->tid != -1)) - etmq->thread = machine__find_thread(etm->machine, -1, + if ((!tidq->thread) && (etmq->tid != -1)) + tidq->thread = machine__find_thread(etm->machine, -1, etmq->tid); - if (etmq->thread) - etmq->pid = etmq->thread->pid_; + if (tidq->thread) + etmq->pid = tidq->thread->pid_; } static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, From 0abb868bbcbcf9f86e67bcbfaea7dcaba5a72ac0 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:03 -0600 Subject: [PATCH 48/85] perf cs-etm: Move tid/pid to traceid_queue The tid/pid fields of structure cs_etm_queue are CPU dependent and as such need to be part of the cs_etm_traceid_queue in order to support CPU-wide trace scenarios. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-13-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 44 ++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 0d51d6d9a594..7e3b4d10f5c4 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -62,6 +62,7 @@ struct cs_etm_auxtrace { struct cs_etm_traceid_queue { u8 trace_chan_id; + pid_t pid, tid; u64 period_instructions; size_t last_branch_pos; union perf_event *event_buf; @@ -78,7 +79,6 @@ struct cs_etm_queue { struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; - pid_t pid, tid; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; @@ -159,10 +159,14 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) { int rc = -ENOMEM; + struct auxtrace_queue *queue; struct cs_etm_auxtrace *etm = etmq->etm; cs_etm__clear_packet_queue(&tidq->packet_queue); + queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; + tidq->tid = queue->tid; + tidq->pid = -1; tidq->trace_chan_id = trace_chan_id; tidq->packet = zalloc(sizeof(struct cs_etm_packet)); @@ -598,8 +602,6 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; - etmq->tid = queue->tid; - etmq->pid = -1; etmq->offset = 0; out: @@ -817,23 +819,19 @@ cs_etm__get_trace(struct cs_etm_queue *etmq) } static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, - struct auxtrace_queue *queue) + struct auxtrace_queue *queue, + struct cs_etm_traceid_queue *tidq) { - struct cs_etm_traceid_queue *tidq; - struct cs_etm_queue *etmq = queue->priv; - - tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); - /* CPU-wide tracing isn't supported yet */ if (queue->tid == -1) return; - if ((!tidq->thread) && (etmq->tid != -1)) + if ((!tidq->thread) && (tidq->tid != -1)) tidq->thread = machine__find_thread(etm->machine, -1, - etmq->tid); + tidq->tid); if (tidq->thread) - etmq->pid = tidq->thread->pid_; + tidq->pid = tidq->thread->pid_; } static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, @@ -850,8 +848,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, event->sample.header.size = sizeof(struct perf_event_header); sample.ip = addr; - sample.pid = etmq->pid; - sample.tid = etmq->tid; + sample.pid = tidq->pid; + sample.tid = tidq->tid; sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id; sample.period = period; @@ -909,8 +907,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, event->sample.header.size = sizeof(struct perf_event_header); sample.ip = ip; - sample.pid = etmq->pid; - sample.tid = etmq->tid; + sample.pid = tidq->pid; + sample.tid = tidq->tid; sample.addr = cs_etm__first_executed_instr(tidq->packet); sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; @@ -1758,9 +1756,19 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, for (i = 0; i < queues->nr_queues; i++) { struct auxtrace_queue *queue = &etm->queues.queue_array[i]; struct cs_etm_queue *etmq = queue->priv; + struct cs_etm_traceid_queue *tidq; - if (etmq && ((tid == -1) || (etmq->tid == tid))) { - cs_etm__set_pid_tid_cpu(etm, queue); + if (!etmq) + continue; + + tidq = cs_etm__etmq_get_traceid_queue(etmq, + CS_ETM_PER_THREAD_TRACEID); + + if (!tidq) + continue; + + if ((tid == -1) || (tidq->tid == tid)) { + cs_etm__set_pid_tid_cpu(etm, queue, tidq); cs_etm__run_decoder(etmq); } } From af21577c05a746579c4c084d51d7f0661def128c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:04 -0600 Subject: [PATCH 49/85] perf cs-etm: Use traceID aware memory callback API When working with CPU-wide traces different traceID may be found in the same stream. As such we need to use the decoder callback that provides the traceID in order to know the thread context being decoded. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-14-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 14 +++---- .../perf/util/cs-etm-decoder/cs-etm-decoder.h | 3 +- tools/perf/util/cs-etm.c | 41 +++++++++++++------ 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 4303d2d00d31..87264b79de0e 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -41,15 +41,14 @@ static u32 cs_etm_decoder__mem_access(const void *context, const ocsd_vaddr_t address, const ocsd_mem_space_acc_t mem_space __maybe_unused, + const u8 trace_chan_id, const u32 req_size, u8 *buffer) { struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; - return decoder->mem_access(decoder->data, - address, - req_size, - buffer); + return decoder->mem_access(decoder->data, trace_chan_id, + address, req_size, buffer); } int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, @@ -58,9 +57,10 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, { decoder->mem_access = cb_func; - if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end, - OCSD_MEM_SPACE_ANY, - cs_etm_decoder__mem_access, decoder)) + if (ocsd_dt_add_callback_trcid_mem_acc(decoder->dcd_tree, start, end, + OCSD_MEM_SPACE_ANY, + cs_etm_decoder__mem_access, + decoder)) return -1; return 0; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 6ae7ab4cf5fe..11f3391d06f2 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -19,8 +19,7 @@ struct cs_etm_packet_queue; struct cs_etm_queue; -typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, - size_t, u8 *); +typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *); struct cs_etmv3_trace_params { u32 reg_ctrl; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 7e3b4d10f5c4..2483293266d8 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -491,8 +491,8 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) } } -static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, - size_t size, u8 *buffer) +static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, + u64 address, size_t size, u8 *buffer) { u8 cpumode; u64 offset; @@ -501,6 +501,8 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, struct machine *machine; struct addr_location al; + (void)trace_chan_id; + if (!etmq) return 0; @@ -687,10 +689,12 @@ void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) } static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, - u64 addr) { + u8 trace_chan_id, u64 addr) +{ u8 instrBytes[2]; - cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes); + cs_etm__mem_access(etmq, trace_chan_id, addr, + ARRAY_SIZE(instrBytes), instrBytes); /* * T32 instruction size is indicated by bits[15:11] of the first * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 @@ -719,6 +723,7 @@ u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) } static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, + u64 trace_chan_id, const struct cs_etm_packet *packet, u64 offset) { @@ -726,7 +731,8 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, u64 addr = packet->start_addr; while (offset > 0) { - addr += cs_etm__t32_instr_size(etmq, addr); + addr += cs_etm__t32_instr_size(etmq, + trace_chan_id, addr); offset--; } return addr; @@ -1063,6 +1069,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; int ret; + u8 trace_chan_id = tidq->trace_chan_id; u64 instrs_executed = tidq->packet->instr_count; tidq->period_instructions += instrs_executed; @@ -1093,7 +1100,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, * executed, but PC has not advanced to next instruction) */ u64 offset = (instrs_executed - instrs_over - 1); - u64 addr = cs_etm__instr_addr(etmq, tidq->packet, offset); + u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, + tidq->packet, offset); ret = cs_etm__synth_instruction_sample( etmq, tidq, addr, etm->instructions_sample_period); @@ -1268,7 +1276,7 @@ static int cs_etm__get_data_block(struct cs_etm_queue *etmq) return etmq->buf_len; } -static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, +static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, struct cs_etm_packet *packet, u64 end_addr) { @@ -1291,7 +1299,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, * so below only read 2 bytes as instruction size for T32. */ addr = end_addr - 2; - cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); + cs_etm__mem_access(etmq, trace_chan_id, addr, + sizeof(instr16), (u8 *)&instr16); if ((instr16 & 0xFF00) == 0xDF00) return true; @@ -1306,7 +1315,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, * +---------+---------+-------------------------+ */ addr = end_addr - 4; - cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); + cs_etm__mem_access(etmq, trace_chan_id, addr, + sizeof(instr32), (u8 *)&instr32); if ((instr32 & 0x0F000000) == 0x0F000000 && (instr32 & 0xF0000000) != 0xF0000000) return true; @@ -1322,7 +1332,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, * +-----------------------+---------+-----------+ */ addr = end_addr - 4; - cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); + cs_etm__mem_access(etmq, trace_chan_id, addr, + sizeof(instr32), (u8 *)&instr32); if ((instr32 & 0xFFE0001F) == 0xd4000001) return true; @@ -1338,6 +1349,7 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u64 magic) { + u8 trace_chan_id = tidq->trace_chan_id; struct cs_etm_packet *packet = tidq->packet; struct cs_etm_packet *prev_packet = tidq->prev_packet; @@ -1352,7 +1364,7 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, */ if (magic == __perf_cs_etmv4_magic) { if (packet->exception_number == CS_ETMV4_EXC_CALL && - cs_etm__is_svc_instr(etmq, prev_packet, + cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, prev_packet->end_addr)) return true; } @@ -1390,6 +1402,7 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u64 magic) { + u8 trace_chan_id = tidq->trace_chan_id; struct cs_etm_packet *packet = tidq->packet; struct cs_etm_packet *prev_packet = tidq->prev_packet; @@ -1415,7 +1428,7 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, * (SMC, HVC) are taken as sync exceptions. */ if (packet->exception_number == CS_ETMV4_EXC_CALL && - !cs_etm__is_svc_instr(etmq, prev_packet, + !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, prev_packet->end_addr)) return true; @@ -1439,6 +1452,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, { struct cs_etm_packet *packet = tidq->packet; struct cs_etm_packet *prev_packet = tidq->prev_packet; + u8 trace_chan_id = tidq->trace_chan_id; u64 magic; int ret; @@ -1519,7 +1533,8 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT) && - cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) + cs_etm__is_svc_instr(etmq, trace_chan_id, + packet, packet->start_addr)) prev_packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET; From c152d4d49a358224d73cb7ce7cc6090676b04931 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:05 -0600 Subject: [PATCH 50/85] perf cs-etm: Add support for multiple traceID queues When operating in CPU-wide trace mode with a source/sink topology of N:1 packets with multiple traceID will end up in the same cs_etm_queue. In order to properly decode packets they need to be split in different queues, i.e one queue per traceID. As such add support for multiple traceID per cs_etm_queue by adding a new cs_etm_traceid_queue every time a new traceID is discovered in the trace stream. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-15-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 + tools/perf/util/cs-etm.c | 131 ++++++++++++++++++++++++++++++------- 2 files changed, 110 insertions(+), 24 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 85fbcd265351..51dd00f65709 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -413,6 +413,9 @@ ifdef CORESIGHT $(call feature_check,libopencsd) ifeq ($(feature-libopencsd), 1) CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) + ifeq ($(feature-reallocarray), 0) + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY + endif LDFLAGS += $(LIBOPENCSD_LDFLAGS) EXTLIBS += $(OPENCSDLIBS) $(call detected,CONFIG_LIBOPENCSD) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2483293266d8..afc2491f9f2a 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -29,6 +29,7 @@ #include "thread.h" #include "thread_map.h" #include "thread-stack.h" +#include #include "util.h" #define MAX_TIMESTAMP (~0ULL) @@ -82,7 +83,9 @@ struct cs_etm_queue { u64 offset; const unsigned char *buf; size_t buf_len, buf_used; - struct cs_etm_traceid_queue *traceid_queues; + /* Conversion between traceID and index in traceid_queues array */ + struct intlist *traceid_queues_list; + struct cs_etm_traceid_queue **traceid_queues; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); @@ -208,31 +211,71 @@ out: static struct cs_etm_traceid_queue *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) { - struct cs_etm_traceid_queue *tidq; + int idx; + struct int_node *inode; + struct intlist *traceid_queues_list; + struct cs_etm_traceid_queue *tidq, **traceid_queues; struct cs_etm_auxtrace *etm = etmq->etm; - if (!etm->timeless_decoding) - return NULL; + if (etm->timeless_decoding) + trace_chan_id = CS_ETM_PER_THREAD_TRACEID; - tidq = etmq->traceid_queues; + traceid_queues_list = etmq->traceid_queues_list; - if (tidq) - return tidq; + /* + * Check if the traceid_queue exist for this traceID by looking + * in the queue list. + */ + inode = intlist__find(traceid_queues_list, trace_chan_id); + if (inode) { + idx = (int)(intptr_t)inode->priv; + return etmq->traceid_queues[idx]; + } + /* We couldn't find a traceid_queue for this traceID, allocate one */ tidq = malloc(sizeof(*tidq)); if (!tidq) return NULL; memset(tidq, 0, sizeof(*tidq)); + /* Get a valid index for the new traceid_queue */ + idx = intlist__nr_entries(traceid_queues_list); + /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ + inode = intlist__findnew(traceid_queues_list, trace_chan_id); + if (!inode) + goto out_free; + + /* Associate this traceID with this index */ + inode->priv = (void *)(intptr_t)idx; + if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) goto out_free; - etmq->traceid_queues = tidq; + /* Grow the traceid_queues array by one unit */ + traceid_queues = etmq->traceid_queues; + traceid_queues = reallocarray(traceid_queues, + idx + 1, + sizeof(*traceid_queues)); - return etmq->traceid_queues; + /* + * On failure reallocarray() returns NULL and the original block of + * memory is left untouched. + */ + if (!traceid_queues) + goto out_free; + + traceid_queues[idx] = tidq; + etmq->traceid_queues = traceid_queues; + + return etmq->traceid_queues[idx]; out_free: + /* + * Function intlist__remove() removes the inode from the list + * and delete the memory associated to it. + */ + intlist__remove(traceid_queues_list, inode); free(tidq); return NULL; @@ -412,6 +455,44 @@ static int cs_etm__flush_events(struct perf_session *session, return cs_etm__process_timeless_queues(etm, -1); } +static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) +{ + int idx; + uintptr_t priv; + struct int_node *inode, *tmp; + struct cs_etm_traceid_queue *tidq; + struct intlist *traceid_queues_list = etmq->traceid_queues_list; + + intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { + priv = (uintptr_t)inode->priv; + idx = priv; + + /* Free this traceid_queue from the array */ + tidq = etmq->traceid_queues[idx]; + thread__zput(tidq->thread); + zfree(&tidq->event_buf); + zfree(&tidq->last_branch); + zfree(&tidq->last_branch_rb); + zfree(&tidq->prev_packet); + zfree(&tidq->packet); + zfree(&tidq); + + /* + * Function intlist__remove() removes the inode from the list + * and delete the memory associated to it. + */ + intlist__remove(traceid_queues_list, inode); + } + + /* Then the RB tree itself */ + intlist__delete(traceid_queues_list); + etmq->traceid_queues_list = NULL; + + /* finally free the traceid_queues array */ + free(etmq->traceid_queues); + etmq->traceid_queues = NULL; +} + static void cs_etm__free_queue(void *priv) { struct cs_etm_queue *etmq = priv; @@ -419,14 +500,8 @@ static void cs_etm__free_queue(void *priv) if (!etmq) return; - thread__zput(etmq->traceid_queues->thread); cs_etm_decoder__free(etmq->decoder); - zfree(&etmq->traceid_queues->event_buf); - zfree(&etmq->traceid_queues->last_branch); - zfree(&etmq->traceid_queues->last_branch_rb); - zfree(&etmq->traceid_queues->prev_packet); - zfree(&etmq->traceid_queues->packet); - zfree(&etmq->traceid_queues); + cs_etm__free_traceid_queues(etmq); free(etmq); } @@ -497,19 +572,21 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, u8 cpumode; u64 offset; int len; - struct thread *thread; - struct machine *machine; - struct addr_location al; - - (void)trace_chan_id; + struct thread *thread; + struct machine *machine; + struct addr_location al; + struct cs_etm_traceid_queue *tidq; if (!etmq) return 0; machine = etmq->etm->machine; cpumode = cs_etm__cpu_mode(etmq, address); + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); + if (!tidq) + return 0; - thread = etmq->traceid_queues->thread; + thread = tidq->thread; if (!thread) { if (cpumode != PERF_RECORD_MISC_KERNEL) return 0; @@ -545,6 +622,10 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) if (!etmq) return NULL; + etmq->traceid_queues_list = intlist__new(NULL); + if (!etmq->traceid_queues_list) + goto out_free; + /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); @@ -579,6 +660,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: + intlist__delete(etmq->traceid_queues_list); free(etmq); return NULL; @@ -1280,8 +1362,9 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, struct cs_etm_packet *packet, u64 end_addr) { - u16 instr16; - u32 instr32; + /* Initialise to keep compiler happy */ + u16 instr16 = 0; + u32 instr32 = 0; u64 addr; switch (packet->isa) { From 0a6be300eb7b9f6630d565e213a904e5c75a86c0 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:06 -0600 Subject: [PATCH 51/85] perf cs-etm: Linking PE contextID with perf thread mechanic Link contextID packets received from the decoder with the perf tool thread mechanic so that we know the specifics of the process currently executing. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-16-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 20 ++++++++++++ tools/perf/util/cs-etm.c | 32 +++++++++++++++---- tools/perf/util/cs-etm.h | 10 ++++++ 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 87264b79de0e..ce85e52f989c 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -402,6 +402,24 @@ cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue, CS_ETM_EXCEPTION_RET); } +static ocsd_datapath_resp_t +cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, + const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id) +{ + pid_t tid; + + /* Ignore PE_CONTEXT packets that don't have a valid contextID */ + if (!elem->context.ctxt_id_valid) + return OCSD_RESP_CONT; + + tid = elem->context.context_id; + if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) + return OCSD_RESP_FATAL_SYS_ERR; + + return OCSD_RESP_CONT; +} + static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, const ocsd_trc_index_t indx __maybe_unused, @@ -440,6 +458,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( trace_chan_id); break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + resp = cs_etm_decoder__set_tid(etmq, elem, trace_chan_id); + break; case OCSD_GEN_TRC_ELEM_ADDR_NACC: case OCSD_GEN_TRC_ELEM_TIMESTAMP: case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index afc2491f9f2a..17adf554b679 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -907,13 +907,8 @@ cs_etm__get_trace(struct cs_etm_queue *etmq) } static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, - struct auxtrace_queue *queue, struct cs_etm_traceid_queue *tidq) { - /* CPU-wide tracing isn't supported yet */ - if (queue->tid == -1) - return; - if ((!tidq->thread) && (tidq->tid != -1)) tidq->thread = machine__find_thread(etm->machine, -1, tidq->tid); @@ -922,6 +917,31 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, tidq->pid = tidq->thread->pid_; } +int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, + pid_t tid, u8 trace_chan_id) +{ + int cpu, err = -EINVAL; + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_traceid_queue *tidq; + + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); + if (!tidq) + return err; + + if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) + return err; + + err = machine__set_current_tid(etm->machine, cpu, tid, tid); + if (err) + return err; + + tidq->tid = tid; + thread__zput(tidq->thread); + + cs_etm__set_pid_tid_cpu(etm, tidq); + return 0; +} + static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u64 addr, u64 period) @@ -1866,7 +1886,7 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, continue; if ((tid == -1) || (tidq->tid == tid)) { - cs_etm__set_pid_tid_cpu(etm, queue, tidq); + cs_etm__set_pid_tid_cpu(etm, tidq); cs_etm__run_decoder(etmq); } } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index f16082d37ab5..b2a7628620bf 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -181,6 +181,8 @@ struct cs_etm_packet_queue { int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, + pid_t tid, u8 trace_chan_id); struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id); #else @@ -197,6 +199,14 @@ static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused, return -1; } +static inline int cs_etm__etmq_set_tid( + struct cs_etm_queue *etmq __maybe_unused, + pid_t tid __maybe_unused, + u8 trace_chan_id __maybe_unused) +{ + return -1; +} + static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue( struct cs_etm_queue *etmq __maybe_unused, u8 trace_chan_id __maybe_unused) From 675f302fc261fc2af738397cdfb139bae3c58a18 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:07 -0600 Subject: [PATCH 52/85] perf cs-etm: Add notion of time to decoding code This patch deals with timestamp packets received from the decoding library in order to give the front end packet processing loop a handle on the time instruction conveyed by range packets have been executed at. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-17-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 111 +++++++++++++++++- tools/perf/util/cs-etm.c | 19 +++ tools/perf/util/cs-etm.h | 17 +++ 3 files changed, 143 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index ce85e52f989c..bb45e23018ee 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -269,6 +269,75 @@ cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, trace_config); } +static ocsd_datapath_resp_t +cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *packet_queue, + const uint8_t trace_chan_id) +{ + /* No timestamp packet has been received, nothing to do */ + if (!packet_queue->timestamp) + return OCSD_RESP_CONT; + + packet_queue->timestamp = packet_queue->next_timestamp; + + /* Estimate the timestamp for the next range packet */ + packet_queue->next_timestamp += packet_queue->instr_count; + packet_queue->instr_count = 0; + + /* Tell the front end which traceid_queue needs attention */ + cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); + + return OCSD_RESP_WAIT; +} + +static ocsd_datapath_resp_t +cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, + const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id) +{ + struct cs_etm_packet_queue *packet_queue; + + /* First get the packet queue for this traceID */ + packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); + if (!packet_queue) + return OCSD_RESP_FATAL_SYS_ERR; + + /* + * We've seen a timestamp packet before - simply record the new value. + * Function do_soft_timestamp() will report the value to the front end, + * hence asking the decoder to keep decoding rather than stopping. + */ + if (packet_queue->timestamp) { + packet_queue->next_timestamp = elem->timestamp; + return OCSD_RESP_CONT; + } + + /* + * This is the first timestamp we've seen since the beginning of traces + * or a discontinuity. Since timestamps packets are generated *after* + * range packets have been generated, we need to estimate the time at + * which instructions started by substracting the number of instructions + * executed to the timestamp. + */ + packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; + packet_queue->next_timestamp = elem->timestamp; + packet_queue->instr_count = 0; + + /* Tell the front end which traceid_queue needs attention */ + cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); + + /* Halt processing until we are being told to proceed */ + return OCSD_RESP_WAIT; +} + +static void +cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) +{ + packet_queue->timestamp = 0; + packet_queue->next_timestamp = 0; + packet_queue->instr_count = 0; +} + static ocsd_datapath_resp_t cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, const u8 trace_chan_id, @@ -310,7 +379,8 @@ cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_range(struct cs_etm_packet_queue *packet_queue, +cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *packet_queue, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { @@ -365,6 +435,23 @@ cs_etm_decoder__buffer_range(struct cs_etm_packet_queue *packet_queue, packet->last_instr_size = elem->last_instr_sz; + /* per-thread scenario, no need to generate a timestamp */ + if (cs_etm__etmq_is_timeless(etmq)) + goto out; + + /* + * The packet queue is full and we haven't seen a timestamp (had we + * seen one the packet queue wouldn't be full). Let the front end + * deal with it. + */ + if (ret == OCSD_RESP_WAIT) + goto out; + + packet_queue->instr_count += elem->num_instr_range; + /* Tell the front end we have a new timestamp to process */ + ret = cs_etm_decoder__do_soft_timestamp(etmq, packet_queue, + trace_chan_id); +out: return ret; } @@ -372,6 +459,11 @@ static ocsd_datapath_resp_t cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue, const uint8_t trace_chan_id) { + /* + * Something happened and who knows when we'll get new traces so + * reset time statistics. + */ + cs_etm_decoder__reset_timestamp(queue); return cs_etm_decoder__buffer_packet(queue, trace_chan_id, CS_ETM_DISCONTINUITY); } @@ -404,6 +496,7 @@ cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue, static ocsd_datapath_resp_t cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *packet_queue, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { @@ -417,6 +510,12 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) return OCSD_RESP_FATAL_SYS_ERR; + /* + * A timestamp is generated after a PE_CONTEXT element so make sure + * to rely on that coming one. + */ + cs_etm_decoder__reset_timestamp(packet_queue); + return OCSD_RESP_CONT; } @@ -446,7 +545,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( trace_chan_id); break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: - resp = cs_etm_decoder__buffer_range(packet_queue, elem, + resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: @@ -457,11 +556,15 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( resp = cs_etm_decoder__buffer_exception_ret(packet_queue, trace_chan_id); break; + case OCSD_GEN_TRC_ELEM_TIMESTAMP: + resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, + trace_chan_id); + break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: - resp = cs_etm_decoder__set_tid(etmq, elem, trace_chan_id); + resp = cs_etm_decoder__set_tid(etmq, packet_queue, + elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_ADDR_NACC: - case OCSD_GEN_TRC_ELEM_TIMESTAMP: case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: case OCSD_GEN_TRC_ELEM_EVENT: diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 17adf554b679..91496a3a2209 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -80,6 +80,7 @@ struct cs_etm_queue { struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; + u8 pending_timestamp; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; @@ -133,6 +134,19 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, + u8 trace_chan_id) +{ + /* + * Wnen a timestamp packet is encountered the backend code + * is stopped so that the front end has time to process packets + * that were accumulated in the traceID queue. Since there can + * be more than one channel per cs_etm_queue, we need to specify + * what traceID queue needs servicing. + */ + etmq->pending_timestamp = trace_chan_id; +} + static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) { int i; @@ -942,6 +956,11 @@ int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, return 0; } +bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) +{ + return !!etmq->etm->timeless_decoding; +} + static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u64 addr, u64 period) diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index b2a7628620bf..33b57e748c3d 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -150,6 +150,9 @@ struct cs_etm_packet_queue { u32 packet_count; u32 head; u32 tail; + u32 instr_count; + u64 timestamp; + u64 next_timestamp; struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; }; @@ -183,6 +186,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event, int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id); +bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); +void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, + u8 trace_chan_id); struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id); #else @@ -207,6 +213,17 @@ static inline int cs_etm__etmq_set_tid( return -1; } +static inline bool cs_etm__etmq_is_timeless( + struct cs_etm_queue *etmq __maybe_unused) +{ + /* What else to return? */ + return true; +} + +static inline void cs_etm__etmq_set_traceid_queue_timestamp( + struct cs_etm_queue *etmq __maybe_unused, + u8 trace_chan_id __maybe_unused) {} + static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue( struct cs_etm_queue *etmq __maybe_unused, u8 trace_chan_id __maybe_unused) From 21fe8dc1191a1528f6732e471f45d370a5c48045 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 24 May 2019 11:35:08 -0600 Subject: [PATCH 53/85] perf cs-etm: Add support for CPU-wide trace scenarios Add support for CPU-wide trace scenarios by correlating range packets with timestamp packets. That way range packets received on different ETMQ/traceID channels can be processed and synthesized in chronological order. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190524173508.29044-18-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 254 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 246 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 91496a3a2209..0c7776b51045 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -90,12 +90,26 @@ struct cs_etm_queue { }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); +static int cs_etm__get_data_block(struct cs_etm_queue *etmq); +static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); /* PTMs ETMIDR [11:8] set to b0011 */ #define ETMIDR_PTM_VERSION 0x00000300 +/* + * A struct auxtrace_heap_item only has a queue_nr and a timestamp to + * work with. One option is to modify to auxtrace_heap_XYZ() API or simply + * encode the etm queue number as the upper 16 bit and the channel as + * the lower 16 bit. + */ +#define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \ + (queue_nr << 16 | trace_chan_id) +#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) +#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) + static u32 cs_etm__get_v7_protocol_version(u32 etmidr) { etmidr &= ETMIDR_PTM_VERSION; @@ -147,6 +161,29 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, etmq->pending_timestamp = trace_chan_id; } +static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, + u8 *trace_chan_id) +{ + struct cs_etm_packet_queue *packet_queue; + + if (!etmq->pending_timestamp) + return 0; + + if (trace_chan_id) + *trace_chan_id = etmq->pending_timestamp; + + packet_queue = cs_etm__etmq_get_packet_queue(etmq, + etmq->pending_timestamp); + if (!packet_queue) + return 0; + + /* Acknowledge pending status */ + etmq->pending_timestamp = 0; + + /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ + return packet_queue->timestamp; +} + static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) { int i; @@ -171,6 +208,20 @@ static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) } } +static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) +{ + int idx; + struct int_node *inode; + struct cs_etm_traceid_queue *tidq; + struct intlist *traceid_queues_list = etmq->traceid_queues_list; + + intlist__for_each_entry(inode, traceid_queues_list) { + idx = (int)(intptr_t)inode->priv; + tidq = etmq->traceid_queues[idx]; + cs_etm__clear_packet_queue(&tidq->packet_queue); + } +} + static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u8 trace_chan_id) @@ -458,15 +509,15 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL; - if (!etm->timeless_decoding) - return -EINVAL; - ret = cs_etm__update_queues(etm); if (ret < 0) return ret; - return cs_etm__process_timeless_queues(etm, -1); + if (etm->timeless_decoding) + return cs_etm__process_timeless_queues(etm, -1); + + return cs_etm__process_queues(etm); } static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) @@ -685,6 +736,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, unsigned int queue_nr) { int ret = 0; + unsigned int cs_queue_nr; + u8 trace_chan_id; + u64 timestamp; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) @@ -702,6 +756,67 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, etmq->queue_nr = queue_nr; etmq->offset = 0; + if (etm->timeless_decoding) + goto out; + + /* + * We are under a CPU-wide trace scenario. As such we need to know + * when the code that generated the traces started to execute so that + * it can be correlated with execution on other CPUs. So we get a + * handle on the beginning of traces and decode until we find a + * timestamp. The timestamp is then added to the auxtrace min heap + * in order to know what nibble (of all the etmqs) to decode first. + */ + while (1) { + /* + * Fetch an aux_buffer from this etmq. Bail if no more + * blocks or an error has been encountered. + */ + ret = cs_etm__get_data_block(etmq); + if (ret <= 0) + goto out; + + /* + * Run decoder on the trace block. The decoder will stop when + * encountering a timestamp, a full packet queue or the end of + * trace for that block. + */ + ret = cs_etm__decode_data_block(etmq); + if (ret) + goto out; + + /* + * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all + * the timestamp calculation for us. + */ + timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + + /* We found a timestamp, no need to continue. */ + if (timestamp) + break; + + /* + * We didn't find a timestamp so empty all the traceid packet + * queues before looking for another timestamp packet, either + * in the current data block or a new one. Packets that were + * just decoded are useless since no timestamp has been + * associated with them. As such simply discard them. + */ + cs_etm__clear_all_packet_queues(etmq); + } + + /* + * We have a timestamp. Add it to the min heap to reflect when + * instructions conveyed by the range packets of this traceID queue + * started to execute. Once the same has been done for all the traceID + * queues of each etmq, redenring and decoding can start in + * chronological order. + * + * Note that packets decoded above are still in the traceID's packet + * queue and will be processed in cs_etm__process_queues(). + */ + cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); out: return ret; } @@ -1846,6 +1961,28 @@ static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, return ret; } +static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) +{ + int idx; + struct int_node *inode; + struct cs_etm_traceid_queue *tidq; + struct intlist *traceid_queues_list = etmq->traceid_queues_list; + + intlist__for_each_entry(inode, traceid_queues_list) { + idx = (int)(intptr_t)inode->priv; + tidq = etmq->traceid_queues[idx]; + + /* Ignore return value */ + cs_etm__process_traceid_queue(etmq, tidq); + + /* + * Generate an instruction sample with the remaining + * branchstack entries. + */ + cs_etm__flush(etmq, tidq); + } +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { int err = 0; @@ -1913,6 +2050,105 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, return 0; } +static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) +{ + int ret = 0; + unsigned int cs_queue_nr, queue_nr; + u8 trace_chan_id; + u64 timestamp; + struct auxtrace_queue *queue; + struct cs_etm_queue *etmq; + struct cs_etm_traceid_queue *tidq; + + while (1) { + if (!etm->heap.heap_cnt) + goto out; + + /* Take the entry at the top of the min heap */ + cs_queue_nr = etm->heap.heap_array[0].queue_nr; + queue_nr = TO_QUEUE_NR(cs_queue_nr); + trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); + queue = &etm->queues.queue_array[queue_nr]; + etmq = queue->priv; + + /* + * Remove the top entry from the heap since we are about + * to process it. + */ + auxtrace_heap__pop(&etm->heap); + + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); + if (!tidq) { + /* + * No traceID queue has been allocated for this traceID, + * which means something somewhere went very wrong. No + * other choice than simply exit. + */ + ret = -EINVAL; + goto out; + } + + /* + * Packets associated with this timestamp are already in + * the etmq's traceID queue, so process them. + */ + ret = cs_etm__process_traceid_queue(etmq, tidq); + if (ret < 0) + goto out; + + /* + * Packets for this timestamp have been processed, time to + * move on to the next timestamp, fetching a new auxtrace_buffer + * if need be. + */ +refetch: + ret = cs_etm__get_data_block(etmq); + if (ret < 0) + goto out; + + /* + * No more auxtrace_buffers to process in this etmq, simply + * move on to another entry in the auxtrace_heap. + */ + if (!ret) + continue; + + ret = cs_etm__decode_data_block(etmq); + if (ret) + goto out; + + timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + + if (!timestamp) { + /* + * Function cs_etm__decode_data_block() returns when + * there is no more traces to decode in the current + * auxtrace_buffer OR when a timestamp has been + * encountered on any of the traceID queues. Since we + * did not get a timestamp, there is no more traces to + * process in this auxtrace_buffer. As such empty and + * flush all traceID queues. + */ + cs_etm__clear_all_traceid_queues(etmq); + + /* Fetch another auxtrace_buffer for this etmq */ + goto refetch; + } + + /* + * Add to the min heap the timestamp for packets that have + * just been decoded. They will be processed and synthesized + * during the next call to cs_etm__process_traceid_queue() for + * this queue/traceID. + */ + cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + } + +out: + return ret; +} + static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, union perf_event *event) { @@ -1991,9 +2227,6 @@ static int cs_etm__process_event(struct perf_session *session, return -EINVAL; } - if (!etm->timeless_decoding) - return -EINVAL; - if (sample->time && (sample->time != (u64) -1)) timestamp = sample->time; else @@ -2005,7 +2238,8 @@ static int cs_etm__process_event(struct perf_session *session, return err; } - if (event->header.type == PERF_RECORD_EXIT) + if (etm->timeless_decoding && + event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, event->fork.tid); @@ -2014,6 +2248,10 @@ static int cs_etm__process_event(struct perf_session *session, else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) return cs_etm__process_switch_cpu_wide(etm, event); + if (!etm->timeless_decoding && + event->header.type == PERF_RECORD_AUX) + return cs_etm__process_queues(etm); + return 0; } From b74d8686a18b36adecc710597198d5ef2dd5ef14 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 4 Jun 2019 15:50:40 -0700 Subject: [PATCH 54/85] perf cpumap: Retrieve die id information There is no function to retrieve die id information of a given CPU. Add cpu_map__get_die_id() to retrieve die id information. Signed-off-by: Kan Liang Reviewed-by: Jiri Olsa Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1559688644-106558-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 7 +++++++ tools/perf/util/cpumap.h | 1 + 2 files changed, 8 insertions(+) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 0b599229bc7e..7db1365c667e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -373,6 +373,13 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, return 0; } +int cpu_map__get_die_id(int cpu) +{ + int value, ret = cpu__get_topology_int(cpu, "die_id", &value); + + return ret ?: value; +} + int cpu_map__get_core_id(int cpu) { int value, ret = cpu__get_topology_int(cpu, "core_id", &value); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f00ce624b9f7..6762ff9e7ad5 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -25,6 +25,7 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); +int cpu_map__get_die_id(int cpu); int cpu_map__get_core_id(int cpu); int cpu_map__get_core(struct cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); From acae8b36cded0ee62038dedd0a44d54d5d673a96 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 4 Jun 2019 15:50:41 -0700 Subject: [PATCH 55/85] perf header: Add die information in CPU topology With the new CPUID.1F, a new level type of CPU topology, 'die', is introduced. The 'die' information in CPU topology should be added in perf header. To be compatible with old perf.data, the patch checks the section size before reading the die information. The new info is added at the end of the cpu_topology section, the old perf tool ignores the extra data. It never reads data crossing the section boundary. The new perf tool with the patch can be used on legacy kernel. Add a new function has_die_topology() to check if die topology information is supported by kernel. The function only check X86 and CPU 0. Assuming other CPUs have same topology. Use similar method for core and socket to support die id and sibling dies string. Signed-off-by: Kan Liang Reviewed-by: Jiri Olsa Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1559688644-106558-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/perf.data-file-format.txt | 9 +- tools/perf/util/cputopo.c | 76 ++++++++++++++- tools/perf/util/cputopo.h | 2 + tools/perf/util/env.c | 1 + tools/perf/util/env.h | 3 + tools/perf/util/header.c | 94 +++++++++++++++++-- 6 files changed, 172 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 6375e6fb8bac..0165e92e717e 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -153,7 +153,7 @@ struct { String lists defining the core and CPU threads topology. The string lists are followed by a variable length array -which contains core_id and socket_id of each cpu. +which contains core_id, die_id (for x86) and socket_id of each cpu. The number of entries can be determined by the size of the section minus the sizes of both string lists. @@ -162,14 +162,19 @@ struct { struct perf_header_string_list threads; /* Variable length */ struct { uint32_t core_id; + uint32_t die_id; uint32_t socket_id; } cpus[nr]; /* Variable length records */ }; Example: - sibling cores : 0-3 + sibling cores : 0-8 + sibling dies : 0-3 + sibling dies : 4-7 sibling threads : 0-1 sibling threads : 2-3 + sibling threads : 4-5 + sibling threads : 6-7 HEADER_NUMA_TOPOLOGY = 14, diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index ece0710249d4..85fa87fc30cf 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include @@ -8,9 +9,10 @@ #include "util.h" #include "env.h" - #define CORE_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" +#define DIE_SIB_FMT \ + "%s/devices/system/cpu/cpu%d/topology/die_cpus_list" #define THRD_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" #define NODE_ONLINE_FMT \ @@ -34,12 +36,12 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) sysfs__mountpoint(), cpu); fp = fopen(filename, "r"); if (!fp) - goto try_threads; + goto try_dies; sret = getline(&buf, &len, fp); fclose(fp); if (sret <= 0) - goto try_threads; + goto try_dies; p = strchr(buf, '\n'); if (p) @@ -57,6 +59,37 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) } ret = 0; +try_dies: + if (!tp->die_siblings) + goto try_threads; + + scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, + sysfs__mountpoint(), cpu); + fp = fopen(filename, "r"); + if (!fp) + goto try_threads; + + sret = getline(&buf, &len, fp); + fclose(fp); + if (sret <= 0) + goto try_threads; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < tp->die_sib; i++) { + if (!strcmp(buf, tp->die_siblings[i])) + break; + } + if (i == tp->die_sib) { + tp->die_siblings[i] = buf; + tp->die_sib++; + buf = NULL; + len = 0; + } + ret = 0; + try_threads: scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, sysfs__mountpoint(), cpu); @@ -98,21 +131,46 @@ void cpu_topology__delete(struct cpu_topology *tp) for (i = 0 ; i < tp->core_sib; i++) zfree(&tp->core_siblings[i]); + if (tp->die_sib) { + for (i = 0 ; i < tp->die_sib; i++) + zfree(&tp->die_siblings[i]); + } + for (i = 0 ; i < tp->thread_sib; i++) zfree(&tp->thread_siblings[i]); free(tp); } +static bool has_die_topology(void) +{ + char filename[MAXPATHLEN]; + struct utsname uts; + + if (uname(&uts) < 0) + return false; + + if (strncmp(uts.machine, "x86_64", 6)) + return false; + + scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, + sysfs__mountpoint(), 0); + if (access(filename, F_OK) == -1) + return false; + + return true; +} + struct cpu_topology *cpu_topology__new(void) { struct cpu_topology *tp = NULL; void *addr; - u32 nr, i; + u32 nr, i, nr_addr; size_t sz; long ncpus; int ret = -1; struct cpu_map *map; + bool has_die = has_die_topology(); ncpus = cpu__max_present_cpu(); @@ -126,7 +184,11 @@ struct cpu_topology *cpu_topology__new(void) nr = (u32)(ncpus & UINT_MAX); sz = nr * sizeof(char *); - addr = calloc(1, sizeof(*tp) + 2 * sz); + if (has_die) + nr_addr = 3; + else + nr_addr = 2; + addr = calloc(1, sizeof(*tp) + nr_addr * sz); if (!addr) goto out_free; @@ -134,6 +196,10 @@ struct cpu_topology *cpu_topology__new(void) addr += sizeof(*tp); tp->core_siblings = addr; addr += sz; + if (has_die) { + tp->die_siblings = addr; + addr += sz; + } tp->thread_siblings = addr; for (i = 0; i < nr; i++) { diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index 47a97e71acdf..bae2f1d41856 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -7,8 +7,10 @@ struct cpu_topology { u32 core_sib; + u32 die_sib; u32 thread_sib; char **core_siblings; + char **die_siblings; char **thread_siblings; }; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6a3eaf7d9353..1cc7a1837822 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -246,6 +246,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) for (cpu = 0; cpu < nr_cpus; ++cpu) { env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); + env->cpu[cpu].die_id = cpu_map__get_die_id(cpu); } env->nr_cpus_avail = nr_cpus; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 271a90b326c4..d5d9865aa812 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -9,6 +9,7 @@ struct cpu_topology_map { int socket_id; + int die_id; int core_id; }; @@ -49,6 +50,7 @@ struct perf_env { int nr_cmdline; int nr_sibling_cores; + int nr_sibling_dies; int nr_sibling_threads; int nr_numa_nodes; int nr_memory_nodes; @@ -57,6 +59,7 @@ struct perf_env { char *cmdline; const char **cmdline_argv; char *sibling_cores; + char *sibling_dies; char *sibling_threads; char *pmu_mappings; struct cpu_topology_map *cpu; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 847ae51a524b..64976254431c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -599,6 +599,27 @@ static int write_cpu_topology(struct feat_fd *ff, if (ret < 0) return ret; } + + if (!tp->die_sib) + goto done; + + ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib)); + if (ret < 0) + goto done; + + for (i = 0; i < tp->die_sib; i++) { + ret = do_write_string(ff, tp->die_siblings[i]); + if (ret < 0) + goto done; + } + + for (j = 0; j < perf_env.nr_cpus_avail; j++) { + ret = do_write(ff, &perf_env.cpu[j].die_id, + sizeof(perf_env.cpu[j].die_id)); + if (ret < 0) + return ret; + } + done: cpu_topology__delete(tp); return ret; @@ -1443,6 +1464,16 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) str += strlen(str) + 1; } + if (ph->env.nr_sibling_dies) { + nr = ph->env.nr_sibling_dies; + str = ph->env.sibling_dies; + + for (i = 0; i < nr; i++) { + fprintf(fp, "# sibling dies : %s\n", str); + str += strlen(str) + 1; + } + } + nr = ph->env.nr_sibling_threads; str = ph->env.sibling_threads; @@ -1451,12 +1482,28 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) str += strlen(str) + 1; } - if (ph->env.cpu != NULL) { - for (i = 0; i < cpu_nr; i++) - fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i, - ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id); - } else - fprintf(fp, "# Core ID and Socket ID information is not available\n"); + if (ph->env.nr_sibling_dies) { + if (ph->env.cpu != NULL) { + for (i = 0; i < cpu_nr; i++) + fprintf(fp, "# CPU %d: Core ID %d, " + "Die ID %d, Socket ID %d\n", + i, ph->env.cpu[i].core_id, + ph->env.cpu[i].die_id, + ph->env.cpu[i].socket_id); + } else + fprintf(fp, "# Core ID, Die ID and Socket ID " + "information is not available\n"); + } else { + if (ph->env.cpu != NULL) { + for (i = 0; i < cpu_nr; i++) + fprintf(fp, "# CPU %d: Core ID %d, " + "Socket ID %d\n", + i, ph->env.cpu[i].core_id, + ph->env.cpu[i].socket_id); + } else + fprintf(fp, "# Core ID and Socket ID " + "information is not available\n"); + } } static void print_clockid(struct feat_fd *ff, FILE *fp) @@ -2214,6 +2261,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) goto free_cpu; ph->env.cpu[i].core_id = nr; + size += sizeof(u32); if (do_read_u32(ff, &nr)) goto free_cpu; @@ -2225,6 +2273,40 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) } ph->env.cpu[i].socket_id = nr; + size += sizeof(u32); + } + + /* + * The header may be from old perf, + * which doesn't include die information. + */ + if (ff->size <= size) + return 0; + + if (do_read_u32(ff, &nr)) + return -1; + + ph->env.nr_sibling_dies = nr; + size += sizeof(u32); + + for (i = 0; i < nr; i++) { + str = do_read_string(ff); + if (!str) + goto error; + + /* include a NULL character at the end */ + if (strbuf_add(&sb, str, strlen(str) + 1) < 0) + goto error; + size += string_size(str); + free(str); + } + ph->env.sibling_dies = strbuf_detach(&sb, NULL); + + for (i = 0; i < (u32)cpu_nr; i++) { + if (do_read_u32(ff, &nr)) + goto free_cpu; + + ph->env.cpu[i].die_id = nr; } return 0; From db5742b6849ed7f01d764e6755b8ab2b422f29b2 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 4 Jun 2019 15:50:42 -0700 Subject: [PATCH 56/85] perf stat: Support per-die aggregation It is useful to aggregate counts per die. E.g. Uncore becomes die-scope on Xeon Cascade Lake-AP. Introduce a new option "--per-die" to support per-die aggregation. The global id for each core has been changed to socket + die id + core id. The global id for each die is socket + die id. Add die information for per-core aggregation. The output of per-core aggregation will be changed from "S0-C0" to "S0-D0-C0". Any scripts which rely on the output format of per-core aggregation probably be broken. For 'perf stat record/report', there is no die information when processing the old perf.data. The per-die result will be the same as per-socket. Committer notes: Renamed 'die' variable to 'die_id' to fix the build in some systems: CC /tmp/build/perf/builtin-script.o cc1: warnings being treated as errors builtin-stat.c: In function 'perf_env__get_die': builtin-stat.c:963: error: declaration of 'die' shadows a global declaration util/util.h:19: error: shadowed declaration is here mv: cannot stat `/tmp/build/perf/.builtin-stat.o.tmp': No such file or directory Signed-off-by: Kan Liang Reviewed-by: Jiri Olsa Cc: Andi Kleen Cc: Peter Zijlstra Link: https://lkml.kernel.org/n/tip-bsnhx7vgsuu6ei307mw60mbj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 10 +++ tools/perf/builtin-stat.c | 87 ++++++++++++++++++++++++-- tools/perf/util/cpumap.c | 57 ++++++++++++++--- tools/perf/util/cpumap.h | 9 ++- tools/perf/util/stat-display.c | 29 +++++++-- tools/perf/util/stat-shadow.c | 1 + tools/perf/util/stat.c | 1 + tools/perf/util/stat.h | 1 + 8 files changed, 177 insertions(+), 18 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 1e312c2672e4..930c51c01201 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -200,6 +200,13 @@ use --per-socket in addition to -a. (system-wide). The output includes the socket number and the number of online processors on that socket. This is useful to gauge the amount of aggregation. +--per-die:: +Aggregate counts per processor die for system-wide mode measurements. This +is a useful mode to detect imbalance between dies. To enable this mode, +use --per-die in addition to -a. (system-wide). The output includes the +die number and the number of online processors on that die. This is +useful to gauge the amount of aggregation. + --per-core:: Aggregate counts per physical processor for system-wide mode measurements. This is a useful mode to detect imbalance between physical cores. To enable this mode, @@ -239,6 +246,9 @@ Input file name. --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. +--per-die:: +Aggregate counts per processor die for system-wide mode measurements. + --per-core:: Aggregate counts per physical processor for system-wide mode measurements. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 24b8e690fb69..272df8426f0a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -777,6 +777,8 @@ static struct option stat_options[] = { "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode, + "aggregate counts per processor die", AGGR_DIE), OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, @@ -801,6 +803,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, return cpu_map__get_socket(map, cpu, NULL); } +static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int cpu) +{ + return cpu_map__get_die(map, cpu, NULL); +} + static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, struct cpu_map *map, int cpu) { @@ -841,6 +849,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config, return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } +static int perf_stat__get_die_cached(struct perf_stat_config *config, + struct cpu_map *map, int idx) +{ + return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); +} + static int perf_stat__get_core_cached(struct perf_stat_config *config, struct cpu_map *map, int idx) { @@ -871,6 +885,13 @@ static int perf_stat_init_aggr_mode(void) } stat_config.aggr_get_id = perf_stat__get_socket_cached; break; + case AGGR_DIE: + if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_cached; + break; case AGGR_CORE: if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); @@ -936,21 +957,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) return cpu == -1 ? -1 : env->cpu[cpu].socket_id; } +static int perf_env__get_die(struct cpu_map *map, int idx, void *data) +{ + struct perf_env *env = data; + int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); + + if (cpu != -1) { + /* + * Encode socket in bit range 15:8 + * die_id is relative to socket, + * we need a global id. So we combine + * socket + die id + */ + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) + return -1; + + die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + } + + return die_id; +} + static int perf_env__get_core(struct cpu_map *map, int idx, void *data) { struct perf_env *env = data; int core = -1, cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { - int socket_id = env->cpu[cpu].socket_id; - /* - * Encode socket in upper 16 bits - * core_id is relative to socket, and + * Encode socket in bit range 31:24 + * encode die id in bit range 23:16 + * core_id is relative to socket and die, * we need a global id. So we combine - * socket + core id. + * socket + die id + core id */ - core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) + return -1; + + core = (env->cpu[cpu].socket_id << 24) | + (env->cpu[cpu].die_id << 16) | + (env->cpu[cpu].core_id & 0xffff); } return core; @@ -962,6 +1017,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); } +static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus, + struct cpu_map **diep) +{ + return cpu_map__build_map(cpus, diep, perf_env__get_die, env); +} + static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, struct cpu_map **corep) { @@ -973,6 +1034,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } +static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int idx) +{ + return perf_env__get_die(map, idx, &perf_stat.session->header.env); +} static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, struct cpu_map *map, int idx) @@ -992,6 +1058,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) } stat_config.aggr_get_id = perf_stat__get_socket_file; break; + case AGGR_DIE: + if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_file; + break; case AGGR_CORE: if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); @@ -1542,6 +1615,8 @@ static int __cmd_report(int argc, const char **argv) OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, + "aggregate counts per processor die", AGGR_DIE), OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 7db1365c667e..c11a459ca582 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -380,6 +380,39 @@ int cpu_map__get_die_id(int cpu) return ret ?: value; } +int cpu_map__get_die(struct cpu_map *map, int idx, void *data) +{ + int cpu, die_id, s; + + if (idx > map->nr) + return -1; + + cpu = map->map[idx]; + + die_id = cpu_map__get_die_id(cpu); + /* There is no die_id on legacy system. */ + if (die_id == -1) + die_id = 0; + + s = cpu_map__get_socket(map, idx, data); + if (s == -1) + return -1; + + /* + * Encode socket in bit range 15:8 + * die_id is relative to socket, and + * we need a global id. So we combine + * socket + die id + */ + if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n")) + return -1; + + if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) + return -1; + + return (s << 8) | (die_id & 0xff); +} + int cpu_map__get_core_id(int cpu) { int value, ret = cpu__get_topology_int(cpu, "core_id", &value); @@ -388,7 +421,7 @@ int cpu_map__get_core_id(int cpu) int cpu_map__get_core(struct cpu_map *map, int idx, void *data) { - int cpu, s; + int cpu, s_die; if (idx > map->nr) return -1; @@ -397,17 +430,22 @@ int cpu_map__get_core(struct cpu_map *map, int idx, void *data) cpu = cpu_map__get_core_id(cpu); - s = cpu_map__get_socket(map, idx, data); - if (s == -1) + /* s_die is the combination of socket + die id */ + s_die = cpu_map__get_die(map, idx, data); + if (s_die == -1) return -1; /* - * encode socket in upper 16 bits - * core_id is relative to socket, and + * encode socket in bit range 31:24 + * encode die id in bit range 23:16 + * core_id is relative to socket and die, * we need a global id. So we combine - * socket+ core id + * socket + die id + core id */ - return (s << 16) | (cpu & 0xffff); + if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) + return -1; + + return (s_die << 16) | (cpu & 0xffff); } int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) @@ -415,6 +453,11 @@ int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } +int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep) +{ + return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); +} + int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) { return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 6762ff9e7ad5..1265f0e33920 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -26,9 +26,11 @@ size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); int cpu_map__get_die_id(int cpu); +int cpu_map__get_die(struct cpu_map *map, int idx, void *data); int cpu_map__get_core_id(int cpu); int cpu_map__get_core(struct cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); +int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); const struct cpu_map *cpu_map__online(void); /* thread unsafe */ @@ -44,7 +46,12 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s) static inline int cpu_map__id_to_socket(int id) { - return id >> 16; + return id >> 24; +} + +static inline int cpu_map__id_to_die(int id) +{ + return (id >> 16) & 0xff; } static inline int cpu_map__id_to_cpu(int id) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 4c53bae5644b..a6b9de3e83fc 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -69,8 +69,9 @@ static void aggr_printout(struct perf_stat_config *config, { switch (config->aggr_mode) { case AGGR_CORE: - fprintf(config->output, "S%d-C%*d%s%*d%s", + fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", cpu_map__id_to_socket(id), + cpu_map__id_to_die(id), config->csv_output ? 0 : -8, cpu_map__id_to_cpu(id), config->csv_sep, @@ -78,6 +79,16 @@ static void aggr_printout(struct perf_stat_config *config, nr, config->csv_sep); break; + case AGGR_DIE: + fprintf(config->output, "S%d-D%*d%s%*d%s", + cpu_map__id_to_socket(id << 16), + config->csv_output ? 0 : -8, + cpu_map__id_to_die(id << 16), + config->csv_sep, + config->csv_output ? 0 : 4, + nr, + config->csv_sep); + break; case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", config->csv_output ? 0 : -5, @@ -89,8 +100,9 @@ static void aggr_printout(struct perf_stat_config *config, break; case AGGR_NONE: if (evsel->percore) { - fprintf(config->output, "S%d-C%*d%s", + fprintf(config->output, "S%d-D%d-C%*d%s", cpu_map__id_to_socket(id), + cpu_map__id_to_die(id), config->csv_output ? 0 : -5, cpu_map__id_to_cpu(id), config->csv_sep); } else { @@ -407,6 +419,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, [AGGR_THREAD] = 1, [AGGR_NONE] = 1, [AGGR_SOCKET] = 2, + [AGGR_DIE] = 2, [AGGR_CORE] = 2, }; @@ -879,7 +892,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, } static int aggr_header_lens[] = { - [AGGR_CORE] = 18, + [AGGR_CORE] = 24, + [AGGR_DIE] = 18, [AGGR_SOCKET] = 12, [AGGR_NONE] = 6, [AGGR_THREAD] = 24, @@ -888,6 +902,7 @@ static int aggr_header_lens[] = { static const char *aggr_header_csv[] = { [AGGR_CORE] = "core,cpus,", + [AGGR_DIE] = "die,cpus", [AGGR_SOCKET] = "socket,cpus", [AGGR_NONE] = "cpu,", [AGGR_THREAD] = "comm-pid,", @@ -954,8 +969,13 @@ static void print_interval(struct perf_stat_config *config, if (!metric_only) fprintf(output, " counts %*s events\n", unit_width, "unit"); break; + case AGGR_DIE: + fprintf(output, "# time die cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; case AGGR_CORE: - fprintf(output, "# time core cpus"); + fprintf(output, "# time core cpus"); if (!metric_only) fprintf(output, " counts %*s events\n", unit_width, "unit"); break; @@ -1165,6 +1185,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist, switch (config->aggr_mode) { case AGGR_CORE: + case AGGR_DIE: case AGGR_SOCKET: print_aggr(config, evlist, prefix); break; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 83d8094be4fe..027b09aaa4cf 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -12,6 +12,7 @@ /* * AGGR_GLOBAL: Use CPU 0 * AGGR_SOCKET: Use first CPU of socket + * AGGR_DIE: Use first CPU of die * AGGR_CORE: Use first CPU of core * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c3115d939b0b..d91fe754b6d2 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -272,6 +272,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel switch (config->aggr_mode) { case AGGR_THREAD: case AGGR_CORE: + case AGGR_DIE: case AGGR_SOCKET: case AGGR_NONE: if (!evsel->snapshot) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2f9c9159a364..7032dd1eeac2 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -44,6 +44,7 @@ enum aggr_mode { AGGR_NONE, AGGR_GLOBAL, AGGR_SOCKET, + AGGR_DIE, AGGR_CORE, AGGR_THREAD, AGGR_UNSET, From e05a899718f094e2c87d99115c5b1191405a9fd0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 4 Jun 2019 15:50:43 -0700 Subject: [PATCH 57/85] perf header: Rename "sibling cores" to "sibling sockets" The "sibling cores" actually shows the sibling CPUs of a socket. The name "sibling cores" is very misleading. Rename "sibling cores" to "sibling sockets" Signed-off-by: Kan Liang Reviewed-by: Jiri Olsa Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1559688644-106558-4-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 2 +- tools/perf/util/header.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 0165e92e717e..de78183f6881 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -168,7 +168,7 @@ struct { }; Example: - sibling cores : 0-8 + sibling sockets : 0-8 sibling dies : 0-3 sibling dies : 4-7 sibling threads : 0-1 diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 64976254431c..06ddb6618ef3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1460,7 +1460,7 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) str = ph->env.sibling_cores; for (i = 0; i < nr; i++) { - fprintf(fp, "# sibling cores : %s\n", str); + fprintf(fp, "# sibling sockets : %s\n", str); str += strlen(str) + 1; } From 0ccdb8407a4660f6dbc5977bc060917d2c3e7986 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 4 Jun 2019 15:50:44 -0700 Subject: [PATCH 58/85] perf tools: Apply new CPU topology sysfs attributes The existing "thread_siblings" and "thread_siblings_list" attribute will be deprecated. Use the new CPU topology sysfs attributes, "core_cpus" and "core_cpus_list", which are synonymous with the deprecated attributes. Check the new name first. If not available, use the deprecated name to be compatible with old kernel. Signed-off-by: Kan Liang Reviewed-by: Jiri Olsa Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1559688644-106558-5-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cputopo.c | 8 +++++++- tools/perf/util/smt.c | 8 ++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 85fa87fc30cf..26e73a4bd4fe 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -15,6 +15,8 @@ "%s/devices/system/cpu/cpu%d/topology/die_cpus_list" #define THRD_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" +#define THRD_SIB_FMT_NEW \ + "%s/devices/system/cpu/cpu%d/topology/core_cpus_list" #define NODE_ONLINE_FMT \ "%s/devices/system/node/online" #define NODE_MEMINFO_FMT \ @@ -91,8 +93,12 @@ try_dies: ret = 0; try_threads: - scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW, sysfs__mountpoint(), cpu); + if (access(filename, F_OK) == -1) { + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, + sysfs__mountpoint(), cpu); + } fp = fopen(filename, "r"); if (!fp) goto done; diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 453f6f6f29f3..3b791ef2cd50 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -23,8 +23,12 @@ int smt_on(void) char fn[256]; snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/thread_siblings", - cpu); + "devices/system/cpu/cpu%d/topology/core_cpus", cpu); + if (access(fn, F_OK) == -1) { + snprintf(fn, sizeof fn, + "devices/system/cpu/cpu%d/topology/thread_siblings", + cpu); + } if (sysfs__read_str(fn, &str, &strlen) < 0) continue; /* Entry is hex, but does not have 0x, so need custom parser */ From 36edfb940195b62c79bc8b148720c8275659a8c7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Jun 2019 17:03:18 -0300 Subject: [PATCH 59/85] perf data: Fix perf.data documentation for HEADER_CPU_TOPOLOGY The 'die' info isn't in the same array as core and socket ids, and we missed the 'dies' string list, that comes right after the 'core' + 'socket' id variable length array, followed by the VLA for the dies. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: c9cb12c5ba08 ("perf header: Add die information in CPU topology") Link: https://lkml.kernel.org/n/tip-nubi6mxp2n8ofvlx7ph6k3h6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/perf.data-file-format.txt | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index de78183f6881..5f54feb19977 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -151,20 +151,35 @@ struct { HEADER_CPU_TOPOLOGY = 13, -String lists defining the core and CPU threads topology. -The string lists are followed by a variable length array -which contains core_id, die_id (for x86) and socket_id of each cpu. -The number of entries can be determined by the size of the -section minus the sizes of both string lists. - struct { + /* + * First revision of HEADER_CPU_TOPOLOGY + * + * See 'struct perf_header_string_list' definition earlier + * in this file. + */ + struct perf_header_string_list cores; /* Variable length */ struct perf_header_string_list threads; /* Variable length */ + + /* + * Second revision of HEADER_CPU_TOPOLOGY, older tools + * will not consider what comes next + */ + struct { uint32_t core_id; - uint32_t die_id; uint32_t socket_id; } cpus[nr]; /* Variable length records */ + /* 'nr' comes from previously processed HEADER_NRCPUS's nr_cpu_avail */ + + /* + * Third revision of HEADER_CPU_TOPOLOGY, older tools + * will not consider what comes next + */ + + struct perf_header_string_list dies; /* Variable length */ + uint32_t die_id[nr_cpus_avail]; /* from previously processed HEADER_NR_CPUS, VLA */ }; Example: From e45c48a9a4d20ebc7b639a62c3ef8f4b08007027 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 5 Jun 2019 10:16:33 -0600 Subject: [PATCH 60/85] perf cs-etm: Properly set the value of 'old' and 'head' in snapshot mode This patch adds the necessary intelligence to properly compute the value of 'old' and 'head' when operating in snapshot mode. That way we can get the latest information in the AUX buffer and be compatible with the generic AUX ring buffer mechanic. Tester notes: > Leo, have you had the chance to test/review this one? Suzuki? Sure. I applied this patch on the perf/core branch (with latest commit 3e4fbf36c1e3 'perf augmented_raw_syscalls: Move reading filename to the loop') and passed testing with below steps: # perf record -e cs_etm/@tmc_etr0/ -S -m,64 --per-thread ./sort & [1] 19097 Bubble sorting array of 30000 elements # kill -USR2 19097 # kill -USR2 19097 # kill -USR2 19097 [ perf record: Woken up 4 times to write data ] [ perf record: Captured and wrote 0.753 MB perf.data ] Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190605161633.12245-1-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 127 +++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index cc7f1cd23b14..279c69caef91 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -31,6 +31,8 @@ struct cs_etm_recording { struct auxtrace_record itr; struct perf_pmu *cs_etm_pmu; struct perf_evlist *evlist; + int wrapped_cnt; + bool *wrapped; bool snapshot_mode; size_t snapshot_size; }; @@ -684,16 +686,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, return 0; } -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused, +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx) +{ + bool *wrapped; + int cnt = ptr->wrapped_cnt; + + /* Make @ptr->wrapped as big as @idx */ + while (cnt <= idx) + cnt++; + + /* + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid + * cross compilation problems where the host's system supports + * reallocarray() but not the target. + */ + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool)); + if (!wrapped) + return -ENOMEM; + + wrapped[cnt - 1] = false; + ptr->wrapped_cnt = cnt; + ptr->wrapped = wrapped; + + return 0; +} + +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer, + size_t buffer_size, u64 head) +{ + u64 i, watermark; + u64 *buf = (u64 *)buffer; + size_t buf_size = buffer_size; + + /* + * We want to look the very last 512 byte (chosen arbitrarily) in + * the ring buffer. + */ + watermark = buf_size - 512; + + /* + * @head is continuously increasing - if its value is equal or greater + * than the size of the ring buffer, it has wrapped around. + */ + if (head >= buffer_size) + return true; + + /* + * The value of @head is somewhere within the size of the ring buffer. + * This can be that there hasn't been enough data to fill the ring + * buffer yet or the trace time was so long that @head has numerically + * wrapped around. To find we need to check if we have data at the very + * end of the ring buffer. We can reliably do this because mmap'ed + * pages are zeroed out and there is a fresh mapping with every new + * session. + */ + + /* @head is less than 512 byte from the end of the ring buffer */ + if (head > watermark) + watermark = head; + + /* + * Speed things up by using 64 bit transactions (see "u64 *buf" above) + */ + watermark >>= 3; + buf_size >>= 3; + + /* + * If we find trace data at the end of the ring buffer, @head has + * been there and has numerically wrapped around at least once. + */ + for (i = watermark; i < buf_size; i++) + if (buf[i]) + return true; + + return false; +} + +static int cs_etm_find_snapshot(struct auxtrace_record *itr, int idx, struct auxtrace_mmap *mm, - unsigned char *data __maybe_unused, + unsigned char *data, u64 *head, u64 *old) { + int err; + bool wrapped; + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + + /* + * Allocate memory to keep track of wrapping if this is the first + * time we deal with this *mm. + */ + if (idx >= ptr->wrapped_cnt) { + err = cs_etm_alloc_wrapped_array(ptr, idx); + if (err) + return err; + } + + /* + * Check to see if *head has wrapped around. If it hasn't only the + * amount of data between *head and *old is snapshot'ed to avoid + * bloating the perf.data file with zeros. But as soon as *head has + * wrapped around the entire size of the AUX ring buffer it taken. + */ + wrapped = ptr->wrapped[idx]; + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) { + wrapped = true; + ptr->wrapped[idx] = true; + } + pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", __func__, idx, (size_t)*old, (size_t)*head, mm->len); - *old = *head; - *head += mm->len; + /* No wrap has occurred, we can just use *head and *old. */ + if (!wrapped) + return 0; + + /* + * *head has wrapped around - adjust *head and *old to pickup the + * entire content of the AUX buffer. + */ + if (*head >= mm->len) { + *old = *head - mm->len; + } else { + *head += mm->len; + *old = *head - mm->len; + } return 0; } @@ -734,6 +851,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr) { struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + + zfree(&ptr->wrapped); free(ptr); } From 965e176f3c4ae260f21606e19dc3a58d66d8f605 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 7 Jun 2019 15:14:27 -0300 Subject: [PATCH 61/85] perf cs-etm: Remove duplicate GENMASK() define, use linux/bits.h instead Suzuki noticed that this should be more useful in a generic header, and after looking I noticed we have it already in our copy of include/linux/bits.h in tools/include, so just use it, test built on x86-64 and ubuntu 19.04 with: perfbuilder@46646c9e848e:/$ aarch64-linux-gnu-gcc --version |& head -1 aarch64-linux-gnu-gcc (Ubuntu/Linaro 8.3.0-6ubuntu1) 8.3.0 perfbuilder@46646c9e848e:/$ Suggested-by: Suzuki K Poulose Link: https://lkml.kernel.org/r/68c1c548-33cd-31e8-100d-7ffad008c7b2@arm.com Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org, Link: https://lkml.kernel.org/n/tip-69pd3mqvxdlh2shddsc7yhyv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 33b57e748c3d..bc848fd095f4 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -9,6 +9,7 @@ #include "util/event.h" #include "util/session.h" +#include /* Versionning header in case things need tro change in the future. That way * decoding of old snapshot is still possible. @@ -161,16 +162,6 @@ struct cs_etm_packet_queue { #define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL -/* - * Create a contiguous bitmask starting at bit position @l and ending at - * position @h. For example - * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. - * - * Carbon copy of implementation found in $KERNEL/include/linux/bitops.h - */ -#define GENMASK(h, l) \ - (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) - #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) From 87407fa58b6645cecd24102f58476b8dd7ce778d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 7 Jun 2019 22:35:08 +0800 Subject: [PATCH 62/85] perf config: Update default value for llvm.clang-bpf-cmd-template The clang bpf cmdline template has defined default value in the file tools/perf/util/llvm-utils.c, which has been changed for several times. This patch updates the documentation to reflect the latest default value for the configuration llvm.clang-bpf-cmd-template. Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Mark Drayton Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Song Liu Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Fixes: d35b168c3dcd ("perf bpf: Give precedence to bpf header dir") Fixes: cb76371441d0 ("perf llvm: Allow passing options to llc in addition to clang") Fixes: 1b16fffa389d ("perf llvm-utils: Add bpf include path to clang command line") Link: http://lkml.kernel.org/r/20190607143508.18141-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 462b3cde0675..e4aa268d2e38 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -564,9 +564,12 @@ llvm.*:: llvm.clang-bpf-cmd-template:: Cmdline template. Below lines show its default value. Environment variable is used to pass options. - "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \ - -Wno-unused-value -Wno-pointer-sign -working-directory \ - $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -" + "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ + "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ + "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ + "-Wno-unused-value -Wno-pointer-sign " \ + "-working-directory $WORKING_DIR " \ + "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" llvm.clang-opt:: Options passed to clang. From 33526f362b019f0a17c6b522eb3b07017dba98a7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 15:59:59 +0300 Subject: [PATCH 63/85] perf auxtrace: Add perf time interval to itrace_synth_ops Instruction trace decoders can optimize output based on what time intervals will be filtered, so pass that information in itrace_synth_ops. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index c69bcd9a3091..c80c58eb7f4d 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -83,6 +83,8 @@ enum itrace_period_type { * @period_type: 'instructions' events period type * @initial_skip: skip N events at the beginning. * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all + * @ptime_range: time intervals to trace or NULL + * @range_num: number of time intervals to trace */ struct itrace_synth_opts { bool set; @@ -107,6 +109,8 @@ struct itrace_synth_opts { enum itrace_period_type period_type; unsigned long initial_skip; unsigned long *cpu_bitmap; + struct perf_time_interval *ptime_range; + int range_num; }; /** @@ -599,6 +603,21 @@ static inline void auxtrace__free(struct perf_session *session) " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" +static inline +void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts, + struct perf_time_interval *ptime_range, + int range_num) +{ + opts->ptime_range = ptime_range; + opts->range_num = range_num; +} + +static inline +void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts) +{ + opts->ptime_range = NULL; + opts->range_num = 0; +} #else @@ -742,6 +761,21 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, #define ITRACE_HELP "" +static inline +void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts + __maybe_unused, + struct perf_time_interval *ptime_range + __maybe_unused, + int range_num __maybe_unused) +{ +} + +static inline +void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts + __maybe_unused) +{ +} + #endif #endif From 400ae9818fe64899cea921a89c7078e0df9e41ea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:00 +0300 Subject: [PATCH 64/85] perf script: Set perf time interval in itrace_synth_ops Instruction trace decoders can optimize output based on what time intervals will be filtered, so pass that information in itrace_synth_ops. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 80c722ade852..61f00055476a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3829,6 +3829,10 @@ int cmd_script(int argc, const char **argv) &script.range_num); if (err < 0) goto out_delete; + + itrace_synth_opts__set_time_range(&itrace_synth_opts, + script.ptime_range, + script.range_num); } err = __cmd_script(&script); @@ -3836,8 +3840,10 @@ int cmd_script(int argc, const char **argv) flush_scripting(); out_delete: - if (script.ptime_range) + if (script.ptime_range) { + itrace_synth_opts__clear_time_range(&itrace_synth_opts); zfree(&script.ptime_range); + } perf_evlist__free_stats(session->evlist); perf_session__delete(session); From 4885c90c5e84926cfb083c58d8b6d70d1b1ac7cf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:01 +0300 Subject: [PATCH 65/85] perf report: Set perf time interval in itrace_synth_ops Instruction trace decoders can optimize output based on what time intervals will be filtered, so pass that information in itrace_synth_ops. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1ca533f06a4c..91c40808380d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1428,6 +1428,10 @@ repeat: &report.range_num); if (ret < 0) goto error; + + itrace_synth_opts__set_time_range(&itrace_synth_opts, + report.ptime_range, + report.range_num); } if (session->tevent.pevent && @@ -1449,8 +1453,10 @@ repeat: ret = 0; error: - if (report.ptime_range) + if (report.ptime_range) { + itrace_synth_opts__clear_time_range(&itrace_synth_opts); zfree(&report.ptime_range); + } zstd_fini(&(session->zstd_data)); perf_session__delete(session); return ret; From 4d678e9039b075f9418600dc87ec5e61cfb57115 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:02 +0300 Subject: [PATCH 66/85] perf intel-pt: Add lookahead callback Add a callback function to enable the decoder to lookahead at subsequent trace buffers. This will be used to implement a "fast forward" facility which will be needed to support efficient time interval filtering. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 ++ tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9eb778f9c911..13123b195083 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -104,6 +104,7 @@ struct intel_pt_decoder { uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); void *data; struct intel_pt_state state; const unsigned char *buf; @@ -233,6 +234,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->get_trace = params->get_trace; decoder->walk_insn = params->walk_insn; decoder->pgd_ip = params->pgd_ip; + decoder->lookahead = params->lookahead; decoder->data = params->data; decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 6a61773dc44b..de36254c6ac9 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -102,12 +102,15 @@ struct intel_pt_buffer { uint64_t trace_nr; }; +typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *); + struct intel_pt_params { int (*get_trace)(struct intel_pt_buffer *buffer, void *data); int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); void *data; bool return_compression; bool branch_enable; From e72b52a2cfdea5cb0279b2d63a36d78b8c2134de Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:03 +0300 Subject: [PATCH 67/85] perf intel-pt: Factor out intel_pt_8b_tsc() Factor out intel_pt_8b_tsc() so it can be reused. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 13123b195083..c06dceb774e9 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1369,6 +1369,21 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip) return 0; } +static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp) +{ + timestamp |= (ref_timestamp & (0xffULL << 56)); + + if (timestamp < ref_timestamp) { + if (ref_timestamp - timestamp > (1ULL << 55)) + timestamp += (1ULL << 56); + } else { + if (timestamp - ref_timestamp > (1ULL << 55)) + timestamp -= (1ULL << 56); + } + + return timestamp; +} + static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) { uint64_t timestamp; @@ -1376,15 +1391,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) decoder->have_tma = false; if (decoder->ref_timestamp) { - timestamp = decoder->packet.payload | - (decoder->ref_timestamp & (0xffULL << 56)); - if (timestamp < decoder->ref_timestamp) { - if (decoder->ref_timestamp - timestamp > (1ULL << 55)) - timestamp += (1ULL << 56); - } else { - if (timestamp - decoder->ref_timestamp > (1ULL << 55)) - timestamp -= (1ULL << 56); - } + timestamp = intel_pt_8b_tsc(decoder->packet.payload, + decoder->ref_timestamp); decoder->tsc_timestamp = timestamp; decoder->timestamp = timestamp; decoder->ref_timestamp = 0; From 6492e5f013d9975d68528150edadead91e97a78a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:04 +0300 Subject: [PATCH 68/85] perf intel-pt: Factor out intel_pt_reposition() Factor out intel_pt_reposition() so it can be reused. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index c06dceb774e9..70bff7bb79f3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -494,6 +494,14 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; } +static void intel_pt_reposition(struct intel_pt_decoder *decoder) +{ + decoder->ip = 0; + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->timestamp = 0; + decoder->have_tma = false; +} + static int intel_pt_get_data(struct intel_pt_decoder *decoder) { struct intel_pt_buffer buffer = { .buf = 0, }; @@ -512,11 +520,8 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) return -ENODATA; } if (!buffer.consecutive) { - decoder->ip = 0; - decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; - decoder->timestamp = 0; - decoder->have_tma = false; decoder->state.trace_nr = buffer.trace_nr; intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", decoder->ref_timestamp); From 6c1f0b18ac3361837dbe53e794e28096285fb4f0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:05 +0300 Subject: [PATCH 69/85] perf intel-pt: Add reposition parameter to intel_pt_get_data() When the decoder gets the next trace buffer, some state is reset if the buffer is not consecutive to the previous buffer. Add a parameter 'reposition' so that can be done also to support a "fast forward" facility. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 70bff7bb79f3..dde6a7a97a7a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -502,7 +502,7 @@ static void intel_pt_reposition(struct intel_pt_decoder *decoder) decoder->have_tma = false; } -static int intel_pt_get_data(struct intel_pt_decoder *decoder) +static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition) { struct intel_pt_buffer buffer = { .buf = 0, }; int ret; @@ -519,7 +519,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) intel_pt_log("No more data\n"); return -ENODATA; } - if (!buffer.consecutive) { + if (!buffer.consecutive || reposition) { intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; decoder->state.trace_nr = buffer.trace_nr; @@ -531,10 +531,11 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) return 0; } -static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder, + bool reposition) { if (!decoder->next_buf) - return intel_pt_get_data(decoder); + return intel_pt_get_data(decoder, reposition); decoder->buf = decoder->next_buf; decoder->len = decoder->next_len; @@ -553,7 +554,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) len = decoder->len; memcpy(buf, decoder->buf, len); - ret = intel_pt_get_data(decoder); + ret = intel_pt_get_data(decoder, false); if (ret) { decoder->pos += old_len; return ret < 0 ? ret : -EINVAL; @@ -879,7 +880,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) decoder->len -= decoder->pkt_step; if (!decoder->len) { - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; } @@ -2369,7 +2370,7 @@ static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder, decoder->pos += decoder->len; decoder->len = 0; - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; @@ -2395,7 +2396,7 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder) intel_pt_log("Scanning for PSB\n"); while (1) { if (!decoder->len) { - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; } From a7fa19f5a255cc8970202d5c54092089a01fc042 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:06 +0300 Subject: [PATCH 70/85] perf intel-pt: Add intel_pt_fast_forward() Intel PT decoding is done in time order. In order to support efficient time interval filtering, add a facility to "fast forward" towards a particular timestamp. That involves finding the right buffer, stepping to that buffer, and then stepping forward PSBs. Because decoding must begin at a PSB, "fast forward" stops at the last PSB that has a timestamp before the target timestamp. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 130 ++++++++++++++++++ .../util/intel-pt-decoder/intel-pt-decoder.h | 2 + 2 files changed, 132 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index dde6a7a97a7a..c374a856e73f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -126,6 +126,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t buf_timestamp; uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; @@ -519,6 +520,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition) intel_pt_log("No more data\n"); return -ENODATA; } + decoder->buf_timestamp = buffer.ref_timestamp; if (!buffer.consecutive || reposition) { intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; @@ -2854,3 +2856,131 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, return buf_b; /* No overlap */ } } + +/** + * struct fast_forward_data - data used by intel_pt_ff_cb(). + * @timestamp: timestamp to fast forward towards + * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than + * the fast forward timestamp. + */ +struct fast_forward_data { + uint64_t timestamp; + uint64_t buf_timestamp; +}; + +/** + * intel_pt_ff_cb - fast forward lookahead callback. + * @buffer: Intel PT trace buffer + * @data: opaque pointer to fast forward data (struct fast_forward_data) + * + * Determine if @buffer trace is past the fast forward timestamp. + * + * Return: 1 (stop lookahead) if @buffer trace is past the fast forward + * timestamp, and 0 otherwise. + */ +static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data) +{ + struct fast_forward_data *d = data; + unsigned char *buf; + uint64_t tsc; + size_t rem; + size_t len; + + buf = (unsigned char *)buffer->buf; + len = buffer->len; + + if (!intel_pt_next_psb(&buf, &len) || + !intel_pt_next_tsc(buf, len, &tsc, &rem)) + return 0; + + tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp); + + intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n", + tsc, buffer->ref_timestamp); + + /* + * If the buffer contains a timestamp earlier that the fast forward + * timestamp, then record it, else stop. + */ + if (tsc < d->timestamp) + d->buf_timestamp = buffer->ref_timestamp; + else + return 1; + + return 0; +} + +/** + * intel_pt_fast_forward - reposition decoder forwards. + * @decoder: Intel PT decoder + * @timestamp: timestamp to fast forward towards + * + * Reposition decoder at the last PSB with a timestamp earlier than @timestamp. + * + * Return: 0 on success or negative error code on failure. + */ +int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp) +{ + struct fast_forward_data d = { .timestamp = timestamp }; + unsigned char *buf; + size_t len; + int err; + + intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp); + + /* Find buffer timestamp of buffer to fast forward to */ + err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d); + if (err < 0) + return err; + + /* Walk to buffer with same buffer timestamp */ + if (d.buf_timestamp) { + do { + decoder->pos += decoder->len; + decoder->len = 0; + err = intel_pt_get_next_data(decoder, true); + /* -ENOLINK means non-consecutive trace */ + if (err && err != -ENOLINK) + return err; + } while (decoder->buf_timestamp != d.buf_timestamp); + } + + if (!decoder->buf) + return 0; + + buf = (unsigned char *)decoder->buf; + len = decoder->len; + + if (!intel_pt_next_psb(&buf, &len)) + return 0; + + /* + * Walk PSBs while the PSB timestamp is less than the fast forward + * timestamp. + */ + do { + uint64_t tsc; + size_t rem; + + if (!intel_pt_next_tsc(buf, len, &tsc, &rem)) + break; + tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp); + /* + * A TSC packet can slip past MTC packets but, after fast + * forward, decoding starts at the TSC timestamp. That means + * the timestamps may not be exactly the same as the timestamps + * that would have been decoded without fast forward. + */ + if (tsc < timestamp) { + intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc); + decoder->pos += decoder->len - len; + decoder->buf = buf; + decoder->len = len; + intel_pt_reposition(decoder); + } else { + break; + } + } while (intel_pt_step_psb(&buf, &len)); + + return 0; +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index de36254c6ac9..e633fad2fd5d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -130,6 +130,8 @@ void intel_pt_decoder_free(struct intel_pt_decoder *decoder); const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); +int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp); + unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, bool have_tsc, bool *consecutive); From e96f7df8807615b96af59f8f8bc6263a7adc27b7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:07 +0300 Subject: [PATCH 71/85] perf intel-pt: Factor out intel_pt_get_buffer() Factor out intel_pt_get_buffer() so it can be reused. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-10-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 60 +++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3cff8fe2eaa0..4a61c73c9711 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -239,32 +239,13 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer * return 0; } -/* This function assumes data is processed sequentially only */ -static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +static int intel_pt_get_buffer(struct intel_pt_queue *ptq, + struct auxtrace_buffer *buffer, + struct auxtrace_buffer *old_buffer, + struct intel_pt_buffer *b) { - struct intel_pt_queue *ptq = data; - struct auxtrace_buffer *buffer = ptq->buffer; - struct auxtrace_buffer *old_buffer = ptq->old_buffer; - struct auxtrace_queue *queue; bool might_overlap; - if (ptq->stop) { - b->len = 0; - return 0; - } - - queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; - - buffer = auxtrace_buffer__next(queue, buffer); - if (!buffer) { - if (old_buffer) - auxtrace_buffer__drop_data(old_buffer); - b->len = 0; - return 0; - } - - ptq->buffer = buffer; - if (!buffer->data) { int fd = perf_data__fd(ptq->pt->session->data); @@ -294,6 +275,39 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) b->consecutive = true; } + return 0; +} + +/* This function assumes data is processed sequentially only */ +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; + struct auxtrace_queue *queue; + int err; + + if (ptq->stop) { + b->len = 0; + return 0; + } + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + ptq->buffer = buffer; + + err = intel_pt_get_buffer(ptq, buffer, old_buffer, b); + if (err) + return err; + if (ptq->step_through_buffers) ptq->stop = true; From da9000ae35027fb7305b8cad0b37df71937ad578 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:08 +0300 Subject: [PATCH 72/85] perf intel-pt: Add support for lookahead Implement the lookahead callback to let the decoder access subsequent buffers. intel_pt_lookahead() manages the buffer lifetime and calls the decoder for each buffer until the decoder returns a non-zero value. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-11-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 59 +++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4a61c73c9711..3e3a01318b76 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -278,7 +278,63 @@ static int intel_pt_get_buffer(struct intel_pt_queue *ptq, return 0; } -/* This function assumes data is processed sequentially only */ +/* Do not drop buffers with references - refer intel_pt_get_trace() */ +static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq, + struct auxtrace_buffer *buffer) +{ + if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer) + return; + + auxtrace_buffer__drop_data(buffer); +} + +/* Must be serialized with respect to intel_pt_get_trace() */ +static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb, + void *cb_data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; + struct auxtrace_queue *queue; + int err = 0; + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + while (1) { + struct intel_pt_buffer b = { .len = 0 }; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) + break; + + err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b); + if (err) + break; + + if (b.len) { + intel_pt_lookahead_drop_buffer(ptq, old_buffer); + old_buffer = buffer; + } else { + intel_pt_lookahead_drop_buffer(ptq, buffer); + continue; + } + + err = cb(&b, cb_data); + if (err) + break; + } + + if (buffer != old_buffer) + intel_pt_lookahead_drop_buffer(ptq, buffer); + intel_pt_lookahead_drop_buffer(ptq, old_buffer); + + return err; +} + +/* + * This function assumes data is processed sequentially only. + * Must be serialized with respect to intel_pt_lookahead() + */ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) { struct intel_pt_queue *ptq = data; @@ -827,6 +883,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.get_trace = intel_pt_get_trace; params.walk_insn = intel_pt_walk_next_insn; + params.lookahead = intel_pt_lookahead; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt); From 2c47db90ed71af9c12d5600dbcef864761d76b3d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:09 +0300 Subject: [PATCH 73/85] perf intel-pt: Add support for efficient time interval filtering Set up time ranges for efficient time interval filtering using the new "fast forward" facility. Because decoding is done in time order, intel_pt_time_filter() needs to look only at the next start or end timestamp - refer intel_pt_next_time(). Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-12-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 208 +++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3e3a01318b76..43ddc78a066e 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -42,6 +42,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "time-utils.h" #include "intel-pt-decoder/intel-pt-log.h" #include "intel-pt-decoder/intel-pt-decoder.h" @@ -50,6 +51,11 @@ #define MAX_TIMESTAMP (~0ULL) +struct range { + u64 start; + u64 end; +}; + struct intel_pt { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -118,6 +124,9 @@ struct intel_pt { char *filter; struct addr_filters filts; + + struct range *time_ranges; + unsigned int range_cnt; }; enum switch_state { @@ -154,6 +163,9 @@ struct intel_pt_queue { bool have_sample; u64 time; u64 timestamp; + u64 sel_timestamp; + bool sel_start; + unsigned int sel_idx; u32 flags; u16 insn_len; u64 last_insn_cnt; @@ -1007,6 +1019,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->flags |= PERF_IP_FLAG_TRACE_END; } +static void intel_pt_setup_time_range(struct intel_pt *pt, + struct intel_pt_queue *ptq) +{ + if (!pt->range_cnt) + return; + + ptq->sel_timestamp = pt->time_ranges[0].start; + ptq->sel_idx = 0; + + if (ptq->sel_timestamp) { + ptq->sel_start = true; + } else { + ptq->sel_timestamp = pt->time_ranges[0].end; + ptq->sel_start = false; + } +} + static int intel_pt_setup_queue(struct intel_pt *pt, struct auxtrace_queue *queue, unsigned int queue_nr) @@ -1031,6 +1060,8 @@ static int intel_pt_setup_queue(struct intel_pt *pt, ptq->step_through_buffers = true; ptq->sync_switch = pt->sync_switch; + + intel_pt_setup_time_range(pt, ptq); } if (!ptq->on_heap && @@ -1045,6 +1076,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt, intel_pt_log("queue %u getting timestamp\n", queue_nr); intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + if (ptq->sel_start && ptq->sel_timestamp) { + ret = intel_pt_fast_forward(ptq->decoder, + ptq->sel_timestamp); + if (ret) + return ret; + } + while (1) { state = intel_pt_decode(ptq->decoder); if (state->err) { @@ -1064,6 +1103,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt, queue_nr, ptq->timestamp); ptq->state = state; ptq->have_sample = true; + if (ptq->sel_start && ptq->sel_timestamp && + ptq->timestamp < ptq->sel_timestamp) + ptq->have_sample = false; intel_pt_sample_flags(ptq); ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); if (ret) @@ -1750,10 +1792,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt) } } +/* + * To filter against time ranges, it is only necessary to look at the next start + * or end time. + */ +static bool intel_pt_next_time(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + + if (ptq->sel_start) { + /* Next time is an end time */ + ptq->sel_start = false; + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; + return true; + } else if (ptq->sel_idx + 1 < pt->range_cnt) { + /* Next time is a start time */ + ptq->sel_start = true; + ptq->sel_idx += 1; + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start; + return true; + } + + /* No next time */ + return false; +} + +static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp) +{ + int err; + + while (1) { + if (ptq->sel_start) { + if (ptq->timestamp >= ptq->sel_timestamp) { + /* After start time, so consider next time */ + intel_pt_next_time(ptq); + if (!ptq->sel_timestamp) { + /* No end time */ + return 0; + } + /* Check against end time */ + continue; + } + /* Before start time, so fast forward */ + ptq->have_sample = false; + if (ptq->sel_timestamp > *ff_timestamp) { + if (ptq->sync_switch) { + intel_pt_next_tid(ptq->pt, ptq); + ptq->switch_state = INTEL_PT_SS_UNKNOWN; + } + *ff_timestamp = ptq->sel_timestamp; + err = intel_pt_fast_forward(ptq->decoder, + ptq->sel_timestamp); + if (err) + return err; + } + return 0; + } else if (ptq->timestamp > ptq->sel_timestamp) { + /* After end time, so consider next time */ + if (!intel_pt_next_time(ptq)) { + /* No next time range, so stop decoding */ + ptq->have_sample = false; + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; + return 1; + } + /* Check against next start time */ + continue; + } else { + /* Before end time */ + return 0; + } + } +} + static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) { const struct intel_pt_state *state = ptq->state; struct intel_pt *pt = ptq->pt; + u64 ff_timestamp = 0; int err; if (!pt->kernel_start) { @@ -1818,6 +1933,12 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) ptq->timestamp = state->timestamp; } + if (ptq->sel_timestamp) { + err = intel_pt_time_filter(ptq, &ff_timestamp); + if (err) + return err; + } + if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { *timestamp = ptq->timestamp; return 0; @@ -2223,6 +2344,7 @@ static void intel_pt_free(struct perf_session *session) thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); zfree(&pt->filter); + zfree(&pt->time_ranges); free(pt); } @@ -2520,6 +2642,85 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data) return 0; } +/* Find least TSC which converts to ns or later */ +static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt) +{ + u64 tsc, tm; + + tsc = perf_time_to_tsc(ns, &pt->tc); + + while (1) { + tm = tsc_to_perf_time(tsc, &pt->tc); + if (tm < ns) + break; + tsc -= 1; + } + + while (tm < ns) + tm = tsc_to_perf_time(++tsc, &pt->tc); + + return tsc; +} + +/* Find greatest TSC which converts to ns or earlier */ +static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt) +{ + u64 tsc, tm; + + tsc = perf_time_to_tsc(ns, &pt->tc); + + while (1) { + tm = tsc_to_perf_time(tsc, &pt->tc); + if (tm > ns) + break; + tsc += 1; + } + + while (tm > ns) + tm = tsc_to_perf_time(--tsc, &pt->tc); + + return tsc; +} + +static int intel_pt_setup_time_ranges(struct intel_pt *pt, + struct itrace_synth_opts *opts) +{ + struct perf_time_interval *p = opts->ptime_range; + int n = opts->range_num; + int i; + + if (!n || !p || pt->timeless_decoding) + return 0; + + pt->time_ranges = calloc(n, sizeof(struct range)); + if (!pt->time_ranges) + return -ENOMEM; + + pt->range_cnt = n; + + intel_pt_log("%s: %u range(s)\n", __func__, n); + + for (i = 0; i < n; i++) { + struct range *r = &pt->time_ranges[i]; + u64 ts = p[i].start; + u64 te = p[i].end; + + /* + * Take care to ensure the TSC range matches the perf-time range + * when converted back to perf-time. + */ + r->start = ts ? intel_pt_tsc_start(ts, pt) : 0; + r->end = te ? intel_pt_tsc_end(te, pt) : 0; + + intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n", + i, ts, te); + intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n", + i, r->start, r->end); + } + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -2752,6 +2953,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; } + if (session->itrace_synth_opts) { + err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts); + if (err) + goto err_delete_thread; + } + if (pt->synth_opts.calls) pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_TRACE_END; @@ -2792,6 +2999,7 @@ err_free_queues: err_free: addr_filters__exit(&pt->filts); zfree(&pt->filter); + zfree(&pt->time_ranges); free(pt); return err; } From f79a7689d99366aee9f89d785bca6c52ed6b76eb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:10 +0300 Subject: [PATCH 74/85] perf time-utils: Treat time ranges consistently Currently, options allow only 1 explicit (non-percentage) time range. In preparation for adding support for multiple explicit time ranges, treat time ranges consistently. Instead of treating some time ranges as inclusive and some as excluding the end time, treat all time ranges as inclusive. This is only a 1 nanosecond change but is necessary to treat multiple explicit time ranges in a consistent manner. Note, there is a later patch that adds a test for time-utils. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-13-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 20663a460df3..1d67cf1216c7 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -389,13 +389,12 @@ bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, ptime = &ptime_buf[i]; if (timestamp >= ptime->start && - ((timestamp < ptime->end && i < num - 1) || - (timestamp <= ptime->end && i == num - 1))) { - break; + (timestamp <= ptime->end || !ptime->end)) { + return false; } } - return (i == num) ? true : false; + return true; } int perf_time__parse_for_ranges(const char *time_str, From c763242a5e742f8fefda0bb6cfdf6a5a34ae5e10 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:11 +0300 Subject: [PATCH 75/85] perf time-utils: Factor out set_percent_time() Factor out set_percent_time() so it can be reused. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-14-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 39 +++++++++++++++++------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 1d67cf1216c7..69441faab3d0 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -135,12 +135,27 @@ static int parse_percent(double *pcnt, char *str) return 0; } +static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt, + double end_pcnt, u64 start, u64 end) +{ + u64 total = end - start; + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + static int percent_slash_split(char *str, struct perf_time_interval *ptime, u64 start, u64 end) { char *p, *end_str; double pcnt, start_pcnt, end_pcnt; - u64 total = end - start; int i; /* @@ -168,15 +183,7 @@ static int percent_slash_split(char *str, struct perf_time_interval *ptime, start_pcnt = pcnt * (i - 1); end_pcnt = pcnt * i; - if (start_pcnt < 0.0 || start_pcnt > 1.0 || - end_pcnt < 0.0 || end_pcnt > 1.0) { - return -1; - } - - ptime->start = start + round(start_pcnt * total); - ptime->end = start + round(end_pcnt * total); - - return 0; + return set_percent_time(ptime, start_pcnt, end_pcnt, start, end); } static int percent_dash_split(char *str, struct perf_time_interval *ptime, @@ -184,7 +191,6 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime, { char *start_str = NULL, *end_str; double start_pcnt, end_pcnt; - u64 total = end - start; int ret; /* @@ -203,16 +209,7 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime, free(start_str); - if (start_pcnt < 0.0 || start_pcnt > 1.0 || - end_pcnt < 0.0 || end_pcnt > 1.0 || - start_pcnt > end_pcnt) { - return -1; - } - - ptime->start = start + round(start_pcnt * total); - ptime->end = start + round(end_pcnt * total); - - return 0; + return set_percent_time(ptime, start_pcnt, end_pcnt, start, end); } typedef int (*time_pecent_split)(char *, struct perf_time_interval *, From b16bfeb3db1b50273e95f539953c337be759500d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:12 +0300 Subject: [PATCH 76/85] perf time-utils: Prevent percentage time range overlap Prevent percentage time range overlap. This is only a 1 nanosecond change but makes the results more logical e.g. a sample cannot be in both the first 10% and the second 20%. Note, there is a later patch that adds a test for time-utils. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-15-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 69441faab3d0..3e87c21c293c 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -148,6 +148,9 @@ static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt, ptime->start = start + round(start_pcnt * total); ptime->end = start + round(end_pcnt * total); + if (ptime->end > ptime->start && ptime->end != end) + ptime->end -= 1; + return 0; } From 0ccc69ba0af46e3371c8cefb506aaf9f0e4f554c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:13 +0300 Subject: [PATCH 77/85] perf time-utils: Fix --time documentation Correct some punctuation and spelling and correct the format to show that the time resolution is nanoseconds not microseconds. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-16-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-diff.txt | 6 +++--- tools/perf/Documentation/perf-report.txt | 6 +++--- tools/perf/Documentation/perf-script.txt | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index da7809b15cc9..5732f69580ab 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -142,9 +142,9 @@ OPTIONS perf diff --time 0%-10%,30%-40% It also supports analyzing samples within a given time window - ,. Times have the format seconds.microseconds. If 'start' - is not given (i.e., time string is ',x.y') then analysis starts at - the beginning of the file. If stop time is not given (i.e, time + ,. Times have the format seconds.nanoseconds. If 'start' + is not given (i.e. time string is ',x.y') then analysis starts at + the beginning of the file. If stop time is not given (i.e. time string is 'x.y,') then analysis goes to the end of the file. Time string is 'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps for different perf.data files. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f441baa794ce..3de029f6881d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -412,12 +412,12 @@ OPTIONS --time:: Only analyze samples within given time window: ,. Times - have the format seconds.microseconds. If start is not given (i.e., time + have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If - stop time is not given (i.e, time string is 'x.y,') then analysis goes + stop time is not given (i.e. time string is 'x.y,') then analysis goes to end of file. - Also support time percent with multiple time range. Time string is + Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c59fd52e9e91..878349cce968 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -361,12 +361,12 @@ include::itrace.txt[] --time:: Only analyze samples within given time window: ,. Times - have the format seconds.microseconds. If start is not given (i.e., time + have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If - stop time is not given (i.e, time string is 'x.y,') then analysis goes + stop time is not given (i.e. time string is 'x.y,') then analysis goes to end of file. - Also support time percent with multipe time range. Time string is + Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: From 2a8afddc084a5f5f933382758dd2767ed8a69f77 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:14 +0300 Subject: [PATCH 78/85] perf time-utils: Simplify perf_time__parse_for_ranges() error paths slightly Simplify perf_time__parse_for_ranges() error paths slightly. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-17-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 3e87c21c293c..9a463752dba8 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -403,7 +403,7 @@ int perf_time__parse_for_ranges(const char *time_str, int *range_size, int *range_num) { struct perf_time_interval *ptime_range; - int size, num, ret; + int size, num, ret = -EINVAL; ptime_range = perf_time__range_alloc(time_str, &size); if (!ptime_range) @@ -415,7 +415,6 @@ int perf_time__parse_for_ranges(const char *time_str, pr_err("HINT: no first/last sample time found in perf data.\n" "Please use latest perf binary to execute 'perf record'\n" "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); - ret = -EINVAL; goto error; } @@ -425,11 +424,8 @@ int perf_time__parse_for_ranges(const char *time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); - if (num < 0) { - pr_err("Invalid time string\n"); - ret = -EINVAL; - goto error; - } + if (num < 0) + goto error_invalid; } else { num = 1; } @@ -439,6 +435,8 @@ int perf_time__parse_for_ranges(const char *time_str, *ranges = ptime_range; return 0; +error_invalid: + pr_err("Invalid time string\n"); error: free(ptime_range); return ret; From 929afa0092d0ea6be2fbd0ac087319092595eba6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:15 +0300 Subject: [PATCH 79/85] perf time-utils: Make perf_time__parse_for_ranges() more logical Explicit time ranges never contain a percent sign whereas percentage ranges always do, so it is possible to call the correct parser. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-18-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 9a463752dba8..d942840356e3 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -402,6 +402,7 @@ int perf_time__parse_for_ranges(const char *time_str, struct perf_time_interval **ranges, int *range_size, int *range_num) { + bool has_percent = strchr(time_str, '%'); struct perf_time_interval *ptime_range; int size, num, ret = -EINVAL; @@ -409,7 +410,7 @@ int perf_time__parse_for_ranges(const char *time_str, if (!ptime_range) return -ENOMEM; - if (perf_time__parse_str(ptime_range, time_str) != 0) { + if (has_percent) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { pr_err("HINT: no first/last sample time found in perf data.\n" @@ -427,6 +428,8 @@ int perf_time__parse_for_ranges(const char *time_str, if (num < 0) goto error_invalid; } else { + if (perf_time__parse_str(ptime_range, time_str)) + goto error_invalid; num = 1; } From e39a12cbd2496edb4cab0f99efb0d217c55ba273 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:16 +0300 Subject: [PATCH 80/85] perf tests: Add a test for time-utils Test time ranges work as expected. Committer testing: $ perf test "time utils" 59: time utils : Ok $ perf test -v "time utils" 59: time utils : --- start --- test child forked, pid 31711 parse_nsec_time("0") 0 parse_nsec_time("1") 1000000000 parse_nsec_time("0.000000001") 1 parse_nsec_time("1.000000001") 1000000001 parse_nsec_time("123456.123456") 123456123456000 parse_nsec_time("1234567.123456789") 1234567123456789 parse_nsec_time("18446744073.709551615") 18446744073709551615 perf_time__parse_str("1234567.123456789,1234567.123456789") start time 1234567123456789, end time 1234567123456789 perf_time__parse_str("1234567.123456789,1234567.123456790") start time 1234567123456789, end time 1234567123456790 perf_time__parse_str("1234567.123456789,") start time 1234567123456789, end time 0 perf_time__parse_str(",1234567.123456789") start time 0, end time 1234567123456789 perf_time__parse_str("0,1234567.123456789") start time 0, end time 1234567123456789 perf_time__parse_for_ranges("1234567.123456789,1234567.123456790") start time 1234567123456789, end time 1234567123456790 perf_time__parse_for_ranges("10%/1") first_sample_time 7654321000000000 last_sample_time 7654321000000100 start time 0: 7654321000000000, end time 0: 7654321000000009 perf_time__parse_for_ranges("10%/2") first_sample_time 7654321000000000 last_sample_time 7654321000000100 start time 0: 7654321000000010, end time 0: 7654321000000019 perf_time__parse_for_ranges("10%/1,10%/2") first_sample_time 11223344000000000 last_sample_time 11223344000000100 start time 0: 11223344000000000, end time 0: 11223344000000009 start time 1: 11223344000000010, end time 1: 11223344000000019 perf_time__parse_for_ranges("10%/1,10%/3,10%/10") first_sample_time 11223344000000000 last_sample_time 11223344000000100 start time 0: 11223344000000000, end time 0: 11223344000000009 start time 1: 11223344000000020, end time 1: 11223344000000029 start time 2: 11223344000000090, end time 2: 11223344000000100 test child finished with 0 ---- end ---- time utils: Ok $ Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-19-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 + tools/perf/tests/tests.h | 1 + tools/perf/tests/time-utils-test.c | 234 +++++++++++++++++++++++++++++ 4 files changed, 240 insertions(+) create mode 100644 tools/perf/tests/time-utils-test.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 4afb6319ed51..e3ba63cef01e 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -51,6 +51,7 @@ perf-y += clang.o perf-y += unit_number__scnprintf.o perf-y += mem2node.o perf-y += map_groups.o +perf-y += time-utils-test.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 941c5456d625..cd72ff0f7658 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -289,6 +289,10 @@ static struct test generic_tests[] = { .desc = "mem2node", .func = test__mem2node, }, + { + .desc = "time utils", + .func = test__time_utils, + }, { .desc = "map_groups__merge_in", .func = test__map_groups__merge_in, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index e5e3a57cd373..72912eb473cb 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -108,6 +108,7 @@ int test__clang_subtest_get_nr(void); int test__unit_number__scnprint(struct test *test, int subtest); int test__mem2node(struct test *t, int subtest); int test__map_groups__merge_in(struct test *t, int subtest); +int test__time_utils(struct test *t, int subtest); bool test__bp_signal_is_supported(void); bool test__wp_is_supported(void); diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c new file mode 100644 index 000000000000..7504046b111c --- /dev/null +++ b/tools/perf/tests/time-utils-test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "time-utils.h" +#include "evlist.h" +#include "session.h" +#include "debug.h" +#include "tests.h" + +static bool test__parse_nsec_time(const char *str, u64 expected) +{ + u64 ptime; + int err; + + pr_debug("\nparse_nsec_time(\"%s\")\n", str); + + err = parse_nsec_time(str, &ptime); + if (err) { + pr_debug("error %d\n", err); + return false; + } + + if (ptime != expected) { + pr_debug("Failed. ptime %" PRIu64 " expected %" PRIu64 "\n", + ptime, expected); + return false; + } + + pr_debug("%" PRIu64 "\n", ptime); + + return true; +} + +static bool test__perf_time__parse_str(const char *ostr, u64 start, u64 end) +{ + struct perf_time_interval ptime; + int err; + + pr_debug("\nperf_time__parse_str(\"%s\")\n", ostr); + + err = perf_time__parse_str(&ptime, ostr); + if (err) { + pr_debug("Error %d\n", err); + return false; + } + + if (ptime.start != start || ptime.end != end) { + pr_debug("Failed. Expected %" PRIu64 " to %" PRIu64 "\n", + start, end); + return false; + } + + return true; +} + +#define TEST_MAX 64 + +struct test_data { + const char *str; + u64 first; + u64 last; + struct perf_time_interval ptime[TEST_MAX]; + int num; + u64 skip[TEST_MAX]; + u64 noskip[TEST_MAX]; +}; + +static bool test__perf_time__parse_for_ranges(struct test_data *d) +{ + struct perf_evlist evlist = { + .first_sample_time = d->first, + .last_sample_time = d->last, + }; + struct perf_session session = { .evlist = &evlist }; + struct perf_time_interval *ptime = NULL; + int range_size, range_num; + bool pass = false; + int i, err; + + pr_debug("\nperf_time__parse_for_ranges(\"%s\")\n", d->str); + + if (strchr(d->str, '%')) + pr_debug("first_sample_time %" PRIu64 " last_sample_time %" PRIu64 "\n", + d->first, d->last); + + err = perf_time__parse_for_ranges(d->str, &session, &ptime, &range_size, + &range_num); + if (err) { + pr_debug("error %d\n", err); + goto out; + } + + if (range_size < d->num || range_num != d->num) { + pr_debug("bad size: range_size %d range_num %d expected num %d\n", + range_size, range_num, d->num); + goto out; + } + + for (i = 0; i < d->num; i++) { + if (ptime[i].start != d->ptime[i].start || + ptime[i].end != d->ptime[i].end) { + pr_debug("bad range %d expected %" PRIu64 " to %" PRIu64 "\n", + i, d->ptime[i].start, d->ptime[i].end); + goto out; + } + } + + if (perf_time__ranges_skip_sample(ptime, d->num, 0)) { + pr_debug("failed to keep 0\n"); + goto out; + } + + for (i = 0; i < TEST_MAX; i++) { + if (d->skip[i] && + !perf_time__ranges_skip_sample(ptime, d->num, d->skip[i])) { + pr_debug("failed to skip %" PRIu64 "\n", d->skip[i]); + goto out; + } + if (d->noskip[i] && + perf_time__ranges_skip_sample(ptime, d->num, d->noskip[i])) { + pr_debug("failed to keep %" PRIu64 "\n", d->noskip[i]); + goto out; + } + } + + pass = true; +out: + free(ptime); + return pass; +} + +int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused) +{ + bool pass = true; + + pass &= test__parse_nsec_time("0", 0); + pass &= test__parse_nsec_time("1", 1000000000ULL); + pass &= test__parse_nsec_time("0.000000001", 1); + pass &= test__parse_nsec_time("1.000000001", 1000000001ULL); + pass &= test__parse_nsec_time("123456.123456", 123456123456000ULL); + pass &= test__parse_nsec_time("1234567.123456789", 1234567123456789ULL); + pass &= test__parse_nsec_time("18446744073.709551615", + 0xFFFFFFFFFFFFFFFFULL); + + pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456789", + 1234567123456789ULL, 1234567123456789ULL); + pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456790", + 1234567123456789ULL, 1234567123456790ULL); + pass &= test__perf_time__parse_str("1234567.123456789,", + 1234567123456789ULL, 0); + pass &= test__perf_time__parse_str(",1234567.123456789", + 0, 1234567123456789ULL); + pass &= test__perf_time__parse_str("0,1234567.123456789", + 0, 1234567123456789ULL); + + { + u64 b = 1234567123456789ULL; + struct test_data d = { + .str = "1234567.123456789,1234567.123456790", + .ptime = { {b, b + 1}, }, + .num = 1, + .skip = { b - 1, b + 2, }, + .noskip = { b, b + 1, }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 7654321ULL * NSEC_PER_SEC; + struct test_data d = { + .str = "10%/1", + .first = b, + .last = b + 100, + .ptime = { {b, b + 9}, }, + .num = 1, + .skip = { b - 1, b + 10, }, + .noskip = { b, b + 9, }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 7654321ULL * NSEC_PER_SEC; + struct test_data d = { + .str = "10%/2", + .first = b, + .last = b + 100, + .ptime = { {b + 10, b + 19}, }, + .num = 1, + .skip = { b + 9, b + 20, }, + .noskip = { b + 10, b + 19, }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 11223344ULL * NSEC_PER_SEC; + struct test_data d = { + .str = "10%/1,10%/2", + .first = b, + .last = b + 100, + .ptime = { {b, b + 9}, {b + 10, b + 19}, }, + .num = 2, + .skip = { b - 1, b + 20, }, + .noskip = { b, b + 8, b + 9, b + 10, b + 11, b + 12, b + 19, }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 11223344ULL * NSEC_PER_SEC; + struct test_data d = { + .str = "10%/1,10%/3,10%/10", + .first = b, + .last = b + 100, + .ptime = { {b, b + 9}, {b + 20, b + 29}, { b + 90, b + 100}, }, + .num = 3, + .skip = { b - 1, b + 10, b + 19, b + 30, b + 89, b + 101 }, + .noskip = { b, b + 9, b + 20, b + 29, b + 90, b + 100}, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + pr_debug("\n"); + + return pass ? 0 : TEST_FAIL; +} From a77a05e2337df1347f4de96bfa313db7008fe8bd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 4 Jun 2019 16:00:17 +0300 Subject: [PATCH 81/85] perf time-utils: Add support for multiple explicit time intervals Currently only a single explicit time range is accepted. Add support for multiple ranges separated by spaces, which requires the string to be quoted. Update the time utils test accordingly. Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190604130017.31207-20-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-diff.txt | 8 ++- tools/perf/Documentation/perf-report.txt | 3 +- tools/perf/Documentation/perf-script.txt | 3 +- tools/perf/tests/time-utils-test.c | 17 ++++++ tools/perf/util/time-utils.c | 74 ++++++++++++++++++++++-- 5 files changed, 94 insertions(+), 11 deletions(-) diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index 5732f69580ab..facd91e4e945 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -145,9 +145,11 @@ OPTIONS ,. Times have the format seconds.nanoseconds. If 'start' is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If stop time is not given (i.e. time - string is 'x.y,') then analysis goes to the end of the file. Time string is - 'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps for different - perf.data files. + string is 'x.y,') then analysis goes to the end of the file. + Multiple ranges can be separated by spaces, which requires the argument + to be quoted e.g. --time "1234.567,1234.789 1235," + Time string is'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps + for different perf.data files. For example, we get the timestamp information from 'perf script'. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 3de029f6881d..8c4372819e11 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -415,7 +415,8 @@ OPTIONS have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If stop time is not given (i.e. time string is 'x.y,') then analysis goes - to end of file. + to end of file. Multiple ranges can be separated by spaces, which + requires the argument to be quoted e.g. --time "1234.567,1234.789 1235," Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 878349cce968..d4e2e18a5881 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -364,7 +364,8 @@ include::itrace.txt[] have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If stop time is not given (i.e. time string is 'x.y,') then analysis goes - to end of file. + to end of file. Multiple ranges can be separated by spaces, which + requires the argument to be quoted e.g. --time "1234.567,1234.789 1235," Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c index 7504046b111c..4f53006233a1 100644 --- a/tools/perf/tests/time-utils-test.c +++ b/tools/perf/tests/time-utils-test.c @@ -168,6 +168,23 @@ int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused) pass &= test__perf_time__parse_for_ranges(&d); } + { + u64 b = 1234567123456789ULL; + u64 c = 7654321987654321ULL; + u64 e = 8000000000000000ULL; + struct test_data d = { + .str = "1234567.123456789,1234567.123456790 " + "7654321.987654321,7654321.987654444 " + "8000000,8000000.000000005", + .ptime = { {b, b + 1}, {c, c + 123}, {e, e + 5}, }, + .num = 3, + .skip = { b - 1, b + 2, c - 1, c + 124, e - 1, e + 6 }, + .noskip = { b, b + 1, c, c + 123, e, e + 5 }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + { u64 b = 7654321ULL * NSEC_PER_SEC; struct test_data d = { diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index d942840356e3..2b48816a2d2e 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "perf.h" #include "debug.h" @@ -116,6 +117,69 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) return rc; } +static int perf_time__parse_strs(struct perf_time_interval *ptime, + const char *ostr, int size) +{ + const char *cp; + char *str, *arg, *p; + int i, num = 0, rc = 0; + + /* Count the commas */ + for (cp = ostr; *cp; cp++) + num += !!(*cp == ','); + + if (!num) + return -EINVAL; + + BUG_ON(num > size); + + str = strdup(ostr); + if (!str) + return -ENOMEM; + + /* Split the string and parse each piece, except the last */ + for (i = 0, p = str; i < num - 1; i++) { + arg = p; + /* Find next comma, there must be one */ + p = strchr(p, ',') + 1; + /* Skip white space */ + while (isspace(*p)) + p++; + /* Skip the value, must not contain space or comma */ + while (*p && !isspace(*p)) { + if (*p++ == ',') { + rc = -EINVAL; + goto out; + } + } + /* Split and parse */ + if (*p) + *p++ = 0; + rc = perf_time__parse_str(ptime + i, arg); + if (rc < 0) + goto out; + } + + /* Parse the last piece */ + rc = perf_time__parse_str(ptime + i, p); + if (rc < 0) + goto out; + + /* Check there is no overlap */ + for (i = 0; i < num - 1; i++) { + if (ptime[i].end >= ptime[i + 1].start) { + rc = -EINVAL; + goto out; + } + } + + rc = num; +out: + free(str); + + return rc; +} + static int parse_percent(double *pcnt, char *str) { char *c, *endptr; @@ -424,15 +488,13 @@ int perf_time__parse_for_ranges(const char *time_str, time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); - - if (num < 0) - goto error_invalid; } else { - if (perf_time__parse_str(ptime_range, time_str)) - goto error_invalid; - num = 1; + num = perf_time__parse_strs(ptime_range, time_str, size); } + if (num < 0) + goto error_invalid; + *range_size = size; *range_num = num; *ranges = ptime_range; From 53fe307dfd309e425b171f6272d64296a54f4dff Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 4 Jun 2019 07:35:04 +0200 Subject: [PATCH 82/85] perf test 6: Fix missing kvm module load for s390 Command # perf test -Fv 6 fails with error running test 100 'kvm-s390:kvm_s390_create_vm' failed to parse event 'kvm-s390:kvm_s390_create_vm', err -1, str 'unknown tracepoint' event syntax error: 'kvm-s390:kvm_s390_create_vm' \___ unknown tracepoint when the kvm module is not loaded or not built in. Fix this by adding a valid function which tests if the module is loaded. Loaded modules (or builtin KVM support) have a directory named /sys/kernel/debug/tracing/events/kvm-s390 for this tracepoint. Check for existence of this directory. Signed-off-by: Thomas Richter Reviewed-by: Christian Borntraeger Cc: Heiko Carstens Cc: Hendrik Brueckner Link: http://lkml.kernel.org/r/20190604053504.43073-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 4a69c07f4101..8f3c80e13584 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -18,6 +18,32 @@ #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) +#if defined(__s390x__) +/* Return true if kvm module is available and loaded. Test this + * and retun success when trace point kvm_s390_create_vm + * exists. Otherwise this test always fails. + */ +static bool kvm_s390_create_vm_valid(void) +{ + char *eventfile; + bool rc = false; + + eventfile = get_events_file("kvm-s390"); + + if (eventfile) { + DIR *mydir = opendir(eventfile); + + if (mydir) { + rc = true; + closedir(mydir); + } + put_events_file(eventfile); + } + + return rc; +} +#endif + static int test__checkevent_tracepoint(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); @@ -1642,6 +1668,7 @@ static struct evlist_test test__events[] = { { .name = "kvm-s390:kvm_s390_create_vm", .check = test__checkevent_tracepoint, + .valid = kvm_s390_create_vm_valid, .id = 100, }, #endif From 8a07aa4e9b7b0222129c07afff81634a884b2866 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 23 May 2019 10:25:21 +0200 Subject: [PATCH 83/85] perf report: Fix OOM error in TUI mode on s390 Debugging a OOM error using the TUI interface revealed this issue on s390: [tmricht@m83lp54 perf]$ cat /proc/kallsyms |sort .... 00000001119b7158 B radix_tree_node_cachep 00000001119b8000 B __bss_stop 00000001119b8000 B _end 000003ff80002850 t autofs_mount [autofs4] 000003ff80002868 t autofs_show_options [autofs4] 000003ff80002a98 t autofs_evict_inode [autofs4] .... There is a huge gap between the last kernel symbol __bss_stop/_end and the first kernel module symbol autofs_mount (from autofs4 module). After reading the kernel symbol table via functions: dso__load() +--> dso__load_kernel_sym() +--> dso__load_kallsyms() +--> __dso_load_kallsyms() +--> symbols__fixup_end() the symbol __bss_stop has a start address of 1119b8000 and an end address of 3ff80002850, as can be seen by this debug statement: symbols__fixup_end __bss_stop start:0x1119b8000 end:0x3ff80002850 The size of symbol __bss_stop is 0x3fe6e64a850 bytes! It is the last kernel symbol and fills up the space until the first kernel module symbol. This size kills the TUI interface when executing the following code: process_sample_event() hist_entry_iter__add() hist_iter__report_callback() hist_entry__inc_addr_samples() symbol__inc_addr_samples(symbol = __bss_stop) symbol__cycles_hist() annotated_source__alloc_histograms(..., symbol__size(sym), ...) This function allocates memory to save sample histograms. The symbol_size() marco is defined as sym->end - sym->start, which results in above value of 0x3fe6e64a850 bytes and the call to calloc() in annotated_source__alloc_histograms() fails. The histgram memory allocation might fail, make this failure no-fatal and continue processing. Output before: [tmricht@m83lp54 perf]$ ./perf --debug stderr=1 report -vvvvv \ -i ~/slow.data 2>/tmp/2 [tmricht@m83lp54 perf]$ tail -5 /tmp/2 __symbol__inc_addr_samples(875): ENOMEM! sym->name=__bss_stop, start=0x1119b8000, addr=0x2aa0005eb08, end=0x3ff80002850, func: 0 problem adding hist entry, skipping event 0x938b8 [0x8]: failed to process type: 68 [Cannot allocate memory] [tmricht@m83lp54 perf]$ Output after: [tmricht@m83lp54 perf]$ ./perf --debug stderr=1 report -vvvvv \ -i ~/slow.data 2>/tmp/2 [tmricht@m83lp54 perf]$ tail -5 /tmp/2 symbol__inc_addr_samples map:0x1597830 start:0x110730000 end:0x3ff80002850 symbol__hists notes->src:0x2aa2a70 nr_hists:1 symbol__inc_addr_samples sym:unlink_anon_vmas src:0x2aa2a70 __symbol__inc_addr_samples: addr=0x11094c69e 0x11094c670 unlink_anon_vmas: period++ [addr: 0x11094c69e, 0x2e, evidx=0] => nr_samples: 1, period: 526008 [tmricht@m83lp54 perf]$ There is no error about failed memory allocation and the TUI interface shows all entries. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Hendrik Brueckner Link: http://lkml.kernel.org/r/90cb5607-3e12-5167-682d-978eba7dafa8@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0b8573fd9b05..15be9d271f55 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -932,9 +932,8 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, if (sym == NULL) return 0; src = symbol__hists(sym, evsel->evlist->nr_entries); - if (src == NULL) - return -ENOMEM; - return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample); + return (src) ? __symbol__inc_addr_samples(sym, map, src, evsel->idx, + addr, sample) : 0; } static int symbol__account_cycles(u64 addr, u64 start, From 180ca71cf1be6030ba7eb4515bf2224c07b62631 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 22 May 2019 08:43:25 +0200 Subject: [PATCH 84/85] perf report: Support s390 diag event display on x86 Perf report fails to display s390 specific event numbered bd000 on an x86 platform. For example on s390 this works without error: [root@m35lp76 perf]# uname -m s390x [root@m35lp76 perf]# ./perf record -e rbd000 -- find / >/dev/null [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.549 MB perf.data ] [root@m35lp76 perf]# ./perf report -D --stdio > /dev/null [root@m35lp76 perf]# Transfering this perf.data file to an x86 platform and executing the same report command produces: [root@f29 perf]# uname -m x86_64 [root@f29 perf]# ./perf report -i ~/perf.data.m35lp76 --stdio interpreting bpf_prog_info from systems with endianity is not yet supported interpreting btf from systems with endianity is not yet supported 0x8c890 [0x8]: failed to process type: 68 Error: failed to process sample Event bd000 generates auxiliary data which is stored in big endian format in the perf data file. This error is caused by missing endianess handling on the x86 platform when the data is displayed. Fix this by handling s390 auxiliary event data depending on the local platform endianness. Output after on x86: [root@f29 perf]# ./perf report -D -i ~/perf.data.m35lp76 --stdio > /dev/null interpreting bpf_prog_info from systems with endianity is not yet supported interpreting btf from systems with endianity is not yet supported [root@f29 perf]# Committer notes: Fix build breakage on older systems, such as CentOS:6 where using nesting calls to the endian.h macros end up redefining local variables: util/s390-cpumsf.c: In function 's390_cpumsf_trailer_show': util/s390-cpumsf.c:333: error: declaration of '__v' shadows a previous local util/s390-cpumsf.c:333: error: shadowed declaration is here util/s390-cpumsf.c:333: error: declaration of '__x' shadows a previous local util/s390-cpumsf.c:333: error: shadowed declaration is here util/s390-cpumsf.c:334: error: declaration of '__v' shadows a previous local util/s390-cpumsf.c:334: error: shadowed declaration is here util/s390-cpumsf.c:334: error: declaration of '__x' shadows a previous local util/s390-cpumsf.c:334: error: shadowed declaration is here [perfbuilder@455a63ef60dc perf]$ gcc -v |& tail -1 gcc version 4.4.7 20120313 (Red Hat 4.4.7-23) (GCC) [perfbuilder@455a63ef60dc perf]$ Since there are several uses of be64toh(te->flags) Introduce a variable to hold that and then use it, avoiding this case that causes the above problems: - local.bsdes = be16toh((be64toh(te->flags) >> 16 & 0xffff)); + local.bsdes = be16toh((flags >> 16 & 0xffff)); Its the same construct used in s390_cpumsf_diag_show() where we have a 'word' variable that is used just once, s390_cpumsf_basic_show() has lots of uses and also uses a variable to hold the result of be16toh(). Some of those temp variables needed to be converted from 'unsigned long' to 'unsigned long long' so as to build on 32-bit arches such as debian:experimental-x-mipsel, the android NDK ones and fedora:24-x-ARC-uClibc. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20190522064325.25596-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-cpumsf.c | 96 ++++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index c215704931dc..10d36d9b7909 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -17,8 +17,8 @@ * see Documentation/perf.data-file-format.txt. * PERF_RECORD_AUXTRACE_INFO: * Defines a table of contains for PERF_RECORD_AUXTRACE records. This - * record is generated during 'perf record' command. Each record contains up - * to 256 entries describing offset and size of the AUXTRACE data in the + * record is generated during 'perf record' command. Each record contains + * up to 256 entries describing offset and size of the AUXTRACE data in the * perf.data file. * PERF_RECORD_AUXTRACE_ERROR: * Indicates an error during AUXTRACE collection such as buffer overflow. @@ -237,10 +237,33 @@ static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf, return rc; } -/* Display s390 CPU measurement facility basic-sampling data entry */ +/* Display s390 CPU measurement facility basic-sampling data entry + * Data written on s390 in big endian byte order and contains bit + * fields across byte boundaries. + */ static bool s390_cpumsf_basic_show(const char *color, size_t pos, - struct hws_basic_entry *basic) + struct hws_basic_entry *basicp) { + struct hws_basic_entry *basic = basicp; +#if __BYTE_ORDER == __LITTLE_ENDIAN + struct hws_basic_entry local; + unsigned long long word = be64toh(*(unsigned long long *)basicp); + + memset(&local, 0, sizeof(local)); + local.def = be16toh(basicp->def); + local.prim_asn = word & 0xffff; + local.CL = word >> 30 & 0x3; + local.I = word >> 32 & 0x1; + local.AS = word >> 33 & 0x3; + local.P = word >> 35 & 0x1; + local.W = word >> 36 & 0x1; + local.T = word >> 37 & 0x1; + local.U = word >> 40 & 0xf; + local.ia = be64toh(basicp->ia); + local.gpp = be64toh(basicp->gpp); + local.hpp = be64toh(basicp->hpp); + basic = &local; +#endif if (basic->def != 1) { pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); return false; @@ -258,10 +281,22 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos, return true; } -/* Display s390 CPU measurement facility diagnostic-sampling data entry */ +/* Display s390 CPU measurement facility diagnostic-sampling data entry. + * Data written on s390 in big endian byte order and contains bit + * fields across byte boundaries. + */ static bool s390_cpumsf_diag_show(const char *color, size_t pos, - struct hws_diag_entry *diag) + struct hws_diag_entry *diagp) { + struct hws_diag_entry *diag = diagp; +#if __BYTE_ORDER == __LITTLE_ENDIAN + struct hws_diag_entry local; + unsigned long long word = be64toh(*(unsigned long long *)diagp); + + local.def = be16toh(diagp->def); + local.I = word >> 32 & 0x1; + diag = &local; +#endif if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); return false; @@ -272,35 +307,52 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos, } /* Return TOD timestamp contained in an trailer entry */ -static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) +static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, + int idx) { /* te->t set: TOD in STCKE format, bytes 8-15 * to->t not set: TOD in STCK format, bytes 0-7 */ unsigned long long ts; - memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); - return ts; + memcpy(&ts, &te->timestamp[idx], sizeof(ts)); + return be64toh(ts); } /* Display s390 CPU measurement facility trailer entry */ static bool s390_cpumsf_trailer_show(const char *color, size_t pos, struct hws_trailer_entry *te) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + struct hws_trailer_entry local; + const unsigned long long flags = be64toh(te->flags); + + memset(&local, 0, sizeof(local)); + local.f = flags >> 63 & 0x1; + local.a = flags >> 62 & 0x1; + local.t = flags >> 61 & 0x1; + local.bsdes = be16toh((flags >> 16 & 0xffff)); + local.dsdes = be16toh((flags & 0xffff)); + memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp)); + local.overflow = be64toh(te->overflow); + local.clock_base = be64toh(te->progusage[0]) >> 63 & 1; + local.progusage2 = be64toh(te->progusage2); + te = &local; +#endif if (te->bsdes != sizeof(struct hws_basic_entry)) { pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); return false; } color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" " dsdes:%d Overflow:%lld Time:%#llx\n" - "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", + "\t\tC:%d TOD:%#lx\n", pos, te->f ? 'F' : ' ', te->a ? 'A' : ' ', te->t ? 'T' : ' ', te->bsdes, te->dsdes, te->overflow, - trailer_timestamp(te), te->clock_base, te->progusage2, - te->progusage[0], te->progusage[1]); + trailer_timestamp(te, te->clock_base), + te->clock_base, te->progusage2); return true; } @@ -327,13 +379,13 @@ static bool s390_cpumsf_validate(int machine_type, *dsdes = *bsdes = 0; if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ return false; - if (basic->def != 1) /* No basic set entry, must be first */ + if (be16toh(basic->def) != 1) /* No basic set entry, must be first */ return false; /* Check for trailer entry at end of SDB */ te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ - sizeof(*te)); - *bsdes = te->bsdes; - *dsdes = te->dsdes; + *bsdes = be16toh(te->bsdes); + *dsdes = be16toh(te->dsdes); if (!te->bsdes && !te->dsdes) { /* Very old hardware, use CPUID */ switch (machine_type) { @@ -495,19 +547,27 @@ static bool s390_cpumsf_make_event(size_t pos, static unsigned long long get_trailer_time(const unsigned char *buf) { struct hws_trailer_entry *te; - unsigned long long aux_time; + unsigned long long aux_time, progusage2; + bool clock_base; te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ - sizeof(*te)); - if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; + progusage2 = be64toh(te->progusage[1]); +#else + clock_base = te->clock_base; + progusage2 = te->progusage2; +#endif + if (!clock_base) /* TOD_CLOCK_BASE value missing */ return 0; /* Correct calculation to convert time stamp in trailer entry to * nano seconds (taken from arch/s390 function tod_to_ns()). * TOD_CLOCK_BASE is stored in trailer entry member progusage2. */ - aux_time = trailer_timestamp(te) - te->progusage2; + aux_time = trailer_timestamp(te, clock_base) - progusage2; aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); return aux_time; } From 04c41bcb862bbec1fb225243ecf07a3219593f81 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 10 Jun 2019 15:37:45 -0300 Subject: [PATCH 85/85] perf trace: Skip unknown syscalls when expanding strace like syscall groups We have $INSTALL_DIR/share/perf-core/strace/groups/string files with syscalls that should be selected when 'string' is used, meaning, in this case, syscalls that receive as one of its arguments a string, like a pathname. But those were first selected and tested on x86_64, and end up failing in architectures where some of those syscalls are not available, like the 'access' syscall on arm64, which makes using 'perf trace -e string' in such archs to fail. Since this the routine doing the validation is used only when reading such files, do not fail when some syscall is not found in the syscalltbl, instead just use pr_debug() to register that in case people are suspicious of problems. Now using 'perf trace -e string' should work on arm64, selecting only the syscalls that have a string and are available on that architecture. Reported-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Song Liu Cc: Suzuki K Poulose Cc: Yonghong Song Link: https://lkml.kernel.org/r/20190610184754.GU21245@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1a2a605cf068..eb70a4b71755 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1529,6 +1529,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) static int trace__validate_ev_qualifier(struct trace *trace) { int err = 0, i; + bool printed_invalid_prefix = false; size_t nr_allocated; struct str_node *pos; @@ -1555,14 +1556,15 @@ static int trace__validate_ev_qualifier(struct trace *trace) if (id >= 0) goto matches; - if (err == 0) { - fputs("Error:\tInvalid syscall ", trace->output); - err = -EINVAL; + if (!printed_invalid_prefix) { + pr_debug("Skipping unknown syscalls: "); + printed_invalid_prefix = true; } else { - fputs(", ", trace->output); + pr_debug(", "); } - fputs(sc, trace->output); + pr_debug("%s", sc); + continue; } matches: trace->ev_qualifier_ids.entries[i++] = id; @@ -1591,15 +1593,14 @@ matches: } } - if (err < 0) { - fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" - "\nHint:\tand: 'man syscalls'\n", trace->output); -out_free: - zfree(&trace->ev_qualifier_ids.entries); - trace->ev_qualifier_ids.nr = 0; - } out: + if (printed_invalid_prefix) + pr_debug("\n"); return err; +out_free: + zfree(&trace->ev_qualifier_ids.entries); + trace->ev_qualifier_ids.nr = 0; + goto out; } /*