perf tools changes for v5.19: 2nd batch

- Add BPF based off-CPU profiling.
 
 - Improvements for system wide recording, specially for Intel PT.
 
 - Improve DWARF unwinding on arm64.
 
 - Support Arm CoreSight trace data disassembly in 'perf script' python.
 
 - Fix build with new libbpf version, related to supporting older versions
   of distro released libbpf packages.
 
 - Fix event syntax error caused by ExtSel in the JSON events infra.
 
 - Use stdio interface if slang is not supported in 'perf c2c'.
 
 - Add 'perf test' checking for perf stat CSV output.
 
 - Sync the msr-index.h copy with the kernel sources.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYpLIVgAKCRCyPKLppCJ+
 Jz/XAP4+JpPf8lI4Rw5FgFV84uRS48EwADPjZFfauNUwUmhMwQEAn/ZzYMg20DyU
 QTYiHN5AFprT5WYGCDAMr6N94/8eEQA=
 =KT5G
 -----END PGP SIGNATURE-----

Merge tag 'perf-tools-for-v5.19-2022-05-28' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tools updates from Arnaldo Carvalho de Melo:

 - Add BPF based off-CPU profiling

 - Improvements for system wide recording, specially for Intel PT

 - Improve DWARF unwinding on arm64

 - Support Arm CoreSight trace data disassembly in 'perf script' python

 - Fix build with new libbpf version, related to supporting older
   versions of distro released libbpf packages

 - Fix event syntax error caused by ExtSel in the JSON events infra

 - Use stdio interface if slang is not supported in 'perf c2c'

 - Add 'perf test' checking for perf stat CSV output

 - Sync the msr-index.h copy with the kernel sources

* tag 'perf-tools-for-v5.19-2022-05-28' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (38 commits)
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  perf scripts python: Support Arm CoreSight trace data disassembly
  perf scripting python: Expose dso and map information
  perf jevents: Fix event syntax error caused by ExtSel
  perf tools arm64: Add support for VG register
  perf unwind arm64: Decouple Libunwind register names from Perf
  perf unwind: Use dynamic register set for DWARF unwind
  perf tools arm64: Copy perf_regs.h from the kernel
  perf unwind arm64: Use perf's copy of kernel headers
  perf c2c: Use stdio interface if slang is not supported
  perf test: Add a basic offcpu profiling test
  perf record: Add cgroup support for off-cpu profiling
  perf record: Handle argument change in sched_switch
  perf record: Implement basic filtering for off-cpu
  perf record: Enable off-cpu analysis with BPF
  perf report: Do not extend sample type of bpf-output event
  perf test: Add checking for perf stat CSV output.
  perf tools: Allow system-wide events to keep their own threads
  perf tools: Allow system-wide events to keep their own CPUs
  libperf evsel: Add comments for booleans
  ...
This commit is contained in:
Linus Torvalds 2022-05-29 10:10:15 -07:00
commit 09f73a1ab8
44 changed files with 1594 additions and 197 deletions

View File

@ -36,6 +36,11 @@ enum perf_event_arm_regs {
PERF_REG_ARM64_LR,
PERF_REG_ARM64_SP,
PERF_REG_ARM64_PC,
PERF_REG_ARM64_MAX,
/* Extended/pseudo registers */
PERF_REG_ARM64_VG = 46, // SVE Vector Granule
PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1,
PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1
};
#endif /* _ASM_ARM64_PERF_REGS_H */

View File

@ -76,6 +76,8 @@
/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
#define MSR_IA32_CORE_CAPS 0x000000cf
#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2
#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
@ -154,6 +156,11 @@
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_POWER_CTL_BIT_EE 19
/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
#define MSR_INTEGRITY_CAPS 0x000002d9
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@ -312,6 +319,7 @@
/* Run Time Average Power Limiting (RAPL) Interface */
#define MSR_VR_CURRENT_CONFIG 0x00000601
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
@ -502,8 +510,10 @@
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
@ -524,6 +534,11 @@
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
@ -688,6 +703,10 @@
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
/* AMD Branch Sampling configuration */
#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
#define MSR_AMD_SAMP_BR_FROM 0xc0010300
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8

View File

@ -99,6 +99,10 @@ FEATURE_TESTS_EXTRA := \
clang \
libbpf \
libbpf-btf__load_from_kernel_by_id \
libbpf-bpf_prog_load \
libbpf-bpf_object__next_program \
libbpf-bpf_object__next_map \
libbpf-bpf_create_map \
libpfm4 \
libdebuginfod \
clang-bpf-co-re

View File

@ -58,6 +58,11 @@ FILES= \
test-bpf.bin \
test-libbpf.bin \
test-libbpf-btf__load_from_kernel_by_id.bin \
test-libbpf-bpf_prog_load.bin \
test-libbpf-bpf_map_create.bin \
test-libbpf-bpf_object__next_program.bin \
test-libbpf-bpf_object__next_map.bin \
test-libbpf-btf__raw_data.bin \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
@ -291,6 +296,21 @@ $(OUTPUT)test-libbpf.bin:
$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
$(BUILD) -lbpf
$(OUTPUT)test-libbpf-bpf_prog_load.bin:
$(BUILD) -lbpf
$(OUTPUT)test-libbpf-bpf_map_create.bin:
$(BUILD) -lbpf
$(OUTPUT)test-libbpf-bpf_object__next_program.bin:
$(BUILD) -lbpf
$(OUTPUT)test-libbpf-bpf_object__next_map.bin:
$(BUILD) -lbpf
$(OUTPUT)test-libbpf-btf__raw_data.bin:
$(BUILD) -lbpf
$(OUTPUT)test-sdt.bin:
$(BUILD)

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/bpf.h>
int main(void)
{
return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */,
0 /* value_size */, 0 /* max_entries */, NULL /* opts */);
}

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/libbpf.h>
int main(void)
{
bpf_object__next_map(NULL /* obj */, NULL /* prev */);
return 0;
}

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/libbpf.h>
int main(void)
{
bpf_object__next_program(NULL /* obj */, NULL /* prev */);
return 0;
}

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/bpf.h>
int main(void)
{
return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */,
NULL /* license */, NULL /* insns */,
0 /* insn_cnt */, NULL /* opts */);
}

View File

@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/libbpf.h>
#include <bpf/btf.h>
int main(void)
{
return btf__load_from_kernel_by_id(20151128, NULL);
btf__load_from_kernel_by_id(20151128);
return 0;
}

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/btf.h>
int main(void)
{
btf__raw_data(NULL /* btf_ro */, NULL /* size */);
return 0;
}

View File

@ -23,6 +23,7 @@
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <api/fd/array.h>
#include "internal.h"
void perf_evlist__init(struct perf_evlist *evlist)
{
@ -39,10 +40,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
* We already have cpus for evsel (via PMU sysfs) so
* keep it, if there's no target cpu list defined.
*/
if (!evsel->own_cpus || evlist->has_user_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) {
if (!evsel->own_cpus ||
(!evsel->system_wide && evlist->has_user_cpus) ||
(!evsel->system_wide &&
!evsel->requires_cpu &&
perf_cpu_map__empty(evlist->user_requested_cpus))) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (evsel->cpus != evsel->own_cpus) {
@ -50,8 +52,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
}
if (!evsel->system_wide) {
perf_thread_map__put(evsel->threads);
evsel->threads = perf_thread_map__get(evlist->threads);
}
evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
}
@ -298,7 +303,7 @@ add:
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
int nr_threads = perf_thread_map__nr(evlist->threads);
int nfds = 0;
struct perf_evsel *evsel;
@ -428,9 +433,9 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_
static int
mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
int thread, int *_output, int *_output_overwrite)
int thread, int *_output, int *_output_overwrite, int *nr_mmaps)
{
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx);
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
@ -484,6 +489,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1;
*nr_mmaps += 1;
if (!idx)
perf_evlist__set_mmap_first(evlist, map, overwrite);
} else {
@ -512,35 +519,13 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
return 0;
}
static int
mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int thread;
int nr_threads = perf_thread_map__nr(evlist->threads);
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
int output_overwrite = -1;
if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
&output, &output_overwrite))
goto out_unmap;
}
return 0;
out_unmap:
perf_evlist__munmap(evlist);
return -1;
}
static int
mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int nr_threads = perf_thread_map__nr(evlist->threads);
int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
int nr_mmaps = 0;
int cpu, thread;
for (cpu = 0; cpu < nr_cpus; cpu++) {
@ -549,11 +534,14 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
for (thread = 0; thread < nr_threads; thread++) {
if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
thread, &output, &output_overwrite))
thread, &output, &output_overwrite, &nr_mmaps))
goto out_unmap;
}
}
if (nr_mmaps != evlist->nr_mmaps)
pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
return 0;
out_unmap:
@ -565,9 +553,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
{
int nr_mmaps;
nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus);
if (perf_cpu_map__empty(evlist->user_requested_cpus))
nr_mmaps = perf_thread_map__nr(evlist->threads);
/* One for each CPU */
nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
if (perf_cpu_map__empty(evlist->all_cpus)) {
/* Plus one for each thread */
nr_mmaps += perf_thread_map__nr(evlist->threads);
/* Minus the per-thread CPU (-1) */
nr_mmaps -= 1;
}
return nr_mmaps;
}
@ -577,7 +570,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_mmap_param *mp)
{
struct perf_evsel *evsel;
const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
@ -596,9 +588,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
if (perf_cpu_map__empty(cpus))
return mmap_per_thread(evlist, ops, mp);
return mmap_per_cpu(evlist, ops, mp);
}

View File

@ -49,7 +49,18 @@ struct perf_evsel {
/* parse modifier helper */
int nr_members;
/*
* system_wide is for events that need to be on every CPU, irrespective
* of user requested CPUs or threads. Map propagation will set cpus to
* this event's own_cpus, whereby they will contribute to evlist
* all_cpus.
*/
bool system_wide;
/*
* Some events, for example uncore events, require a CPU.
* i.e. it cannot be the 'any CPU' value of -1.
*/
bool requires_cpu;
int idx;
};

View File

@ -758,6 +758,16 @@ include::intel-hybrid.txt[]
If the URLs is not specified, the value of DEBUGINFOD_URLS
system environment variable is used.
--off-cpu::
Enable off-cpu profiling with BPF. The BPF program will collect
task scheduling information with (user) stacktrace and save them
as sample data of a software event named "offcpu-time". The
sample period will have the time the task slept in nanoseconds.
Note that BPF can collect stack traces using frame pointer ("fp")
only, as of now. So the applications built without the frame
pointer might see bogus addresses.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]

View File

@ -573,11 +573,36 @@ ifndef NO_LIBELF
ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
endif
$(call feature_check,libbpf-bpf_prog_load)
ifeq ($(feature-libbpf-bpf_prog_load), 1)
CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
endif
$(call feature_check,libbpf-bpf_object__next_program)
ifeq ($(feature-libbpf-bpf_object__next_program), 1)
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
endif
$(call feature_check,libbpf-bpf_object__next_map)
ifeq ($(feature-libbpf-bpf_object__next_map), 1)
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
endif
$(call feature_check,libbpf-btf__raw_data)
ifeq ($(feature-libbpf-btf__raw_data), 1)
CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA
endif
$(call feature_check,libbpf-bpf_map_create)
ifeq ($(feature-libbpf-bpf_map_create), 1)
CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE
endif
else
dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
endif
else
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA
CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE
endif
endif

View File

@ -1038,6 +1038,7 @@ SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
SKELETONS += $(SKEL_OUT)/off_cpu.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@

View File

@ -2,13 +2,19 @@
#include <errno.h>
#include <regex.h>
#include <string.h>
#include <sys/auxv.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
#include "../../../perf-sys.h"
#include "../../../util/debug.h"
#include "../../../util/event.h"
#include "../../../util/perf_regs.h"
#ifndef HWCAP_SVE
#define HWCAP_SVE (1 << 22)
#endif
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(x0, PERF_REG_ARM64_X0),
SMPL_REG(x1, PERF_REG_ARM64_X1),
@ -43,6 +49,7 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(lr, PERF_REG_ARM64_LR),
SMPL_REG(sp, PERF_REG_ARM64_SP),
SMPL_REG(pc, PERF_REG_ARM64_PC),
SMPL_REG(vg, PERF_REG_ARM64_VG),
SMPL_REG_END
};
@ -131,3 +138,34 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
uint64_t arch__user_reg_mask(void)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.sample_type = PERF_SAMPLE_REGS_USER,
.disabled = 1,
.exclude_kernel = 1,
.sample_period = 1,
.sample_regs_user = PERF_REGS_MASK
};
int fd;
if (getauxval(AT_HWCAP) & HWCAP_SVE)
attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG);
/*
* Check if the pmu supports perf extended regs, before
* returning the register mask to sample.
*/
if (attr.sample_regs_user != PERF_REGS_MASK) {
event_attr_init(&attr);
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
if (fd != -1) {
close(fd);
return attr.sample_regs_user;
}
}
return PERF_REGS_MASK;
}

View File

@ -10,77 +10,8 @@
int LIBUNWIND__ARCH_REG_ID(int regnum)
{
switch (regnum) {
case UNW_AARCH64_X0:
return PERF_REG_ARM64_X0;
case UNW_AARCH64_X1:
return PERF_REG_ARM64_X1;
case UNW_AARCH64_X2:
return PERF_REG_ARM64_X2;
case UNW_AARCH64_X3:
return PERF_REG_ARM64_X3;
case UNW_AARCH64_X4:
return PERF_REG_ARM64_X4;
case UNW_AARCH64_X5:
return PERF_REG_ARM64_X5;
case UNW_AARCH64_X6:
return PERF_REG_ARM64_X6;
case UNW_AARCH64_X7:
return PERF_REG_ARM64_X7;
case UNW_AARCH64_X8:
return PERF_REG_ARM64_X8;
case UNW_AARCH64_X9:
return PERF_REG_ARM64_X9;
case UNW_AARCH64_X10:
return PERF_REG_ARM64_X10;
case UNW_AARCH64_X11:
return PERF_REG_ARM64_X11;
case UNW_AARCH64_X12:
return PERF_REG_ARM64_X12;
case UNW_AARCH64_X13:
return PERF_REG_ARM64_X13;
case UNW_AARCH64_X14:
return PERF_REG_ARM64_X14;
case UNW_AARCH64_X15:
return PERF_REG_ARM64_X15;
case UNW_AARCH64_X16:
return PERF_REG_ARM64_X16;
case UNW_AARCH64_X17:
return PERF_REG_ARM64_X17;
case UNW_AARCH64_X18:
return PERF_REG_ARM64_X18;
case UNW_AARCH64_X19:
return PERF_REG_ARM64_X19;
case UNW_AARCH64_X20:
return PERF_REG_ARM64_X20;
case UNW_AARCH64_X21:
return PERF_REG_ARM64_X21;
case UNW_AARCH64_X22:
return PERF_REG_ARM64_X22;
case UNW_AARCH64_X23:
return PERF_REG_ARM64_X23;
case UNW_AARCH64_X24:
return PERF_REG_ARM64_X24;
case UNW_AARCH64_X25:
return PERF_REG_ARM64_X25;
case UNW_AARCH64_X26:
return PERF_REG_ARM64_X26;
case UNW_AARCH64_X27:
return PERF_REG_ARM64_X27;
case UNW_AARCH64_X28:
return PERF_REG_ARM64_X28;
case UNW_AARCH64_X29:
return PERF_REG_ARM64_X29;
case UNW_AARCH64_X30:
return PERF_REG_ARM64_LR;
case UNW_AARCH64_SP:
return PERF_REG_ARM64_SP;
case UNW_AARCH64_PC:
return PERF_REG_ARM64_PC;
default:
pr_err("unwind: invalid reg id %d\n", regnum);
if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX)
return -EINVAL;
}
return -EINVAL;
return regnum;
}

View File

@ -811,18 +811,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
if (!cpu_wide && perf_can_record_cpu_wide()) {
struct evsel *switch_evsel;
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
if (!switch_evsel)
return -ENOMEM;
switch_evsel = evlist__last(evlist);
switch_evsel->core.attr.freq = 0;
switch_evsel->core.attr.sample_period = 1;
switch_evsel->core.attr.context_switch = 1;
switch_evsel->core.system_wide = true;
switch_evsel->no_aux_samples = true;
switch_evsel->immediate = true;
evsel__set_sample_bit(switch_evsel, TID);
@ -871,20 +864,22 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
/* Add dummy event to keep tracking */
if (opts->full_auxtrace) {
bool need_system_wide_tracking;
struct evsel *tracking_evsel;
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
/*
* User space tasks can migrate between CPUs, so when tracing
* selected CPUs, sideband for all CPUs is still needed.
*/
need_system_wide_tracking = evlist->core.has_user_cpus &&
!intel_pt_evsel->core.attr.exclude_user;
tracking_evsel = evlist__last(evlist);
tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking);
if (!tracking_evsel)
return -ENOMEM;
evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->core.attr.freq = 0;
tracking_evsel->core.attr.sample_period = 1;
tracking_evsel->no_aux_samples = true;
if (need_immediate)
tracking_evsel->immediate = true;

View File

@ -2801,9 +2801,7 @@ static int perf_c2c__report(int argc, const char **argv)
"the input file to process"),
OPT_INCR('N', "node-info", &c2c.node_info,
"show extra node info in report (repeat for more info)"),
#ifdef HAVE_SLANG_SUPPORT
OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
#endif
OPT_BOOLEAN(0, "stats", &c2c.stats_only,
"Display only statistic tables (implies --stdio)"),
OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
@ -2833,6 +2831,10 @@ static int perf_c2c__report(int argc, const char **argv)
if (argc)
usage_with_options(report_c2c_usage, options);
#ifndef HAVE_SLANG_SUPPORT
c2c.use_stdio = true;
#endif
if (c2c.stats_only)
c2c.use_stdio = true;

View File

@ -49,6 +49,7 @@
#include "util/clockid.h"
#include "util/pmu-hybrid.h"
#include "util/evlist-hybrid.h"
#include "util/off_cpu.h"
#include "asm/bug.h"
#include "perf.h"
#include "cputopo.h"
@ -162,6 +163,7 @@ struct record {
bool buildid_mmap;
bool timestamp_filename;
bool timestamp_boundary;
bool off_cpu;
struct switch_output switch_output;
unsigned long long samples;
unsigned long output_max_size; /* = 0: unlimited */
@ -869,7 +871,6 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
static int record__config_text_poke(struct evlist *evlist)
{
struct evsel *evsel;
int err;
/* Nothing to do if text poke is already configured */
evlist__for_each_entry(evlist, evsel) {
@ -877,32 +878,23 @@ static int record__config_text_poke(struct evlist *evlist)
return 0;
}
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
evsel = evlist__add_dummy_on_all_cpus(evlist);
if (!evsel)
return -ENOMEM;
evsel = evlist__last(evlist);
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
evsel->core.attr.text_poke = 1;
evsel->core.attr.ksymbol = 1;
evsel->core.system_wide = true;
evsel->no_aux_samples = true;
evsel->immediate = true;
/* Text poke must be collected on all CPUs */
perf_cpu_map__put(evsel->core.own_cpus);
evsel->core.own_cpus = perf_cpu_map__new(NULL);
perf_cpu_map__put(evsel->core.cpus);
evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
evsel__set_sample_bit(evsel, TIME);
return 0;
}
static int record__config_off_cpu(struct record *rec)
{
return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
}
static bool record__kcore_readable(struct machine *machine)
{
char kcore[PATH_MAX];
@ -982,14 +974,20 @@ static void record__thread_data_close_pipes(struct record_thread *thread_data)
}
}
static bool evlist__per_thread(struct evlist *evlist)
{
return cpu_map__is_dummy(evlist->core.user_requested_cpus);
}
static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
{
int m, tm, nr_mmaps = evlist->core.nr_mmaps;
struct mmap *mmap = evlist->mmap;
struct mmap *overwrite_mmap = evlist->overwrite_mmap;
struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
struct perf_cpu_map *cpus = evlist->core.all_cpus;
bool per_thread = evlist__per_thread(evlist);
if (cpu_map__is_dummy(cpus))
if (per_thread)
thread_data->nr_mmaps = nr_mmaps;
else
thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
@ -1010,7 +1008,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
if (cpu_map__is_dummy(cpus) ||
if (per_thread ||
test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
if (thread_data->maps) {
thread_data->maps[tm] = &mmap[m];
@ -1885,7 +1883,7 @@ static int record__synthesize(struct record *rec, bool tail)
return err;
}
err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus,
err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
process_synthesized_event, NULL);
if (err < 0) {
pr_err("Couldn't synthesize cpu map.\n");
@ -2600,6 +2598,9 @@ out_free_threads:
} else
status = err;
if (rec->off_cpu)
rec->bytes_written += off_cpu_write(rec->session);
record__synthesize(rec, true);
/* this will be recalculated during process_buildids() */
rec->samples = 0;
@ -3324,6 +3325,7 @@ static struct option __record_options[] = {
OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
"write collected trace data into several data files using parallel threads",
record__parse_threads),
OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
OPT_END()
};
@ -3683,12 +3685,12 @@ static int record__init_thread_default_masks(struct record *rec, struct perf_cpu
static int record__init_thread_masks(struct record *rec)
{
int ret = 0;
struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus;
struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
if (!record__threads_enabled(rec))
return record__init_thread_default_masks(rec, cpus);
if (cpu_map__is_dummy(cpus)) {
if (evlist__per_thread(rec->evlist)) {
pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
return -EINVAL;
}
@ -3743,6 +3745,12 @@ int cmd_record(int argc, const char **argv)
set_nobuild('\0', "vmlinux", true);
# undef set_nobuild
# undef REASON
#endif
#ifndef HAVE_BPF_SKEL
# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
# undef set_nobuild
#endif
rec->opts.affinity = PERF_AFFINITY_SYS;
@ -3981,6 +3989,14 @@ int cmd_record(int argc, const char **argv)
}
}
if (rec->off_cpu) {
err = record__config_off_cpu(rec);
if (err) {
pr_err("record__config_off_cpu failed, error %d\n", err);
goto out;
}
}
if (record_opts__config(&rec->opts)) {
err = -EINVAL;
goto out;

View File

@ -382,9 +382,6 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_
if (!counter->supported)
return -ENOENT;
if (counter->core.system_wide)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
struct perf_counts_values *count;
@ -2261,7 +2258,7 @@ static void setup_system_wide(int forks)
struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
if (!counter->core.system_wide &&
if (!counter->core.requires_cpu &&
strcmp(counter->name, "duration_time")) {
return;
}

View File

@ -605,7 +605,7 @@ static int json_events(const char *fn,
} else if (json_streq(map, field, "ExtSel")) {
char *code = NULL;
addfield(map, &code, "", "", val);
eventcode |= strtoul(code, NULL, 0) << 21;
eventcode |= strtoul(code, NULL, 0) << 8;
free(code);
} else if (json_streq(map, field, "EventName")) {
addfield(map, &je.name, "", "", val);

View File

@ -0,0 +1,272 @@
# SPDX-License-Identifier: GPL-2.0
# arm-cs-trace-disasm.py: ARM CoreSight Trace Dump With Disassember
#
# Author: Tor Jeremiassen <tor@ti.com>
# Mathieu Poirier <mathieu.poirier@linaro.org>
# Leo Yan <leo.yan@linaro.org>
# Al Grant <Al.Grant@arm.com>
from __future__ import print_function
import os
from os import path
import sys
import re
from subprocess import *
from optparse import OptionParser, make_option
from perf_trace_context import perf_set_itrace_options, \
perf_sample_insn, perf_sample_srccode
# Below are some example commands for using this script.
#
# Output disassembly with objdump:
# perf script -s scripts/python/arm-cs-trace-disasm.py \
# -- -d objdump -k path/to/vmlinux
# Output disassembly with llvm-objdump:
# perf script -s scripts/python/arm-cs-trace-disasm.py \
# -- -d llvm-objdump-11 -k path/to/vmlinux
# Output only source line and symbols:
# perf script -s scripts/python/arm-cs-trace-disasm.py
# Command line parsing.
option_list = [
# formatting options for the bottom entry of the stack
make_option("-k", "--vmlinux", dest="vmlinux_name",
help="Set path to vmlinux file"),
make_option("-d", "--objdump", dest="objdump_name",
help="Set path to objdump executable file"),
make_option("-v", "--verbose", dest="verbose",
action="store_true", default=False,
help="Enable debugging log")
]
parser = OptionParser(option_list=option_list)
(options, args) = parser.parse_args()
# Initialize global dicts and regular expression
disasm_cache = dict()
cpu_data = dict()
disasm_re = re.compile("^\s*([0-9a-fA-F]+):")
disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:")
cache_size = 64*1024
glb_source_file_name = None
glb_line_number = None
glb_dso = None
def get_optional(perf_dict, field):
if field in perf_dict:
return perf_dict[field]
return "[unknown]"
def get_offset(perf_dict, field):
if field in perf_dict:
return f"+0x{perf_dict[field]:x}"
return ""
def get_dso_file_path(dso_name, dso_build_id):
if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"):
if (options.vmlinux_name):
return options.vmlinux_name;
else:
return dso_name
if (dso_name == "[vdso]") :
append = "/vdso"
else:
append = "/elf"
dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}"
# Replace duplicate slash chars to single slash char
dso_path = dso_path.replace('//', '/', 1)
return dso_path
def read_disam(dso_fname, dso_start, start_addr, stop_addr):
addr_range = str(start_addr) + ":" + str(stop_addr) + ":" + dso_fname
# Don't let the cache get too big, clear it when it hits max size
if (len(disasm_cache) > cache_size):
disasm_cache.clear();
if addr_range in disasm_cache:
disasm_output = disasm_cache[addr_range];
else:
start_addr = start_addr - dso_start;
stop_addr = stop_addr - dso_start;
disasm = [ options.objdump_name, "-d", "-z",
f"--start-address=0x{start_addr:x}",
f"--stop-address=0x{stop_addr:x}" ]
disasm += [ dso_fname ]
disasm_output = check_output(disasm).decode('utf-8').split('\n')
disasm_cache[addr_range] = disasm_output
return disasm_output
def print_disam(dso_fname, dso_start, start_addr, stop_addr):
for line in read_disam(dso_fname, dso_start, start_addr, stop_addr):
m = disasm_func_re.search(line)
if m is None:
m = disasm_re.search(line)
if m is None:
continue
print(f"\t{line}")
def print_sample(sample):
print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \
f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \
f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}")
def trace_begin():
print('ARM CoreSight Trace Data Assembler Dump')
def trace_end():
print('End')
def trace_unhandled(event_name, context, event_fields_dict):
print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]))
def common_start_str(comm, sample):
sec = int(sample["time"] / 1000000000)
ns = sample["time"] % 1000000000
cpu = sample["cpu"]
pid = sample["pid"]
tid = sample["tid"]
return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} "
# This code is copied from intel-pt-events.py for printing source code
# line and symbols.
def print_srccode(comm, param_dict, sample, symbol, dso):
ip = sample["ip"]
if symbol == "[unknown]":
start_str = common_start_str(comm, sample) + ("%x" % ip).rjust(16).ljust(40)
else:
offs = get_offset(param_dict, "symoff")
start_str = common_start_str(comm, sample) + (symbol + offs).ljust(40)
global glb_source_file_name
global glb_line_number
global glb_dso
source_file_name, line_number, source_line = perf_sample_srccode(perf_script_context)
if source_file_name:
if glb_line_number == line_number and glb_source_file_name == source_file_name:
src_str = ""
else:
if len(source_file_name) > 40:
src_file = ("..." + source_file_name[-37:]) + " "
else:
src_file = source_file_name.ljust(41)
if source_line is None:
src_str = src_file + str(line_number).rjust(4) + " <source not found>"
else:
src_str = src_file + str(line_number).rjust(4) + " " + source_line
glb_dso = None
elif dso == glb_dso:
src_str = ""
else:
src_str = dso
glb_dso = dso
glb_line_number = line_number
glb_source_file_name = source_file_name
print(f"{start_str}{src_str}")
def process_event(param_dict):
global cache_size
global options
sample = param_dict["sample"]
comm = param_dict["comm"]
name = param_dict["ev_name"]
dso = get_optional(param_dict, "dso")
dso_bid = get_optional(param_dict, "dso_bid")
dso_start = get_optional(param_dict, "dso_map_start")
dso_end = get_optional(param_dict, "dso_map_end")
symbol = get_optional(param_dict, "symbol")
if (options.verbose == True):
print(f"Event type: {name}")
print_sample(sample)
# If cannot find dso so cannot dump assembler, bail out
if (dso == '[unknown]'):
return
# Validate dso start and end addresses
if ((dso_start == '[unknown]') or (dso_end == '[unknown]')):
print(f"Failed to find valid dso map for dso {dso}")
return
if (name[0:12] == "instructions"):
print_srccode(comm, param_dict, sample, symbol, dso)
return
# Don't proceed if this event is not a branch sample, .
if (name[0:8] != "branches"):
return
cpu = sample["cpu"]
ip = sample["ip"]
addr = sample["addr"]
# Initialize CPU data if it's empty, and directly return back
# if this is the first tracing event for this CPU.
if (cpu_data.get(str(cpu) + 'addr') == None):
cpu_data[str(cpu) + 'addr'] = addr
return
# The format for packet is:
#
# +------------+------------+------------+
# sample_prev: | addr | ip | cpu |
# +------------+------------+------------+
# sample_next: | addr | ip | cpu |
# +------------+------------+------------+
#
# We need to combine the two continuous packets to get the instruction
# range for sample_prev::cpu:
#
# [ sample_prev::addr .. sample_next::ip ]
#
# For this purose, sample_prev::addr is stored into cpu_data structure
# and read back for 'start_addr' when the new packet comes, and we need
# to use sample_next::ip to calculate 'stop_addr', plusing extra 4 for
# 'stop_addr' is for the sake of objdump so the final assembler dump can
# include last instruction for sample_next::ip.
start_addr = cpu_data[str(cpu) + 'addr']
stop_addr = ip + 4
# Record for previous sample packet
cpu_data[str(cpu) + 'addr'] = addr
# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
if (start_addr == 0 and stop_addr == 4):
print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted")
return
if (start_addr < int(dso_start) or start_addr > int(dso_end)):
print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
return
if (stop_addr < int(dso_start) or stop_addr > int(dso_end)):
print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
return
if (options.objdump_name != None):
# It doesn't need to decrease virtual memory offset for disassembly
# for kernel dso, so in this case we set vm_start to zero.
if (dso == "[kernel.kallsyms]"):
dso_vm_start = 0
else:
dso_vm_start = int(dso_start)
dso_fname = get_dso_file_path(dso, dso_bid)
if path.exists(dso_fname):
print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
else:
print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]")
print_srccode(comm, param_dict, sample, symbol, dso)

View File

@ -0,0 +1,48 @@
#!/usr/bin/python
# SPDX-License-Identifier: GPL-2.0
import argparse
import sys
# Basic sanity check of perf CSV output as specified in the man page.
# Currently just checks the number of fields per line in output.
ap = argparse.ArgumentParser()
ap.add_argument('--no-args', action='store_true')
ap.add_argument('--interval', action='store_true')
ap.add_argument('--system-wide-no-aggr', action='store_true')
ap.add_argument('--system-wide', action='store_true')
ap.add_argument('--event', action='store_true')
ap.add_argument('--per-core', action='store_true')
ap.add_argument('--per-thread', action='store_true')
ap.add_argument('--per-die', action='store_true')
ap.add_argument('--per-node', action='store_true')
ap.add_argument('--per-socket', action='store_true')
ap.add_argument('--separator', default=',', nargs='?')
args = ap.parse_args()
Lines = sys.stdin.readlines()
def check_csv_output(exp):
for line in Lines:
if 'failed' not in line:
count = line.count(args.separator)
if count != exp:
sys.stdout.write(''.join(Lines))
raise RuntimeError(f'wrong number of fields. expected {exp} in {line}')
try:
if args.no_args or args.system_wide or args.event:
expected_items = 6
elif args.interval or args.per_thread or args.system_wide_no_aggr:
expected_items = 7
elif args.per_core or args.per_socket or args.per_node or args.per_die:
expected_items = 8
else:
ap.print_help()
raise RuntimeError('No checking option specified')
check_csv_output(expected_items)
except:
sys.stdout.write('Test failed for input: ' + ''.join(Lines))
raise

View File

@ -0,0 +1,60 @@
#!/bin/sh
# perf record offcpu profiling tests
# SPDX-License-Identifier: GPL-2.0
set -e
err=0
perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
cleanup() {
rm -f ${perfdata}
rm -f ${perfdata}.old
trap - exit term int
}
trap_cleanup() {
cleanup
exit 1
}
trap trap_cleanup exit term int
test_offcpu() {
echo "Basic off-cpu test"
if [ `id -u` != 0 ]
then
echo "Basic off-cpu test [Skipped permission]"
err=2
return
fi
if perf record --off-cpu -o ${perfdata} --quiet true 2>&1 | grep BUILD_BPF_SKEL
then
echo "Basic off-cpu test [Skipped missing BPF support]"
err=2
return
fi
if ! perf record --off-cpu -e dummy -o ${perfdata} sleep 1 2> /dev/null
then
echo "Basic off-cpu test [Failed record]"
err=1
return
fi
if ! perf evlist -i ${perfdata} | grep -q "offcpu-time"
then
echo "Basic off-cpu test [Failed record]"
err=1
return
fi
if ! perf report -i ${perfdata} -q --percent-limit=90 | egrep -q sleep
then
echo "Basic off-cpu test [Failed missing output]"
err=1
return
fi
echo "Basic off-cpu test [Success]"
}
test_offcpu
cleanup
exit $err

View File

@ -0,0 +1,147 @@
#!/bin/bash
# perf stat CSV output linter
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Tests various perf stat CSV output commands for the
# correct number of fields and the CSV separator set to ','.
set -e
pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py
if [ "x$PYTHON" == "x" ]
then
if which python3 > /dev/null
then
PYTHON=python3
elif which python > /dev/null
then
PYTHON=python
else
echo Skipping test, python not detected please set environment variable PYTHON.
exit 2
fi
fi
# Return true if perf_event_paranoid is > $1 and not running as root.
function ParanoidAndNotRoot()
{
[ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ]
}
check_no_args()
{
echo -n "Checking CSV output: no args "
perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args
echo "[Success]"
}
check_system_wide()
{
echo -n "Checking CSV output: system wide "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide
echo "[Success]"
}
check_system_wide_no_aggr()
{
echo -n "Checking CSV output: system wide "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
echo -n "Checking CSV output: system wide no aggregation "
perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr
echo "[Success]"
}
check_interval()
{
echo -n "Checking CSV output: interval "
perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval
echo "[Success]"
}
check_event()
{
echo -n "Checking CSV output: event "
perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event
echo "[Success]"
}
check_per_core()
{
echo -n "Checking CSV output: per core "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core
echo "[Success]"
}
check_per_thread()
{
echo -n "Checking CSV output: per thread "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread
echo "[Success]"
}
check_per_die()
{
echo -n "Checking CSV output: per die "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die
echo "[Success]"
}
check_per_node()
{
echo -n "Checking CSV output: per node "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node
echo "[Success]"
}
check_per_socket()
{
echo -n "Checking CSV output: per socket "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket
echo "[Success]"
}
check_no_args
check_system_wide
check_system_wide_no_aggr
check_interval
check_event
check_per_core
check_per_thread
check_per_die
check_per_node
check_per_socket
exit 0

View File

@ -0,0 +1,71 @@
#!/bin/sh
# Miscellaneous Intel PT testing
# SPDX-License-Identifier: GPL-2.0
set -e
# Skip if no Intel PT
perf list | grep -q 'intel_pt//' || exit 2
skip_cnt=0
ok_cnt=0
err_cnt=0
tmpfile=`mktemp`
perfdatafile=`mktemp`
can_cpu_wide()
{
perf record -o ${tmpfile} -B -N --no-bpf-event -e dummy:u -C $1 true 2>&1 >/dev/null || return 2
return 0
}
test_system_wide_side_band()
{
# Need CPU 0 and CPU 1
can_cpu_wide 0 || return $?
can_cpu_wide 1 || return $?
# Record on CPU 0 a task running on CPU 1
perf record -B -N --no-bpf-event -o ${perfdatafile} -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname
# Should get MMAP events from CPU 1 because they can be needed to decode
mmap_cnt=`perf script -i ${perfdatafile} --no-itrace --show-mmap-events -C 1 2>/dev/null | grep MMAP | wc -l`
if [ ${mmap_cnt} -gt 0 ] ; then
return 0
fi
echo "Failed to record MMAP events on CPU 1 when tracing CPU 0"
return 1
}
count_result()
{
if [ $1 -eq 2 ] ; then
skip_cnt=`expr ${skip_cnt} \+ 1`
return
fi
if [ $1 -eq 0 ] ; then
ok_cnt=`expr ${ok_cnt} \+ 1`
return
fi
err_cnt=`expr ${err_cnt} \+ 1`
}
test_system_wide_side_band
count_result $?
rm -f ${tmpfile}
rm -f ${perfdatafile}
if [ ${err_cnt} -gt 0 ] ; then
exit 1
fi
if [ ${ok_cnt} -gt 0 ] ; then
exit 0
fi
exit 2

View File

@ -147,6 +147,7 @@ perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o

View File

@ -125,7 +125,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
mm->tid = mp->tid;
mm->cpu = mp->cpu.cpu;
if (!mp->len) {
if (!mp->len || !mp->mmap_needed) {
mm->base = NULL;
return 0;
}
@ -168,13 +168,20 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
}
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist, int idx,
bool per_cpu)
struct evlist *evlist,
struct evsel *evsel, int idx)
{
bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
mp->mmap_needed = evsel->needs_auxtrace_mmap;
if (!mp->mmap_needed)
return;
mp->idx = idx;
if (per_cpu) {
mp->cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, idx);
mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
if (evlist->core.threads)
mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
else

View File

@ -344,6 +344,10 @@ struct auxtrace_mmap {
* @idx: index of this mmap
* @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
* mmap) otherwise %0
* @mmap_needed: set to %false for non-auxtrace events. This is needed because
* auxtrace mmapping is done in the same code path as non-auxtrace
* mmapping but not every evsel that needs non-auxtrace mmapping
* also needs auxtrace mmapping.
* @cpu: cpu number for a per-cpu mmap otherwise %-1
*/
struct auxtrace_mmap_params {
@ -353,6 +357,7 @@ struct auxtrace_mmap_params {
int prot;
int idx;
pid_t tid;
bool mmap_needed;
struct perf_cpu cpu;
};
@ -490,8 +495,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
unsigned int auxtrace_pages,
bool auxtrace_overwrite);
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist, int idx,
bool per_cpu);
struct evlist *evlist,
struct evsel *evsel, int idx);
typedef int (*process_auxtrace_t)(struct perf_tool *tool,
struct mmap *map,
@ -863,8 +868,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
unsigned int auxtrace_pages,
bool auxtrace_overwrite);
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist, int idx,
bool per_cpu);
struct evlist *evlist,
struct evsel *evsel, int idx);
#define ITRACE_HELP ""

View File

@ -35,7 +35,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
}
#endif
int __weak bpf_prog_load(enum bpf_prog_type prog_type,
#ifndef HAVE_LIBBPF_BPF_PROG_LOAD
int bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name __maybe_unused,
const char *license,
const struct bpf_insn *insns, size_t insn_cnt,
@ -47,8 +48,10 @@ int __weak bpf_prog_load(enum bpf_prog_type prog_type,
opts->kern_version, opts->log_buf, opts->log_size);
#pragma GCC diagnostic pop
}
#endif
struct bpf_program * __weak
#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
struct bpf_program *
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
{
#pragma GCC diagnostic push
@ -56,8 +59,10 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
return bpf_program__next(prev, obj);
#pragma GCC diagnostic pop
}
#endif
struct bpf_map * __weak
#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
struct bpf_map *
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
{
#pragma GCC diagnostic push
@ -65,8 +70,10 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
return bpf_map__next(prev, obj);
#pragma GCC diagnostic pop
}
#endif
const void * __weak
#ifndef HAVE_LIBBPF_BTF__RAW_DATA
const void *
btf__raw_data(const struct btf *btf_ro, __u32 *size)
{
#pragma GCC diagnostic push
@ -74,6 +81,7 @@ btf__raw_data(const struct btf *btf_ro, __u32 *size)
return btf__get_raw_data(btf_ro, size);
#pragma GCC diagnostic pop
}
#endif
static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
{

View File

@ -312,7 +312,10 @@ static bool bperf_attr_map_compatible(int attr_map_fd)
(map_info.value_size == sizeof(struct perf_event_attr_map_entry));
}
int __weak
#ifndef HAVE_LIBBPF_BPF_MAP_CREATE
LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries, __u32 map_flags);
int
bpf_map_create(enum bpf_map_type map_type,
const char *map_name __maybe_unused,
__u32 key_size,
@ -325,6 +328,7 @@ bpf_map_create(enum bpf_map_type map_type,
return bpf_create_map(map_type, key_size, value_size, max_entries, 0);
#pragma GCC diagnostic pop
}
#endif
static int bperf_lock_attr_map(struct target *target)
{

View File

@ -0,0 +1,338 @@
// SPDX-License-Identifier: GPL-2.0
#include "util/bpf_counter.h"
#include "util/debug.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/off_cpu.h"
#include "util/perf-hooks.h"
#include "util/record.h"
#include "util/session.h"
#include "util/target.h"
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/cgroup.h"
#include <bpf/bpf.h>
#include "bpf_skel/off_cpu.skel.h"
#define MAX_STACKS 32
/* we don't need actual timestamp, just want to put the samples at last */
#define OFF_CPU_TIMESTAMP (~0ull << 32)
static struct off_cpu_bpf *skel;
struct off_cpu_key {
u32 pid;
u32 tgid;
u32 stack_id;
u32 state;
u64 cgroup_id;
};
union off_cpu_data {
struct perf_event_header hdr;
u64 array[1024 / sizeof(u64)];
};
static int off_cpu_config(struct evlist *evlist)
{
struct evsel *evsel;
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_BPF_OUTPUT,
.size = sizeof(attr), /* to capture ABI version */
};
char *evname = strdup(OFFCPU_EVENT);
if (evname == NULL)
return -ENOMEM;
evsel = evsel__new(&attr);
if (!evsel) {
free(evname);
return -ENOMEM;
}
evsel->core.attr.freq = 1;
evsel->core.attr.sample_period = 1;
/* off-cpu analysis depends on stack trace */
evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
evlist__add(evlist, evsel);
free(evsel->name);
evsel->name = evname;
return 0;
}
static void off_cpu_start(void *arg)
{
struct evlist *evlist = arg;
/* update task filter for the given workload */
if (!skel->bss->has_cpu && !skel->bss->has_task &&
perf_thread_map__pid(evlist->core.threads, 0) != -1) {
int fd;
u32 pid;
u8 val = 1;
skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
pid = perf_thread_map__pid(evlist->core.threads, 0);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
skel->bss->enabled = 1;
}
static void off_cpu_finish(void *arg __maybe_unused)
{
skel->bss->enabled = 0;
off_cpu_bpf__destroy(skel);
}
/* v5.18 kernel added prev_state arg, so it needs to check the signature */
static void check_sched_switch_args(void)
{
const struct btf *btf = bpf_object__btf(skel->obj);
const struct btf_type *t1, *t2, *t3;
u32 type_id;
type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch",
BTF_KIND_TYPEDEF);
if ((s32)type_id < 0)
return;
t1 = btf__type_by_id(btf, type_id);
if (t1 == NULL)
return;
t2 = btf__type_by_id(btf, t1->type);
if (t2 == NULL || !btf_is_ptr(t2))
return;
t3 = btf__type_by_id(btf, t2->type);
if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) {
/* new format: pass prev_state as 4th arg */
skel->rodata->has_prev_state = true;
}
}
int off_cpu_prepare(struct evlist *evlist, struct target *target,
struct record_opts *opts)
{
int err, fd, i;
int ncpus = 1, ntasks = 1, ncgrps = 1;
if (off_cpu_config(evlist) < 0) {
pr_err("Failed to config off-cpu BPF event\n");
return -1;
}
skel = off_cpu_bpf__open();
if (!skel) {
pr_err("Failed to open off-cpu BPF skeleton\n");
return -1;
}
/* don't need to set cpu filter for system-wide mode */
if (target->cpu_list) {
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
}
if (target__has_task(target)) {
ntasks = perf_thread_map__nr(evlist->core.threads);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
}
if (evlist__first(evlist)->cgrp) {
ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */
bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
if (!cgroup_is_v2("perf_event"))
skel->rodata->uses_cgroup_v1 = true;
}
if (opts->record_cgroup) {
skel->rodata->needs_cgroup = true;
if (!cgroup_is_v2("perf_event"))
skel->rodata->uses_cgroup_v1 = true;
}
set_max_rlimit();
check_sched_switch_args();
err = off_cpu_bpf__load(skel);
if (err) {
pr_err("Failed to load off-cpu skeleton\n");
goto out;
}
if (target->cpu_list) {
u32 cpu;
u8 val = 1;
skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
}
}
if (target__has_task(target)) {
u32 pid;
u8 val = 1;
skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
pid = perf_thread_map__pid(evlist->core.threads, i);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
}
if (evlist__first(evlist)->cgrp) {
struct evsel *evsel;
u8 val = 1;
skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter);
evlist__for_each_entry(evlist, evsel) {
struct cgroup *cgrp = evsel->cgrp;
if (cgrp == NULL)
continue;
if (!cgrp->id && read_cgroup_id(cgrp) < 0) {
pr_err("Failed to read cgroup id of %s\n",
cgrp->name);
goto out;
}
bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY);
}
}
err = off_cpu_bpf__attach(skel);
if (err) {
pr_err("Failed to attach off-cpu BPF skeleton\n");
goto out;
}
if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) ||
perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) {
pr_err("Failed to attach off-cpu skeleton\n");
goto out;
}
return 0;
out:
off_cpu_bpf__destroy(skel);
return -1;
}
int off_cpu_write(struct perf_session *session)
{
int bytes = 0, size;
int fd, stack;
u64 sample_type, val, sid = 0;
struct evsel *evsel;
struct perf_data_file *file = &session->data->file;
struct off_cpu_key prev, key;
union off_cpu_data data = {
.hdr = {
.type = PERF_RECORD_SAMPLE,
.misc = PERF_RECORD_MISC_USER,
},
};
u64 tstamp = OFF_CPU_TIMESTAMP;
skel->bss->enabled = 0;
evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT);
if (evsel == NULL) {
pr_err("%s evsel not found\n", OFFCPU_EVENT);
return 0;
}
sample_type = evsel->core.attr.sample_type;
if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
if (evsel->core.id)
sid = evsel->core.id[0];
}
fd = bpf_map__fd(skel->maps.off_cpu);
stack = bpf_map__fd(skel->maps.stacks);
memset(&prev, 0, sizeof(prev));
while (!bpf_map_get_next_key(fd, &prev, &key)) {
int n = 1; /* start from perf_event_header */
int ip_pos = -1;
bpf_map_lookup_elem(fd, &key, &val);
if (sample_type & PERF_SAMPLE_IDENTIFIER)
data.array[n++] = sid;
if (sample_type & PERF_SAMPLE_IP) {
ip_pos = n;
data.array[n++] = 0; /* will be updated */
}
if (sample_type & PERF_SAMPLE_TID)
data.array[n++] = (u64)key.pid << 32 | key.tgid;
if (sample_type & PERF_SAMPLE_TIME)
data.array[n++] = tstamp;
if (sample_type & PERF_SAMPLE_ID)
data.array[n++] = sid;
if (sample_type & PERF_SAMPLE_CPU)
data.array[n++] = 0;
if (sample_type & PERF_SAMPLE_PERIOD)
data.array[n++] = val;
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int len = 0;
/* data.array[n] is callchain->nr (updated later) */
data.array[n + 1] = PERF_CONTEXT_USER;
data.array[n + 2] = 0;
bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
while (data.array[n + 2 + len])
len++;
/* update length of callchain */
data.array[n] = len + 1;
/* update sample ip with the first callchain entry */
if (ip_pos >= 0)
data.array[ip_pos] = data.array[n + 2];
/* calculate sample callchain data array length */
n += len + 2;
}
if (sample_type & PERF_SAMPLE_CGROUP)
data.array[n++] = key.cgroup_id;
/* TODO: handle more sample types */
size = n * sizeof(u64);
data.hdr.size = size;
bytes += size;
if (perf_data_file__write(file, &data, size) < 0) {
pr_err("failed to write perf data, error: %m\n");
return bytes;
}
prev = key;
/* increase dummy timestamp to sort later samples */
tstamp++;
}
return bytes;
}

View File

@ -0,0 +1,229 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2022 Google
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
/* task->flags for off-cpu analysis */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
/* task->state for off-cpu analysis */
#define TASK_INTERRUPTIBLE 0x0001
#define TASK_UNINTERRUPTIBLE 0x0002
#define MAX_STACKS 32
#define MAX_ENTRIES 102400
struct tstamp_data {
__u32 stack_id;
__u32 state;
__u64 timestamp;
};
struct offcpu_key {
__u32 pid;
__u32 tgid;
__u32 stack_id;
__u32 state;
__u64 cgroup_id;
};
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(key_size, sizeof(__u32));
__uint(value_size, MAX_STACKS * sizeof(__u64));
__uint(max_entries, MAX_ENTRIES);
} stacks SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, struct tstamp_data);
} tstamp SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(struct offcpu_key));
__uint(value_size, sizeof(__u64));
__uint(max_entries, MAX_ENTRIES);
} off_cpu SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u8));
__uint(max_entries, 1);
} cpu_filter SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u8));
__uint(max_entries, 1);
} task_filter SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u64));
__uint(value_size, sizeof(__u8));
__uint(max_entries, 1);
} cgroup_filter SEC(".maps");
/* old kernel task_struct definition */
struct task_struct___old {
long state;
} __attribute__((preserve_access_index));
int enabled = 0;
int has_cpu = 0;
int has_task = 0;
int has_cgroup = 0;
const volatile bool has_prev_state = false;
const volatile bool needs_cgroup = false;
const volatile bool uses_cgroup_v1 = false;
/*
* Old kernel used to call it task_struct->state and now it's '__state'.
* Use BPF CO-RE "ignored suffix rule" to deal with it like below:
*
* https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes
*/
static inline int get_task_state(struct task_struct *t)
{
if (bpf_core_field_exists(t->__state))
return BPF_CORE_READ(t, __state);
/* recast pointer to capture task_struct___old type for compiler */
struct task_struct___old *t_old = (void *)t;
/* now use old "state" name of the field */
return BPF_CORE_READ(t_old, state);
}
static inline __u64 get_cgroup_id(struct task_struct *t)
{
struct cgroup *cgrp;
if (uses_cgroup_v1)
cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup);
else
cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp);
return BPF_CORE_READ(cgrp, kn, id);
}
static inline int can_record(struct task_struct *t, int state)
{
/* kernel threads don't have user stack */
if (t->flags & PF_KTHREAD)
return 0;
if (state != TASK_INTERRUPTIBLE &&
state != TASK_UNINTERRUPTIBLE)
return 0;
if (has_cpu) {
__u32 cpu = bpf_get_smp_processor_id();
__u8 *ok;
ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
if (!ok)
return 0;
}
if (has_task) {
__u8 *ok;
__u32 pid = t->pid;
ok = bpf_map_lookup_elem(&task_filter, &pid);
if (!ok)
return 0;
}
if (has_cgroup) {
__u8 *ok;
__u64 cgrp_id = get_cgroup_id(t);
ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id);
if (!ok)
return 0;
}
return 1;
}
static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
struct task_struct *next, int state)
{
__u64 ts;
__u32 stack_id;
struct tstamp_data *pelem;
ts = bpf_ktime_get_ns();
if (!can_record(prev, state))
goto next;
stack_id = bpf_get_stackid(ctx, &stacks,
BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK);
pelem = bpf_task_storage_get(&tstamp, prev, NULL,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!pelem)
goto next;
pelem->timestamp = ts;
pelem->state = state;
pelem->stack_id = stack_id;
next:
pelem = bpf_task_storage_get(&tstamp, next, NULL, 0);
if (pelem && pelem->timestamp) {
struct offcpu_key key = {
.pid = next->pid,
.tgid = next->tgid,
.stack_id = pelem->stack_id,
.state = pelem->state,
.cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0,
};
__u64 delta = ts - pelem->timestamp;
__u64 *total;
total = bpf_map_lookup_elem(&off_cpu, &key);
if (total)
*total += delta;
else
bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY);
/* prevent to reuse the timestamp later */
pelem->timestamp = 0;
}
return 0;
}
SEC("tp_btf/sched_switch")
int on_switch(u64 *ctx)
{
struct task_struct *prev, *next;
int prev_state;
if (!enabled)
return 0;
prev = (struct task_struct *)ctx[1];
next = (struct task_struct *)ctx[2];
if (has_prev_state)
prev_state = (int)ctx[3];
else
prev_state = get_task_state(prev);
return off_cpu_stat(ctx, prev, next, prev_state);
}
char LICENSE[] SEC("license") = "Dual BSD/GPL";

View File

@ -242,14 +242,20 @@ int __evlist__add_default(struct evlist *evlist, bool precise)
return 0;
}
int evlist__add_dummy(struct evlist *evlist)
static struct evsel *evlist__dummy_event(struct evlist *evlist)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_DUMMY,
.size = sizeof(attr), /* to capture ABI version */
};
struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries);
return evsel__new_idx(&attr, evlist->core.nr_entries);
}
int evlist__add_dummy(struct evlist *evlist)
{
struct evsel *evsel = evlist__dummy_event(evlist);
if (evsel == NULL)
return -ENOMEM;
@ -258,6 +264,51 @@ int evlist__add_dummy(struct evlist *evlist)
return 0;
}
static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel)
{
evsel->core.system_wide = true;
/*
* All CPUs.
*
* Note perf_event_open() does not accept CPUs that are not online, so
* in fact this CPU list will include only all online CPUs.
*/
perf_cpu_map__put(evsel->core.own_cpus);
evsel->core.own_cpus = perf_cpu_map__new(NULL);
perf_cpu_map__put(evsel->core.cpus);
evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
/* No threads */
perf_thread_map__put(evsel->core.threads);
evsel->core.threads = perf_thread_map__new_dummy();
evlist__add(evlist, evsel);
}
struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
{
struct evsel *evsel = evlist__dummy_event(evlist);
if (!evsel)
return NULL;
evsel->core.attr.exclude_kernel = 1;
evsel->core.attr.exclude_guest = 1;
evsel->core.attr.exclude_hv = 1;
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
evsel->no_aux_samples = true;
evsel->name = strdup("dummy:u");
if (system_wide)
evlist__add_on_all_cpus(evlist, evsel);
else
evlist__add(evlist, evsel);
return evsel;
}
static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
struct evsel *evsel, *n;
@ -747,15 +798,15 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
static void
perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
struct perf_evsel *_evsel __maybe_unused,
struct perf_evsel *_evsel,
struct perf_mmap_param *_mp,
int idx)
{
struct evlist *evlist = container_of(_evlist, struct evlist, core);
struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
bool per_cpu = !perf_cpu_map__empty(_evlist->user_requested_cpus);
struct evsel *evsel = container_of(_evsel, struct evsel, core);
auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu);
auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx);
}
static struct perf_mmap*

View File

@ -114,6 +114,11 @@ int arch_evlist__add_default_attrs(struct evlist *evlist);
struct evsel *arch_evlist__leader(struct list_head *list);
int evlist__add_dummy(struct evlist *evlist);
struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide);
static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist)
{
return evlist__add_aux_dummy(evlist, true);
}
int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
evsel__sb_cb_t cb, void *data);

View File

@ -296,8 +296,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
return NULL;
evsel__init(evsel, attr, idx);
if (evsel__is_bpf_output(evsel)) {
evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
if (evsel__is_bpf_output(evsel) && !attr->sample_type) {
evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
evsel->core.attr.sample_period = 1;
}
@ -409,6 +409,7 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->core.threads = perf_thread_map__get(orig->core.threads);
evsel->core.nr_members = orig->core.nr_members;
evsel->core.system_wide = orig->core.system_wide;
evsel->core.requires_cpu = orig->core.requires_cpu;
if (orig->name) {
evsel->name = strdup(orig->name);
@ -896,7 +897,7 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
"specifying a subset with --user-regs may render DWARF unwinding unreliable, "
"so the minimal registers set (IP, SP) is explicitly forced.\n");
} else {
attr->sample_regs_user |= PERF_REGS_MASK;
attr->sample_regs_user |= arch__user_reg_mask();
}
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;

View File

@ -24,7 +24,7 @@
#include "unwind.h"
#include "libunwind-aarch64.h"
#define perf_event_arm_regs perf_event_arm64_regs
#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
#include <../../../arch/arm64/include/uapi/asm/perf_regs.h>
#undef perf_event_arm_regs
#include "../../arch/arm64/util/unwind-libunwind.c"

View File

@ -62,8 +62,8 @@ void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_u
void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
struct evlist *evlist __maybe_unused,
int idx __maybe_unused,
bool per_cpu __maybe_unused)
struct evsel *evsel __maybe_unused,
int idx __maybe_unused)
{
}

29
tools/perf/util/off_cpu.h Normal file
View File

@ -0,0 +1,29 @@
#ifndef PERF_UTIL_OFF_CPU_H
#define PERF_UTIL_OFF_CPU_H
struct evlist;
struct target;
struct perf_session;
struct record_opts;
#define OFFCPU_EVENT "offcpu-time"
#ifdef HAVE_BPF_SKEL
int off_cpu_prepare(struct evlist *evlist, struct target *target,
struct record_opts *opts);
int off_cpu_write(struct perf_session *session);
#else
static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused,
struct target *target __maybe_unused,
struct record_opts *opts __maybe_unused)
{
return -1;
}
static inline int off_cpu_write(struct perf_session *session __maybe_unused)
{
return -1;
}
#endif
#endif /* PERF_UTIL_OFF_CPU_H */

View File

@ -365,7 +365,7 @@ __add_event(struct list_head *list, int *idx,
(*idx)++;
evsel->core.cpus = cpus;
evsel->core.own_cpus = perf_cpu_map__get(cpus);
evsel->core.system_wide = pmu ? pmu->is_uncore : false;
evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
evsel->auto_merge_stats = auto_merge_stats;
if (name)

View File

@ -103,6 +103,8 @@ static const char *__perf_reg_name_arm64(int id)
return "lr";
case PERF_REG_ARM64_PC:
return "pc";
case PERF_REG_ARM64_VG:
return "vg";
default:
return NULL;
}

View File

@ -38,5 +38,6 @@ util/units.c
util/affinity.c
util/rwsem.c
util/hashmap.c
util/perf_regs.c
util/pmu-hybrid.c
util/fncache.c

View File

@ -755,12 +755,22 @@ static void set_regs_in_dict(PyObject *dict,
}
static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
const char *dso_field, const char *sym_field,
const char *symoff_field)
const char *dso_field, const char *dso_bid_field,
const char *dso_map_start, const char *dso_map_end,
const char *sym_field, const char *symoff_field)
{
char sbuild_id[SBUILD_ID_SIZE];
if (al->map) {
pydict_set_item_string_decref(dict, dso_field,
_PyUnicode_FromString(al->map->dso->name));
build_id__sprintf(&al->map->dso->bid, sbuild_id);
pydict_set_item_string_decref(dict, dso_bid_field,
_PyUnicode_FromString(sbuild_id));
pydict_set_item_string_decref(dict, dso_map_start,
PyLong_FromUnsignedLong(al->map->start));
pydict_set_item_string_decref(dict, dso_map_end,
PyLong_FromUnsignedLong(al->map->end));
}
if (al->sym) {
pydict_set_item_string_decref(dict, sym_field,
@ -840,7 +850,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
(const char *)sample->raw_data, sample->raw_size));
pydict_set_item_string_decref(dict, "comm",
_PyUnicode_FromString(thread__comm_str(al->thread)));
set_sym_in_dict(dict, al, "dso", "symbol", "symoff");
set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end",
"symbol", "symoff");
pydict_set_item_string_decref(dict, "callchain", callchain);
@ -856,7 +867,9 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
if (addr_al) {
pydict_set_item_string_decref(dict_sample, "addr_correlates_sym",
PyBool_FromLong(1));
set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff");
set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid",
"addr_dso_map_start", "addr_dso_map_end",
"addr_symbol", "addr_symoff");
}
if (sample->flags)