perf/core improvements and fixes:
- Support to display the IPC/Cycle in 'annotate' TUI, for systems where this info can be obtained, like Intel's >= Skylake (Jin Yao) - Support wildcards on PMU name in dynamic PMU events (Agustin Vega-Frias) - Display pmu name when printing unmerged events in stat (Agustin Vega-Frias) - Auto-merge PMU events created by prefix or glob match (Agustin Vega-Frias) - Fix s390 'call' operations target function annotation (Thomas Richter) - Handle s390 PC relative load and store instruction in the augmented 'annotate', code, used so far in the TUI modes of 'perf report' and 'perf annotate' (Thomas Richter) - Provide libtraceevent with a kernel symbol resolver, so that symbols in tracepoint fields can be resolved when showing them in tools such as 'perf report' (Wang YanQing) - Refactor the cgroups code to look more like other code in tools/perf, using cgroup__{put,get} for refcount operations instead of its open-coded equivalent, breaking larger functions, etc (Arnaldo Carvalho de Melo) - Implement support for the -G/--cgroup target in 'perf trace', allowing strace like tracing (plus other events, backtraces, etc) for cgroups (Arnaldo Carvalho de Melo) - Update thread shortname in 'perf sched map' when the thread's COMM changes (Changbin Du) - refcount 'struct mem_info', for better sharing it over several users, avoid duplicating structs and fixing crashes related to use after free (Jiri Olsa) - Display perf.data version, offsets in 'perf report --header' (Jiri Olsa) - Record the machine's memory topology information in a perf.data feature section, to be used by tools such as 'perf c2c' (Jiri Olsa) - Fix output of forced groups in the header for 'perf report' --stdio and --tui (Jiri Olsa) - Better support llvm, clang, cxx make tests in the build process (Jiri Olsa) - Streamline the 'struct perf_mmap' methods, storing some info in the struct instead of passing it via various methods, shortening its signatures (Kan Liang) - Update the quipper perf.data parser library site information (Stephane Eranian) - Correct perf's man pages title markers for asciidoctor (Takashi Iwai) - Intel PT fixes and refactorings paving the way for implementing support for AUX area sampling (Adrian Hunter) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEELb9bqkb7Te0zijNb1lAW81NSqkAFAlqhkTcACgkQ1lAW81NS qkBuTg/9EShNWhRNBroCw2pXqKPLVzsmDHbzIpPIz8BA7589/66PmneVxhbRIsTN pFY6gNBiVUbm6/u9SkiA3iKZUCmYBBlRWw5j2sD1y27nSdCyvi2Y9RTY1MfJaWcr zDuHoOaHLAi2jjJLasuhCqFEX/di0ZFr4NgdVKFSDEv0oNN7IyOcQpOtB7I0RGya FWY32SU2EzYUZ0XQ4pSBOrjRLVK6AIc6OqgJB287FdB0Zo7kcBTQ/LuVsCsvmEjo Mack6V1qPH2NNPH6LlpcNttPZ3yeD9oCKc5wvUjV7yFn3ikK5eUu/qSa+LKkB3U9 OFGm6QjyPPcxvQjkVL8OvJO++PtQVxywSHmhfxmmQvJ76hWFcUqdEU19CvUvT4i+ Bt92NZvNoIwgVJE1K5ixNdkVroojRwA6SZHGKXfs0zJ7r2rC+7ihkoZdIWi92w9i cBkme12ywdSaqR+Z8saez+ccCeHdFDRrjC92GGG38aHCGZNQqI068BPUuuK5Lh3c nZAcD400M9gbe9+6wTJnQl7gcLsw/b6tJQH9n4yKlb1BCcEnAw8qdtSbUls7yCB8 1aK/sl2NoXzJ14r2SQYKaF6ckVyoGszFId4yOopKfGTaef0L/FxsOOuEGjG8+PdG xM9Vc/1+S4jTjkqDGhAbZKElN0Vlx4OmfSe1F5yzS8lE5T56dlk= =cZ6w -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo-4.17-20180308' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Support to display the IPC/Cycle in 'annotate' TUI, for systems where this info can be obtained, like Intel's >= Skylake (Jin Yao) - Support wildcards on PMU name in dynamic PMU events (Agustin Vega-Frias) - Display pmu name when printing unmerged events in stat (Agustin Vega-Frias) - Auto-merge PMU events created by prefix or glob match (Agustin Vega-Frias) - Fix s390 'call' operations target function annotation (Thomas Richter) - Handle s390 PC relative load and store instruction in the augmented 'annotate', code, used so far in the TUI modes of 'perf report' and 'perf annotate' (Thomas Richter) - Provide libtraceevent with a kernel symbol resolver, so that symbols in tracepoint fields can be resolved when showing them in tools such as 'perf report' (Wang YanQing) - Refactor the cgroups code to look more like other code in tools/perf, using cgroup__{put,get} for refcount operations instead of its open-coded equivalent, breaking larger functions, etc (Arnaldo Carvalho de Melo) - Implement support for the -G/--cgroup target in 'perf trace', allowing strace like tracing (plus other events, backtraces, etc) for cgroups (Arnaldo Carvalho de Melo) - Update thread shortname in 'perf sched map' when the thread's COMM changes (Changbin Du) - refcount 'struct mem_info', for better sharing it over several users, avoid duplicating structs and fixing crashes related to use after free (Jiri Olsa) - Display perf.data version, offsets in 'perf report --header' (Jiri Olsa) - Record the machine's memory topology information in a perf.data feature section, to be used by tools such as 'perf c2c' (Jiri Olsa) - Fix output of forced groups in the header for 'perf report' --stdio and --tui (Jiri Olsa) - Better support llvm, clang, cxx make tests in the build process (Jiri Olsa) - Streamline the 'struct perf_mmap' methods, storing some info in the struct instead of passing it via various methods, shortening its signatures (Kan Liang) - Update the quipper perf.data parser library site information (Stephane Eranian) - Correct perf's man pages title markers for asciidoctor (Takashi Iwai) - Intel PT fixes and refactorings paving the way for implementing support for AUX area sampling (Adrian Hunter) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
fbf8a1e12c
|
@ -82,7 +82,11 @@ FEATURE_TESTS_EXTRA := \
|
|||
liberty-z \
|
||||
libunwind-debug-frame \
|
||||
libunwind-debug-frame-arm \
|
||||
libunwind-debug-frame-aarch64
|
||||
libunwind-debug-frame-aarch64 \
|
||||
cxx \
|
||||
llvm \
|
||||
llvm-version \
|
||||
clang
|
||||
|
||||
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
|
||||
|
||||
|
|
|
@ -54,7 +54,10 @@ FILES= \
|
|||
test-jvmti.bin \
|
||||
test-sched_getcpu.bin \
|
||||
test-setns.bin \
|
||||
test-libopencsd.bin
|
||||
test-libopencsd.bin \
|
||||
test-clang.bin \
|
||||
test-llvm.bin \
|
||||
test-llvm-version.bin
|
||||
|
||||
FILES := $(addprefix $(OUTPUT),$(FILES))
|
||||
|
||||
|
@ -257,11 +260,13 @@ $(OUTPUT)test-llvm.bin:
|
|||
-I$(shell $(LLVM_CONFIG) --includedir) \
|
||||
-L$(shell $(LLVM_CONFIG) --libdir) \
|
||||
$(shell $(LLVM_CONFIG) --libs Core BPF) \
|
||||
$(shell $(LLVM_CONFIG) --system-libs)
|
||||
$(shell $(LLVM_CONFIG) --system-libs) \
|
||||
> $(@:.bin=.make.output) 2>&1
|
||||
|
||||
$(OUTPUT)test-llvm-version.bin:
|
||||
$(BUILDXX) -std=gnu++11 \
|
||||
-I$(shell $(LLVM_CONFIG) --includedir)
|
||||
-I$(shell $(LLVM_CONFIG) --includedir) \
|
||||
> $(@:.bin=.make.output) 2>&1
|
||||
|
||||
$(OUTPUT)test-clang.bin:
|
||||
$(BUILDXX) -std=gnu++11 \
|
||||
|
@ -271,7 +276,8 @@ $(OUTPUT)test-clang.bin:
|
|||
-lclangFrontend -lclangEdit -lclangLex \
|
||||
-lclangAST -Wl,--end-group \
|
||||
$(shell $(LLVM_CONFIG) --libs Core option) \
|
||||
$(shell $(LLVM_CONFIG) --system-libs)
|
||||
$(shell $(LLVM_CONFIG) --system-libs) \
|
||||
> $(@:.bin=.make.output) 2>&1
|
||||
|
||||
-include $(OUTPUT)*.d
|
||||
|
||||
|
|
|
@ -98,7 +98,7 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
|
|||
|
||||
/**
|
||||
* bitmap_alloc - Allocate bitmap
|
||||
* @nr: Bit to set
|
||||
* @nbits: Number of bits
|
||||
*/
|
||||
static inline unsigned long *bitmap_alloc(int nbits)
|
||||
{
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
perf-data(1)
|
||||
==============
|
||||
============
|
||||
|
||||
NAME
|
||||
----
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
perf-ftrace(1)
|
||||
=============
|
||||
==============
|
||||
|
||||
NAME
|
||||
----
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
perf-kallsyms(1)
|
||||
==============
|
||||
================
|
||||
|
||||
NAME
|
||||
----
|
||||
|
|
|
@ -141,7 +141,13 @@ on the first memory controller on socket 0 of a Intel Xeon system
|
|||
|
||||
Each memory controller has its own PMU. Measuring the complete system
|
||||
bandwidth would require specifying all imc PMUs (see perf list output),
|
||||
and adding the values together.
|
||||
and adding the values together. To simplify creation of multiple events,
|
||||
prefix and glob matching is supported in the PMU name, and the prefix
|
||||
'uncore_' is also ignored when performing the match. So the command above
|
||||
can be expanded to all memory controllers by using the syntaxes:
|
||||
|
||||
perf stat -C 0 -a imc/cas_count_read/,imc/cas_count_write/ -I 1000 ...
|
||||
perf stat -C 0 -a *imc*/cas_count_read/,*imc*/cas_count_write/ -I 1000 ...
|
||||
|
||||
This example measures the combined core power every second
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
perf-sched(1)
|
||||
==============
|
||||
=============
|
||||
|
||||
NAME
|
||||
----
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
perf-script-perl(1)
|
||||
==================
|
||||
===================
|
||||
|
||||
NAME
|
||||
----
|
||||
|
|
|
@ -49,6 +49,13 @@ report::
|
|||
parameters are defined by corresponding entries in
|
||||
/sys/bus/event_source/devices/<pmu>/format/*
|
||||
|
||||
Note that the last two syntaxes support prefix and glob matching in
|
||||
the PMU name to simplify creation of events accross multiple instances
|
||||
of the same type of PMU in large systems (e.g. memory controller PMUs).
|
||||
Multiple PMU instances are typical for uncore PMUs, so the prefix
|
||||
'uncore_' is also ignored when performing this match.
|
||||
|
||||
|
||||
-i::
|
||||
--no-inherit::
|
||||
child tasks do not inherit counters
|
||||
|
@ -260,6 +267,16 @@ taskset.
|
|||
--no-merge::
|
||||
Do not merge results from same PMUs.
|
||||
|
||||
When multiple events are created from a single event specification,
|
||||
stat will, by default, aggregate the event counts and show the result
|
||||
in a single row. This option disables that behavior and shows
|
||||
the individual events and counts.
|
||||
|
||||
Multiple events are created from a single event specification when:
|
||||
1. Prefix or glob matching is used for the PMU name.
|
||||
2. Aliases, which are listed immediately after the Kernel PMU events
|
||||
by perf list, are used.
|
||||
|
||||
--smi-cost::
|
||||
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
|
||||
|
||||
|
|
|
@ -63,6 +63,31 @@ filter out the startup phase of the program, which is often very different.
|
|||
--uid=::
|
||||
Record events in threads owned by uid. Name or number.
|
||||
|
||||
-G::
|
||||
--cgroup::
|
||||
Record events in threads in a cgroup.
|
||||
|
||||
Look for cgroups to set at the /sys/fs/cgroup/perf_event directory, then
|
||||
remove the /sys/fs/cgroup/perf_event/ part and try:
|
||||
|
||||
perf trace -G A -e sched:*switch
|
||||
|
||||
Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
|
||||
_and_ sched:sched_switch to the 'A' cgroup, while:
|
||||
|
||||
perf trace -e sched:*switch -G A
|
||||
|
||||
will only set the sched:sched_switch event to the 'A' cgroup, all the
|
||||
other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
|
||||
a cgroup (on the root cgroup, sys wide, etc).
|
||||
|
||||
Multiple cgroups:
|
||||
|
||||
perf trace -G A -e sched:*switch -G B
|
||||
|
||||
the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
|
||||
to the 'B' cgroup.
|
||||
|
||||
--filter-pids=::
|
||||
Filter out events for these pids and for 'trace' itself (comma separated list).
|
||||
|
||||
|
|
|
@ -485,10 +485,5 @@ in pmu-tools parser. This allows to read perf.data from python and dump it.
|
|||
quipper
|
||||
|
||||
The quipper C++ parser is available at
|
||||
https://chromium.googlesource.com/chromiumos/platform2
|
||||
http://github.com/google/perf_data_converter/tree/master/src/quipper
|
||||
|
||||
It is under the chromiumos-wide-profiling/ subdirectory. This library can
|
||||
convert a perf data file to a protobuf and vice versa.
|
||||
|
||||
Unfortunately this parser tends to be many versions behind and may not be able
|
||||
to parse data files generated by recent perf.
|
||||
|
|
|
@ -708,15 +708,15 @@ TAG_FILES= ../../include/uapi/linux/perf_event.h
|
|||
|
||||
TAGS:
|
||||
$(QUIET_GEN)$(RM) TAGS; \
|
||||
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
|
||||
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs etags -a $(TAG_FILES)
|
||||
|
||||
tags:
|
||||
$(QUIET_GEN)$(RM) tags; \
|
||||
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
|
||||
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs ctags -a $(TAG_FILES)
|
||||
|
||||
cscope:
|
||||
$(QUIET_GEN)$(RM) cscope*; \
|
||||
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
|
||||
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs cscope -b $(TAG_FILES)
|
||||
|
||||
### Testing rules
|
||||
|
||||
|
|
|
@ -1,6 +1,112 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/compiler.h>
|
||||
|
||||
static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
|
||||
struct map *map)
|
||||
{
|
||||
char *endptr, *tok, *name;
|
||||
struct addr_map_symbol target = {
|
||||
.map = map,
|
||||
};
|
||||
|
||||
tok = strchr(ops->raw, ',');
|
||||
if (!tok)
|
||||
return -1;
|
||||
|
||||
ops->target.addr = strtoull(tok + 1, &endptr, 16);
|
||||
|
||||
name = strchr(endptr, '<');
|
||||
if (name == NULL)
|
||||
return -1;
|
||||
|
||||
name++;
|
||||
|
||||
if (arch->objdump.skip_functions_char &&
|
||||
strchr(name, arch->objdump.skip_functions_char))
|
||||
return -1;
|
||||
|
||||
tok = strchr(name, '>');
|
||||
if (tok == NULL)
|
||||
return -1;
|
||||
|
||||
*tok = '\0';
|
||||
ops->target.name = strdup(name);
|
||||
*tok = '>';
|
||||
|
||||
if (ops->target.name == NULL)
|
||||
return -1;
|
||||
target.addr = map__objdump_2mem(map, ops->target.addr);
|
||||
|
||||
if (map_groups__find_ams(&target) == 0 &&
|
||||
map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
|
||||
ops->target.sym = target.sym;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int call__scnprintf(struct ins *ins, char *bf, size_t size,
|
||||
struct ins_operands *ops);
|
||||
|
||||
static struct ins_ops s390_call_ops = {
|
||||
.parse = s390_call__parse,
|
||||
.scnprintf = call__scnprintf,
|
||||
};
|
||||
|
||||
static int s390_mov__parse(struct arch *arch __maybe_unused,
|
||||
struct ins_operands *ops,
|
||||
struct map *map __maybe_unused)
|
||||
{
|
||||
char *s = strchr(ops->raw, ','), *target, *endptr;
|
||||
|
||||
if (s == NULL)
|
||||
return -1;
|
||||
|
||||
*s = '\0';
|
||||
ops->source.raw = strdup(ops->raw);
|
||||
*s = ',';
|
||||
|
||||
if (ops->source.raw == NULL)
|
||||
return -1;
|
||||
|
||||
target = ++s;
|
||||
ops->target.raw = strdup(target);
|
||||
if (ops->target.raw == NULL)
|
||||
goto out_free_source;
|
||||
|
||||
ops->target.addr = strtoull(target, &endptr, 16);
|
||||
if (endptr == target)
|
||||
goto out_free_target;
|
||||
|
||||
s = strchr(endptr, '<');
|
||||
if (s == NULL)
|
||||
goto out_free_target;
|
||||
endptr = strchr(s + 1, '>');
|
||||
if (endptr == NULL)
|
||||
goto out_free_target;
|
||||
|
||||
*endptr = '\0';
|
||||
ops->target.name = strdup(s + 1);
|
||||
*endptr = '>';
|
||||
if (ops->target.name == NULL)
|
||||
goto out_free_target;
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_target:
|
||||
zfree(&ops->target.raw);
|
||||
out_free_source:
|
||||
zfree(&ops->source.raw);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
|
||||
struct ins_operands *ops);
|
||||
|
||||
static struct ins_ops s390_mov_ops = {
|
||||
.parse = s390_mov__parse,
|
||||
.scnprintf = mov__scnprintf,
|
||||
};
|
||||
|
||||
static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
|
||||
{
|
||||
struct ins_ops *ops = NULL;
|
||||
|
@ -14,9 +120,17 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
|
|||
if (!strcmp(name, "bras") ||
|
||||
!strcmp(name, "brasl") ||
|
||||
!strcmp(name, "basr"))
|
||||
ops = &call_ops;
|
||||
ops = &s390_call_ops;
|
||||
if (!strcmp(name, "br"))
|
||||
ops = &ret_ops;
|
||||
/* override load/store relative to PC */
|
||||
if (!strcmp(name, "lrl") ||
|
||||
!strcmp(name, "lgrl") ||
|
||||
!strcmp(name, "lgfrl") ||
|
||||
!strcmp(name, "llgfrl") ||
|
||||
!strcmp(name, "strl") ||
|
||||
!strcmp(name, "stgrl"))
|
||||
ops = &s390_mov_ops;
|
||||
|
||||
if (ops)
|
||||
arch__associate_ins_ops(arch, name, ops);
|
||||
|
|
|
@ -61,7 +61,6 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
|
|||
u64 test_tsc, comm1_tsc, comm2_tsc;
|
||||
u64 test_time, comm1_time = 0, comm2_time = 0;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
|
||||
threads = thread_map__new(-1, getpid(), UINT_MAX);
|
||||
CHECK_NOT_NULL__(threads);
|
||||
|
@ -112,10 +111,10 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
|
|||
|
||||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
md = &evlist->mmap[i];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
continue;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
struct perf_sample sample;
|
||||
|
||||
if (event->header.type != PERF_RECORD_COMM ||
|
||||
|
@ -134,7 +133,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
|
|||
comm2_time = sample.time;
|
||||
}
|
||||
next_event:
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
}
|
||||
perf_mmap__read_done(md);
|
||||
}
|
||||
|
|
|
@ -37,15 +37,11 @@ struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
|
|||
intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
|
||||
intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
|
||||
|
||||
if (evlist) {
|
||||
evlist__for_each_entry(evlist, evsel) {
|
||||
if (intel_pt_pmu &&
|
||||
evsel->attr.type == intel_pt_pmu->type)
|
||||
found_pt = true;
|
||||
if (intel_bts_pmu &&
|
||||
evsel->attr.type == intel_bts_pmu->type)
|
||||
found_bts = true;
|
||||
}
|
||||
evlist__for_each_entry(evlist, evsel) {
|
||||
if (intel_pt_pmu && evsel->attr.type == intel_pt_pmu->type)
|
||||
found_pt = true;
|
||||
if (intel_bts_pmu && evsel->attr.type == intel_bts_pmu->type)
|
||||
found_bts = true;
|
||||
}
|
||||
|
||||
if (found_pt && found_bts) {
|
||||
|
|
|
@ -44,6 +44,7 @@ struct perf_annotate {
|
|||
bool full_paths;
|
||||
bool print_line;
|
||||
bool skip_missing;
|
||||
bool has_br_stack;
|
||||
const char *sym_hist_filter;
|
||||
const char *cpu_list;
|
||||
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
|
||||
|
@ -146,16 +147,73 @@ static void process_branch_stack(struct branch_stack *bs, struct addr_location *
|
|||
free(bi);
|
||||
}
|
||||
|
||||
static int hist_iter__branch_callback(struct hist_entry_iter *iter,
|
||||
struct addr_location *al __maybe_unused,
|
||||
bool single __maybe_unused,
|
||||
void *arg __maybe_unused)
|
||||
{
|
||||
struct hist_entry *he = iter->he;
|
||||
struct branch_info *bi;
|
||||
struct perf_sample *sample = iter->sample;
|
||||
struct perf_evsel *evsel = iter->evsel;
|
||||
int err;
|
||||
|
||||
hist__account_cycles(sample->branch_stack, al, sample, false);
|
||||
|
||||
bi = he->branch_info;
|
||||
err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int process_branch_callback(struct perf_evsel *evsel,
|
||||
struct perf_sample *sample,
|
||||
struct addr_location *al __maybe_unused,
|
||||
struct perf_annotate *ann,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct hist_entry_iter iter = {
|
||||
.evsel = evsel,
|
||||
.sample = sample,
|
||||
.add_entry_cb = hist_iter__branch_callback,
|
||||
.hide_unresolved = symbol_conf.hide_unresolved,
|
||||
.ops = &hist_iter_branch,
|
||||
};
|
||||
|
||||
struct addr_location a;
|
||||
int ret;
|
||||
|
||||
if (machine__resolve(machine, &a, sample) < 0)
|
||||
return -1;
|
||||
|
||||
if (a.sym == NULL)
|
||||
return 0;
|
||||
|
||||
if (a.map != NULL)
|
||||
a.map->dso->hit = 1;
|
||||
|
||||
ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int perf_evsel__add_sample(struct perf_evsel *evsel,
|
||||
struct perf_sample *sample,
|
||||
struct addr_location *al,
|
||||
struct perf_annotate *ann)
|
||||
struct perf_annotate *ann,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct hists *hists = evsel__hists(evsel);
|
||||
struct hist_entry *he;
|
||||
int ret;
|
||||
|
||||
if (ann->sym_hist_filter != NULL &&
|
||||
if ((!ann->has_br_stack || !ui__has_annotation()) &&
|
||||
ann->sym_hist_filter != NULL &&
|
||||
(al->sym == NULL ||
|
||||
strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
|
||||
/* We're only interested in a symbol named sym_hist_filter */
|
||||
|
@ -178,6 +236,9 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
|
|||
*/
|
||||
process_branch_stack(sample->branch_stack, al, sample);
|
||||
|
||||
if (ann->has_br_stack && ui__has_annotation())
|
||||
return process_branch_callback(evsel, sample, al, ann, machine);
|
||||
|
||||
he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
|
||||
if (he == NULL)
|
||||
return -ENOMEM;
|
||||
|
@ -206,7 +267,8 @@ static int process_sample_event(struct perf_tool *tool,
|
|||
if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
|
||||
goto out_put;
|
||||
|
||||
if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
|
||||
if (!al.filtered &&
|
||||
perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
|
||||
pr_warning("problem incrementing symbol count, "
|
||||
"skipping event\n");
|
||||
ret = -1;
|
||||
|
@ -238,6 +300,10 @@ static void hists__find_annotations(struct hists *hists,
|
|||
if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
|
||||
goto find_next;
|
||||
|
||||
if (ann->sym_hist_filter &&
|
||||
(strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
|
||||
goto find_next;
|
||||
|
||||
notes = symbol__annotation(he->ms.sym);
|
||||
if (notes->src == NULL) {
|
||||
find_next:
|
||||
|
@ -269,6 +335,7 @@ find_next:
|
|||
nd = rb_next(nd);
|
||||
} else if (use_browser == 1) {
|
||||
key = hist_entry__tui_annotate(he, evsel, NULL);
|
||||
|
||||
switch (key) {
|
||||
case -1:
|
||||
if (!ann->skip_missing)
|
||||
|
@ -489,6 +556,9 @@ int cmd_annotate(int argc, const char **argv)
|
|||
if (annotate.session == NULL)
|
||||
return -1;
|
||||
|
||||
annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
|
||||
HEADER_BRANCH_STACK);
|
||||
|
||||
ret = symbol__annotation_init();
|
||||
if (ret < 0)
|
||||
goto out_delete;
|
||||
|
@ -499,9 +569,6 @@ int cmd_annotate(int argc, const char **argv)
|
|||
if (ret < 0)
|
||||
goto out_delete;
|
||||
|
||||
if (setup_sorting(NULL) < 0)
|
||||
usage_with_options(annotate_usage, options);
|
||||
|
||||
if (annotate.use_stdio)
|
||||
use_browser = 0;
|
||||
else if (annotate.use_tui)
|
||||
|
@ -511,6 +578,15 @@ int cmd_annotate(int argc, const char **argv)
|
|||
|
||||
setup_browser(true);
|
||||
|
||||
if (use_browser == 1 && annotate.has_br_stack) {
|
||||
sort__mode = SORT_MODE__BRANCH;
|
||||
if (setup_sorting(annotate.session->evlist) < 0)
|
||||
usage_with_options(annotate_usage, options);
|
||||
} else {
|
||||
if (setup_sorting(NULL) < 0)
|
||||
usage_with_options(annotate_usage, options);
|
||||
}
|
||||
|
||||
ret = __cmd_annotate(&annotate);
|
||||
|
||||
out_delete:
|
||||
|
|
|
@ -237,9 +237,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
|
|||
if (mi == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
mi_dup = memdup(mi, sizeof(*mi));
|
||||
if (!mi_dup)
|
||||
goto free_mi;
|
||||
/*
|
||||
* The mi object is released in hists__add_entry_ops,
|
||||
* if it gets sorted out into existing data, so we need
|
||||
* to take the copy now.
|
||||
*/
|
||||
mi_dup = mem_info__get(mi);
|
||||
|
||||
c2c_decode_stats(&stats, mi);
|
||||
|
||||
|
@ -247,7 +250,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
|
|||
&al, NULL, NULL, mi,
|
||||
sample, true);
|
||||
if (he == NULL)
|
||||
goto free_mi_dup;
|
||||
goto free_mi;
|
||||
|
||||
c2c_he = container_of(he, struct c2c_hist_entry, he);
|
||||
c2c_add_stats(&c2c_he->stats, &stats);
|
||||
|
@ -272,19 +275,15 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
|
|||
|
||||
mi = mi_dup;
|
||||
|
||||
mi_dup = memdup(mi, sizeof(*mi));
|
||||
if (!mi_dup)
|
||||
goto free_mi;
|
||||
|
||||
c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
|
||||
if (!c2c_hists)
|
||||
goto free_mi_dup;
|
||||
goto free_mi;
|
||||
|
||||
he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
|
||||
&al, NULL, NULL, mi,
|
||||
sample, true);
|
||||
if (he == NULL)
|
||||
goto free_mi_dup;
|
||||
goto free_mi;
|
||||
|
||||
c2c_he = container_of(he, struct c2c_hist_entry, he);
|
||||
c2c_add_stats(&c2c_he->stats, &stats);
|
||||
|
@ -303,10 +302,9 @@ out:
|
|||
addr_location__put(&al);
|
||||
return ret;
|
||||
|
||||
free_mi_dup:
|
||||
free(mi_dup);
|
||||
free_mi:
|
||||
free(mi);
|
||||
mem_info__put(mi_dup);
|
||||
mem_info__put(mi);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -746,21 +746,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
|
|||
struct perf_evlist *evlist = kvm->evlist;
|
||||
union perf_event *event;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
u64 timestamp;
|
||||
s64 n = 0;
|
||||
int err;
|
||||
|
||||
*mmap_time = ULLONG_MAX;
|
||||
md = &evlist->mmap[idx];
|
||||
err = perf_mmap__read_init(md, false, &start, &end);
|
||||
err = perf_mmap__read_init(md);
|
||||
if (err < 0)
|
||||
return (err == -EAGAIN) ? 0 : -1;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp);
|
||||
if (err) {
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
pr_err("Failed to parse sample\n");
|
||||
return -1;
|
||||
}
|
||||
|
@ -770,7 +769,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
|
|||
* FIXME: Here we can't consume the event, as perf_session__queue_event will
|
||||
* point to it, and it'll get possibly overwritten by the kernel.
|
||||
*/
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
|
||||
if (err) {
|
||||
pr_err("Failed to enqueue sample: %d\n", err);
|
||||
|
|
|
@ -71,7 +71,6 @@ struct record {
|
|||
struct auxtrace_record *itr;
|
||||
struct perf_evlist *evlist;
|
||||
struct perf_session *session;
|
||||
const char *progname;
|
||||
int realtime_prio;
|
||||
bool no_buildid;
|
||||
bool no_buildid_set;
|
||||
|
@ -274,6 +273,24 @@ static void record__read_auxtrace_snapshot(struct record *rec)
|
|||
}
|
||||
}
|
||||
|
||||
static int record__auxtrace_init(struct record *rec)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!rec->itr) {
|
||||
rec->itr = auxtrace_record__init(rec->evlist, &err);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
|
||||
rec->opts.auxtrace_snapshot_opts);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return auxtrace_parse_filters(rec->evlist);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline
|
||||
|
@ -294,6 +311,11 @@ int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int record__auxtrace_init(struct record *rec __maybe_unused)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int record__mmap_evlist(struct record *rec,
|
||||
|
@ -510,7 +532,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
|
|||
struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
|
||||
|
||||
if (maps[i].base) {
|
||||
if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
|
||||
if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
|
||||
rc = -1;
|
||||
goto out;
|
||||
}
|
||||
|
@ -831,7 +853,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
|||
int status = 0;
|
||||
unsigned long waking = 0;
|
||||
const bool forks = argc > 0;
|
||||
struct machine *machine;
|
||||
struct perf_tool *tool = &rec->tool;
|
||||
struct record_opts *opts = &rec->opts;
|
||||
struct perf_data *data = &rec->data;
|
||||
|
@ -839,8 +860,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
|||
bool disabled = false, draining = false;
|
||||
int fd;
|
||||
|
||||
rec->progname = argv[0];
|
||||
|
||||
atexit(record__sig_exit);
|
||||
signal(SIGCHLD, sig_handler);
|
||||
signal(SIGINT, sig_handler);
|
||||
|
@ -936,8 +955,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
|||
goto out_child;
|
||||
}
|
||||
|
||||
machine = &session->machines.host;
|
||||
|
||||
err = record__synthesize(rec, false);
|
||||
if (err < 0)
|
||||
goto out_child;
|
||||
|
@ -965,6 +982,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
|||
* Let the child rip
|
||||
*/
|
||||
if (forks) {
|
||||
struct machine *machine = &session->machines.host;
|
||||
union perf_event *event;
|
||||
pid_t tgid;
|
||||
|
||||
|
@ -1727,17 +1745,6 @@ int cmd_record(int argc, const char **argv)
|
|||
alarm(rec->switch_output.time);
|
||||
}
|
||||
|
||||
if (!rec->itr) {
|
||||
rec->itr = auxtrace_record__init(rec->evlist, &err);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
|
||||
rec->opts.auxtrace_snapshot_opts);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Allow aliases to facilitate the lookup of symbols for address
|
||||
* filters. Refer to auxtrace_parse_filters().
|
||||
|
@ -1746,7 +1753,7 @@ int cmd_record(int argc, const char **argv)
|
|||
|
||||
symbol__init(NULL);
|
||||
|
||||
err = auxtrace_parse_filters(rec->evlist);
|
||||
err = record__auxtrace_init(rec);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
|
|
|
@ -400,8 +400,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
|
|||
|
||||
nr_samples = convert_unit(nr_samples, &unit);
|
||||
ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
|
||||
if (evname != NULL)
|
||||
ret += fprintf(fp, " of event '%s'", evname);
|
||||
if (evname != NULL) {
|
||||
ret += fprintf(fp, " of event%s '%s'",
|
||||
evsel->nr_members > 1 ? "s" : "", evname);
|
||||
}
|
||||
|
||||
if (rep->time_str)
|
||||
ret += fprintf(fp, " (time slices: %s)", rep->time_str);
|
||||
|
@ -1175,8 +1177,17 @@ repeat:
|
|||
has_br_stack = perf_header__has_feat(&session->header,
|
||||
HEADER_BRANCH_STACK);
|
||||
|
||||
if (group_set && !session->evlist->nr_groups)
|
||||
/*
|
||||
* Events in data file are not collect in groups, but we still want
|
||||
* the group display. Set the artificial group and set the leader's
|
||||
* forced_leader flag to notify the display code.
|
||||
*/
|
||||
if (group_set && !session->evlist->nr_groups) {
|
||||
struct perf_evsel *leader = perf_evlist__first(session->evlist);
|
||||
|
||||
perf_evlist__set_leader(session->evlist);
|
||||
leader->forced_leader = true;
|
||||
}
|
||||
|
||||
if (itrace_synth_opts.last_branch)
|
||||
has_br_stack = true;
|
||||
|
@ -1337,6 +1348,15 @@ repeat:
|
|||
report.range_num = 1;
|
||||
}
|
||||
|
||||
if (session->tevent.pevent &&
|
||||
pevent_set_function_resolver(session->tevent.pevent,
|
||||
machine__resolve_kernel_addr,
|
||||
&session->machines.host) < 0) {
|
||||
pr_err("%s: failed to set libtraceevent function resolver\n",
|
||||
__func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
sort__setup_elide(stdout);
|
||||
|
||||
ret = __cmd_report(&report);
|
||||
|
|
|
@ -254,6 +254,10 @@ struct thread_runtime {
|
|||
u64 total_delay_time;
|
||||
|
||||
int last_state;
|
||||
|
||||
char shortname[3];
|
||||
bool comm_changed;
|
||||
|
||||
u64 migrations;
|
||||
};
|
||||
|
||||
|
@ -897,6 +901,37 @@ struct sort_dimension {
|
|||
struct list_head list;
|
||||
};
|
||||
|
||||
/*
|
||||
* handle runtime stats saved per thread
|
||||
*/
|
||||
static struct thread_runtime *thread__init_runtime(struct thread *thread)
|
||||
{
|
||||
struct thread_runtime *r;
|
||||
|
||||
r = zalloc(sizeof(struct thread_runtime));
|
||||
if (!r)
|
||||
return NULL;
|
||||
|
||||
init_stats(&r->run_stats);
|
||||
thread__set_priv(thread, r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct thread_runtime *thread__get_runtime(struct thread *thread)
|
||||
{
|
||||
struct thread_runtime *tr;
|
||||
|
||||
tr = thread__priv(thread);
|
||||
if (tr == NULL) {
|
||||
tr = thread__init_runtime(thread);
|
||||
if (tr == NULL)
|
||||
pr_debug("Failed to malloc memory for runtime data.\n");
|
||||
}
|
||||
|
||||
return tr;
|
||||
}
|
||||
|
||||
static int
|
||||
thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
|
||||
{
|
||||
|
@ -1480,6 +1515,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
|||
{
|
||||
const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
|
||||
struct thread *sched_in;
|
||||
struct thread_runtime *tr;
|
||||
int new_shortname;
|
||||
u64 timestamp0, timestamp = sample->time;
|
||||
s64 delta;
|
||||
|
@ -1519,22 +1555,28 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
|||
if (sched_in == NULL)
|
||||
return -1;
|
||||
|
||||
tr = thread__get_runtime(sched_in);
|
||||
if (tr == NULL) {
|
||||
thread__put(sched_in);
|
||||
return -1;
|
||||
}
|
||||
|
||||
sched->curr_thread[this_cpu] = thread__get(sched_in);
|
||||
|
||||
printf(" ");
|
||||
|
||||
new_shortname = 0;
|
||||
if (!sched_in->shortname[0]) {
|
||||
if (!tr->shortname[0]) {
|
||||
if (!strcmp(thread__comm_str(sched_in), "swapper")) {
|
||||
/*
|
||||
* Don't allocate a letter-number for swapper:0
|
||||
* as a shortname. Instead, we use '.' for it.
|
||||
*/
|
||||
sched_in->shortname[0] = '.';
|
||||
sched_in->shortname[1] = ' ';
|
||||
tr->shortname[0] = '.';
|
||||
tr->shortname[1] = ' ';
|
||||
} else {
|
||||
sched_in->shortname[0] = sched->next_shortname1;
|
||||
sched_in->shortname[1] = sched->next_shortname2;
|
||||
tr->shortname[0] = sched->next_shortname1;
|
||||
tr->shortname[1] = sched->next_shortname2;
|
||||
|
||||
if (sched->next_shortname1 < 'Z') {
|
||||
sched->next_shortname1++;
|
||||
|
@ -1552,6 +1594,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
|||
for (i = 0; i < cpus_nr; i++) {
|
||||
int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
|
||||
struct thread *curr_thread = sched->curr_thread[cpu];
|
||||
struct thread_runtime *curr_tr;
|
||||
const char *pid_color = color;
|
||||
const char *cpu_color = color;
|
||||
|
||||
|
@ -1569,9 +1612,14 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
|||
else
|
||||
color_fprintf(stdout, cpu_color, "*");
|
||||
|
||||
if (sched->curr_thread[cpu])
|
||||
color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
|
||||
else
|
||||
if (sched->curr_thread[cpu]) {
|
||||
curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
|
||||
if (curr_tr == NULL) {
|
||||
thread__put(sched_in);
|
||||
return -1;
|
||||
}
|
||||
color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname);
|
||||
} else
|
||||
color_fprintf(stdout, color, " ");
|
||||
}
|
||||
|
||||
|
@ -1580,14 +1628,15 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
|||
|
||||
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
|
||||
color_fprintf(stdout, color, " %12s secs ", stimestamp);
|
||||
if (new_shortname || (verbose > 0 && sched_in->tid)) {
|
||||
if (new_shortname || tr->comm_changed || (verbose > 0 && sched_in->tid)) {
|
||||
const char *pid_color = color;
|
||||
|
||||
if (thread__has_color(sched_in))
|
||||
pid_color = COLOR_PIDS;
|
||||
|
||||
color_fprintf(stdout, pid_color, "%s => %s:%d",
|
||||
sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
|
||||
tr->shortname, thread__comm_str(sched_in), sched_in->tid);
|
||||
tr->comm_changed = false;
|
||||
}
|
||||
|
||||
if (sched->map.comp && new_cpu)
|
||||
|
@ -1691,6 +1740,37 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
|
|||
return err;
|
||||
}
|
||||
|
||||
static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct thread *thread;
|
||||
struct thread_runtime *tr;
|
||||
int err;
|
||||
|
||||
err = perf_event__process_comm(tool, event, sample, machine);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
thread = machine__find_thread(machine, sample->pid, sample->tid);
|
||||
if (!thread) {
|
||||
pr_err("Internal error: can't find thread\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
tr = thread__get_runtime(thread);
|
||||
if (tr == NULL) {
|
||||
thread__put(thread);
|
||||
return -1;
|
||||
}
|
||||
|
||||
tr->comm_changed = true;
|
||||
thread__put(thread);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_sched__read_events(struct perf_sched *sched)
|
||||
{
|
||||
const struct perf_evsel_str_handler handlers[] = {
|
||||
|
@ -2200,37 +2280,6 @@ static void save_idle_callchain(struct idle_thread_runtime *itr,
|
|||
callchain_cursor__copy(&itr->cursor, &callchain_cursor);
|
||||
}
|
||||
|
||||
/*
|
||||
* handle runtime stats saved per thread
|
||||
*/
|
||||
static struct thread_runtime *thread__init_runtime(struct thread *thread)
|
||||
{
|
||||
struct thread_runtime *r;
|
||||
|
||||
r = zalloc(sizeof(struct thread_runtime));
|
||||
if (!r)
|
||||
return NULL;
|
||||
|
||||
init_stats(&r->run_stats);
|
||||
thread__set_priv(thread, r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct thread_runtime *thread__get_runtime(struct thread *thread)
|
||||
{
|
||||
struct thread_runtime *tr;
|
||||
|
||||
tr = thread__priv(thread);
|
||||
if (tr == NULL) {
|
||||
tr = thread__init_runtime(thread);
|
||||
if (tr == NULL)
|
||||
pr_debug("Failed to malloc memory for runtime data.\n");
|
||||
}
|
||||
|
||||
return tr;
|
||||
}
|
||||
|
||||
static struct thread *timehist_get_thread(struct perf_sched *sched,
|
||||
struct perf_sample *sample,
|
||||
struct machine *machine,
|
||||
|
@ -3291,7 +3340,7 @@ int cmd_sched(int argc, const char **argv)
|
|||
struct perf_sched sched = {
|
||||
.tool = {
|
||||
.sample = perf_sched__process_tracepoint_sample,
|
||||
.comm = perf_event__process_comm,
|
||||
.comm = perf_sched__process_comm,
|
||||
.namespaces = perf_event__process_namespaces,
|
||||
.lost = perf_event__process_lost,
|
||||
.fork = perf_sched__process_fork_event,
|
||||
|
|
|
@ -1251,6 +1251,31 @@ static void aggr_update_shadow(void)
|
|||
}
|
||||
}
|
||||
|
||||
static void uniquify_event_name(struct perf_evsel *counter)
|
||||
{
|
||||
char *new_name;
|
||||
char *config;
|
||||
|
||||
if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
|
||||
strlen(counter->pmu_name)))
|
||||
return;
|
||||
|
||||
config = strchr(counter->name, '/');
|
||||
if (config) {
|
||||
if (asprintf(&new_name,
|
||||
"%s%s", counter->pmu_name, config) > 0) {
|
||||
free(counter->name);
|
||||
counter->name = new_name;
|
||||
}
|
||||
} else {
|
||||
if (asprintf(&new_name,
|
||||
"%s [%s]", counter->name, counter->pmu_name) > 0) {
|
||||
free(counter->name);
|
||||
counter->name = new_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void collect_all_aliases(struct perf_evsel *counter,
|
||||
void (*cb)(struct perf_evsel *counter, void *data,
|
||||
bool first),
|
||||
|
@ -1279,7 +1304,9 @@ static bool collect_data(struct perf_evsel *counter,
|
|||
if (counter->merged_stat)
|
||||
return false;
|
||||
cb(counter, data, true);
|
||||
if (!no_merge && counter->auto_merge_stats)
|
||||
if (no_merge)
|
||||
uniquify_event_name(counter);
|
||||
else if (counter->auto_merge_stats)
|
||||
collect_all_aliases(counter, cb, data);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -817,14 +817,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
|
|||
struct perf_session *session = top->session;
|
||||
union perf_event *event;
|
||||
struct machine *machine;
|
||||
u64 end, start;
|
||||
int ret;
|
||||
|
||||
md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
|
||||
if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
return;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
ret = perf_evlist__parse_sample(evlist, event, &sample);
|
||||
if (ret) {
|
||||
pr_err("Can't parse sample, err = %d\n", ret);
|
||||
|
@ -879,7 +878,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
|
|||
} else
|
||||
++session->evlist->stats.nr_unknown_events;
|
||||
next_event:
|
||||
perf_mmap__consume(md, opts->overwrite);
|
||||
perf_mmap__consume(md);
|
||||
}
|
||||
|
||||
perf_mmap__read_done(md);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <traceevent/event-parse.h>
|
||||
#include <api/fs/tracing_path.h>
|
||||
#include "builtin.h"
|
||||
#include "util/cgroup.h"
|
||||
#include "util/color.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/env.h"
|
||||
|
@ -83,6 +84,7 @@ struct trace {
|
|||
struct perf_evlist *evlist;
|
||||
struct machine *host;
|
||||
struct thread *current;
|
||||
struct cgroup *cgroup;
|
||||
u64 base_time;
|
||||
FILE *output;
|
||||
unsigned long nr_events;
|
||||
|
@ -2370,6 +2372,34 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
|
|||
trace__sched_stat_runtime))
|
||||
goto out_error_sched_stat_runtime;
|
||||
|
||||
/*
|
||||
* If a global cgroup was set, apply it to all the events without an
|
||||
* explicit cgroup. I.e.:
|
||||
*
|
||||
* trace -G A -e sched:*switch
|
||||
*
|
||||
* Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
|
||||
* _and_ sched:sched_switch to the 'A' cgroup, while:
|
||||
*
|
||||
* trace -e sched:*switch -G A
|
||||
*
|
||||
* will only set the sched:sched_switch event to the 'A' cgroup, all the
|
||||
* other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
|
||||
* a cgroup (on the root cgroup, sys wide, etc).
|
||||
*
|
||||
* Multiple cgroups:
|
||||
*
|
||||
* trace -G A -e sched:*switch -G B
|
||||
*
|
||||
* the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
|
||||
* to the 'B' cgroup.
|
||||
*
|
||||
* evlist__set_default_cgroup() grabs a reference of the passed cgroup
|
||||
* only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
|
||||
*/
|
||||
if (trace->cgroup)
|
||||
evlist__set_default_cgroup(trace->evlist, trace->cgroup);
|
||||
|
||||
err = perf_evlist__create_maps(evlist, &trace->opts.target);
|
||||
if (err < 0) {
|
||||
fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
|
||||
|
@ -2473,13 +2503,12 @@ again:
|
|||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
union perf_event *event;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
|
||||
md = &evlist->mmap[i];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
continue;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
struct perf_sample sample;
|
||||
|
||||
++trace->nr_events;
|
||||
|
@ -2492,7 +2521,7 @@ again:
|
|||
|
||||
trace__handle_event(trace, event, &sample);
|
||||
next_event:
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
|
||||
if (interrupted)
|
||||
goto out_disable;
|
||||
|
@ -2540,6 +2569,7 @@ out_delete_evlist:
|
|||
trace__symbols__exit(trace);
|
||||
|
||||
perf_evlist__delete(evlist);
|
||||
cgroup__put(trace->cgroup);
|
||||
trace->evlist = NULL;
|
||||
trace->live = false;
|
||||
return err;
|
||||
|
@ -2979,6 +3009,18 @@ out:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
|
||||
{
|
||||
struct trace *trace = opt->value;
|
||||
|
||||
if (!list_empty(&trace->evlist->entries))
|
||||
return parse_cgroups(opt, str, unset);
|
||||
|
||||
trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmd_trace(int argc, const char **argv)
|
||||
{
|
||||
const char *trace_usage[] = {
|
||||
|
@ -3069,6 +3111,8 @@ int cmd_trace(int argc, const char **argv)
|
|||
"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
|
||||
OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
|
||||
"per thread proc mmap processing timeout in ms"),
|
||||
OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
|
||||
trace__parse_cgroups),
|
||||
OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
|
||||
"ms to wait before starting measurement after program "
|
||||
"start"),
|
||||
|
@ -3095,6 +3139,11 @@ int cmd_trace(int argc, const char **argv)
|
|||
argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
|
||||
trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
|
||||
|
||||
if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
|
||||
usage_with_options_msg(trace_usage, trace_options,
|
||||
"cgroup monitoring only available in system-wide mode");
|
||||
}
|
||||
|
||||
err = bpf__setup_stdout(trace.evlist);
|
||||
if (err) {
|
||||
bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
|
||||
|
|
|
@ -33,10 +33,9 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
|
|||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
struct perf_mmap *map = &evlist->overwrite_mmap[i];
|
||||
union perf_event *event;
|
||||
u64 start, end;
|
||||
|
||||
perf_mmap__read_init(map, true, &start, &end);
|
||||
while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
|
||||
perf_mmap__read_init(map);
|
||||
while ((event = perf_mmap__read_event(map)) != NULL) {
|
||||
const u32 type = event->header.type;
|
||||
|
||||
switch (type) {
|
||||
|
|
|
@ -177,13 +177,12 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
|
|||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
union perf_event *event;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
|
||||
md = &evlist->mmap[i];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
continue;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
const u32 type = event->header.type;
|
||||
|
||||
if (type == PERF_RECORD_SAMPLE)
|
||||
|
|
|
@ -410,17 +410,16 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist,
|
|||
{
|
||||
union perf_event *event;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
md = &evlist->mmap[i];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
continue;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
ret = process_event(machine, evlist, event, state);
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -28,21 +28,20 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
|
|||
{
|
||||
union perf_event *event;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
int i, found;
|
||||
|
||||
found = 0;
|
||||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
md = &evlist->mmap[i];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
continue;
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
if (event->header.type == PERF_RECORD_COMM &&
|
||||
(pid_t)event->comm.pid == getpid() &&
|
||||
(pid_t)event->comm.tid == getpid() &&
|
||||
strcmp(event->comm.comm, comm) == 0)
|
||||
found += 1;
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
}
|
||||
perf_mmap__read_done(md);
|
||||
}
|
||||
|
|
|
@ -39,7 +39,6 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
|
|||
struct perf_evsel *evsels[nsyscalls], *evsel;
|
||||
char sbuf[STRERR_BUFSIZE];
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
|
||||
threads = thread_map__new(-1, getpid(), UINT_MAX);
|
||||
if (threads == NULL) {
|
||||
|
@ -109,10 +108,10 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
|
|||
}
|
||||
|
||||
md = &evlist->mmap[0];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
goto out_init;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
struct perf_sample sample;
|
||||
|
||||
if (event->header.type != PERF_RECORD_SAMPLE) {
|
||||
|
@ -135,7 +134,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
|
|||
goto out_delete_evlist;
|
||||
}
|
||||
nr_events[evsel->idx]++;
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
}
|
||||
perf_mmap__read_done(md);
|
||||
|
||||
|
|
|
@ -87,13 +87,12 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
|
|||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
union perf_event *event;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
|
||||
md = &evlist->mmap[i];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
continue;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
const u32 type = event->header.type;
|
||||
int tp_flags;
|
||||
struct perf_sample sample;
|
||||
|
@ -101,7 +100,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
|
|||
++nr_events;
|
||||
|
||||
if (type != PERF_RECORD_SAMPLE) {
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -165,13 +165,12 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
|
|||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
union perf_event *event;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
|
||||
md = &evlist->mmap[i];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
continue;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
const u32 type = event->header.type;
|
||||
const char *name = perf_event__name(type);
|
||||
|
||||
|
@ -272,7 +271,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
|
|||
++errs;
|
||||
}
|
||||
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
}
|
||||
perf_mmap__read_done(md);
|
||||
}
|
||||
|
|
|
@ -40,7 +40,6 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
|
|||
struct cpu_map *cpus;
|
||||
struct thread_map *threads;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
|
||||
attr.sample_freq = 500;
|
||||
|
||||
|
@ -96,10 +95,10 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
|
|||
perf_evlist__disable(evlist);
|
||||
|
||||
md = &evlist->mmap[0];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
goto out_init;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
struct perf_sample sample;
|
||||
|
||||
if (event->header.type != PERF_RECORD_SAMPLE)
|
||||
|
@ -114,7 +113,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
|
|||
total_periods += sample.period;
|
||||
nr_samples++;
|
||||
next_event:
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
}
|
||||
perf_mmap__read_done(md);
|
||||
|
||||
|
|
|
@ -259,18 +259,17 @@ static int process_events(struct perf_evlist *evlist,
|
|||
LIST_HEAD(events);
|
||||
struct event_node *events_array, *node;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
md = &evlist->mmap[i];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
continue;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
cnt += 1;
|
||||
ret = add_event(evlist, &events, event);
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
if (ret < 0)
|
||||
goto out_free_nodes;
|
||||
}
|
||||
|
|
|
@ -48,7 +48,6 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
|
|||
struct cpu_map *cpus;
|
||||
struct thread_map *threads;
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
|
||||
signal(SIGCHLD, sig_handler);
|
||||
|
||||
|
@ -113,14 +112,14 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
|
|||
|
||||
retry:
|
||||
md = &evlist->mmap[0];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
goto out_init;
|
||||
|
||||
while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
|
||||
while ((event = perf_mmap__read_event(md)) != NULL) {
|
||||
if (event->header.type == PERF_RECORD_EXIT)
|
||||
nr_exit++;
|
||||
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
}
|
||||
perf_mmap__read_done(md);
|
||||
|
||||
|
|
|
@ -2261,8 +2261,9 @@ static int perf_evsel_browser_title(struct hist_browser *browser,
|
|||
|
||||
nr_samples = convert_unit(nr_samples, &unit);
|
||||
printed = scnprintf(bf, size,
|
||||
"Samples: %lu%c of event '%s',%s%sEvent count (approx.): %" PRIu64,
|
||||
nr_samples, unit, ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
|
||||
"Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
|
||||
nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
|
||||
ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
|
||||
|
||||
|
||||
if (hists->uid_filter_str)
|
||||
|
|
|
@ -248,7 +248,7 @@ static struct ins_ops call_ops = {
|
|||
|
||||
bool ins__is_call(const struct ins *ins)
|
||||
{
|
||||
return ins->ops == &call_ops;
|
||||
return ins->ops == &call_ops || ins->ops == &s390_call_ops;
|
||||
}
|
||||
|
||||
static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
|
||||
|
|
|
@ -233,9 +233,9 @@ static void *auxtrace_copy_data(u64 size, struct perf_session *session)
|
|||
return p;
|
||||
}
|
||||
|
||||
static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
|
||||
unsigned int idx,
|
||||
struct auxtrace_buffer *buffer)
|
||||
static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
|
||||
unsigned int idx,
|
||||
struct auxtrace_buffer *buffer)
|
||||
{
|
||||
struct auxtrace_queue *queue;
|
||||
int err;
|
||||
|
@ -286,7 +286,7 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
|
|||
return -ENOMEM;
|
||||
b->size = BUFFER_LIMIT_FOR_32_BIT;
|
||||
b->consecutive = consecutive;
|
||||
err = auxtrace_queues__add_buffer(queues, idx, b);
|
||||
err = auxtrace_queues__queue_buffer(queues, idx, b);
|
||||
if (err) {
|
||||
auxtrace_buffer__free(b);
|
||||
return err;
|
||||
|
@ -302,11 +302,14 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
|
||||
struct perf_session *session,
|
||||
unsigned int idx,
|
||||
struct auxtrace_buffer *buffer)
|
||||
static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
|
||||
struct perf_session *session,
|
||||
unsigned int idx,
|
||||
struct auxtrace_buffer *buffer,
|
||||
struct auxtrace_buffer **buffer_ptr)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (session->one_mmap) {
|
||||
buffer->data = buffer->data_offset - session->one_mmap_offset +
|
||||
session->one_mmap_addr;
|
||||
|
@ -317,14 +320,20 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
|
|||
buffer->data_needs_freeing = true;
|
||||
} else if (BITS_PER_LONG == 32 &&
|
||||
buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
|
||||
int err;
|
||||
|
||||
err = auxtrace_queues__split_buffer(queues, idx, buffer);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return auxtrace_queues__add_buffer(queues, idx, buffer);
|
||||
err = auxtrace_queues__queue_buffer(queues, idx, buffer);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* FIXME: Doesn't work for split buffer */
|
||||
if (buffer_ptr)
|
||||
*buffer_ptr = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool filter_cpu(struct perf_session *session, int cpu)
|
||||
|
@ -359,13 +368,11 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
|
|||
buffer->size = event->auxtrace.size;
|
||||
idx = event->auxtrace.idx;
|
||||
|
||||
err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer);
|
||||
err = auxtrace_queues__add_buffer(queues, session, idx, buffer,
|
||||
buffer_ptr);
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
if (buffer_ptr)
|
||||
*buffer_ptr = buffer;
|
||||
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
|
|
|
@ -130,6 +130,7 @@ struct auxtrace_index {
|
|||
/**
|
||||
* struct auxtrace - session callbacks to allow AUX area data decoding.
|
||||
* @process_event: lets the decoder see all session events
|
||||
* @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event
|
||||
* @flush_events: process any remaining data
|
||||
* @free_events: free resources associated with event processing
|
||||
* @free: free resources associated with the session
|
||||
|
@ -301,6 +302,7 @@ struct auxtrace_mmap_params {
|
|||
* @parse_snapshot_options: parse snapshot options
|
||||
* @reference: provide a 64-bit reference number for auxtrace_event
|
||||
* @read_finish: called after reading from an auxtrace mmap
|
||||
* @alignment: alignment (if any) for AUX area data
|
||||
*/
|
||||
struct auxtrace_record {
|
||||
int (*recording_options)(struct auxtrace_record *itr,
|
||||
|
|
|
@ -71,7 +71,7 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static int open_cgroup(char *name)
|
||||
static int open_cgroup(const char *name)
|
||||
{
|
||||
char path[PATH_MAX + 1];
|
||||
char mnt[PATH_MAX + 1];
|
||||
|
@ -90,41 +90,64 @@ static int open_cgroup(char *name)
|
|||
return fd;
|
||||
}
|
||||
|
||||
static int add_cgroup(struct perf_evlist *evlist, char *str)
|
||||
static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str)
|
||||
{
|
||||
struct perf_evsel *counter;
|
||||
struct cgroup_sel *cgrp = NULL;
|
||||
int n;
|
||||
struct cgroup *cgrp = NULL;
|
||||
/*
|
||||
* check if cgrp is already defined, if so we reuse it
|
||||
*/
|
||||
evlist__for_each_entry(evlist, counter) {
|
||||
cgrp = counter->cgrp;
|
||||
if (!cgrp)
|
||||
if (!counter->cgrp)
|
||||
continue;
|
||||
if (!strcmp(cgrp->name, str)) {
|
||||
refcount_inc(&cgrp->refcnt);
|
||||
if (!strcmp(counter->cgrp->name, str)) {
|
||||
cgrp = cgroup__get(counter->cgrp);
|
||||
break;
|
||||
}
|
||||
|
||||
cgrp = NULL;
|
||||
}
|
||||
|
||||
if (!cgrp) {
|
||||
cgrp = zalloc(sizeof(*cgrp));
|
||||
if (!cgrp)
|
||||
return -1;
|
||||
return cgrp;
|
||||
}
|
||||
|
||||
cgrp->name = str;
|
||||
refcount_set(&cgrp->refcnt, 1);
|
||||
static struct cgroup *cgroup__new(const char *name)
|
||||
{
|
||||
struct cgroup *cgroup = zalloc(sizeof(*cgroup));
|
||||
|
||||
cgrp->fd = open_cgroup(str);
|
||||
if (cgrp->fd == -1) {
|
||||
free(cgrp);
|
||||
return -1;
|
||||
}
|
||||
if (cgroup != NULL) {
|
||||
refcount_set(&cgroup->refcnt, 1);
|
||||
|
||||
cgroup->name = strdup(name);
|
||||
if (!cgroup->name)
|
||||
goto out_err;
|
||||
cgroup->fd = open_cgroup(name);
|
||||
if (cgroup->fd == -1)
|
||||
goto out_free_name;
|
||||
}
|
||||
|
||||
return cgroup;
|
||||
|
||||
out_free_name:
|
||||
free(cgroup->name);
|
||||
out_err:
|
||||
free(cgroup);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name)
|
||||
{
|
||||
struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
|
||||
|
||||
return cgroup ?: cgroup__new(name);
|
||||
}
|
||||
|
||||
static int add_cgroup(struct perf_evlist *evlist, const char *str)
|
||||
{
|
||||
struct perf_evsel *counter;
|
||||
struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
|
||||
int n;
|
||||
|
||||
if (!cgrp)
|
||||
return -1;
|
||||
/*
|
||||
* find corresponding event
|
||||
* if add cgroup N, then need to find event N
|
||||
|
@ -135,30 +158,55 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
|
|||
goto found;
|
||||
n++;
|
||||
}
|
||||
if (refcount_dec_and_test(&cgrp->refcnt))
|
||||
free(cgrp);
|
||||
|
||||
cgroup__put(cgrp);
|
||||
return -1;
|
||||
found:
|
||||
counter->cgrp = cgrp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void close_cgroup(struct cgroup_sel *cgrp)
|
||||
static void cgroup__delete(struct cgroup *cgroup)
|
||||
{
|
||||
close(cgroup->fd);
|
||||
zfree(&cgroup->name);
|
||||
free(cgroup);
|
||||
}
|
||||
|
||||
void cgroup__put(struct cgroup *cgrp)
|
||||
{
|
||||
if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
|
||||
close(cgrp->fd);
|
||||
zfree(&cgrp->name);
|
||||
free(cgrp);
|
||||
cgroup__delete(cgrp);
|
||||
}
|
||||
}
|
||||
|
||||
int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
|
||||
struct cgroup *cgroup__get(struct cgroup *cgroup)
|
||||
{
|
||||
if (cgroup)
|
||||
refcount_inc(&cgroup->refcnt);
|
||||
return cgroup;
|
||||
}
|
||||
|
||||
static void evsel__set_default_cgroup(struct perf_evsel *evsel, struct cgroup *cgroup)
|
||||
{
|
||||
if (evsel->cgrp == NULL)
|
||||
evsel->cgrp = cgroup__get(cgroup);
|
||||
}
|
||||
|
||||
void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup)
|
||||
{
|
||||
struct perf_evsel *evsel;
|
||||
|
||||
evlist__for_each_entry(evlist, evsel)
|
||||
evsel__set_default_cgroup(evsel, cgroup);
|
||||
}
|
||||
|
||||
int parse_cgroups(const struct option *opt, const char *str,
|
||||
int unset __maybe_unused)
|
||||
{
|
||||
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
|
||||
struct perf_evsel *counter;
|
||||
struct cgroup_sel *cgrp = NULL;
|
||||
struct cgroup *cgrp = NULL;
|
||||
const char *p, *e, *eos = str + strlen(str);
|
||||
char *s;
|
||||
int ret, i;
|
||||
|
@ -179,10 +227,9 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
|
|||
if (!s)
|
||||
return -1;
|
||||
ret = add_cgroup(evlist, s);
|
||||
if (ret) {
|
||||
free(s);
|
||||
free(s);
|
||||
if (ret)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
/* nr_cgroups is increased een for empty cgroups */
|
||||
nr_cgroups++;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
struct option;
|
||||
|
||||
struct cgroup_sel {
|
||||
struct cgroup {
|
||||
char *name;
|
||||
int fd;
|
||||
refcount_t refcnt;
|
||||
|
@ -14,7 +14,16 @@ struct cgroup_sel {
|
|||
|
||||
|
||||
extern int nr_cgroups; /* number of explicit cgroups defined */
|
||||
void close_cgroup(struct cgroup_sel *cgrp);
|
||||
|
||||
struct cgroup *cgroup__get(struct cgroup *cgroup);
|
||||
void cgroup__put(struct cgroup *cgroup);
|
||||
|
||||
struct perf_evlist;
|
||||
|
||||
struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name);
|
||||
|
||||
void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup);
|
||||
|
||||
int parse_cgroups(const struct option *opt, const char *str, int unset);
|
||||
|
||||
#endif /* __CGROUP_H__ */
|
||||
|
|
|
@ -27,6 +27,12 @@ struct numa_node {
|
|||
struct cpu_map *map;
|
||||
};
|
||||
|
||||
struct memory_node {
|
||||
u64 node;
|
||||
u64 size;
|
||||
unsigned long *set;
|
||||
};
|
||||
|
||||
struct perf_env {
|
||||
char *hostname;
|
||||
char *os_release;
|
||||
|
@ -43,6 +49,7 @@ struct perf_env {
|
|||
int nr_sibling_cores;
|
||||
int nr_sibling_threads;
|
||||
int nr_numa_nodes;
|
||||
int nr_memory_nodes;
|
||||
int nr_pmu_mappings;
|
||||
int nr_groups;
|
||||
char *cmdline;
|
||||
|
@ -54,6 +61,8 @@ struct perf_env {
|
|||
struct cpu_cache_level *caches;
|
||||
int caches_cnt;
|
||||
struct numa_node *numa_nodes;
|
||||
struct memory_node *memory_nodes;
|
||||
unsigned long long memory_bsize;
|
||||
};
|
||||
|
||||
extern struct perf_env perf_env;
|
||||
|
|
|
@ -722,7 +722,8 @@ void perf_evlist__munmap(struct perf_evlist *evlist)
|
|||
zfree(&evlist->overwrite_mmap);
|
||||
}
|
||||
|
||||
static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
|
||||
static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist,
|
||||
bool overwrite)
|
||||
{
|
||||
int i;
|
||||
struct perf_mmap *map;
|
||||
|
@ -736,6 +737,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
|
|||
|
||||
for (i = 0; i < evlist->nr_mmaps; i++) {
|
||||
map[i].fd = -1;
|
||||
map[i].overwrite = overwrite;
|
||||
/*
|
||||
* When the perf_mmap() call is made we grab one refcount, plus
|
||||
* one extra to let perf_mmap__consume() get the last
|
||||
|
@ -779,7 +781,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
|
|||
maps = evlist->overwrite_mmap;
|
||||
|
||||
if (!maps) {
|
||||
maps = perf_evlist__alloc_mmap(evlist);
|
||||
maps = perf_evlist__alloc_mmap(evlist, true);
|
||||
if (!maps)
|
||||
return -1;
|
||||
evlist->overwrite_mmap = maps;
|
||||
|
@ -1029,7 +1031,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
|
|||
struct mmap_params mp;
|
||||
|
||||
if (!evlist->mmap)
|
||||
evlist->mmap = perf_evlist__alloc_mmap(evlist);
|
||||
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
|
||||
if (!evlist->mmap)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
|
@ -244,6 +244,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
|
|||
evsel->metric_name = NULL;
|
||||
evsel->metric_events = NULL;
|
||||
evsel->collect_stat = false;
|
||||
evsel->pmu_name = NULL;
|
||||
}
|
||||
|
||||
struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
|
||||
|
@ -621,22 +622,34 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel)
|
|||
return evsel->group_name ?: "anon group";
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the group details for the specified leader,
|
||||
* with following rules.
|
||||
*
|
||||
* For record -e '{cycles,instructions}'
|
||||
* 'anon group { cycles:u, instructions:u }'
|
||||
*
|
||||
* For record -e 'cycles,instructions' and report --group
|
||||
* 'cycles:u, instructions:u'
|
||||
*/
|
||||
int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
|
||||
{
|
||||
int ret;
|
||||
int ret = 0;
|
||||
struct perf_evsel *pos;
|
||||
const char *group_name = perf_evsel__group_name(evsel);
|
||||
|
||||
ret = scnprintf(buf, size, "%s", group_name);
|
||||
if (!evsel->forced_leader)
|
||||
ret = scnprintf(buf, size, "%s { ", group_name);
|
||||
|
||||
ret += scnprintf(buf + ret, size - ret, " { %s",
|
||||
ret += scnprintf(buf + ret, size - ret, "%s",
|
||||
perf_evsel__name(evsel));
|
||||
|
||||
for_each_group_member(pos, evsel)
|
||||
ret += scnprintf(buf + ret, size - ret, ", %s",
|
||||
perf_evsel__name(pos));
|
||||
|
||||
ret += scnprintf(buf + ret, size - ret, " }");
|
||||
if (!evsel->forced_leader)
|
||||
ret += scnprintf(buf + ret, size - ret, " }");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1233,7 +1246,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
|
|||
perf_evsel__free_fd(evsel);
|
||||
perf_evsel__free_id(evsel);
|
||||
perf_evsel__free_config_terms(evsel);
|
||||
close_cgroup(evsel->cgrp);
|
||||
cgroup__put(evsel->cgrp);
|
||||
cpu_map__put(evsel->cpus);
|
||||
cpu_map__put(evsel->own_cpus);
|
||||
thread_map__put(evsel->threads);
|
||||
|
|
|
@ -30,7 +30,7 @@ struct perf_sample_id {
|
|||
u64 period;
|
||||
};
|
||||
|
||||
struct cgroup_sel;
|
||||
struct cgroup;
|
||||
|
||||
/*
|
||||
* The 'struct perf_evsel_config_term' is used to pass event
|
||||
|
@ -107,7 +107,7 @@ struct perf_evsel {
|
|||
struct perf_stat_evsel *stats;
|
||||
void *priv;
|
||||
u64 db_id;
|
||||
struct cgroup_sel *cgrp;
|
||||
struct cgroup *cgrp;
|
||||
void *handler;
|
||||
struct cpu_map *cpus;
|
||||
struct cpu_map *own_cpus;
|
||||
|
@ -125,6 +125,7 @@ struct perf_evsel {
|
|||
bool per_pkg;
|
||||
bool precise_max;
|
||||
bool ignore_missing_thread;
|
||||
bool forced_leader;
|
||||
/* parse modifier helper */
|
||||
int exclude_GH;
|
||||
int nr_members;
|
||||
|
@ -142,6 +143,7 @@ struct perf_evsel {
|
|||
struct perf_evsel **metric_events;
|
||||
bool collect_stat;
|
||||
bool weak_group;
|
||||
const char *pmu_name;
|
||||
};
|
||||
|
||||
union u64_swap {
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <sys/stat.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <linux/time64.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include "evlist.h"
|
||||
#include "evsel.h"
|
||||
|
@ -37,6 +38,7 @@
|
|||
#include "asm/bug.h"
|
||||
#include "tool.h"
|
||||
#include "time-utils.h"
|
||||
#include "units.h"
|
||||
|
||||
#include "sane_ctype.h"
|
||||
|
||||
|
@ -131,6 +133,25 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size)
|
|||
return __do_write_buf(ff, buf, size);
|
||||
}
|
||||
|
||||
/* Return: 0 if succeded, -ERR if failed. */
|
||||
static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
|
||||
{
|
||||
u64 *p = (u64 *) set;
|
||||
int i, ret;
|
||||
|
||||
ret = do_write(ff, &size, sizeof(size));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
|
||||
ret = do_write(ff, p + i, sizeof(*p));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return: 0 if succeded, -ERR if failed. */
|
||||
int write_padded(struct feat_fd *ff, const void *bf,
|
||||
size_t count, size_t count_aligned)
|
||||
|
@ -243,6 +264,38 @@ static char *do_read_string(struct feat_fd *ff)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* Return: 0 if succeded, -ERR if failed. */
|
||||
static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
|
||||
{
|
||||
unsigned long *set;
|
||||
u64 size, *p;
|
||||
int i, ret;
|
||||
|
||||
ret = do_read_u64(ff, &size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
set = bitmap_alloc(size);
|
||||
if (!set)
|
||||
return -ENOMEM;
|
||||
|
||||
bitmap_zero(set, size);
|
||||
|
||||
p = (u64 *) set;
|
||||
|
||||
for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
|
||||
ret = do_read_u64(ff, p + i);
|
||||
if (ret < 0) {
|
||||
free(set);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
*pset = set;
|
||||
*psize = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_tracing_data(struct feat_fd *ff,
|
||||
struct perf_evlist *evlist)
|
||||
{
|
||||
|
@ -1196,6 +1249,176 @@ static int write_sample_time(struct feat_fd *ff,
|
|||
sizeof(evlist->last_sample_time));
|
||||
}
|
||||
|
||||
|
||||
static int memory_node__read(struct memory_node *n, unsigned long idx)
|
||||
{
|
||||
unsigned int phys, size = 0;
|
||||
char path[PATH_MAX];
|
||||
struct dirent *ent;
|
||||
DIR *dir;
|
||||
|
||||
#define for_each_memory(mem, dir) \
|
||||
while ((ent = readdir(dir))) \
|
||||
if (strcmp(ent->d_name, ".") && \
|
||||
strcmp(ent->d_name, "..") && \
|
||||
sscanf(ent->d_name, "memory%u", &mem) == 1)
|
||||
|
||||
scnprintf(path, PATH_MAX,
|
||||
"%s/devices/system/node/node%lu",
|
||||
sysfs__mountpoint(), idx);
|
||||
|
||||
dir = opendir(path);
|
||||
if (!dir) {
|
||||
pr_warning("failed: cant' open memory sysfs data\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for_each_memory(phys, dir) {
|
||||
size = max(phys, size);
|
||||
}
|
||||
|
||||
size++;
|
||||
|
||||
n->set = bitmap_alloc(size);
|
||||
if (!n->set) {
|
||||
closedir(dir);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
bitmap_zero(n->set, size);
|
||||
n->node = idx;
|
||||
n->size = size;
|
||||
|
||||
rewinddir(dir);
|
||||
|
||||
for_each_memory(phys, dir) {
|
||||
set_bit(phys, n->set);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int memory_node__sort(const void *a, const void *b)
|
||||
{
|
||||
const struct memory_node *na = a;
|
||||
const struct memory_node *nb = b;
|
||||
|
||||
return na->node - nb->node;
|
||||
}
|
||||
|
||||
static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
struct dirent *ent;
|
||||
DIR *dir;
|
||||
u64 cnt = 0;
|
||||
int ret = 0;
|
||||
|
||||
scnprintf(path, PATH_MAX, "%s/devices/system/node/",
|
||||
sysfs__mountpoint());
|
||||
|
||||
dir = opendir(path);
|
||||
if (!dir) {
|
||||
pr_warning("failed: can't open node sysfs data\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (!ret && (ent = readdir(dir))) {
|
||||
unsigned int idx;
|
||||
int r;
|
||||
|
||||
if (!strcmp(ent->d_name, ".") ||
|
||||
!strcmp(ent->d_name, ".."))
|
||||
continue;
|
||||
|
||||
r = sscanf(ent->d_name, "node%u", &idx);
|
||||
if (r != 1)
|
||||
continue;
|
||||
|
||||
if (WARN_ONCE(cnt >= size,
|
||||
"failed to write MEM_TOPOLOGY, way too many nodes\n"))
|
||||
return -1;
|
||||
|
||||
ret = memory_node__read(&nodes[cnt++], idx);
|
||||
}
|
||||
|
||||
*cntp = cnt;
|
||||
closedir(dir);
|
||||
|
||||
if (!ret)
|
||||
qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define MAX_MEMORY_NODES 2000
|
||||
|
||||
/*
|
||||
* The MEM_TOPOLOGY holds physical memory map for every
|
||||
* node in system. The format of data is as follows:
|
||||
*
|
||||
* 0 - version | for future changes
|
||||
* 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
|
||||
* 16 - count | number of nodes
|
||||
*
|
||||
* For each node we store map of physical indexes for
|
||||
* each node:
|
||||
*
|
||||
* 32 - node id | node index
|
||||
* 40 - size | size of bitmap
|
||||
* 48 - bitmap | bitmap of memory indexes that belongs to node
|
||||
*/
|
||||
static int write_mem_topology(struct feat_fd *ff __maybe_unused,
|
||||
struct perf_evlist *evlist __maybe_unused)
|
||||
{
|
||||
static struct memory_node nodes[MAX_MEMORY_NODES];
|
||||
u64 bsize, version = 1, i, nr;
|
||||
int ret;
|
||||
|
||||
ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
|
||||
(unsigned long long *) &bsize);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = do_write(ff, &version, sizeof(version));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = do_write(ff, &bsize, sizeof(bsize));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = do_write(ff, &nr, sizeof(nr));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct memory_node *n = &nodes[i];
|
||||
|
||||
#define _W(v) \
|
||||
ret = do_write(ff, &n->v, sizeof(n->v)); \
|
||||
if (ret < 0) \
|
||||
goto out;
|
||||
|
||||
_W(node)
|
||||
_W(size)
|
||||
|
||||
#undef _W
|
||||
|
||||
ret = do_write_bitmap(ff, n->set, n->size);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void print_hostname(struct feat_fd *ff, FILE *fp)
|
||||
{
|
||||
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
|
||||
|
@ -1543,6 +1766,35 @@ static void print_sample_time(struct feat_fd *ff, FILE *fp)
|
|||
fprintf(fp, "# sample duration : %10.3f ms\n", d);
|
||||
}
|
||||
|
||||
static void memory_node__fprintf(struct memory_node *n,
|
||||
unsigned long long bsize, FILE *fp)
|
||||
{
|
||||
char buf_map[100], buf_size[50];
|
||||
unsigned long long size;
|
||||
|
||||
size = bsize * bitmap_weight(n->set, n->size);
|
||||
unit_number__scnprintf(buf_size, 50, size);
|
||||
|
||||
bitmap_scnprintf(n->set, n->size, buf_map, 100);
|
||||
fprintf(fp, "# %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
|
||||
}
|
||||
|
||||
static void print_mem_topology(struct feat_fd *ff, FILE *fp)
|
||||
{
|
||||
struct memory_node *nodes;
|
||||
int i, nr;
|
||||
|
||||
nodes = ff->ph->env.memory_nodes;
|
||||
nr = ff->ph->env.nr_memory_nodes;
|
||||
|
||||
fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
|
||||
nr, ff->ph->env.memory_bsize);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
|
||||
}
|
||||
}
|
||||
|
||||
static int __event_process_build_id(struct build_id_event *bev,
|
||||
char *filename,
|
||||
struct perf_session *session)
|
||||
|
@ -2205,6 +2457,58 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int process_mem_topology(struct feat_fd *ff,
|
||||
void *data __maybe_unused)
|
||||
{
|
||||
struct memory_node *nodes;
|
||||
u64 version, i, nr, bsize;
|
||||
int ret = -1;
|
||||
|
||||
if (do_read_u64(ff, &version))
|
||||
return -1;
|
||||
|
||||
if (version != 1)
|
||||
return -1;
|
||||
|
||||
if (do_read_u64(ff, &bsize))
|
||||
return -1;
|
||||
|
||||
if (do_read_u64(ff, &nr))
|
||||
return -1;
|
||||
|
||||
nodes = zalloc(sizeof(*nodes) * nr);
|
||||
if (!nodes)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct memory_node n;
|
||||
|
||||
#define _R(v) \
|
||||
if (do_read_u64(ff, &n.v)) \
|
||||
goto out; \
|
||||
|
||||
_R(node)
|
||||
_R(size)
|
||||
|
||||
#undef _R
|
||||
|
||||
if (do_read_bitmap(ff, &n.set, &n.size))
|
||||
goto out;
|
||||
|
||||
nodes[i] = n;
|
||||
}
|
||||
|
||||
ff->ph->env.memory_bsize = bsize;
|
||||
ff->ph->env.memory_nodes = nodes;
|
||||
ff->ph->env.nr_memory_nodes = nr;
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
if (ret)
|
||||
free(nodes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct feature_ops {
|
||||
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
|
||||
void (*print)(struct feat_fd *ff, FILE *fp);
|
||||
|
@ -2263,6 +2567,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
|
|||
FEAT_OPN(STAT, stat, false),
|
||||
FEAT_OPN(CACHE, cache, true),
|
||||
FEAT_OPR(SAMPLE_TIME, sample_time, false),
|
||||
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
|
||||
};
|
||||
|
||||
struct header_print_data {
|
||||
|
@ -2318,7 +2623,12 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
|
|||
if (ret == -1)
|
||||
return -1;
|
||||
|
||||
fprintf(fp, "# captured on: %s", ctime(&st.st_ctime));
|
||||
fprintf(fp, "# captured on : %s", ctime(&st.st_ctime));
|
||||
|
||||
fprintf(fp, "# header version : %u\n", header->version);
|
||||
fprintf(fp, "# data offset : %" PRIu64 "\n", header->data_offset);
|
||||
fprintf(fp, "# data size : %" PRIu64 "\n", header->data_size);
|
||||
fprintf(fp, "# feat offset : %" PRIu64 "\n", header->feat_offset);
|
||||
|
||||
perf_header__process_sections(header, fd, &hd,
|
||||
perf_file_section__fprintf_info);
|
||||
|
|
|
@ -36,6 +36,7 @@ enum {
|
|||
HEADER_STAT,
|
||||
HEADER_CACHE,
|
||||
HEADER_SAMPLE_TIME,
|
||||
HEADER_MEM_TOPOLOGY,
|
||||
HEADER_LAST_FEATURE,
|
||||
HEADER_FEAT_BITS = 256,
|
||||
};
|
||||
|
|
|
@ -536,7 +536,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
|
|||
* This mem info was allocated from sample__resolve_mem
|
||||
* and will not be used anymore.
|
||||
*/
|
||||
zfree(&entry->mem_info);
|
||||
mem_info__zput(entry->mem_info);
|
||||
|
||||
/* If the map of an existing hist_entry has
|
||||
* become out-of-date due to an exec() or
|
||||
|
@ -1139,7 +1139,7 @@ void hist_entry__delete(struct hist_entry *he)
|
|||
if (he->mem_info) {
|
||||
map__zput(he->mem_info->iaddr.map);
|
||||
map__zput(he->mem_info->daddr.map);
|
||||
zfree(&he->mem_info);
|
||||
mem_info__zput(he->mem_info);
|
||||
}
|
||||
|
||||
zfree(&he->stat_acc);
|
||||
|
|
|
@ -1378,6 +1378,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
|
|||
intel_pt_clear_tx_flags(decoder);
|
||||
decoder->have_tma = false;
|
||||
decoder->cbr = 0;
|
||||
decoder->timestamp_insn_cnt = 0;
|
||||
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
|
||||
decoder->overflow = true;
|
||||
return -EOVERFLOW;
|
||||
|
@ -1616,6 +1617,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
|
|||
case INTEL_PT_PWRX:
|
||||
intel_pt_log("ERROR: Missing TIP after FUP\n");
|
||||
decoder->pkt_state = INTEL_PT_STATE_ERR3;
|
||||
decoder->pkt_step = 0;
|
||||
return -ENOENT;
|
||||
|
||||
case INTEL_PT_OVF:
|
||||
|
@ -2390,14 +2392,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
|
|||
return &decoder->state;
|
||||
}
|
||||
|
||||
static bool intel_pt_at_psb(unsigned char *buf, size_t len)
|
||||
{
|
||||
if (len < INTEL_PT_PSB_LEN)
|
||||
return false;
|
||||
return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
|
||||
INTEL_PT_PSB_LEN);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
|
||||
* @buf: pointer to buffer pointer
|
||||
|
@ -2486,6 +2480,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
|
|||
* @buf: buffer
|
||||
* @len: size of buffer
|
||||
* @tsc: TSC value returned
|
||||
* @rem: returns remaining size when TSC is found
|
||||
*
|
||||
* Find a TSC packet in @buf and return the TSC value. This function assumes
|
||||
* that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
|
||||
|
@ -2493,7 +2488,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
|
|||
*
|
||||
* Return: %true if TSC is found, false otherwise.
|
||||
*/
|
||||
static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
|
||||
static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
|
||||
size_t *rem)
|
||||
{
|
||||
struct intel_pt_pkt packet;
|
||||
int ret;
|
||||
|
@ -2504,6 +2500,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
|
|||
return false;
|
||||
if (packet.type == INTEL_PT_TSC) {
|
||||
*tsc = packet.payload;
|
||||
*rem = len;
|
||||
return true;
|
||||
}
|
||||
if (packet.type == INTEL_PT_PSBEND)
|
||||
|
@ -2554,6 +2551,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
|
|||
* @len_a: size of first buffer
|
||||
* @buf_b: second buffer
|
||||
* @len_b: size of second buffer
|
||||
* @consecutive: returns true if there is data in buf_b that is consecutive
|
||||
* to buf_a
|
||||
*
|
||||
* If the trace contains TSC we can look at the last TSC of @buf_a and the
|
||||
* first TSC of @buf_b in order to determine if the buffers overlap, and then
|
||||
|
@ -2566,33 +2565,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
|
|||
static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
|
||||
size_t len_a,
|
||||
unsigned char *buf_b,
|
||||
size_t len_b)
|
||||
size_t len_b, bool *consecutive)
|
||||
{
|
||||
uint64_t tsc_a, tsc_b;
|
||||
unsigned char *p;
|
||||
size_t len;
|
||||
size_t len, rem_a, rem_b;
|
||||
|
||||
p = intel_pt_last_psb(buf_a, len_a);
|
||||
if (!p)
|
||||
return buf_b; /* No PSB in buf_a => no overlap */
|
||||
|
||||
len = len_a - (p - buf_a);
|
||||
if (!intel_pt_next_tsc(p, len, &tsc_a)) {
|
||||
if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
|
||||
/* The last PSB+ in buf_a is incomplete, so go back one more */
|
||||
len_a -= len;
|
||||
p = intel_pt_last_psb(buf_a, len_a);
|
||||
if (!p)
|
||||
return buf_b; /* No full PSB+ => assume no overlap */
|
||||
len = len_a - (p - buf_a);
|
||||
if (!intel_pt_next_tsc(p, len, &tsc_a))
|
||||
if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
|
||||
return buf_b; /* No TSC in buf_a => assume no overlap */
|
||||
}
|
||||
|
||||
while (1) {
|
||||
/* Ignore PSB+ with no TSC */
|
||||
if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
|
||||
intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
|
||||
return buf_b; /* tsc_a < tsc_b => no overlap */
|
||||
if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
|
||||
int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
|
||||
|
||||
/* Same TSC, so buffers are consecutive */
|
||||
if (!cmp && rem_b >= rem_a) {
|
||||
*consecutive = true;
|
||||
return buf_b + len_b - (rem_b - rem_a);
|
||||
}
|
||||
if (cmp < 0)
|
||||
return buf_b; /* tsc_a < tsc_b => no overlap */
|
||||
}
|
||||
|
||||
if (!intel_pt_step_psb(&buf_b, &len_b))
|
||||
return buf_b + len_b; /* No PSB in buf_b => no data */
|
||||
|
@ -2606,6 +2613,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
|
|||
* @buf_b: second buffer
|
||||
* @len_b: size of second buffer
|
||||
* @have_tsc: can use TSC packets to detect overlap
|
||||
* @consecutive: returns true if there is data in buf_b that is consecutive
|
||||
* to buf_a
|
||||
*
|
||||
* When trace samples or snapshots are recorded there is the possibility that
|
||||
* the data overlaps. Note that, for the purposes of decoding, data is only
|
||||
|
@ -2616,7 +2625,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
|
|||
*/
|
||||
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
|
||||
unsigned char *buf_b, size_t len_b,
|
||||
bool have_tsc)
|
||||
bool have_tsc, bool *consecutive)
|
||||
{
|
||||
unsigned char *found;
|
||||
|
||||
|
@ -2628,7 +2637,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
|
|||
return buf_b; /* No overlap */
|
||||
|
||||
if (have_tsc) {
|
||||
found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
|
||||
found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
|
||||
consecutive);
|
||||
if (found)
|
||||
return found;
|
||||
}
|
||||
|
@ -2643,28 +2653,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
|
|||
}
|
||||
|
||||
/* Now len_b >= len_a */
|
||||
if (len_b > len_a) {
|
||||
/* The leftover buffer 'b' must start at a PSB */
|
||||
while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
|
||||
if (!intel_pt_step_psb(&buf_a, &len_a))
|
||||
return buf_b; /* No overlap */
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
/* Potential overlap so check the bytes */
|
||||
found = memmem(buf_a, len_a, buf_b, len_a);
|
||||
if (found)
|
||||
if (found) {
|
||||
*consecutive = true;
|
||||
return buf_b + len_a;
|
||||
}
|
||||
|
||||
/* Try again at next PSB in buffer 'a' */
|
||||
if (!intel_pt_step_psb(&buf_a, &len_a))
|
||||
return buf_b; /* No overlap */
|
||||
|
||||
/* The leftover buffer 'b' must start at a PSB */
|
||||
while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
|
||||
if (!intel_pt_step_psb(&buf_a, &len_a))
|
||||
return buf_b; /* No overlap */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -117,7 +117,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
|
|||
|
||||
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
|
||||
unsigned char *buf_b, size_t len_b,
|
||||
bool have_tsc);
|
||||
bool have_tsc, bool *consecutive);
|
||||
|
||||
int intel_pt__strerror(int code, char *buf, size_t buflen);
|
||||
|
||||
|
|
|
@ -132,6 +132,7 @@ struct intel_pt_queue {
|
|||
struct intel_pt *pt;
|
||||
unsigned int queue_nr;
|
||||
struct auxtrace_buffer *buffer;
|
||||
struct auxtrace_buffer *old_buffer;
|
||||
void *decoder;
|
||||
const struct intel_pt_state *state;
|
||||
struct ip_callchain *chain;
|
||||
|
@ -143,6 +144,7 @@ struct intel_pt_queue {
|
|||
bool stop;
|
||||
bool step_through_buffers;
|
||||
bool use_buffer_pid_tid;
|
||||
bool sync_switch;
|
||||
pid_t pid, tid;
|
||||
int cpu;
|
||||
int switch_state;
|
||||
|
@ -207,49 +209,28 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
|
|||
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
|
||||
struct auxtrace_buffer *b)
|
||||
{
|
||||
bool consecutive = false;
|
||||
void *start;
|
||||
|
||||
start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
|
||||
pt->have_tsc);
|
||||
pt->have_tsc, &consecutive);
|
||||
if (!start)
|
||||
return -EINVAL;
|
||||
b->use_size = b->data + b->size - start;
|
||||
b->use_data = start;
|
||||
if (b->use_size && consecutive)
|
||||
b->consecutive = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
|
||||
struct auxtrace_queue *queue,
|
||||
struct auxtrace_buffer *buffer)
|
||||
{
|
||||
if (queue->cpu == -1 && buffer->cpu != -1)
|
||||
ptq->cpu = buffer->cpu;
|
||||
|
||||
ptq->pid = buffer->pid;
|
||||
ptq->tid = buffer->tid;
|
||||
|
||||
intel_pt_log("queue %u cpu %d pid %d tid %d\n",
|
||||
ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
|
||||
|
||||
thread__zput(ptq->thread);
|
||||
|
||||
if (ptq->tid != -1) {
|
||||
if (ptq->pid != -1)
|
||||
ptq->thread = machine__findnew_thread(ptq->pt->machine,
|
||||
ptq->pid,
|
||||
ptq->tid);
|
||||
else
|
||||
ptq->thread = machine__find_thread(ptq->pt->machine, -1,
|
||||
ptq->tid);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function assumes data is processed sequentially only */
|
||||
static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
|
||||
{
|
||||
struct intel_pt_queue *ptq = data;
|
||||
struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
|
||||
struct auxtrace_buffer *buffer = ptq->buffer;
|
||||
struct auxtrace_buffer *old_buffer = ptq->old_buffer;
|
||||
struct auxtrace_queue *queue;
|
||||
bool might_overlap;
|
||||
|
||||
if (ptq->stop) {
|
||||
b->len = 0;
|
||||
|
@ -257,7 +238,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
|
|||
}
|
||||
|
||||
queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
|
||||
next:
|
||||
|
||||
buffer = auxtrace_buffer__next(queue, buffer);
|
||||
if (!buffer) {
|
||||
if (old_buffer)
|
||||
|
@ -276,7 +257,8 @@ next:
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
|
||||
might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
|
||||
if (might_overlap && !buffer->consecutive && old_buffer &&
|
||||
intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -289,33 +271,24 @@ next:
|
|||
}
|
||||
b->ref_timestamp = buffer->reference;
|
||||
|
||||
/*
|
||||
* If in snapshot mode and the buffer has no usable data, get next
|
||||
* buffer and again check overlap against old_buffer.
|
||||
*/
|
||||
if (ptq->pt->snapshot_mode && !b->len)
|
||||
goto next;
|
||||
|
||||
if (old_buffer)
|
||||
auxtrace_buffer__drop_data(old_buffer);
|
||||
|
||||
if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
|
||||
!buffer->consecutive)) {
|
||||
if (!old_buffer || (might_overlap && !buffer->consecutive)) {
|
||||
b->consecutive = false;
|
||||
b->trace_nr = buffer->buffer_nr + 1;
|
||||
} else {
|
||||
b->consecutive = true;
|
||||
}
|
||||
|
||||
if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
|
||||
ptq->tid != buffer->tid))
|
||||
intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
|
||||
|
||||
if (ptq->step_through_buffers)
|
||||
ptq->stop = true;
|
||||
|
||||
if (!b->len)
|
||||
if (b->len) {
|
||||
if (old_buffer)
|
||||
auxtrace_buffer__drop_data(old_buffer);
|
||||
ptq->old_buffer = buffer;
|
||||
} else {
|
||||
auxtrace_buffer__drop_data(buffer);
|
||||
return intel_pt_get_trace(b, data);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -954,16 +927,15 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
|
|||
ptq->cpu = queue->cpu;
|
||||
ptq->tid = queue->tid;
|
||||
|
||||
if (pt->sampling_mode) {
|
||||
if (pt->timeless_decoding)
|
||||
ptq->step_through_buffers = true;
|
||||
if (pt->timeless_decoding || !pt->have_sched_switch)
|
||||
ptq->use_buffer_pid_tid = true;
|
||||
}
|
||||
if (pt->sampling_mode && !pt->snapshot_mode &&
|
||||
pt->timeless_decoding)
|
||||
ptq->step_through_buffers = true;
|
||||
|
||||
ptq->sync_switch = pt->sync_switch;
|
||||
}
|
||||
|
||||
if (!ptq->on_heap &&
|
||||
(!pt->sync_switch ||
|
||||
(!ptq->sync_switch ||
|
||||
ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
|
||||
const struct intel_pt_state *state;
|
||||
int ret;
|
||||
|
@ -1546,7 +1518,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
|
|||
if (pt->synth_opts.last_branch)
|
||||
intel_pt_update_last_branch_rb(ptq);
|
||||
|
||||
if (!pt->sync_switch)
|
||||
if (!ptq->sync_switch)
|
||||
return 0;
|
||||
|
||||
if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
|
||||
|
@ -1627,6 +1599,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
|
|||
return switch_ip;
|
||||
}
|
||||
|
||||
static void intel_pt_enable_sync_switch(struct intel_pt *pt)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
pt->sync_switch = true;
|
||||
|
||||
for (i = 0; i < pt->queues.nr_queues; i++) {
|
||||
struct auxtrace_queue *queue = &pt->queues.queue_array[i];
|
||||
struct intel_pt_queue *ptq = queue->priv;
|
||||
|
||||
if (ptq)
|
||||
ptq->sync_switch = true;
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
|
||||
{
|
||||
const struct intel_pt_state *state = ptq->state;
|
||||
|
@ -1643,7 +1630,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
|
|||
if (pt->switch_ip) {
|
||||
intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
|
||||
pt->switch_ip, pt->ptss_ip);
|
||||
pt->sync_switch = true;
|
||||
intel_pt_enable_sync_switch(pt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1659,9 +1646,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
|
|||
if (state->err) {
|
||||
if (state->err == INTEL_PT_ERR_NODATA)
|
||||
return 1;
|
||||
if (pt->sync_switch &&
|
||||
if (ptq->sync_switch &&
|
||||
state->from_ip >= pt->kernel_start) {
|
||||
pt->sync_switch = false;
|
||||
ptq->sync_switch = false;
|
||||
intel_pt_next_tid(pt, ptq);
|
||||
}
|
||||
if (pt->synth_opts.errors) {
|
||||
|
@ -1687,7 +1674,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
|
|||
state->timestamp, state->est_timestamp);
|
||||
ptq->timestamp = state->est_timestamp;
|
||||
/* Use estimated TSC in unknown switch state */
|
||||
} else if (pt->sync_switch &&
|
||||
} else if (ptq->sync_switch &&
|
||||
ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
|
||||
intel_pt_is_switch_ip(ptq, state->to_ip) &&
|
||||
ptq->next_tid == -1) {
|
||||
|
@ -1834,7 +1821,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
|
|||
return 1;
|
||||
|
||||
ptq = intel_pt_cpu_to_ptq(pt, cpu);
|
||||
if (!ptq)
|
||||
if (!ptq || !ptq->sync_switch)
|
||||
return 1;
|
||||
|
||||
switch (ptq->switch_state) {
|
||||
|
@ -2075,9 +2062,6 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session,
|
|||
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
|
||||
auxtrace);
|
||||
|
||||
if (pt->sampling_mode)
|
||||
return 0;
|
||||
|
||||
if (!pt->data_queued) {
|
||||
struct auxtrace_buffer *buffer;
|
||||
off_t data_offset;
|
||||
|
|
|
@ -1697,7 +1697,7 @@ static void ip__resolve_data(struct thread *thread,
|
|||
struct mem_info *sample__resolve_mem(struct perf_sample *sample,
|
||||
struct addr_location *al)
|
||||
{
|
||||
struct mem_info *mi = zalloc(sizeof(*mi));
|
||||
struct mem_info *mi = mem_info__new();
|
||||
|
||||
if (!mi)
|
||||
return NULL;
|
||||
|
|
|
@ -75,9 +75,7 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
|
|||
* }
|
||||
* perf_mmap__read_done()
|
||||
*/
|
||||
union perf_event *perf_mmap__read_event(struct perf_mmap *map,
|
||||
bool overwrite,
|
||||
u64 *startp, u64 end)
|
||||
union perf_event *perf_mmap__read_event(struct perf_mmap *map)
|
||||
{
|
||||
union perf_event *event;
|
||||
|
||||
|
@ -87,17 +85,14 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map,
|
|||
if (!refcount_read(&map->refcnt))
|
||||
return NULL;
|
||||
|
||||
if (startp == NULL)
|
||||
return NULL;
|
||||
|
||||
/* non-overwirte doesn't pause the ringbuffer */
|
||||
if (!overwrite)
|
||||
end = perf_mmap__read_head(map);
|
||||
if (!map->overwrite)
|
||||
map->end = perf_mmap__read_head(map);
|
||||
|
||||
event = perf_mmap__read(map, startp, end);
|
||||
event = perf_mmap__read(map, &map->start, map->end);
|
||||
|
||||
if (!overwrite)
|
||||
map->prev = *startp;
|
||||
if (!map->overwrite)
|
||||
map->prev = map->start;
|
||||
|
||||
return event;
|
||||
}
|
||||
|
@ -120,9 +115,9 @@ void perf_mmap__put(struct perf_mmap *map)
|
|||
perf_mmap__munmap(map);
|
||||
}
|
||||
|
||||
void perf_mmap__consume(struct perf_mmap *map, bool overwrite)
|
||||
void perf_mmap__consume(struct perf_mmap *map)
|
||||
{
|
||||
if (!overwrite) {
|
||||
if (!map->overwrite) {
|
||||
u64 old = map->prev;
|
||||
|
||||
perf_mmap__write_tail(map, old);
|
||||
|
@ -240,27 +235,26 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
|
|||
/*
|
||||
* Report the start and end of the available data in ringbuffer
|
||||
*/
|
||||
int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
|
||||
u64 *startp, u64 *endp)
|
||||
int perf_mmap__read_init(struct perf_mmap *md)
|
||||
{
|
||||
u64 head = perf_mmap__read_head(md);
|
||||
u64 old = md->prev;
|
||||
unsigned char *data = md->base + page_size;
|
||||
unsigned long size;
|
||||
|
||||
*startp = overwrite ? head : old;
|
||||
*endp = overwrite ? old : head;
|
||||
md->start = md->overwrite ? head : old;
|
||||
md->end = md->overwrite ? old : head;
|
||||
|
||||
if (*startp == *endp)
|
||||
if (md->start == md->end)
|
||||
return -EAGAIN;
|
||||
|
||||
size = *endp - *startp;
|
||||
size = md->end - md->start;
|
||||
if (size > (unsigned long)(md->mask) + 1) {
|
||||
if (!overwrite) {
|
||||
if (!md->overwrite) {
|
||||
WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
|
||||
|
||||
md->prev = head;
|
||||
perf_mmap__consume(md, overwrite);
|
||||
perf_mmap__consume(md);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
@ -268,33 +262,32 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
|
|||
* Backward ring buffer is full. We still have a chance to read
|
||||
* most of data from it.
|
||||
*/
|
||||
if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
|
||||
if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int perf_mmap__push(struct perf_mmap *md, bool overwrite,
|
||||
void *to, int push(void *to, void *buf, size_t size))
|
||||
int perf_mmap__push(struct perf_mmap *md, void *to,
|
||||
int push(void *to, void *buf, size_t size))
|
||||
{
|
||||
u64 head = perf_mmap__read_head(md);
|
||||
u64 end, start;
|
||||
unsigned char *data = md->base + page_size;
|
||||
unsigned long size;
|
||||
void *buf;
|
||||
int rc = 0;
|
||||
|
||||
rc = perf_mmap__read_init(md, overwrite, &start, &end);
|
||||
rc = perf_mmap__read_init(md);
|
||||
if (rc < 0)
|
||||
return (rc == -EAGAIN) ? 0 : -1;
|
||||
|
||||
size = end - start;
|
||||
size = md->end - md->start;
|
||||
|
||||
if ((start & md->mask) + size != (end & md->mask)) {
|
||||
buf = &data[start & md->mask];
|
||||
size = md->mask + 1 - (start & md->mask);
|
||||
start += size;
|
||||
if ((md->start & md->mask) + size != (md->end & md->mask)) {
|
||||
buf = &data[md->start & md->mask];
|
||||
size = md->mask + 1 - (md->start & md->mask);
|
||||
md->start += size;
|
||||
|
||||
if (push(to, buf, size) < 0) {
|
||||
rc = -1;
|
||||
|
@ -302,9 +295,9 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
|
|||
}
|
||||
}
|
||||
|
||||
buf = &data[start & md->mask];
|
||||
size = end - start;
|
||||
start += size;
|
||||
buf = &data[md->start & md->mask];
|
||||
size = md->end - md->start;
|
||||
md->start += size;
|
||||
|
||||
if (push(to, buf, size) < 0) {
|
||||
rc = -1;
|
||||
|
@ -312,7 +305,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
|
|||
}
|
||||
|
||||
md->prev = head;
|
||||
perf_mmap__consume(md, overwrite);
|
||||
perf_mmap__consume(md);
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,9 @@ struct perf_mmap {
|
|||
int fd;
|
||||
refcount_t refcnt;
|
||||
u64 prev;
|
||||
u64 start;
|
||||
u64 end;
|
||||
bool overwrite;
|
||||
struct auxtrace_mmap auxtrace_mmap;
|
||||
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
|
||||
};
|
||||
|
@ -63,7 +66,7 @@ void perf_mmap__munmap(struct perf_mmap *map);
|
|||
void perf_mmap__get(struct perf_mmap *map);
|
||||
void perf_mmap__put(struct perf_mmap *map);
|
||||
|
||||
void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
|
||||
void perf_mmap__consume(struct perf_mmap *map);
|
||||
|
||||
static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
|
||||
{
|
||||
|
@ -86,16 +89,13 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
|
|||
|
||||
union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
|
||||
|
||||
union perf_event *perf_mmap__read_event(struct perf_mmap *map,
|
||||
bool overwrite,
|
||||
u64 *startp, u64 end);
|
||||
union perf_event *perf_mmap__read_event(struct perf_mmap *map);
|
||||
|
||||
int perf_mmap__push(struct perf_mmap *md, bool backward,
|
||||
void *to, int push(void *to, void *buf, size_t size));
|
||||
int perf_mmap__push(struct perf_mmap *md, void *to,
|
||||
int push(void *to, void *buf, size_t size));
|
||||
|
||||
size_t perf_mmap__mmap_len(struct perf_mmap *map);
|
||||
|
||||
int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
|
||||
u64 *startp, u64 *endp);
|
||||
int perf_mmap__read_init(struct perf_mmap *md);
|
||||
void perf_mmap__read_done(struct perf_mmap *map);
|
||||
#endif /*__PERF_MMAP_H */
|
||||
|
|
|
@ -1217,7 +1217,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
|
|||
get_config_name(head_config), &config_terms);
|
||||
}
|
||||
|
||||
static int __parse_events_add_pmu(struct parse_events_state *parse_state,
|
||||
int parse_events_add_pmu(struct parse_events_state *parse_state,
|
||||
struct list_head *list, char *name,
|
||||
struct list_head *head_config, bool auto_merge_stats)
|
||||
{
|
||||
|
@ -1247,7 +1247,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
|
|||
if (!head_config) {
|
||||
attr.type = pmu->type;
|
||||
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
|
||||
return evsel ? 0 : -ENOMEM;
|
||||
if (evsel) {
|
||||
evsel->pmu_name = name;
|
||||
return 0;
|
||||
} else {
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (perf_pmu__check_alias(pmu, head_config, &info))
|
||||
|
@ -1276,18 +1281,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
|
|||
evsel->snapshot = info.snapshot;
|
||||
evsel->metric_expr = info.metric_expr;
|
||||
evsel->metric_name = info.metric_name;
|
||||
evsel->pmu_name = name;
|
||||
}
|
||||
|
||||
return evsel ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
int parse_events_add_pmu(struct parse_events_state *parse_state,
|
||||
struct list_head *list, char *name,
|
||||
struct list_head *head_config)
|
||||
{
|
||||
return __parse_events_add_pmu(parse_state, list, name, head_config, false);
|
||||
}
|
||||
|
||||
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
|
||||
char *str, struct list_head **listp)
|
||||
{
|
||||
|
@ -1317,8 +1316,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
|
|||
return -1;
|
||||
list_add_tail(&term->list, head);
|
||||
|
||||
if (!__parse_events_add_pmu(parse_state, list,
|
||||
pmu->name, head, true)) {
|
||||
if (!parse_events_add_pmu(parse_state, list,
|
||||
pmu->name, head, true)) {
|
||||
pr_debug("%s -> %s/%s/\n", str,
|
||||
pmu->name, alias->str);
|
||||
ok++;
|
||||
|
|
|
@ -167,7 +167,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
|
|||
void *ptr, char *type, u64 len);
|
||||
int parse_events_add_pmu(struct parse_events_state *parse_state,
|
||||
struct list_head *list, char *name,
|
||||
struct list_head *head_config);
|
||||
struct list_head *head_config, bool auto_merge_stats);
|
||||
|
||||
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
|
||||
char *str,
|
||||
|
|
|
@ -175,7 +175,7 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]*
|
|||
num_dec [0-9]+
|
||||
num_hex 0x[a-fA-F0-9]+
|
||||
num_raw_hex [a-fA-F0-9]+
|
||||
name [a-zA-Z_*?][a-zA-Z0-9_*?.]*
|
||||
name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]*
|
||||
name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
|
||||
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
|
||||
/* If you add a modifier you need to update check_modifier() */
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#define YYDEBUG 1
|
||||
|
||||
#include <fnmatch.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/types.h>
|
||||
|
@ -231,9 +232,13 @@ PE_NAME opt_event_config
|
|||
YYABORT;
|
||||
|
||||
ALLOC_LIST(list);
|
||||
if (parse_events_add_pmu(_parse_state, list, $1, $2)) {
|
||||
if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) {
|
||||
struct perf_pmu *pmu = NULL;
|
||||
int ok = 0;
|
||||
char *pattern;
|
||||
|
||||
if (asprintf(&pattern, "%s*", $1) < 0)
|
||||
YYABORT;
|
||||
|
||||
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
|
||||
char *name = pmu->name;
|
||||
|
@ -241,14 +246,19 @@ PE_NAME opt_event_config
|
|||
if (!strncmp(name, "uncore_", 7) &&
|
||||
strncmp($1, "uncore_", 7))
|
||||
name += 7;
|
||||
if (!strncmp($1, name, strlen($1))) {
|
||||
if (parse_events_copy_term_list(orig_terms, &terms))
|
||||
if (!fnmatch(pattern, name, 0)) {
|
||||
if (parse_events_copy_term_list(orig_terms, &terms)) {
|
||||
free(pattern);
|
||||
YYABORT;
|
||||
if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms))
|
||||
}
|
||||
if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true))
|
||||
ok++;
|
||||
parse_events_terms__delete(terms);
|
||||
}
|
||||
}
|
||||
|
||||
free(pattern);
|
||||
|
||||
if (!ok)
|
||||
YYABORT;
|
||||
}
|
||||
|
|
|
@ -984,7 +984,6 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
|
|||
int sample_id_all = 1, cpu;
|
||||
static char *kwlist[] = { "cpu", "sample_id_all", NULL };
|
||||
struct perf_mmap *md;
|
||||
u64 end, start;
|
||||
int err;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
|
||||
|
@ -992,10 +991,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
|
|||
return NULL;
|
||||
|
||||
md = &evlist->mmap[cpu];
|
||||
if (perf_mmap__read_init(md, false, &start, &end) < 0)
|
||||
if (perf_mmap__read_init(md) < 0)
|
||||
goto end;
|
||||
|
||||
event = perf_mmap__read_event(md, false, &start, end);
|
||||
event = perf_mmap__read_event(md);
|
||||
if (event != NULL) {
|
||||
PyObject *pyevent = pyrf_event__new(event);
|
||||
struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
|
||||
|
@ -1013,7 +1012,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
|
|||
err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
|
||||
|
||||
/* Consume the even only after we parsed it out. */
|
||||
perf_mmap__consume(md, false);
|
||||
perf_mmap__consume(md);
|
||||
|
||||
if (err)
|
||||
return PyErr_Format(PyExc_OSError,
|
||||
|
|
|
@ -2221,3 +2221,25 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
|
|||
free(bf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct mem_info *mem_info__get(struct mem_info *mi)
|
||||
{
|
||||
if (mi)
|
||||
refcount_inc(&mi->refcnt);
|
||||
return mi;
|
||||
}
|
||||
|
||||
void mem_info__put(struct mem_info *mi)
|
||||
{
|
||||
if (mi && refcount_dec_and_test(&mi->refcnt))
|
||||
free(mi);
|
||||
}
|
||||
|
||||
struct mem_info *mem_info__new(void)
|
||||
{
|
||||
struct mem_info *mi = zalloc(sizeof(*mi));
|
||||
|
||||
if (mi)
|
||||
refcount_set(&mi->refcnt, 1);
|
||||
return mi;
|
||||
}
|
||||
|
|
|
@ -200,9 +200,10 @@ struct branch_info {
|
|||
};
|
||||
|
||||
struct mem_info {
|
||||
struct addr_map_symbol iaddr;
|
||||
struct addr_map_symbol daddr;
|
||||
union perf_mem_data_src data_src;
|
||||
struct addr_map_symbol iaddr;
|
||||
struct addr_map_symbol daddr;
|
||||
union perf_mem_data_src data_src;
|
||||
refcount_t refcnt;
|
||||
};
|
||||
|
||||
struct addr_location {
|
||||
|
@ -389,4 +390,16 @@ int sdt_notes__get_count(struct list_head *start);
|
|||
#define SDT_NOTE_NAME "stapsdt"
|
||||
#define NR_ADDR 3
|
||||
|
||||
struct mem_info *mem_info__new(void);
|
||||
struct mem_info *mem_info__get(struct mem_info *mi);
|
||||
void mem_info__put(struct mem_info *mi);
|
||||
|
||||
static inline void __mem_info__zput(struct mem_info **mi)
|
||||
{
|
||||
mem_info__put(*mi);
|
||||
*mi = NULL;
|
||||
}
|
||||
|
||||
#define mem_info__zput(mi) __mem_info__zput(&mi)
|
||||
|
||||
#endif /* __PERF_SYMBOL */
|
||||
|
|
|
@ -26,7 +26,6 @@ struct thread {
|
|||
pid_t ppid;
|
||||
int cpu;
|
||||
refcount_t refcnt;
|
||||
char shortname[3];
|
||||
bool comm_set;
|
||||
int comm_len;
|
||||
bool dead; /* if set thread has exited */
|
||||
|
|
Loading…
Reference in New Issue