perf/core improvements and fixes:
User visible: - Add ability to specify to select which registers to record, to reduce the size of perf.data files, and also allow printing the registers in 'perf script': (Stephane Eranian) # perf record --intr-regs=AX,SP usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data (8 samples) ] # perf script -F ip,sym,iregs | tail -5 ffffffff8105f42a native_write_msr_safe AX:0xf SP:0xffff8802629c3c00 ffffffff8105f42a native_write_msr_safe AX:0xf SP:0xffff8802629c3c00 ffffffff81761ac0 _raw_spin_lock AX:0xffff8801bfcf8020 SP:0xffff8802629c3ce8 ffffffff81202bf8 __vma_adjust_trans_huge AX:0x7ffc75200000 SP:0xffff8802629c3b30 ffffffff8122b089 dput AX:0x101 SP:0xffff8802629c3c78 # Infrastructure: - Open event on evsel cpus and threads (Kan Liang) - New bpf API to get name from a BPF object (Wang Nan) Build fixes: - Fix build on powerpc broken by pt/bts (Adrian Hunter) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJV5MdfAAoJENZQFvNTUqpAe20QAKItll0MoqX26lDdJCRRdRY9 pzF2sD6qi3AMed6xbr9lLYKYAeYwESt/j6+3JLX6qRTP2VkRcwybvCmzn3MQxS1a AoBzDJtszYA9kreWVX8cAkPbZrNU7KZvHV95vwY1wNb+mcYLb1WszsKapyaDtDAH dtAZcZyEle1YQdwkVQOv2bLtxayZt9gLbTDBuH0v4fQY85xVlEEZ9j+AVsYUG5PT iNB4/f+ibgcC9L3stpt0IgzTpgxXMYP5EpekXdq1fMIthtnpV1kt7lCyzPkVC0Xk KvzuHhdPj2p05xEVgZKCT0yS06PxLbhwHs25ELtfFXlXrYdW7NmkLnyi04k0jo/o YpHfy5tcOJ4v5q60qcJASDDznk1qr4Yk5l+xccv0hknkysjW9NNhBVf5KlylBNJH F6FnXKCiNVC9e5mLuilVTKdTylANiBvapSLnnS9uY/zYGD62+bm6SpZtlRec5vb6 rwiNdNbECDlk/dB3ez8vtuH826Nrz3MZ2JKsa0b6IkbSeT+B0N5T9IREbVZivxOZ 1bQ2JLZuOtJhL7nXYeZyjwjjmqWAyxMdrqUjjqaLWpf+/Ws/kyoRsWwU30pSR29t syKgL0dRth/JtvjNwwWmfH4skm/h9icYq8OjlLzCTQ4L4rEUag/wvxKwUq/AUOIm Zo9q9E+O/1T4URspaoG6 =unM7 -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Add ability to specify to select which registers to record, to reduce the size of perf.data files, and also allow printing the registers in 'perf script': (Stephane Eranian) # perf record --intr-regs=AX,SP usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data (8 samples) ] # perf script -F ip,sym,iregs | tail -5 ffffffff8105f42a native_write_msr_safe AX:0xf SP:0xffff8802629c3c00 ffffffff8105f42a native_write_msr_safe AX:0xf SP:0xffff8802629c3c00 ffffffff81761ac0 _raw_spin_lock AX:0xffff8801bfcf8020 SP:0xffff8802629c3ce8 ffffffff81202bf8 __vma_adjust_trans_huge AX:0x7ffc75200000 SP:0xffff8802629c3b30 ffffffff8122b089 dput AX:0x101 SP:0xffff8802629c3c78 # Infrastructure changes: - Open event on evsel cpus and threads. (Kan Liang) - Add new bpf API to get name from a BPF object. (Wang Nan) Build fixes: - Fix build on powerpc broken by pt/bts. (Adrian Hunter) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
5320266124
|
@ -880,15 +880,26 @@ struct bpf_object *bpf_object__open(const char *path)
|
|||
}
|
||||
|
||||
struct bpf_object *bpf_object__open_buffer(void *obj_buf,
|
||||
size_t obj_buf_sz)
|
||||
size_t obj_buf_sz,
|
||||
const char *name)
|
||||
{
|
||||
char tmp_name[64];
|
||||
|
||||
/* param validation */
|
||||
if (!obj_buf || obj_buf_sz <= 0)
|
||||
return NULL;
|
||||
|
||||
pr_debug("loading object from buffer\n");
|
||||
if (!name) {
|
||||
snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
|
||||
(unsigned long)obj_buf,
|
||||
(unsigned long)obj_buf_sz);
|
||||
tmp_name[sizeof(tmp_name) - 1] = '\0';
|
||||
name = tmp_name;
|
||||
}
|
||||
pr_debug("loading object '%s' from buffer\n",
|
||||
name);
|
||||
|
||||
return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz);
|
||||
return __bpf_object__open(name, obj_buf, obj_buf_sz);
|
||||
}
|
||||
|
||||
int bpf_object__unload(struct bpf_object *obj)
|
||||
|
@ -975,6 +986,14 @@ bpf_object__next(struct bpf_object *prev)
|
|||
return next;
|
||||
}
|
||||
|
||||
const char *
|
||||
bpf_object__get_name(struct bpf_object *obj)
|
||||
{
|
||||
if (!obj)
|
||||
return NULL;
|
||||
return obj->path;
|
||||
}
|
||||
|
||||
struct bpf_program *
|
||||
bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
|
||||
{
|
||||
|
|
|
@ -28,12 +28,14 @@ struct bpf_object;
|
|||
|
||||
struct bpf_object *bpf_object__open(const char *path);
|
||||
struct bpf_object *bpf_object__open_buffer(void *obj_buf,
|
||||
size_t obj_buf_sz);
|
||||
size_t obj_buf_sz,
|
||||
const char *name);
|
||||
void bpf_object__close(struct bpf_object *object);
|
||||
|
||||
/* Load/unload object into/from kernel */
|
||||
int bpf_object__load(struct bpf_object *obj);
|
||||
int bpf_object__unload(struct bpf_object *obj);
|
||||
const char *bpf_object__get_name(struct bpf_object *obj);
|
||||
|
||||
struct bpf_object *bpf_object__next(struct bpf_object *prev);
|
||||
#define bpf_object__for_each_safe(pos, tmp) \
|
||||
|
|
|
@ -276,7 +276,11 @@ filter out the startup phase of the program, which is often very different.
|
|||
--intr-regs::
|
||||
Capture machine state (registers) at interrupt, i.e., on counter overflows for
|
||||
each sample. List of captured registers depends on the architecture. This option
|
||||
is off by default.
|
||||
is off by default. It is possible to select the registers to sample using their
|
||||
symbolic names, e.g. on x86, ax, si. To list the available registers use
|
||||
--intr-regs=\?. To name registers, pass a comma separated list such as
|
||||
--intr-regs=ax,bx. The list of register is architecture dependent.
|
||||
|
||||
|
||||
--running-time::
|
||||
Record running and enabled time for read events (:S)
|
||||
|
|
|
@ -116,7 +116,7 @@ OPTIONS
|
|||
--fields::
|
||||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
|
||||
srcline, period, flags.
|
||||
srcline, period, iregs, flags.
|
||||
Field list can be prepended with the type, trace, sw or hw,
|
||||
to indicate to which event type the field list applies.
|
||||
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
|
||||
|
|
|
@ -2,6 +2,7 @@ libperf-y += header.o
|
|||
libperf-y += tsc.o
|
||||
libperf-y += pmu.o
|
||||
libperf-y += kvm-stat.o
|
||||
libperf-y += perf_regs.o
|
||||
|
||||
libperf-$(CONFIG_DWARF) += dwarf-regs.o
|
||||
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
#include "../../perf.h"
|
||||
#include "../../util/perf_regs.h"
|
||||
|
||||
#define REG(n, b) { .name = #n, .mask = 1ULL << (b) }
|
||||
#define REG_END { .name = NULL }
|
||||
const struct sample_reg sample_reg_masks[] = {
|
||||
REG(AX, PERF_REG_X86_AX),
|
||||
REG(BX, PERF_REG_X86_BX),
|
||||
REG(CX, PERF_REG_X86_CX),
|
||||
REG(DX, PERF_REG_X86_DX),
|
||||
REG(SI, PERF_REG_X86_SI),
|
||||
REG(DI, PERF_REG_X86_DI),
|
||||
REG(BP, PERF_REG_X86_BP),
|
||||
REG(SP, PERF_REG_X86_SP),
|
||||
REG(IP, PERF_REG_X86_IP),
|
||||
REG(FLAGS, PERF_REG_X86_FLAGS),
|
||||
REG(CS, PERF_REG_X86_CS),
|
||||
REG(SS, PERF_REG_X86_SS),
|
||||
#ifdef HAVE_ARCH_X86_64_SUPPORT
|
||||
REG(R8, PERF_REG_X86_R8),
|
||||
REG(R9, PERF_REG_X86_R9),
|
||||
REG(R10, PERF_REG_X86_R10),
|
||||
REG(R11, PERF_REG_X86_R11),
|
||||
REG(R12, PERF_REG_X86_R12),
|
||||
REG(R13, PERF_REG_X86_R13),
|
||||
REG(R14, PERF_REG_X86_R14),
|
||||
REG(R15, PERF_REG_X86_R15),
|
||||
#endif
|
||||
REG_END
|
||||
};
|
|
@ -27,8 +27,10 @@
|
|||
#include "util/cpumap.h"
|
||||
#include "util/thread_map.h"
|
||||
#include "util/data.h"
|
||||
#include "util/perf_regs.h"
|
||||
#include "util/auxtrace.h"
|
||||
#include "util/parse-branch-options.h"
|
||||
#include "util/parse-regs-options.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
|
@ -279,7 +281,7 @@ static int record__open(struct record *rec)
|
|||
|
||||
evlist__for_each(evlist, pos) {
|
||||
try_again:
|
||||
if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
|
||||
if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
|
||||
if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
|
||||
if (verbose)
|
||||
ui__warning("%s\n", msg);
|
||||
|
@ -1080,8 +1082,9 @@ struct option __record_options[] = {
|
|||
"sample transaction flags (special events only)"),
|
||||
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
|
||||
"use per-thread mmaps"),
|
||||
OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
|
||||
"Sample machine registers on interrupt"),
|
||||
OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
|
||||
"sample selected machine registers on interrupt,"
|
||||
" use -I ? to list register names", parse_regs),
|
||||
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
|
||||
"Record running/enabled time of read (:S) events"),
|
||||
OPT_CALLBACK('k', "clockid", &record.opts,
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "util/exec_cmd.h"
|
||||
#include "util/header.h"
|
||||
#include "util/parse-options.h"
|
||||
#include "util/perf_regs.h"
|
||||
#include "util/session.h"
|
||||
#include "util/tool.h"
|
||||
#include "util/symbol.h"
|
||||
|
@ -46,6 +47,7 @@ enum perf_output_field {
|
|||
PERF_OUTPUT_SYMOFFSET = 1U << 11,
|
||||
PERF_OUTPUT_SRCLINE = 1U << 12,
|
||||
PERF_OUTPUT_PERIOD = 1U << 13,
|
||||
PERF_OUTPUT_IREGS = 1U << 14,
|
||||
};
|
||||
|
||||
struct output_option {
|
||||
|
@ -66,6 +68,7 @@ struct output_option {
|
|||
{.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
|
||||
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
|
||||
{.str = "period", .field = PERF_OUTPUT_PERIOD},
|
||||
{.str = "iregs", .field = PERF_OUTPUT_IREGS},
|
||||
};
|
||||
|
||||
/* default set to maintain compatibility with current format */
|
||||
|
@ -255,6 +258,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
|
|||
PERF_OUTPUT_PERIOD))
|
||||
return -EINVAL;
|
||||
|
||||
if (PRINT_FIELD(IREGS) &&
|
||||
perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
|
||||
PERF_OUTPUT_IREGS))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -352,6 +360,24 @@ out:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void print_sample_iregs(union perf_event *event __maybe_unused,
|
||||
struct perf_sample *sample,
|
||||
struct thread *thread __maybe_unused,
|
||||
struct perf_event_attr *attr)
|
||||
{
|
||||
struct regs_dump *regs = &sample->intr_regs;
|
||||
uint64_t mask = attr->sample_regs_intr;
|
||||
unsigned i = 0, r;
|
||||
|
||||
if (!regs)
|
||||
return;
|
||||
|
||||
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
|
||||
u64 val = regs->regs[i++];
|
||||
printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val);
|
||||
}
|
||||
}
|
||||
|
||||
static void print_sample_start(struct perf_sample *sample,
|
||||
struct thread *thread,
|
||||
struct perf_evsel *evsel)
|
||||
|
@ -525,6 +551,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
|
|||
PERF_MAX_STACK_DEPTH);
|
||||
}
|
||||
|
||||
if (PRINT_FIELD(IREGS))
|
||||
print_sample_iregs(event, sample, thread, attr);
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
@ -1643,7 +1672,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
"comma separated output fields prepend with 'type:'. "
|
||||
"Valid types: hw,sw,trace,raw. "
|
||||
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
|
||||
"addr,symoff,period,flags", parse_output_fields),
|
||||
"addr,symoff,period,iregs,flags", parse_output_fields),
|
||||
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
||||
"system-wide collection from all CPUs"),
|
||||
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
|
||||
|
|
|
@ -54,7 +54,6 @@ struct record_opts {
|
|||
bool sample_time_set;
|
||||
bool callgraph_set;
|
||||
bool period;
|
||||
bool sample_intr_regs;
|
||||
bool running_time;
|
||||
bool full_auxtrace;
|
||||
bool auxtrace_snapshot_mode;
|
||||
|
@ -64,6 +63,7 @@ struct record_opts {
|
|||
unsigned int auxtrace_mmap_pages;
|
||||
unsigned int user_freq;
|
||||
u64 branch_stack;
|
||||
u64 sample_intr_regs;
|
||||
u64 default_interval;
|
||||
u64 user_interval;
|
||||
size_t auxtrace_snapshot_size;
|
||||
|
|
|
@ -26,7 +26,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
|
|||
{
|
||||
struct bpf_object *obj;
|
||||
|
||||
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz);
|
||||
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
|
||||
if (!obj)
|
||||
return -1;
|
||||
bpf_object__close(obj);
|
||||
|
|
|
@ -75,6 +75,7 @@ libperf-y += record.o
|
|||
libperf-y += srcline.o
|
||||
libperf-y += data.o
|
||||
libperf-$(CONFIG_X86) += tsc.o
|
||||
libperf-$(CONFIG_AUXTRACE) += tsc.o
|
||||
libperf-y += cloexec.o
|
||||
libperf-y += thread-stack.o
|
||||
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
|
||||
|
@ -82,6 +83,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
|
|||
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
|
||||
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
|
||||
libperf-y += parse-branch-options.o
|
||||
libperf-y += parse-regs-options.o
|
||||
|
||||
libperf-$(CONFIG_LIBELF) += symbol-elf.o
|
||||
libperf-$(CONFIG_LIBELF) += probe-file.o
|
||||
|
|
|
@ -1181,6 +1181,10 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e
|
|||
if (evsel->filter == NULL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* filters only work for tracepoint event, which doesn't have cpu limit.
|
||||
* So evlist and evsel should always be same.
|
||||
*/
|
||||
err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
|
||||
if (err) {
|
||||
*err_evsel = evsel;
|
||||
|
|
|
@ -787,7 +787,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
|
|||
perf_evsel__config_callgraph(evsel, opts, &callchain_param);
|
||||
|
||||
if (opts->sample_intr_regs) {
|
||||
attr->sample_regs_intr = PERF_REGS_MASK;
|
||||
attr->sample_regs_intr = opts->sample_intr_regs;
|
||||
perf_evsel__set_sample_bit(evsel, REGS_INTR);
|
||||
}
|
||||
|
||||
|
|
|
@ -146,6 +146,9 @@ static void intel_pt_insn_decoder(struct insn *insn,
|
|||
case 4:
|
||||
intel_pt_insn->rel = bswap_32(insn->immediate.value);
|
||||
break;
|
||||
default:
|
||||
intel_pt_insn->rel = 0;
|
||||
break;
|
||||
}
|
||||
#else
|
||||
intel_pt_insn->rel = insn->immediate.value;
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
#include "perf.h"
|
||||
#include "util/util.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/parse-options.h"
|
||||
#include "util/parse-regs-options.h"
|
||||
|
||||
int
|
||||
parse_regs(const struct option *opt, const char *str, int unset)
|
||||
{
|
||||
uint64_t *mode = (uint64_t *)opt->value;
|
||||
const struct sample_reg *r;
|
||||
char *s, *os = NULL, *p;
|
||||
int ret = -1;
|
||||
|
||||
if (unset)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* cannot set it twice
|
||||
*/
|
||||
if (*mode)
|
||||
return -1;
|
||||
|
||||
/* str may be NULL in case no arg is passed to -I */
|
||||
if (str) {
|
||||
/* because str is read-only */
|
||||
s = os = strdup(str);
|
||||
if (!s)
|
||||
return -1;
|
||||
|
||||
for (;;) {
|
||||
p = strchr(s, ',');
|
||||
if (p)
|
||||
*p = '\0';
|
||||
|
||||
if (!strcmp(s, "?")) {
|
||||
fprintf(stderr, "available registers: ");
|
||||
for (r = sample_reg_masks; r->name; r++) {
|
||||
fprintf(stderr, "%s ", r->name);
|
||||
}
|
||||
fputc('\n', stderr);
|
||||
/* just printing available regs */
|
||||
return -1;
|
||||
}
|
||||
for (r = sample_reg_masks; r->name; r++) {
|
||||
if (!strcasecmp(s, r->name))
|
||||
break;
|
||||
}
|
||||
if (!r->name) {
|
||||
ui__warning("unknown register %s,"
|
||||
" check man page\n", s);
|
||||
goto error;
|
||||
}
|
||||
|
||||
*mode |= r->mask;
|
||||
|
||||
if (!p)
|
||||
break;
|
||||
|
||||
s = p + 1;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
/* default to all possible regs */
|
||||
if (*mode == 0)
|
||||
*mode = PERF_REGS_MASK;
|
||||
error:
|
||||
free(os);
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
#ifndef _PERF_PARSE_REGS_OPTIONS_H
|
||||
#define _PERF_PARSE_REGS_OPTIONS_H 1
|
||||
struct option;
|
||||
int parse_regs(const struct option *opt, const char *str, int unset);
|
||||
#endif /* _PERF_PARSE_REGS_OPTIONS_H */
|
|
@ -5,6 +5,13 @@
|
|||
|
||||
struct regs_dump;
|
||||
|
||||
struct sample_reg {
|
||||
const char *name;
|
||||
uint64_t mask;
|
||||
};
|
||||
|
||||
extern const struct sample_reg sample_reg_masks[];
|
||||
|
||||
#ifdef HAVE_PERF_REGS_SUPPORT
|
||||
#include <perf_regs.h>
|
||||
|
||||
|
|
Loading…
Reference in New Issue