perf tools: Add support for skipping itrace instructions
When using 'perf script' to look at PT traces it is often useful to ignore the initialization code at the beginning. On larger traces which may have many millions of instructions in initialization code doing that in a pipeline can be very slow, with perf script spending a lot of CPU time calling printf and writing data. This patch adds an extension to the --itrace argument that skips 'n' events (instructions, branches or transactions) at the beginning. This is much more efficient. v2: Add support for BTS (Adrian Hunter) Document in itrace.txt Fix branch check Check transactions and instructions too Committer note: To test intel_pt one needs to make sure VT-x isn't active, i.e. stopping KVM guests on the test machine, as described by Andi Kleen at http://lkml.kernel.org/r/20160301234953.GD23621@tassilo.jf.intel.com Signed-off-by: Andi Kleen <ak@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1459187142-20035-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
f7380c12ec
commit
d1706b39f0
|
@ -672,6 +672,7 @@ The letters are:
|
|||
d create a debug log
|
||||
g synthesize a call chain (use with i or x)
|
||||
l synthesize last branch entries (use with i or x)
|
||||
s skip initial number of events
|
||||
|
||||
"Instructions" events look like they were recorded by "perf record -e
|
||||
instructions".
|
||||
|
@ -730,6 +731,12 @@ from one sample to the next.
|
|||
|
||||
To disable trace decoding entirely, use the option --no-itrace.
|
||||
|
||||
It is also possible to skip events generated (instructions, branches, transactions)
|
||||
at the beginning. This is useful to ignore initialization code.
|
||||
|
||||
--itrace=i0nss1000000
|
||||
|
||||
skips the first million instructions.
|
||||
|
||||
dump option
|
||||
-----------
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
d create a debug log
|
||||
g synthesize a call chain (use with i or x)
|
||||
l synthesize last branch entries (use with i or x)
|
||||
s skip initial number of events
|
||||
|
||||
The default is all events i.e. the same as --itrace=ibxe
|
||||
|
||||
|
@ -24,3 +25,10 @@
|
|||
|
||||
Also the number of last branch entries (default 64, max. 1024) for
|
||||
instructions or transactions events can be specified.
|
||||
|
||||
It is also possible to skip events generated (instructions, branches, transactions)
|
||||
at the beginning. This is useful to ignore initialization code.
|
||||
|
||||
--itrace=i0nss1000000
|
||||
|
||||
skips the first million instructions.
|
||||
|
|
|
@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
|
|||
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
|
||||
synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
|
||||
synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
|
||||
synth_opts->initial_skip = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
|
|||
synth_opts->last_branch_sz = val;
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
synth_opts->initial_skip = strtoul(p, &endptr, 10);
|
||||
if (p == endptr)
|
||||
goto out_err;
|
||||
p = endptr;
|
||||
break;
|
||||
case ' ':
|
||||
case ',':
|
||||
break;
|
||||
|
|
|
@ -68,6 +68,7 @@ enum itrace_period_type {
|
|||
* @last_branch_sz: branch context size
|
||||
* @period: 'instructions' events period
|
||||
* @period_type: 'instructions' events period type
|
||||
* @initial_skip: skip N events at the beginning.
|
||||
*/
|
||||
struct itrace_synth_opts {
|
||||
bool set;
|
||||
|
@ -86,6 +87,7 @@ struct itrace_synth_opts {
|
|||
unsigned int last_branch_sz;
|
||||
unsigned long long period;
|
||||
enum itrace_period_type period_type;
|
||||
unsigned long initial_skip;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -66,6 +66,7 @@ struct intel_bts {
|
|||
u64 branches_id;
|
||||
size_t branches_event_size;
|
||||
bool synth_needs_swap;
|
||||
unsigned long num_events;
|
||||
};
|
||||
|
||||
struct intel_bts_queue {
|
||||
|
@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
|
|||
union perf_event event;
|
||||
struct perf_sample sample = { .ip = 0, };
|
||||
|
||||
if (bts->synth_opts.initial_skip &&
|
||||
bts->num_events++ <= bts->synth_opts.initial_skip)
|
||||
return 0;
|
||||
|
||||
event.sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event.sample.header.misc = PERF_RECORD_MISC_USER;
|
||||
event.sample.header.size = sizeof(struct perf_event_header);
|
||||
|
|
|
@ -100,6 +100,8 @@ struct intel_pt {
|
|||
u64 cyc_bit;
|
||||
u64 noretcomp_bit;
|
||||
unsigned max_non_turbo_ratio;
|
||||
|
||||
unsigned long num_events;
|
||||
};
|
||||
|
||||
enum switch_state {
|
||||
|
@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
|
|||
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
|
||||
return 0;
|
||||
|
||||
if (pt->synth_opts.initial_skip &&
|
||||
pt->num_events++ < pt->synth_opts.initial_skip)
|
||||
return 0;
|
||||
|
||||
event->sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event->sample.header.misc = PERF_RECORD_MISC_USER;
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
|
@ -1029,6 +1035,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
|
|||
union perf_event *event = ptq->event_buf;
|
||||
struct perf_sample sample = { .ip = 0, };
|
||||
|
||||
if (pt->synth_opts.initial_skip &&
|
||||
pt->num_events++ < pt->synth_opts.initial_skip)
|
||||
return 0;
|
||||
|
||||
event->sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event->sample.header.misc = PERF_RECORD_MISC_USER;
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
|
@ -1087,6 +1097,10 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
|
|||
union perf_event *event = ptq->event_buf;
|
||||
struct perf_sample sample = { .ip = 0, };
|
||||
|
||||
if (pt->synth_opts.initial_skip &&
|
||||
pt->num_events++ < pt->synth_opts.initial_skip)
|
||||
return 0;
|
||||
|
||||
event->sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event->sample.header.misc = PERF_RECORD_MISC_USER;
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
|
@ -1199,14 +1213,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
|
|||
ptq->have_sample = false;
|
||||
|
||||
if (pt->sample_instructions &&
|
||||
(state->type & INTEL_PT_INSTRUCTION)) {
|
||||
(state->type & INTEL_PT_INSTRUCTION) &&
|
||||
(!pt->synth_opts.initial_skip ||
|
||||
pt->num_events++ >= pt->synth_opts.initial_skip)) {
|
||||
err = intel_pt_synth_instruction_sample(ptq);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (pt->sample_transactions &&
|
||||
(state->type & INTEL_PT_TRANSACTION)) {
|
||||
(state->type & INTEL_PT_TRANSACTION) &&
|
||||
(!pt->synth_opts.initial_skip ||
|
||||
pt->num_events++ >= pt->synth_opts.initial_skip)) {
|
||||
err = intel_pt_synth_transaction_sample(ptq);
|
||||
if (err)
|
||||
return err;
|
||||
|
|
Loading…
Reference in New Issue