perf script: Add output of IPC ratio
Add field 'ipc' to display instructions-per-cycle. Example: perf record -e intel_pt/cyc/u ls perf script --insn-trace --xed -F+ipc,-dso,-cpu,-tid ls 2670177.697113434: 7f0dfdbcd090 _start+0x0 mov %rsp, %rdi IPC: 0.00 (1/877) ls 2670177.697113434: 7f0dfdbcd093 _start+0x3 callq 0x7f0dfdbce030 ls 2670177.697113434: 7f0dfdbce030 _dl_start+0x0 pushq %rbp ls 2670177.697113434: 7f0dfdbce031 _dl_start+0x1 mov %rsp, %rbp ls 2670177.697113434: 7f0dfdbce034 _dl_start+0x4 pushq %r15 ls 2670177.697113434: 7f0dfdbce036 _dl_start+0x6 pushq %r14 ls 2670177.697113434: 7f0dfdbce038 _dl_start+0x8 pushq %r13 ls 2670177.697113434: 7f0dfdbce03a _dl_start+0xa pushq %r12 ls 2670177.697113434: 7f0dfdbce03c _dl_start+0xc mov %rdi, %r12 ls 2670177.697113434: 7f0dfdbce03f _dl_start+0xf pushq %rbx ls 2670177.697113434: 7f0dfdbce040 _dl_start+0x10 sub $0x38, %rsp ls 2670177.697113434: 7f0dfdbce044 _dl_start+0x14 rdtsc ls 2670177.697113434: 7f0dfdbce046 _dl_start+0x16 mov %eax, %eax ls 2670177.697113434: 7f0dfdbce048 _dl_start+0x18 shl $0x20, %rdx ls 2670177.697113434: 7f0dfdbce04c _dl_start+0x1c or %rax, %rdx ls 2670177.697114471: 7f0dfdbce04f _dl_start+0x1f movq 0x27e22(%rip), %rax IPC: 0.00 (15/1685) ls 2670177.697116177: 7f0dfdbce056 _dl_start+0x26 movq %rdx, 0x27683(%rip) IPC: 0.00 (1/881) Note, the IPC values are low due to page faults at the beginning of execution. The additional cycles are due to the time to enter the kernel, not the actual kernel page fault handler. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20190520113728.14389-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
5b1dc0fd1d
commit
68fb45bf17
|
@ -117,7 +117,7 @@ OPTIONS
|
|||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
|
||||
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
|
||||
brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode.
|
||||
brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc.
|
||||
Field list can be prepended with the type, trace, sw or hw,
|
||||
to indicate to which event type the field list applies.
|
||||
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
|
||||
|
@ -203,6 +203,9 @@ OPTIONS
|
|||
The synth field is used by synthesized events which may be created when
|
||||
Instruction Trace decoding.
|
||||
|
||||
The ipc (instructions per cycle) field is synthesized and may have a value when
|
||||
Instruction Trace decoding.
|
||||
|
||||
Finally, a user may not set fields to none for all event types.
|
||||
i.e., -F "" is not allowed.
|
||||
|
||||
|
|
|
@ -102,6 +102,7 @@ enum perf_output_field {
|
|||
PERF_OUTPUT_METRIC = 1U << 28,
|
||||
PERF_OUTPUT_MISC = 1U << 29,
|
||||
PERF_OUTPUT_SRCCODE = 1U << 30,
|
||||
PERF_OUTPUT_IPC = 1U << 31,
|
||||
};
|
||||
|
||||
struct output_option {
|
||||
|
@ -139,6 +140,7 @@ struct output_option {
|
|||
{.str = "metric", .field = PERF_OUTPUT_METRIC},
|
||||
{.str = "misc", .field = PERF_OUTPUT_MISC},
|
||||
{.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
|
||||
{.str = "ipc", .field = PERF_OUTPUT_IPC},
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -1268,6 +1270,20 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
|
|||
return printed;
|
||||
}
|
||||
|
||||
static int perf_sample__fprintf_ipc(struct perf_sample *sample,
|
||||
struct perf_event_attr *attr, FILE *fp)
|
||||
{
|
||||
unsigned int ipc;
|
||||
|
||||
if (!PRINT_FIELD(IPC) || !sample->cyc_cnt || !sample->insn_cnt)
|
||||
return 0;
|
||||
|
||||
ipc = (sample->insn_cnt * 100) / sample->cyc_cnt;
|
||||
|
||||
return fprintf(fp, " \t IPC: %u.%02u (%" PRIu64 "/%" PRIu64 ") ",
|
||||
ipc / 100, ipc % 100, sample->insn_cnt, sample->cyc_cnt);
|
||||
}
|
||||
|
||||
static int perf_sample__fprintf_bts(struct perf_sample *sample,
|
||||
struct perf_evsel *evsel,
|
||||
struct thread *thread,
|
||||
|
@ -1312,6 +1328,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
|
|||
printed += perf_sample__fprintf_addr(sample, thread, attr, fp);
|
||||
}
|
||||
|
||||
printed += perf_sample__fprintf_ipc(sample, attr, fp);
|
||||
|
||||
if (print_srcline_last)
|
||||
printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
|
||||
|
||||
|
@ -1859,6 +1877,9 @@ static void process_event(struct perf_script *script,
|
|||
|
||||
if (PRINT_FIELD(PHYS_ADDR))
|
||||
fprintf(fp, "%16" PRIx64, sample->phys_addr);
|
||||
|
||||
perf_sample__fprintf_ipc(sample, attr, fp);
|
||||
|
||||
fprintf(fp, "\n");
|
||||
|
||||
if (PRINT_FIELD(SRCCODE)) {
|
||||
|
@ -3433,7 +3454,7 @@ int cmd_script(int argc, const char **argv)
|
|||
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
|
||||
"addr,symoff,srcline,period,iregs,uregs,brstack,"
|
||||
"brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
|
||||
"callindent,insn,insnlen,synth,phys_addr,metric,misc",
|
||||
"callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc",
|
||||
parse_output_fields),
|
||||
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
||||
"system-wide collection from all CPUs"),
|
||||
|
|
Loading…
Reference in New Issue