perf report: Add branch flag to callchain cursor node

Since the branch ip has been added to call stack for easier browsing,
this patch adds more branch information. For example, add a flag to
indicate if this ip is a branch, and also add with the branch flag.

Then we can know if the cursor node represents a branch and know what
the branch flag it has.

The branch history code has a loop detection pass that removes loops. It
would be nice for knowing how many loops were removed then in next
steps, we can compute out the average number of iterations.

For example:

Before remove_loops(),
entry0: from = 0x100, to = 0x200
entry1: from = 0x300, to = 0x250
entry2: from = 0x300, to = 0x250
entry3: from = 0x300, to = 0x250
entry4: from = 0x700, to = 0x800

After remove_loops()
entry0: from = 0x100, to = 0x200
entry1: from = 0x300, to = 0x250
entry2: from = 0x700, to = 0x800

The original entry2 and entry3 are removed. So the number of iterations
(from = 0x300, to = 0x250) is equal to removed number + 1 (2 + 1).

iterations = removed number + 1;
average iteractions = Sum(iteractions) / number of samples

This formula ignores other cases, for example, iterations cross multiple
buffers and one buffer contains 2+ loops. Because in practice, it's good
enough.

Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linux-kernel@vger.kernel.org
Cc: Yao Jin <yao.jin@linux.intel.com>
Link: http://lkml.kernel.org/n/1477876794-30749-2-git-send-email-yao.jin@linux.intel.com
[ Renamed 'iter' to 'nr_loop_iter' for clarity ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Jin Yao 2016-10-31 09:19:49 +08:00 committed by Arnaldo Carvalho de Melo
parent 08d090cfed
commit 410024dbbc
3 changed files with 86 additions and 18 deletions

View File

@ -728,7 +728,8 @@ merge_chain_branch(struct callchain_cursor *cursor,
list_for_each_entry_safe(list, next_list, &src->val, list) {
callchain_cursor_append(cursor, list->ip,
list->ms.map, list->ms.sym);
list->ms.map, list->ms.sym,
false, NULL, 0, 0);
list_del(&list->list);
free(list);
}
@ -765,7 +766,9 @@ int callchain_merge(struct callchain_cursor *cursor,
}
int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym)
u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples)
{
struct callchain_cursor_node *node = *cursor->last;
@ -780,6 +783,13 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->ip = ip;
node->map = map;
node->sym = sym;
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->samples = samples;
if (flags)
memcpy(&node->branch_flags, flags,
sizeof(struct branch_flags));
cursor->nr++;

View File

@ -125,6 +125,10 @@ struct callchain_cursor_node {
u64 ip;
struct map *map;
struct symbol *sym;
bool branch;
struct branch_flags branch_flags;
int nr_loop_iter;
int samples;
struct callchain_cursor_node *next;
};
@ -179,7 +183,9 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
}
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
struct map *map, struct symbol *sym);
struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples);
/* Close a cursor writing session. Initialize for the reader */
static inline void callchain_cursor_commit(struct callchain_cursor *cursor)

View File

@ -1616,7 +1616,11 @@ static int add_callchain_ip(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
u8 *cpumode,
u64 ip)
u64 ip,
bool branch,
struct branch_flags *flags,
int nr_loop_iter,
int samples)
{
struct addr_location al;
@ -1668,7 +1672,8 @@ static int add_callchain_ip(struct thread *thread,
if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0;
return callchain_cursor_append(cursor, al.addr, al.map, al.sym);
return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
branch, flags, nr_loop_iter, samples);
}
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@ -1757,7 +1762,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* LBR only affects the user callchain */
if (i != chain_nr) {
struct branch_stack *lbr_stack = sample->branch_stack;
int lbr_nr = lbr_stack->nr, j;
int lbr_nr = lbr_stack->nr, j, k;
bool branch;
struct branch_flags *flags;
/*
* LBR callstack can only get user call chain.
* The mix_chain_nr is kernel call chain
@ -1772,23 +1779,41 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
for (j = 0; j < mix_chain_nr; j++) {
int err;
branch = false;
flags = NULL;
if (callchain_param.order == ORDER_CALLEE) {
if (j < i + 1)
ip = chain->ips[j];
else if (j > i + 1)
ip = lbr_stack->entries[j - i - 2].from;
else
else if (j > i + 1) {
k = j - i - 2;
ip = lbr_stack->entries[k].from;
branch = true;
flags = &lbr_stack->entries[k].flags;
} else {
ip = lbr_stack->entries[0].to;
branch = true;
flags = &lbr_stack->entries[0].flags;
}
} else {
if (j < lbr_nr)
ip = lbr_stack->entries[lbr_nr - j - 1].from;
if (j < lbr_nr) {
k = lbr_nr - j - 1;
ip = lbr_stack->entries[k].from;
branch = true;
flags = &lbr_stack->entries[k].flags;
}
else if (j > lbr_nr)
ip = chain->ips[i + 1 - (j - lbr_nr)];
else
else {
ip = lbr_stack->entries[0].to;
branch = true;
flags = &lbr_stack->entries[0].flags;
}
}
err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
branch, flags, 0, 0);
if (err)
return (err < 0) ? err : 0;
}
@ -1813,6 +1838,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries;
int skip_idx = -1;
int first_call = 0;
int nr_loop_iter;
if (perf_evsel__has_branch_callstack(evsel)) {
err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
@ -1868,14 +1894,37 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i] = branch->entries[branch->nr - i - 1];
}
nr_loop_iter = nr;
nr = remove_loops(be, nr);
/*
* Get the number of iterations.
* It's only approximation, but good enough in practice.
*/
if (nr_loop_iter > nr)
nr_loop_iter = nr_loop_iter - nr + 1;
else
nr_loop_iter = 0;
for (i = 0; i < nr; i++) {
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].to);
if (i == nr - 1)
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
nr_loop_iter, 1);
else
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
0, 0);
if (!err)
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from);
NULL, be[i].from,
true, &be[i].flags,
0, 0);
if (err == -EINVAL)
break;
if (err)
@ -1903,7 +1952,9 @@ check_calls:
if (ip < PERF_CONTEXT_MAX)
++nr_entries;
err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, 0, 0);
if (err)
return (err < 0) ? err : 0;
@ -1919,7 +1970,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
if (symbol_conf.hide_unresolved && entry->sym == NULL)
return 0;
return callchain_cursor_append(cursor, entry->ip,
entry->map, entry->sym);
entry->map, entry->sym,
false, NULL, 0, 0);
}
static int thread__resolve_callchain_unwind(struct thread *thread,