2019-05-29 22:18:02 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2014-10-30 22:09:42 +08:00
|
|
|
/*
|
|
|
|
* thread-stack.h: Synthesize a thread's stack using call / return events
|
|
|
|
* Copyright (c) 2014, Intel Corporation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __PERF_THREAD_STACK_H
|
|
|
|
#define __PERF_THREAD_STACK_H
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
|
|
|
|
struct thread;
|
2014-10-30 22:09:45 +08:00
|
|
|
struct comm;
|
2014-10-30 22:09:42 +08:00
|
|
|
struct ip_callchain;
|
2014-10-30 22:09:45 +08:00
|
|
|
struct symbol;
|
|
|
|
struct dso;
|
|
|
|
struct perf_sample;
|
|
|
|
struct addr_location;
|
2016-04-28 16:19:07 +08:00
|
|
|
struct call_path;
|
2014-10-30 22:09:45 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Call/Return flags.
|
|
|
|
*
|
|
|
|
* CALL_RETURN_NO_CALL: 'return' but no matching 'call'
|
|
|
|
* CALL_RETURN_NO_RETURN: 'call' but no matching 'return'
|
perf thread-stack: Represent jmps to the start of a different symbol
The compiler might optimize a call/ret combination by making it a jmp.
However the thread-stack does not presently cater for that, so that such
control flow is not visible in the call graph. Make it visible by
recording on the stack a branch to the start of a different symbol.
Note, that means when a ret pops the stack, all jmps must be popped off
first.
Example:
$ cat jmp-to-fn.c
__attribute__((noinline)) int bar(void)
{
return -1;
}
__attribute__((noinline)) int foo(void)
{
return bar() + 1;
}
int main()
{
return foo();
}
$ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c
$ objdump -d jmp-to-fn
<SNIP>
0000000000001040 <main>:
1040: 31 c0 xor %eax,%eax
1042: e9 09 01 00 00 jmpq 1150 <foo>
<SNIP>
0000000000001140 <bar>:
1140: b8 ff ff ff ff mov $0xffffffff,%eax
1145: c3 retq
<SNIP>
0000000000001150 <foo>:
1150: 31 c0 xor %eax,%eax
1152: e8 e9 ff ff ff callq 1140 <bar>
1157: 83 c0 01 add $0x1,%eax
115a: c3 retq
<SNIP>
$ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ]
$ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls
2019-01-08 13:24:58.783069 Creating database...
2019-01-08 13:24:58.794650 Writing records...
2019-01-08 13:24:59.008050 Adding indexes
2019-01-08 13:24:59.015802 Done
$ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db
Before:
main
-> bar
After:
main
-> foo
-> bar
Committer testing:
Install the python2-pyside package, then select these menu options
on the GUI:
"Reports"
"Context sensitive callgraphs"
Then go on expanding the symbols, to get, full picture when doing this
on a fedora:29 with gcc version 8.2.1 20181215 (Red Hat 8.2.1-6) (GCC):
jmp-to-fn
PID:TID
_start (ld-2.28.so)
__libc_start_main
main
foo
bar
To verify that indeed, this fixes the problem.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190109091835.5570-5-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-01-09 17:18:33 +08:00
|
|
|
* CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different
|
|
|
|
* symbol
|
2014-10-30 22:09:45 +08:00
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
CALL_RETURN_NO_CALL = 1 << 0,
|
|
|
|
CALL_RETURN_NO_RETURN = 1 << 1,
|
perf thread-stack: Represent jmps to the start of a different symbol
The compiler might optimize a call/ret combination by making it a jmp.
However the thread-stack does not presently cater for that, so that such
control flow is not visible in the call graph. Make it visible by
recording on the stack a branch to the start of a different symbol.
Note, that means when a ret pops the stack, all jmps must be popped off
first.
Example:
$ cat jmp-to-fn.c
__attribute__((noinline)) int bar(void)
{
return -1;
}
__attribute__((noinline)) int foo(void)
{
return bar() + 1;
}
int main()
{
return foo();
}
$ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c
$ objdump -d jmp-to-fn
<SNIP>
0000000000001040 <main>:
1040: 31 c0 xor %eax,%eax
1042: e9 09 01 00 00 jmpq 1150 <foo>
<SNIP>
0000000000001140 <bar>:
1140: b8 ff ff ff ff mov $0xffffffff,%eax
1145: c3 retq
<SNIP>
0000000000001150 <foo>:
1150: 31 c0 xor %eax,%eax
1152: e8 e9 ff ff ff callq 1140 <bar>
1157: 83 c0 01 add $0x1,%eax
115a: c3 retq
<SNIP>
$ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ]
$ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls
2019-01-08 13:24:58.783069 Creating database...
2019-01-08 13:24:58.794650 Writing records...
2019-01-08 13:24:59.008050 Adding indexes
2019-01-08 13:24:59.015802 Done
$ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db
Before:
main
-> bar
After:
main
-> foo
-> bar
Committer testing:
Install the python2-pyside package, then select these menu options
on the GUI:
"Reports"
"Context sensitive callgraphs"
Then go on expanding the symbols, to get, full picture when doing this
on a fedora:29 with gcc version 8.2.1 20181215 (Red Hat 8.2.1-6) (GCC):
jmp-to-fn
PID:TID
_start (ld-2.28.so)
__libc_start_main
main
foo
bar
To verify that indeed, this fixes the problem.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190109091835.5570-5-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-01-09 17:18:33 +08:00
|
|
|
CALL_RETURN_NON_CALL = 1 << 2,
|
2014-10-30 22:09:45 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* struct call_return - paired call/return information.
|
|
|
|
* @thread: thread in which call/return occurred
|
|
|
|
* @comm: comm in which call/return occurred
|
|
|
|
* @cp: call path
|
|
|
|
* @call_time: timestamp of call (if known)
|
|
|
|
* @return_time: timestamp of return (if known)
|
|
|
|
* @branch_count: number of branches seen between call and return
|
2019-05-20 19:37:19 +08:00
|
|
|
* @insn_count: approx. number of instructions between call and return
|
|
|
|
* @cyc_count: approx. number of cycles between call and return
|
2014-10-30 22:09:45 +08:00
|
|
|
* @call_ref: external reference to 'call' sample (e.g. db_id)
|
|
|
|
* @return_ref: external reference to 'return' sample (e.g. db_id)
|
|
|
|
* @db_id: id used for db-export
|
2019-02-28 21:00:24 +08:00
|
|
|
* @parent_db_id: id of parent call used for db-export
|
2014-10-30 22:09:45 +08:00
|
|
|
* @flags: Call/Return flags
|
|
|
|
*/
|
|
|
|
struct call_return {
|
|
|
|
struct thread *thread;
|
|
|
|
struct comm *comm;
|
|
|
|
struct call_path *cp;
|
|
|
|
u64 call_time;
|
|
|
|
u64 return_time;
|
|
|
|
u64 branch_count;
|
2019-05-20 19:37:19 +08:00
|
|
|
u64 insn_count;
|
|
|
|
u64 cyc_count;
|
2014-10-30 22:09:45 +08:00
|
|
|
u64 call_ref;
|
|
|
|
u64 return_ref;
|
|
|
|
u64 db_id;
|
2019-02-28 21:00:24 +08:00
|
|
|
u64 parent_db_id;
|
2014-10-30 22:09:45 +08:00
|
|
|
u32 flags;
|
|
|
|
};
|
|
|
|
|
2016-04-28 16:19:10 +08:00
|
|
|
/**
|
|
|
|
* struct call_return_processor - provides a call-back to consume call-return
|
|
|
|
* information.
|
|
|
|
* @cpr: call path root
|
|
|
|
* @process: call-back that accepts call/return information
|
|
|
|
* @data: anonymous data for call-back
|
|
|
|
*/
|
|
|
|
struct call_return_processor {
|
|
|
|
struct call_path_root *cpr;
|
2019-02-28 21:00:24 +08:00
|
|
|
int (*process)(struct call_return *cr, u64 *parent_db_id, void *data);
|
2016-04-28 16:19:10 +08:00
|
|
|
void *data;
|
|
|
|
};
|
|
|
|
|
2018-12-21 20:06:19 +08:00
|
|
|
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
|
2020-04-29 23:07:43 +08:00
|
|
|
u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
|
|
|
|
unsigned int br_stack_sz, bool mispred_all);
|
2018-12-21 20:06:19 +08:00
|
|
|
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
|
|
|
|
void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
|
2018-10-31 17:10:42 +08:00
|
|
|
size_t sz, u64 ip, u64 kernel_start);
|
2020-04-01 18:16:06 +08:00
|
|
|
void thread_stack__sample_late(struct thread *thread, int cpu,
|
|
|
|
struct ip_callchain *chain, size_t sz, u64 ip,
|
|
|
|
u64 kernel_start);
|
2020-04-29 23:07:43 +08:00
|
|
|
void thread_stack__br_sample(struct thread *thread, int cpu,
|
|
|
|
struct branch_stack *dst, unsigned int sz);
|
2020-04-29 23:07:48 +08:00
|
|
|
void thread_stack__br_sample_late(struct thread *thread, int cpu,
|
|
|
|
struct branch_stack *dst, unsigned int sz,
|
|
|
|
u64 sample_ip, u64 kernel_start);
|
2015-05-29 21:33:30 +08:00
|
|
|
int thread_stack__flush(struct thread *thread);
|
2014-10-30 22:09:42 +08:00
|
|
|
void thread_stack__free(struct thread *thread);
|
2018-12-21 20:06:19 +08:00
|
|
|
size_t thread_stack__depth(struct thread *thread, int cpu);
|
2014-10-30 22:09:42 +08:00
|
|
|
|
2014-10-30 22:09:45 +08:00
|
|
|
struct call_return_processor *
|
2019-02-28 21:00:24 +08:00
|
|
|
call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
|
2014-10-30 22:09:45 +08:00
|
|
|
void *data);
|
|
|
|
void call_return_processor__free(struct call_return_processor *crp);
|
|
|
|
int thread_stack__process(struct thread *thread, struct comm *comm,
|
|
|
|
struct perf_sample *sample,
|
|
|
|
struct addr_location *from_al,
|
|
|
|
struct addr_location *to_al, u64 ref,
|
|
|
|
struct call_return_processor *crp);
|
|
|
|
|
2014-10-30 22:09:42 +08:00
|
|
|
#endif
|