2009-09-25 00:02:18 +08:00
|
|
|
#ifndef __PERF_THREAD_H
|
|
|
|
#define __PERF_THREAD_H
|
|
|
|
|
2009-08-14 18:21:53 +08:00
|
|
|
#include <linux/rbtree.h>
|
|
|
|
#include <unistd.h>
|
2012-10-07 02:43:20 +08:00
|
|
|
#include <sys/types.h>
|
2009-08-14 18:21:53 +08:00
|
|
|
#include "symbol.h"
|
|
|
|
|
2009-12-12 00:50:36 +08:00
|
|
|
struct thread {
|
2010-06-17 19:37:44 +08:00
|
|
|
union {
|
|
|
|
struct rb_node rb_node;
|
|
|
|
struct list_head node;
|
|
|
|
};
|
2009-12-12 00:50:36 +08:00
|
|
|
struct map_groups mg;
|
|
|
|
pid_t pid;
|
2009-09-16 23:40:48 +08:00
|
|
|
char shortname[3];
|
2010-02-20 09:02:07 +08:00
|
|
|
bool comm_set;
|
2009-09-11 18:12:54 +08:00
|
|
|
char *comm;
|
perf tools: Bind callchains to the first sort dimension column
Currently, the callchains are displayed using a constant left
margin. So depending on the current sort dimension
configuration, callchains may appear to be well attached to the
first sort dimension column field which is mostly the case,
except when the first dimension of sorting is done by comm,
because these are right aligned.
This patch binds the callchain to the first letter in the first
column, whatever type of column it is (dso, comm, symbol).
Before:
0.80% perf [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
| | __fsnotify_parent
After:
0.80% perf [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
| | __fsnotify_parent
Also, for clarity, we don't put anymore the callchain as is but:
- If we have a top level ancestor in the callchain, start it
with a first ascii hook.
Before:
0.80% perf [kernel] [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
[..] [..]
After:
0.80% perf [kernel] [k] __lock_acquire
|
--- __lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
[..] [..]
- Otherwise, if we have several top level ancestors, then
display these like we did before:
1.69% Xorg
|
|--21.21%-- vread_hpet
| 0x7fffd85b46fc
| 0x7fffd85b494d
| 0x7f4fafb4e54d
|
|--15.15%-- exaOffscreenAlloc
|
|--9.09%-- I830WaitLpRing
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-23 05:23:23 +08:00
|
|
|
int comm_len;
|
perf kvm: Events analysis tool
Add 'perf kvm stat' support to analyze kvm vmexit/mmio/ioport smartly
Usage:
- kvm stat
run a command and gather performance counter statistics, it is the alias of
perf stat
- trace kvm events:
perf kvm stat record, or, if other tracepoints are interesting as well, we
can append the events like this:
perf kvm stat record -e timer:* -a
If many guests are running, we can track the specified guest by using -p or
--pid, -a is used to track events generated by all guests.
- show the result:
perf kvm stat report
The output example is following:
13005
13059
total 2 guests are running on the host
Then, track the guest whose pid is 13059:
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.253 MB perf.data.guest (~11065 samples) ]
See the vmexit events:
Analyze events for all VCPUs:
VM-EXIT Samples Samples% Time% Avg time
APIC_ACCESS 460 70.55% 0.01% 22.44us ( +- 1.75% )
HLT 93 14.26% 99.98% 832077.26us ( +- 10.42% )
EXTERNAL_INTERRUPT 64 9.82% 0.00% 35.35us ( +- 14.21% )
PENDING_INTERRUPT 24 3.68% 0.00% 9.29us ( +- 31.39% )
CR_ACCESS 7 1.07% 0.00% 8.12us ( +- 5.76% )
IO_INSTRUCTION 3 0.46% 0.00% 18.00us ( +- 11.79% )
EXCEPTION_NMI 1 0.15% 0.00% 5.83us ( +- -nan% )
Total Samples:652, Total events handled time:77396109.80us.
See the mmio events:
Analyze events for all VCPUs:
MMIO Access Samples Samples% Time% Avg time
0xfee00380:W 387 84.31% 79.28% 8.29us ( +- 3.32% )
0xfee00300:W 24 5.23% 9.96% 16.79us ( +- 1.97% )
0xfee00300:R 24 5.23% 7.83% 13.20us ( +- 3.00% )
0xfee00310:W 24 5.23% 2.93% 4.94us ( +- 3.84% )
Total Samples:459, Total events handled time:4044.59us.
See the ioport event:
Analyze events for all VCPUs:
IO Port Access Samples Samples% Time% Avg time
0xc050:POUT 3 100.00% 100.00% 13.75us ( +- 10.83% )
Total Samples:3, Total events handled time:41.26us.
And, --vcpu is used to track the specified vcpu and --key is used to sort the
result:
Analyze events for VCPU 0:
VM-EXIT Samples Samples% Time% Avg time
HLT 27 13.85% 99.97% 405790.24us ( +- 12.70% )
EXTERNAL_INTERRUPT 13 6.67% 0.00% 27.94us ( +- 22.26% )
APIC_ACCESS 146 74.87% 0.03% 21.69us ( +- 2.91% )
IO_INSTRUCTION 2 1.03% 0.00% 17.77us ( +- 20.56% )
CR_ACCESS 2 1.03% 0.00% 8.55us ( +- 6.47% )
PENDING_INTERRUPT 5 2.56% 0.00% 6.27us ( +- 3.94% )
Total Samples:195, Total events handled time:10959950.90us.
Signed-off-by: Dong Hao <haodong@linux.vnet.ibm.com>
Signed-off-by: Runzhen Wang <runzhen@linux.vnet.ibm.com>
[ Dong Hao <haodong@linux.vnet.ibm.com>
Runzhen Wang <runzhen@linux.vnet.ibm.com>:
- rebase it on current acme's tree
- fix the compiling-error on i386 ]
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1347870675-31495-4-git-send-email-haodong@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-09-17 16:31:15 +08:00
|
|
|
|
|
|
|
void *priv;
|
2009-08-14 18:21:53 +08:00
|
|
|
};
|
|
|
|
|
2011-11-28 17:56:39 +08:00
|
|
|
struct machine;
|
2010-03-26 06:58:58 +08:00
|
|
|
|
2012-10-07 02:43:20 +08:00
|
|
|
struct thread *thread__new(pid_t pid);
|
2010-07-31 05:28:42 +08:00
|
|
|
void thread__delete(struct thread *self);
|
|
|
|
|
2009-08-14 18:21:53 +08:00
|
|
|
int thread__set_comm(struct thread *self, const char *comm);
|
perf tools: Bind callchains to the first sort dimension column
Currently, the callchains are displayed using a constant left
margin. So depending on the current sort dimension
configuration, callchains may appear to be well attached to the
first sort dimension column field which is mostly the case,
except when the first dimension of sorting is done by comm,
because these are right aligned.
This patch binds the callchain to the first letter in the first
column, whatever type of column it is (dso, comm, symbol).
Before:
0.80% perf [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
| | __fsnotify_parent
After:
0.80% perf [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
| | __fsnotify_parent
Also, for clarity, we don't put anymore the callchain as is but:
- If we have a top level ancestor in the callchain, start it
with a first ascii hook.
Before:
0.80% perf [kernel] [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
[..] [..]
After:
0.80% perf [kernel] [k] __lock_acquire
|
--- __lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
[..] [..]
- Otherwise, if we have several top level ancestors, then
display these like we did before:
1.69% Xorg
|
|--21.21%-- vread_hpet
| 0x7fffd85b46fc
| 0x7fffd85b494d
| 0x7f4fafb4e54d
|
|--15.15%-- exaOffscreenAlloc
|
|--9.09%-- I830WaitLpRing
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-23 05:23:23 +08:00
|
|
|
int thread__comm_len(struct thread *self);
|
2009-08-14 18:21:53 +08:00
|
|
|
void thread__insert_map(struct thread *self, struct map *map);
|
|
|
|
int thread__fork(struct thread *self, struct thread *parent);
|
2012-12-08 04:39:39 +08:00
|
|
|
size_t thread__fprintf(struct thread *thread, FILE *fp);
|
2009-09-25 00:02:18 +08:00
|
|
|
|
2009-12-12 00:50:36 +08:00
|
|
|
static inline struct map *thread__find_map(struct thread *self,
|
|
|
|
enum map_type type, u64 addr)
|
2009-11-28 02:29:20 +08:00
|
|
|
{
|
2009-12-12 00:50:36 +08:00
|
|
|
return self ? map_groups__find(&self->mg, type, addr) : NULL;
|
2009-11-28 02:29:20 +08:00
|
|
|
}
|
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-28 02:29:23 +08:00
|
|
|
|
2011-11-28 17:56:39 +08:00
|
|
|
void thread__find_addr_map(struct thread *thread, struct machine *machine,
|
|
|
|
u8 cpumode, enum map_type type, u64 addr,
|
2010-01-15 09:45:29 +08:00
|
|
|
struct addr_location *al);
|
|
|
|
|
2011-11-28 17:56:39 +08:00
|
|
|
void thread__find_addr_location(struct thread *thread, struct machine *machine,
|
|
|
|
u8 cpumode, enum map_type type, u64 addr,
|
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-28 02:29:23 +08:00
|
|
|
struct addr_location *al,
|
|
|
|
symbol_filter_t filter);
|
2009-09-25 00:02:18 +08:00
|
|
|
#endif /* __PERF_THREAD_H */
|