2009-11-23 22:42:35 +08:00
|
|
|
#include "../../../include/linux/hw_breakpoint.h"
|
2009-05-26 17:10:09 +08:00
|
|
|
#include "util.h"
|
2009-09-05 03:39:51 +08:00
|
|
|
#include "../perf.h"
|
2011-01-12 06:56:53 +08:00
|
|
|
#include "evlist.h"
|
2011-01-04 02:39:04 +08:00
|
|
|
#include "evsel.h"
|
2009-05-26 17:10:09 +08:00
|
|
|
#include "parse-options.h"
|
|
|
|
#include "parse-events.h"
|
|
|
|
#include "exec_cmd.h"
|
2009-06-02 04:50:19 +08:00
|
|
|
#include "string.h"
|
2010-03-26 06:59:00 +08:00
|
|
|
#include "symbol.h"
|
2009-07-22 02:16:29 +08:00
|
|
|
#include "cache.h"
|
2009-09-12 13:52:51 +08:00
|
|
|
#include "header.h"
|
2009-11-08 23:03:07 +08:00
|
|
|
#include "debugfs.h"
|
2012-03-16 03:09:15 +08:00
|
|
|
#include "parse-events-flex.h"
|
2012-03-16 03:09:18 +08:00
|
|
|
#include "pmu.h"
|
2012-03-16 03:09:15 +08:00
|
|
|
|
|
|
|
#define MAX_NAME_LEN 100
|
2009-05-26 17:10:09 +08:00
|
|
|
|
|
|
|
struct event_symbol {
|
2009-08-15 18:26:57 +08:00
|
|
|
u8 type;
|
|
|
|
u64 config;
|
|
|
|
const char *symbol;
|
|
|
|
const char *alias;
|
2009-05-26 17:10:09 +08:00
|
|
|
};
|
|
|
|
|
2012-03-21 02:15:40 +08:00
|
|
|
int parse_events_parse(struct list_head *list, struct list_head *list_tmp,
|
|
|
|
int *idx);
|
2009-09-12 05:19:45 +08:00
|
|
|
|
2009-06-22 19:13:14 +08:00
|
|
|
#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
|
|
|
|
#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
|
2009-06-06 15:58:57 +08:00
|
|
|
|
2009-05-26 17:10:09 +08:00
|
|
|
static struct event_symbol event_symbols[] = {
|
2011-04-29 20:41:28 +08:00
|
|
|
{ CHW(CPU_CYCLES), "cpu-cycles", "cycles" },
|
|
|
|
{ CHW(STALLED_CYCLES_FRONTEND), "stalled-cycles-frontend", "idle-cycles-frontend" },
|
|
|
|
{ CHW(STALLED_CYCLES_BACKEND), "stalled-cycles-backend", "idle-cycles-backend" },
|
|
|
|
{ CHW(INSTRUCTIONS), "instructions", "" },
|
|
|
|
{ CHW(CACHE_REFERENCES), "cache-references", "" },
|
|
|
|
{ CHW(CACHE_MISSES), "cache-misses", "" },
|
|
|
|
{ CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
|
|
|
|
{ CHW(BRANCH_MISSES), "branch-misses", "" },
|
|
|
|
{ CHW(BUS_CYCLES), "bus-cycles", "" },
|
2011-12-11 07:28:54 +08:00
|
|
|
{ CHW(REF_CPU_CYCLES), "ref-cycles", "" },
|
2011-04-29 20:41:28 +08:00
|
|
|
|
|
|
|
{ CSW(CPU_CLOCK), "cpu-clock", "" },
|
|
|
|
{ CSW(TASK_CLOCK), "task-clock", "" },
|
|
|
|
{ CSW(PAGE_FAULTS), "page-faults", "faults" },
|
|
|
|
{ CSW(PAGE_FAULTS_MIN), "minor-faults", "" },
|
|
|
|
{ CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
|
|
|
|
{ CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
|
|
|
|
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
|
|
|
|
{ CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
|
|
|
|
{ CSW(EMULATION_FAULTS), "emulation-faults", "" },
|
2009-05-26 17:10:09 +08:00
|
|
|
};
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
#define __PERF_EVENT_FIELD(config, name) \
|
|
|
|
((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
|
2009-05-26 15:17:18 +08:00
|
|
|
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 11:20:22 +08:00
|
|
|
#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW)
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG)
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 11:20:22 +08:00
|
|
|
#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
|
2009-05-26 15:17:18 +08:00
|
|
|
|
perf stat: Analyze front-end and back-end stall counts
Sample output:
Performance counter stats for './loop_1b':
873.691065 task-clock # 1.000 CPUs utilized
1 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
96 page-faults # 0.000 M/sec
2,012,637,222 cycles # 2.304 GHz (66.58%)
1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%)
7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%)
2,004,551,046 instructions # 1.00 insns per cycle
# 0.50 stalled cycles per insn (66.80%)
1,001,304,992 branches # 1146.063 M/sec (66.76%)
39,453 branch-misses # 0.00% of all branches (66.64%)
0.874046121 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-29 19:49:08 +08:00
|
|
|
static const char *hw_event_names[PERF_COUNT_HW_MAX] = {
|
2009-06-06 19:58:12 +08:00
|
|
|
"cycles",
|
2009-05-26 15:17:18 +08:00
|
|
|
"instructions",
|
2009-06-06 19:58:12 +08:00
|
|
|
"cache-references",
|
|
|
|
"cache-misses",
|
2009-05-26 15:17:18 +08:00
|
|
|
"branches",
|
2009-06-06 19:58:12 +08:00
|
|
|
"branch-misses",
|
|
|
|
"bus-cycles",
|
perf stat: Analyze front-end and back-end stall counts
Sample output:
Performance counter stats for './loop_1b':
873.691065 task-clock # 1.000 CPUs utilized
1 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
96 page-faults # 0.000 M/sec
2,012,637,222 cycles # 2.304 GHz (66.58%)
1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%)
7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%)
2,004,551,046 instructions # 1.00 insns per cycle
# 0.50 stalled cycles per insn (66.80%)
1,001,304,992 branches # 1146.063 M/sec (66.76%)
39,453 branch-misses # 0.00% of all branches (66.64%)
0.874046121 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-29 19:49:08 +08:00
|
|
|
"stalled-cycles-frontend",
|
|
|
|
"stalled-cycles-backend",
|
2011-12-11 07:28:54 +08:00
|
|
|
"ref-cycles",
|
2009-05-26 15:17:18 +08:00
|
|
|
};
|
|
|
|
|
perf stat: Analyze front-end and back-end stall counts
Sample output:
Performance counter stats for './loop_1b':
873.691065 task-clock # 1.000 CPUs utilized
1 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
96 page-faults # 0.000 M/sec
2,012,637,222 cycles # 2.304 GHz (66.58%)
1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%)
7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%)
2,004,551,046 instructions # 1.00 insns per cycle
# 0.50 stalled cycles per insn (66.80%)
1,001,304,992 branches # 1146.063 M/sec (66.76%)
39,453 branch-misses # 0.00% of all branches (66.64%)
0.874046121 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-29 19:49:08 +08:00
|
|
|
static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
|
2011-04-27 10:24:57 +08:00
|
|
|
"cpu-clock",
|
|
|
|
"task-clock",
|
2009-06-06 19:58:12 +08:00
|
|
|
"page-faults",
|
|
|
|
"context-switches",
|
|
|
|
"CPU-migrations",
|
|
|
|
"minor-faults",
|
|
|
|
"major-faults",
|
2009-10-18 09:09:29 +08:00
|
|
|
"alignment-faults",
|
|
|
|
"emulation-faults",
|
2009-05-26 15:17:18 +08:00
|
|
|
};
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
#define MAX_ALIASES 8
|
|
|
|
|
2011-07-14 04:58:18 +08:00
|
|
|
static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = {
|
2009-07-06 20:01:31 +08:00
|
|
|
{ "L1-dcache", "l1-d", "l1d", "L1-data", },
|
|
|
|
{ "L1-icache", "l1-i", "l1i", "L1-instruction", },
|
2011-07-14 04:58:18 +08:00
|
|
|
{ "LLC", "L2", },
|
2009-06-25 20:55:22 +08:00
|
|
|
{ "dTLB", "d-tlb", "Data-TLB", },
|
|
|
|
{ "iTLB", "i-tlb", "Instruction-TLB", },
|
|
|
|
{ "branch", "branches", "bpu", "btb", "bpc", },
|
2011-07-14 04:58:18 +08:00
|
|
|
{ "node", },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
};
|
|
|
|
|
2011-07-14 04:58:18 +08:00
|
|
|
static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = {
|
2009-06-25 20:55:22 +08:00
|
|
|
{ "load", "loads", "read", },
|
|
|
|
{ "store", "stores", "write", },
|
|
|
|
{ "prefetch", "prefetches", "speculative-read", "speculative-load", },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
};
|
|
|
|
|
2011-07-14 04:58:18 +08:00
|
|
|
static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
|
|
|
|
[MAX_ALIASES] = {
|
2009-06-25 20:55:22 +08:00
|
|
|
{ "refs", "Reference", "ops", "access", },
|
|
|
|
{ "misses", "miss", },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
};
|
|
|
|
|
2009-06-25 19:46:07 +08:00
|
|
|
#define C(x) PERF_COUNT_HW_CACHE_##x
|
|
|
|
#define CACHE_READ (1 << C(OP_READ))
|
|
|
|
#define CACHE_WRITE (1 << C(OP_WRITE))
|
|
|
|
#define CACHE_PREFETCH (1 << C(OP_PREFETCH))
|
|
|
|
#define COP(x) (1 << x)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cache operartion stat
|
|
|
|
* L1I : Read and prefetch only
|
|
|
|
* ITLB and BPU : Read-only
|
|
|
|
*/
|
|
|
|
static unsigned long hw_cache_stat[C(MAX)] = {
|
|
|
|
[C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
|
|
|
|
[C(L1I)] = (CACHE_READ | CACHE_PREFETCH),
|
|
|
|
[C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
|
|
|
|
[C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
|
|
|
|
[C(ITLB)] = (CACHE_READ),
|
|
|
|
[C(BPU)] = (CACHE_READ),
|
2011-07-14 04:58:18 +08:00
|
|
|
[C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
|
2009-06-25 19:46:07 +08:00
|
|
|
};
|
|
|
|
|
2009-09-05 03:39:51 +08:00
|
|
|
#define for_each_subsystem(sys_dir, sys_dirent, sys_next) \
|
2009-07-22 00:20:22 +08:00
|
|
|
while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \
|
2009-09-05 03:39:51 +08:00
|
|
|
if (sys_dirent.d_type == DT_DIR && \
|
2009-07-22 00:20:22 +08:00
|
|
|
(strcmp(sys_dirent.d_name, ".")) && \
|
|
|
|
(strcmp(sys_dirent.d_name, "..")))
|
|
|
|
|
2009-08-06 22:48:54 +08:00
|
|
|
static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
|
|
|
|
{
|
|
|
|
char evt_path[MAXPATHLEN];
|
|
|
|
int fd;
|
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
|
2009-08-06 22:48:54 +08:00
|
|
|
sys_dir->d_name, evt_dir->d_name);
|
|
|
|
fd = open(evt_path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-05 03:39:51 +08:00
|
|
|
#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) \
|
2009-07-22 00:20:22 +08:00
|
|
|
while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \
|
2009-09-05 03:39:51 +08:00
|
|
|
if (evt_dirent.d_type == DT_DIR && \
|
2009-07-22 00:20:22 +08:00
|
|
|
(strcmp(evt_dirent.d_name, ".")) && \
|
2009-08-06 22:48:54 +08:00
|
|
|
(strcmp(evt_dirent.d_name, "..")) && \
|
|
|
|
(!tp_event_has_id(&sys_dirent, &evt_dirent)))
|
2009-07-22 00:20:22 +08:00
|
|
|
|
2009-09-17 16:34:51 +08:00
|
|
|
#define MAX_EVENT_LENGTH 512
|
2009-07-22 00:20:22 +08:00
|
|
|
|
|
|
|
|
2009-08-28 09:09:58 +08:00
|
|
|
struct tracepoint_path *tracepoint_id_to_path(u64 config)
|
2009-07-22 00:20:22 +08:00
|
|
|
{
|
2009-08-28 09:09:58 +08:00
|
|
|
struct tracepoint_path *path = NULL;
|
2009-07-22 00:20:22 +08:00
|
|
|
DIR *sys_dir, *evt_dir;
|
|
|
|
struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
|
2012-03-13 23:51:02 +08:00
|
|
|
char id_buf[24];
|
2009-09-24 21:39:09 +08:00
|
|
|
int fd;
|
2009-07-22 00:20:22 +08:00
|
|
|
u64 id;
|
|
|
|
char evt_path[MAXPATHLEN];
|
2009-09-24 21:39:09 +08:00
|
|
|
char dir_path[MAXPATHLEN];
|
2009-07-22 00:20:22 +08:00
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
if (debugfs_valid_mountpoint(tracing_events_path))
|
2009-08-28 09:09:58 +08:00
|
|
|
return NULL;
|
2009-07-22 00:20:22 +08:00
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
sys_dir = opendir(tracing_events_path);
|
2009-07-22 00:20:22 +08:00
|
|
|
if (!sys_dir)
|
2009-09-24 21:39:09 +08:00
|
|
|
return NULL;
|
2009-09-05 03:39:51 +08:00
|
|
|
|
|
|
|
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
|
2009-09-24 21:39:09 +08:00
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
|
2009-09-24 21:39:09 +08:00
|
|
|
sys_dirent.d_name);
|
|
|
|
evt_dir = opendir(dir_path);
|
|
|
|
if (!evt_dir)
|
2009-09-05 03:39:51 +08:00
|
|
|
continue;
|
2009-09-24 21:39:09 +08:00
|
|
|
|
2009-09-05 03:39:51 +08:00
|
|
|
for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
|
2009-09-24 21:39:09 +08:00
|
|
|
|
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
|
2009-07-22 00:20:22 +08:00
|
|
|
evt_dirent.d_name);
|
2009-09-24 21:39:09 +08:00
|
|
|
fd = open(evt_path, O_RDONLY);
|
2009-07-22 00:20:22 +08:00
|
|
|
if (fd < 0)
|
|
|
|
continue;
|
|
|
|
if (read(fd, id_buf, sizeof(id_buf)) < 0) {
|
|
|
|
close(fd);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
close(fd);
|
|
|
|
id = atoll(id_buf);
|
|
|
|
if (id == config) {
|
|
|
|
closedir(evt_dir);
|
|
|
|
closedir(sys_dir);
|
2009-12-06 17:16:30 +08:00
|
|
|
path = zalloc(sizeof(*path));
|
2009-08-28 09:09:58 +08:00
|
|
|
path->system = malloc(MAX_EVENT_LENGTH);
|
|
|
|
if (!path->system) {
|
|
|
|
free(path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
path->name = malloc(MAX_EVENT_LENGTH);
|
|
|
|
if (!path->name) {
|
|
|
|
free(path->system);
|
|
|
|
free(path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
strncpy(path->system, sys_dirent.d_name,
|
|
|
|
MAX_EVENT_LENGTH);
|
|
|
|
strncpy(path->name, evt_dirent.d_name,
|
|
|
|
MAX_EVENT_LENGTH);
|
|
|
|
return path;
|
2009-07-22 00:20:22 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
closedir(evt_dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
closedir(sys_dir);
|
2009-08-28 09:09:58 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TP_PATH_LEN (MAX_EVENT_LENGTH * 2 + 1)
|
|
|
|
static const char *tracepoint_id_to_name(u64 config)
|
|
|
|
{
|
|
|
|
static char buf[TP_PATH_LEN];
|
|
|
|
struct tracepoint_path *path;
|
|
|
|
|
|
|
|
path = tracepoint_id_to_path(config);
|
|
|
|
if (path) {
|
|
|
|
snprintf(buf, TP_PATH_LEN, "%s:%s", path->system, path->name);
|
|
|
|
free(path->name);
|
|
|
|
free(path->system);
|
|
|
|
free(path);
|
|
|
|
} else
|
|
|
|
snprintf(buf, TP_PATH_LEN, "%s:%s", "unknown", "unknown");
|
|
|
|
|
|
|
|
return buf;
|
2009-07-22 00:20:22 +08:00
|
|
|
}
|
|
|
|
|
2009-06-25 19:46:07 +08:00
|
|
|
static int is_cache_op_valid(u8 cache_type, u8 cache_op)
|
|
|
|
{
|
|
|
|
if (hw_cache_stat[cache_type] & COP(cache_op))
|
|
|
|
return 1; /* valid */
|
|
|
|
else
|
|
|
|
return 0; /* invalid */
|
|
|
|
}
|
|
|
|
|
2009-06-25 20:55:22 +08:00
|
|
|
static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
|
|
|
|
{
|
|
|
|
static char name[50];
|
|
|
|
|
|
|
|
if (cache_result) {
|
|
|
|
sprintf(name, "%s-%s-%s", hw_cache[cache_type][0],
|
|
|
|
hw_cache_op[cache_op][0],
|
|
|
|
hw_cache_result[cache_result][0]);
|
|
|
|
} else {
|
|
|
|
sprintf(name, "%s-%s", hw_cache[cache_type][0],
|
|
|
|
hw_cache_op[cache_op][1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2011-03-10 13:23:28 +08:00
|
|
|
const char *event_type(int type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case PERF_TYPE_HARDWARE:
|
|
|
|
return "hardware";
|
|
|
|
|
|
|
|
case PERF_TYPE_SOFTWARE:
|
|
|
|
return "software";
|
|
|
|
|
|
|
|
case PERF_TYPE_TRACEPOINT:
|
|
|
|
return "tracepoint";
|
|
|
|
|
|
|
|
case PERF_TYPE_HW_CACHE:
|
|
|
|
return "hardware-cache";
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return "unknown";
|
|
|
|
}
|
|
|
|
|
2011-01-04 02:39:04 +08:00
|
|
|
const char *event_name(struct perf_evsel *evsel)
|
2009-05-26 15:17:18 +08:00
|
|
|
{
|
2011-01-04 02:39:04 +08:00
|
|
|
u64 config = evsel->attr.config;
|
|
|
|
int type = evsel->attr.type;
|
2009-08-07 01:40:28 +08:00
|
|
|
|
2011-02-16 21:10:01 +08:00
|
|
|
if (evsel->name)
|
|
|
|
return evsel->name;
|
|
|
|
|
2009-08-07 01:40:28 +08:00
|
|
|
return __event_name(type, config);
|
|
|
|
}
|
|
|
|
|
2009-08-15 18:26:57 +08:00
|
|
|
const char *__event_name(int type, u64 config)
|
2009-08-07 01:40:28 +08:00
|
|
|
{
|
2009-05-26 15:17:18 +08:00
|
|
|
static char buf[32];
|
|
|
|
|
2009-08-07 01:40:28 +08:00
|
|
|
if (type == PERF_TYPE_RAW) {
|
2011-01-23 06:37:02 +08:00
|
|
|
sprintf(buf, "raw 0x%" PRIx64, config);
|
2009-05-26 15:17:18 +08:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case PERF_TYPE_HARDWARE:
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 11:20:22 +08:00
|
|
|
if (config < PERF_COUNT_HW_MAX && hw_event_names[config])
|
2009-06-06 15:58:57 +08:00
|
|
|
return hw_event_names[config];
|
2009-05-26 15:17:18 +08:00
|
|
|
return "unknown-hardware";
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
case PERF_TYPE_HW_CACHE: {
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 20:21:42 +08:00
|
|
|
u8 cache_type, cache_op, cache_result;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
|
|
|
cache_type = (config >> 0) & 0xff;
|
|
|
|
if (cache_type > PERF_COUNT_HW_CACHE_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-type";
|
|
|
|
|
|
|
|
cache_op = (config >> 8) & 0xff;
|
2009-06-06 19:58:12 +08:00
|
|
|
if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-op";
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
|
|
|
cache_result = (config >> 16) & 0xff;
|
2009-06-06 19:58:12 +08:00
|
|
|
if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-result";
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
2009-06-25 19:46:07 +08:00
|
|
|
if (!is_cache_op_valid(cache_type, cache_op))
|
|
|
|
return "invalid-cache";
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
2009-06-25 20:55:22 +08:00
|
|
|
return event_cache_name(cache_type, cache_op, cache_result);
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
}
|
|
|
|
|
2009-05-26 15:17:18 +08:00
|
|
|
case PERF_TYPE_SOFTWARE:
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 11:20:22 +08:00
|
|
|
if (config < PERF_COUNT_SW_MAX && sw_event_names[config])
|
2009-06-06 15:58:57 +08:00
|
|
|
return sw_event_names[config];
|
2009-05-26 15:17:18 +08:00
|
|
|
return "unknown-software";
|
|
|
|
|
2009-07-22 00:20:22 +08:00
|
|
|
case PERF_TYPE_TRACEPOINT:
|
|
|
|
return tracepoint_id_to_name(config);
|
|
|
|
|
2009-05-26 15:17:18 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return "unknown";
|
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
static int add_event(struct list_head *list, int *idx,
|
|
|
|
struct perf_event_attr *attr, char *name)
|
|
|
|
{
|
|
|
|
struct perf_evsel *evsel;
|
|
|
|
|
|
|
|
event_attr_init(attr);
|
|
|
|
|
|
|
|
evsel = perf_evsel__new(attr, (*idx)++);
|
|
|
|
if (!evsel)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
list_add_tail(&evsel->node, list);
|
|
|
|
|
|
|
|
evsel->name = strdup(name);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_aliases(char *str, const char *names[][MAX_ALIASES], int size)
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
{
|
|
|
|
int i, j;
|
2009-07-01 11:04:34 +08:00
|
|
|
int n, longest = -1;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
|
|
|
for (i = 0; i < size; i++) {
|
2009-07-01 11:04:34 +08:00
|
|
|
for (j = 0; j < MAX_ALIASES && names[i][j]; j++) {
|
|
|
|
n = strlen(names[i][j]);
|
2012-03-16 03:09:15 +08:00
|
|
|
if (n > longest && !strncasecmp(str, names[i][j], n))
|
2009-07-01 11:04:34 +08:00
|
|
|
longest = n;
|
|
|
|
}
|
2012-03-16 03:09:15 +08:00
|
|
|
if (longest > 0)
|
2009-07-01 11:04:34 +08:00
|
|
|
return i;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
}
|
|
|
|
|
2009-06-07 03:04:17 +08:00
|
|
|
return -1;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
int parse_events_add_cache(struct list_head *list, int *idx,
|
|
|
|
char *type, char *op_result1, char *op_result2)
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
{
|
2012-03-16 03:09:15 +08:00
|
|
|
struct perf_event_attr attr;
|
|
|
|
char name[MAX_NAME_LEN];
|
2009-07-01 11:04:34 +08:00
|
|
|
int cache_type = -1, cache_op = -1, cache_result = -1;
|
2012-03-16 03:09:15 +08:00
|
|
|
char *op_result[2] = { op_result1, op_result2 };
|
|
|
|
int i, n;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* No fallback - if we cannot get a clear cache type
|
|
|
|
* then bail out:
|
|
|
|
*/
|
2012-03-16 03:09:15 +08:00
|
|
|
cache_type = parse_aliases(type, hw_cache,
|
|
|
|
PERF_COUNT_HW_CACHE_MAX);
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
if (cache_type == -1)
|
2012-03-16 03:09:15 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
n = snprintf(name, MAX_NAME_LEN, "%s", type);
|
2009-07-01 11:04:34 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
for (i = 0; (i < 2) && (op_result[i]); i++) {
|
|
|
|
char *str = op_result[i];
|
|
|
|
|
|
|
|
snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str);
|
2009-07-01 11:04:34 +08:00
|
|
|
|
|
|
|
if (cache_op == -1) {
|
2012-03-16 03:09:15 +08:00
|
|
|
cache_op = parse_aliases(str, hw_cache_op,
|
|
|
|
PERF_COUNT_HW_CACHE_OP_MAX);
|
2009-07-01 11:04:34 +08:00
|
|
|
if (cache_op >= 0) {
|
|
|
|
if (!is_cache_op_valid(cache_type, cache_op))
|
2012-03-16 03:09:15 +08:00
|
|
|
return -EINVAL;
|
2009-07-01 11:04:34 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cache_result == -1) {
|
2012-03-16 03:09:15 +08:00
|
|
|
cache_result = parse_aliases(str, hw_cache_result,
|
2009-07-01 11:04:34 +08:00
|
|
|
PERF_COUNT_HW_CACHE_RESULT_MAX);
|
|
|
|
if (cache_result >= 0)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fall back to reads:
|
|
|
|
*/
|
2009-06-07 03:04:17 +08:00
|
|
|
if (cache_op == -1)
|
|
|
|
cache_op = PERF_COUNT_HW_CACHE_OP_READ;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fall back to accesses:
|
|
|
|
*/
|
|
|
|
if (cache_result == -1)
|
|
|
|
cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
|
|
|
|
attr.type = PERF_TYPE_HW_CACHE;
|
|
|
|
return add_event(list, idx, &attr, name);
|
2009-09-12 05:19:45 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
static int add_tracepoint(struct list_head *list, int *idx,
|
|
|
|
char *sys_name, char *evt_name)
|
2009-09-12 05:19:45 +08:00
|
|
|
{
|
2012-03-16 03:09:15 +08:00
|
|
|
struct perf_event_attr attr;
|
|
|
|
char name[MAX_NAME_LEN];
|
2009-09-12 05:19:45 +08:00
|
|
|
char evt_path[MAXPATHLEN];
|
|
|
|
char id_buf[4];
|
|
|
|
u64 id;
|
|
|
|
int fd;
|
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
|
2009-09-12 05:19:45 +08:00
|
|
|
sys_name, evt_name);
|
|
|
|
|
|
|
|
fd = open(evt_path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
2012-03-16 03:09:15 +08:00
|
|
|
return -1;
|
2009-09-12 05:19:45 +08:00
|
|
|
|
|
|
|
if (read(fd, id_buf, sizeof(id_buf)) < 0) {
|
|
|
|
close(fd);
|
2012-03-16 03:09:15 +08:00
|
|
|
return -1;
|
2009-09-12 05:19:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
close(fd);
|
|
|
|
id = atoll(id_buf);
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.config = id;
|
|
|
|
attr.type = PERF_TYPE_TRACEPOINT;
|
|
|
|
attr.sample_type |= PERF_SAMPLE_RAW;
|
|
|
|
attr.sample_type |= PERF_SAMPLE_TIME;
|
|
|
|
attr.sample_type |= PERF_SAMPLE_CPU;
|
|
|
|
attr.sample_period = 1;
|
2010-04-22 00:06:01 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
snprintf(name, MAX_NAME_LEN, "%s:%s", sys_name, evt_name);
|
|
|
|
return add_event(list, idx, &attr, name);
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
static int add_tracepoint_multi(struct list_head *list, int *idx,
|
|
|
|
char *sys_name, char *evt_name)
|
2009-09-12 05:19:45 +08:00
|
|
|
{
|
|
|
|
char evt_path[MAXPATHLEN];
|
|
|
|
struct dirent *evt_ent;
|
|
|
|
DIR *evt_dir;
|
2012-03-16 03:09:15 +08:00
|
|
|
int ret = 0;
|
2009-09-12 05:19:45 +08:00
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name);
|
2009-09-12 05:19:45 +08:00
|
|
|
evt_dir = opendir(evt_path);
|
|
|
|
if (!evt_dir) {
|
|
|
|
perror("Can't open event dir");
|
2012-03-16 03:09:15 +08:00
|
|
|
return -1;
|
2009-09-12 05:19:45 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
while (!ret && (evt_ent = readdir(evt_dir))) {
|
2009-09-12 05:19:45 +08:00
|
|
|
if (!strcmp(evt_ent->d_name, ".")
|
|
|
|
|| !strcmp(evt_ent->d_name, "..")
|
|
|
|
|| !strcmp(evt_ent->d_name, "enable")
|
|
|
|
|| !strcmp(evt_ent->d_name, "filter"))
|
|
|
|
continue;
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
if (!strglobmatch(evt_ent->d_name, evt_name))
|
2010-01-06 06:47:17 +08:00
|
|
|
continue;
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name);
|
2009-09-12 05:19:45 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
return ret;
|
2009-09-12 05:19:45 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
int parse_events_add_tracepoint(struct list_head *list, int *idx,
|
|
|
|
char *sys, char *event)
|
2009-07-22 00:20:22 +08:00
|
|
|
{
|
2012-03-16 03:09:15 +08:00
|
|
|
int ret;
|
2009-07-22 00:20:22 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
ret = debugfs_valid_mountpoint(tracing_events_path);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2009-07-22 00:20:22 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
return strpbrk(event, "*?") ?
|
|
|
|
add_tracepoint_multi(list, idx, sys, event) :
|
|
|
|
add_tracepoint(list, idx, sys, event);
|
2009-07-22 00:20:22 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
static int
|
|
|
|
parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
|
2009-11-23 22:42:35 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 3; i++) {
|
2012-03-16 03:09:15 +08:00
|
|
|
if (!type || !type[i])
|
2009-11-23 22:42:35 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
switch (type[i]) {
|
|
|
|
case 'r':
|
|
|
|
attr->bp_type |= HW_BREAKPOINT_R;
|
|
|
|
break;
|
|
|
|
case 'w':
|
|
|
|
attr->bp_type |= HW_BREAKPOINT_W;
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
attr->bp_type |= HW_BREAKPOINT_X;
|
|
|
|
break;
|
|
|
|
default:
|
2012-03-16 03:09:15 +08:00
|
|
|
return -EINVAL;
|
2009-11-23 22:42:35 +08:00
|
|
|
}
|
|
|
|
}
|
2012-03-16 03:09:15 +08:00
|
|
|
|
2009-11-23 22:42:35 +08:00
|
|
|
if (!attr->bp_type) /* Default */
|
|
|
|
attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
return 0;
|
2009-11-23 22:42:35 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
int parse_events_add_breakpoint(struct list_head *list, int *idx,
|
|
|
|
void *ptr, char *type)
|
2009-11-23 22:42:35 +08:00
|
|
|
{
|
2012-03-16 03:09:15 +08:00
|
|
|
struct perf_event_attr attr;
|
|
|
|
char name[MAX_NAME_LEN];
|
2009-11-23 22:42:35 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2012-03-21 02:15:39 +08:00
|
|
|
attr.bp_addr = (unsigned long) ptr;
|
2009-11-23 22:42:35 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
if (parse_breakpoint_type(type, &attr))
|
|
|
|
return -EINVAL;
|
2009-11-23 22:42:35 +08:00
|
|
|
|
2010-06-25 03:36:19 +08:00
|
|
|
/*
|
|
|
|
* We should find a nice way to override the access length
|
|
|
|
* Provide some defaults for now
|
|
|
|
*/
|
2012-03-16 03:09:15 +08:00
|
|
|
if (attr.bp_type == HW_BREAKPOINT_X)
|
|
|
|
attr.bp_len = sizeof(long);
|
2010-06-25 03:36:19 +08:00
|
|
|
else
|
2012-03-16 03:09:15 +08:00
|
|
|
attr.bp_len = HW_BREAKPOINT_LEN_4;
|
2009-07-01 11:04:34 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
attr.type = PERF_TYPE_BREAKPOINT;
|
2011-04-27 09:55:40 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
snprintf(name, MAX_NAME_LEN, "mem:%p:%s", ptr, type ? type : "rw");
|
|
|
|
return add_event(list, idx, &attr, name);
|
2009-06-22 19:14:28 +08:00
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:16 +08:00
|
|
|
static int config_term(struct perf_event_attr *attr,
|
|
|
|
struct parse_events__term *term)
|
|
|
|
{
|
|
|
|
switch (term->type) {
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_CONFIG:
|
|
|
|
attr->config = term->val.num;
|
|
|
|
break;
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
|
|
|
|
attr->config1 = term->val.num;
|
|
|
|
break;
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_CONFIG2:
|
|
|
|
attr->config2 = term->val.num;
|
|
|
|
break;
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
|
|
|
|
attr->sample_period = term->val.num;
|
|
|
|
break;
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
|
|
|
|
/*
|
|
|
|
* TODO uncomment when the field is available
|
|
|
|
* attr->branch_sample_type = term->val.num;
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int config_attr(struct perf_event_attr *attr,
|
|
|
|
struct list_head *head, int fail)
|
|
|
|
{
|
|
|
|
struct parse_events__term *term;
|
|
|
|
|
|
|
|
list_for_each_entry(term, head, list)
|
|
|
|
if (config_term(attr, term) && fail)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int parse_events_add_numeric(struct list_head *list, int *idx,
|
|
|
|
unsigned long type, unsigned long config,
|
|
|
|
struct list_head *head_config)
|
2009-05-26 17:10:09 +08:00
|
|
|
{
|
2012-03-16 03:09:15 +08:00
|
|
|
struct perf_event_attr attr;
|
2009-07-01 11:04:34 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.type = type;
|
|
|
|
attr.config = config;
|
2012-03-16 03:09:16 +08:00
|
|
|
|
|
|
|
if (head_config &&
|
|
|
|
config_attr(&attr, head_config, 1))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
return add_event(list, idx, &attr,
|
|
|
|
(char *) __event_name(type, config));
|
2009-07-01 11:04:34 +08:00
|
|
|
}
|
2009-05-26 17:10:09 +08:00
|
|
|
|
2012-03-16 03:09:18 +08:00
|
|
|
int parse_events_add_pmu(struct list_head *list, int *idx,
|
|
|
|
char *name, struct list_head *head_config)
|
|
|
|
{
|
|
|
|
struct perf_event_attr attr;
|
|
|
|
struct perf_pmu *pmu;
|
|
|
|
|
|
|
|
pmu = perf_pmu__find(name);
|
|
|
|
if (!pmu)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Configure hardcoded terms first, no need to check
|
|
|
|
* return value when called with fail == 0 ;)
|
|
|
|
*/
|
|
|
|
config_attr(&attr, head_config, 0);
|
|
|
|
|
|
|
|
if (perf_pmu__config(pmu, &attr, head_config))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return add_event(list, idx, &attr, (char *) "pmu");
|
|
|
|
}
|
|
|
|
|
2012-03-21 02:15:40 +08:00
|
|
|
void parse_events_update_lists(struct list_head *list_event,
|
|
|
|
struct list_head *list_all)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Called for single event definition. Update the
|
|
|
|
* 'all event' list, and reinit the 'signle event'
|
|
|
|
* list, for next event definition.
|
|
|
|
*/
|
|
|
|
list_splice_tail(list_event, list_all);
|
|
|
|
INIT_LIST_HEAD(list_event);
|
|
|
|
}
|
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
int parse_events_modifier(struct list_head *list, char *str)
|
2009-07-01 11:04:34 +08:00
|
|
|
{
|
2012-03-16 03:09:15 +08:00
|
|
|
struct perf_evsel *evsel;
|
2012-01-05 00:54:19 +08:00
|
|
|
int exclude = 0, exclude_GH = 0;
|
|
|
|
int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0;
|
2009-06-06 15:58:57 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
if (str == NULL)
|
2009-06-06 15:58:57 +08:00
|
|
|
return 0;
|
2011-04-27 10:06:33 +08:00
|
|
|
|
2009-07-01 11:04:34 +08:00
|
|
|
while (*str) {
|
2010-04-09 05:03:20 +08:00
|
|
|
if (*str == 'u') {
|
|
|
|
if (!exclude)
|
|
|
|
exclude = eu = ek = eh = 1;
|
2009-07-01 11:04:34 +08:00
|
|
|
eu = 0;
|
2010-04-09 05:03:20 +08:00
|
|
|
} else if (*str == 'k') {
|
|
|
|
if (!exclude)
|
|
|
|
exclude = eu = ek = eh = 1;
|
2009-07-01 11:04:34 +08:00
|
|
|
ek = 0;
|
2010-04-09 05:03:20 +08:00
|
|
|
} else if (*str == 'h') {
|
|
|
|
if (!exclude)
|
|
|
|
exclude = eu = ek = eh = 1;
|
2009-07-01 11:04:34 +08:00
|
|
|
eh = 0;
|
2012-01-05 00:54:19 +08:00
|
|
|
} else if (*str == 'G') {
|
|
|
|
if (!exclude_GH)
|
|
|
|
exclude_GH = eG = eH = 1;
|
|
|
|
eG = 0;
|
|
|
|
} else if (*str == 'H') {
|
|
|
|
if (!exclude_GH)
|
|
|
|
exclude_GH = eG = eH = 1;
|
|
|
|
eH = 0;
|
2010-04-09 05:03:20 +08:00
|
|
|
} else if (*str == 'p') {
|
|
|
|
precise++;
|
|
|
|
} else
|
2009-07-01 11:04:34 +08:00
|
|
|
break;
|
2010-04-09 05:03:20 +08:00
|
|
|
|
2009-07-01 11:04:34 +08:00
|
|
|
++str;
|
2009-05-26 15:17:18 +08:00
|
|
|
}
|
2011-04-27 10:06:33 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
/*
|
|
|
|
* precise ip:
|
|
|
|
*
|
|
|
|
* 0 - SAMPLE_IP can have arbitrary skid
|
|
|
|
* 1 - SAMPLE_IP must have constant skid
|
|
|
|
* 2 - SAMPLE_IP requested to have 0 skid
|
|
|
|
* 3 - SAMPLE_IP must have 0 skid
|
|
|
|
*
|
|
|
|
* See also PERF_RECORD_MISC_EXACT_IP
|
|
|
|
*/
|
|
|
|
if (precise > 3)
|
|
|
|
return -EINVAL;
|
2011-04-27 10:06:33 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
list_for_each_entry(evsel, list, node) {
|
|
|
|
evsel->attr.exclude_user = eu;
|
|
|
|
evsel->attr.exclude_kernel = ek;
|
|
|
|
evsel->attr.exclude_hv = eh;
|
|
|
|
evsel->attr.precise_ip = precise;
|
|
|
|
evsel->attr.exclude_host = eH;
|
|
|
|
evsel->attr.exclude_guest = eG;
|
|
|
|
}
|
2011-04-27 10:06:33 +08:00
|
|
|
|
2009-07-01 11:04:34 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2009-05-26 17:10:09 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
|
2009-07-01 11:04:34 +08:00
|
|
|
{
|
2012-03-16 03:09:15 +08:00
|
|
|
LIST_HEAD(list);
|
2012-03-21 02:15:40 +08:00
|
|
|
LIST_HEAD(list_tmp);
|
2012-03-16 03:09:15 +08:00
|
|
|
YY_BUFFER_STATE buffer;
|
|
|
|
int ret, idx = evlist->nr_entries;
|
2009-09-12 05:19:45 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
buffer = parse_events__scan_string(str);
|
2009-06-06 15:58:57 +08:00
|
|
|
|
2012-03-21 02:15:40 +08:00
|
|
|
ret = parse_events_parse(&list, &list_tmp, &idx);
|
2009-09-12 05:19:45 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
parse_events__flush_buffer(buffer);
|
|
|
|
parse_events__delete_buffer(buffer);
|
2009-09-12 05:19:45 +08:00
|
|
|
|
2012-03-16 03:09:15 +08:00
|
|
|
if (!ret) {
|
|
|
|
int entries = idx - evlist->nr_entries;
|
|
|
|
perf_evlist__splice_list_tail(evlist, &list, entries);
|
|
|
|
return 0;
|
|
|
|
}
|
2009-09-12 05:19:45 +08:00
|
|
|
|
2012-03-21 02:15:40 +08:00
|
|
|
/*
|
|
|
|
* There are 2 users - builtin-record and builtin-test objects.
|
|
|
|
* Both call perf_evlist__delete in case of error, so we dont
|
|
|
|
* need to bother.
|
|
|
|
*/
|
2012-03-16 03:09:15 +08:00
|
|
|
fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
|
2009-10-27 08:33:04 +08:00
|
|
|
fprintf(stderr, "Run 'perf list' for a list of valid events\n");
|
2009-09-12 05:19:45 +08:00
|
|
|
return ret;
|
2009-05-26 17:10:09 +08:00
|
|
|
}
|
|
|
|
|
2011-07-14 17:25:32 +08:00
|
|
|
int parse_events_option(const struct option *opt, const char *str,
|
|
|
|
int unset __used)
|
|
|
|
{
|
|
|
|
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
|
|
|
|
return parse_events(evlist, str, unset);
|
|
|
|
}
|
|
|
|
|
2011-01-12 06:56:53 +08:00
|
|
|
int parse_filter(const struct option *opt, const char *str,
|
2009-10-15 11:22:07 +08:00
|
|
|
int unset __used)
|
|
|
|
{
|
2011-01-12 06:56:53 +08:00
|
|
|
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_evsel *last = NULL;
|
2009-10-15 11:22:07 +08:00
|
|
|
|
2011-01-12 06:56:53 +08:00
|
|
|
if (evlist->nr_entries > 0)
|
|
|
|
last = list_entry(evlist->entries.prev, struct perf_evsel, node);
|
2011-01-04 02:39:04 +08:00
|
|
|
|
|
|
|
if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
|
2009-10-15 11:22:07 +08:00
|
|
|
fprintf(stderr,
|
|
|
|
"-F option should follow a -e tracepoint option\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2011-01-04 02:39:04 +08:00
|
|
|
last->filter = strdup(str);
|
|
|
|
if (last->filter == NULL) {
|
2009-10-15 11:22:07 +08:00
|
|
|
fprintf(stderr, "not enough memory to hold filter string\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-06-06 18:24:17 +08:00
|
|
|
static const char * const event_type_descriptors[] = {
|
|
|
|
"Hardware event",
|
|
|
|
"Software event",
|
|
|
|
"Tracepoint event",
|
|
|
|
"Hardware cache event",
|
2009-12-29 16:37:07 +08:00
|
|
|
"Raw hardware event descriptor",
|
|
|
|
"Hardware breakpoint",
|
2009-06-06 18:24:17 +08:00
|
|
|
};
|
|
|
|
|
2009-07-22 00:20:22 +08:00
|
|
|
/*
|
|
|
|
* Print the events from <debugfs_mount_point>/tracing/events
|
|
|
|
*/
|
|
|
|
|
2011-02-18 01:38:58 +08:00
|
|
|
void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
|
2009-07-22 00:20:22 +08:00
|
|
|
{
|
|
|
|
DIR *sys_dir, *evt_dir;
|
|
|
|
struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
|
|
|
|
char evt_path[MAXPATHLEN];
|
2009-09-24 21:39:09 +08:00
|
|
|
char dir_path[MAXPATHLEN];
|
2009-07-22 00:20:22 +08:00
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
if (debugfs_valid_mountpoint(tracing_events_path))
|
2009-07-22 00:20:22 +08:00
|
|
|
return;
|
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
sys_dir = opendir(tracing_events_path);
|
2009-07-22 00:20:22 +08:00
|
|
|
if (!sys_dir)
|
2009-09-24 21:39:09 +08:00
|
|
|
return;
|
2009-09-05 03:39:51 +08:00
|
|
|
|
|
|
|
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
|
2011-02-18 01:38:58 +08:00
|
|
|
if (subsys_glob != NULL &&
|
|
|
|
!strglobmatch(sys_dirent.d_name, subsys_glob))
|
|
|
|
continue;
|
2009-09-24 21:39:09 +08:00
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
|
2009-09-24 21:39:09 +08:00
|
|
|
sys_dirent.d_name);
|
|
|
|
evt_dir = opendir(dir_path);
|
|
|
|
if (!evt_dir)
|
2009-09-05 03:39:51 +08:00
|
|
|
continue;
|
2009-09-24 21:39:09 +08:00
|
|
|
|
2009-09-05 03:39:51 +08:00
|
|
|
for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
|
2011-02-18 01:38:58 +08:00
|
|
|
if (event_glob != NULL &&
|
|
|
|
!strglobmatch(evt_dirent.d_name, event_glob))
|
|
|
|
continue;
|
|
|
|
|
2009-07-22 00:20:22 +08:00
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s:%s",
|
|
|
|
sys_dirent.d_name, evt_dirent.d_name);
|
2011-04-30 04:52:42 +08:00
|
|
|
printf(" %-50s [%s]\n", evt_path,
|
2009-12-29 16:37:07 +08:00
|
|
|
event_type_descriptors[PERF_TYPE_TRACEPOINT]);
|
2009-07-22 00:20:22 +08:00
|
|
|
}
|
|
|
|
closedir(evt_dir);
|
|
|
|
}
|
|
|
|
closedir(sys_dir);
|
|
|
|
}
|
|
|
|
|
2011-01-04 00:50:45 +08:00
|
|
|
/*
|
|
|
|
* Check whether event is in <debugfs_mount_point>/tracing/events
|
|
|
|
*/
|
|
|
|
|
|
|
|
int is_valid_tracepoint(const char *event_string)
|
|
|
|
{
|
|
|
|
DIR *sys_dir, *evt_dir;
|
|
|
|
struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
|
|
|
|
char evt_path[MAXPATHLEN];
|
|
|
|
char dir_path[MAXPATHLEN];
|
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
if (debugfs_valid_mountpoint(tracing_events_path))
|
2011-01-04 00:50:45 +08:00
|
|
|
return 0;
|
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
sys_dir = opendir(tracing_events_path);
|
2011-01-04 00:50:45 +08:00
|
|
|
if (!sys_dir)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
|
|
|
|
|
2011-11-17 00:03:07 +08:00
|
|
|
snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
|
2011-01-04 00:50:45 +08:00
|
|
|
sys_dirent.d_name);
|
|
|
|
evt_dir = opendir(dir_path);
|
|
|
|
if (!evt_dir)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
|
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s:%s",
|
|
|
|
sys_dirent.d_name, evt_dirent.d_name);
|
|
|
|
if (!strcmp(evt_path, event_string)) {
|
|
|
|
closedir(evt_dir);
|
|
|
|
closedir(sys_dir);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
closedir(evt_dir);
|
|
|
|
}
|
|
|
|
closedir(sys_dir);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-02-18 01:38:58 +08:00
|
|
|
void print_events_type(u8 type)
|
|
|
|
{
|
|
|
|
struct event_symbol *syms = event_symbols;
|
|
|
|
unsigned int i;
|
|
|
|
char name[64];
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
|
|
|
|
if (type != syms->type)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (strlen(syms->alias))
|
|
|
|
snprintf(name, sizeof(name), "%s OR %s",
|
|
|
|
syms->symbol, syms->alias);
|
|
|
|
else
|
|
|
|
snprintf(name, sizeof(name), "%s", syms->symbol);
|
|
|
|
|
2011-04-30 04:52:42 +08:00
|
|
|
printf(" %-50s [%s]\n", name,
|
2011-02-18 01:38:58 +08:00
|
|
|
event_type_descriptors[type]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int print_hwcache_events(const char *event_glob)
|
|
|
|
{
|
|
|
|
unsigned int type, op, i, printed = 0;
|
|
|
|
|
|
|
|
for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
|
|
|
|
for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
|
|
|
|
/* skip invalid cache type */
|
|
|
|
if (!is_cache_op_valid(type, op))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
|
|
|
|
char *name = event_cache_name(type, op, i);
|
|
|
|
|
2011-04-30 04:52:42 +08:00
|
|
|
if (event_glob != NULL && !strglobmatch(name, event_glob))
|
2011-02-18 01:38:58 +08:00
|
|
|
continue;
|
|
|
|
|
2011-04-30 04:52:42 +08:00
|
|
|
printf(" %-50s [%s]\n", name,
|
2011-02-18 01:38:58 +08:00
|
|
|
event_type_descriptors[PERF_TYPE_HW_CACHE]);
|
|
|
|
++printed;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return printed;
|
|
|
|
}
|
|
|
|
|
2009-05-26 17:10:09 +08:00
|
|
|
/*
|
2009-06-06 18:24:17 +08:00
|
|
|
* Print the help text for the event symbols:
|
2009-05-26 17:10:09 +08:00
|
|
|
*/
|
2011-02-18 01:38:58 +08:00
|
|
|
void print_events(const char *event_glob)
|
2009-05-26 17:10:09 +08:00
|
|
|
{
|
2011-02-18 01:38:58 +08:00
|
|
|
unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0;
|
2011-04-30 04:52:42 +08:00
|
|
|
struct event_symbol *syms = event_symbols;
|
|
|
|
char name[MAX_NAME_LEN];
|
2009-05-26 17:10:09 +08:00
|
|
|
|
2009-10-27 08:33:05 +08:00
|
|
|
printf("\n");
|
|
|
|
printf("List of pre-defined events (to be used in -e):\n");
|
2009-05-26 17:10:09 +08:00
|
|
|
|
2009-06-06 18:24:17 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
|
2009-12-29 16:37:07 +08:00
|
|
|
type = syms->type;
|
2009-05-26 17:10:09 +08:00
|
|
|
|
2011-02-18 01:38:58 +08:00
|
|
|
if (type != prev_type && printed) {
|
2009-10-27 08:33:05 +08:00
|
|
|
printf("\n");
|
2011-02-18 01:38:58 +08:00
|
|
|
printed = 0;
|
|
|
|
ntypes_printed++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (event_glob != NULL &&
|
|
|
|
!(strglobmatch(syms->symbol, event_glob) ||
|
|
|
|
(syms->alias && strglobmatch(syms->alias, event_glob))))
|
|
|
|
continue;
|
2009-05-26 17:10:09 +08:00
|
|
|
|
2009-06-22 19:14:28 +08:00
|
|
|
if (strlen(syms->alias))
|
2011-04-30 04:52:42 +08:00
|
|
|
snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
|
2009-06-22 19:14:28 +08:00
|
|
|
else
|
2011-04-30 04:52:42 +08:00
|
|
|
strncpy(name, syms->symbol, MAX_NAME_LEN);
|
|
|
|
printf(" %-50s [%s]\n", name,
|
2009-06-06 18:24:17 +08:00
|
|
|
event_type_descriptors[type]);
|
2009-05-26 17:10:09 +08:00
|
|
|
|
2009-06-06 18:24:17 +08:00
|
|
|
prev_type = type;
|
2011-02-18 01:38:58 +08:00
|
|
|
++printed;
|
2009-05-26 17:10:09 +08:00
|
|
|
}
|
|
|
|
|
2011-02-18 01:38:58 +08:00
|
|
|
if (ntypes_printed) {
|
|
|
|
printed = 0;
|
|
|
|
printf("\n");
|
2009-07-01 21:06:18 +08:00
|
|
|
}
|
2011-02-18 01:38:58 +08:00
|
|
|
print_hwcache_events(event_glob);
|
|
|
|
|
|
|
|
if (event_glob != NULL)
|
|
|
|
return;
|
2009-07-01 21:06:18 +08:00
|
|
|
|
2009-10-27 08:33:05 +08:00
|
|
|
printf("\n");
|
2011-04-30 04:52:42 +08:00
|
|
|
printf(" %-50s [%s]\n",
|
2012-03-16 03:09:18 +08:00
|
|
|
"rNNN",
|
|
|
|
event_type_descriptors[PERF_TYPE_RAW]);
|
|
|
|
printf(" %-50s [%s]\n",
|
|
|
|
"cpu/t1=v1[,t2=v2,t3 ...]/modifier",
|
2010-05-08 01:07:05 +08:00
|
|
|
event_type_descriptors[PERF_TYPE_RAW]);
|
2012-03-16 03:09:18 +08:00
|
|
|
printf(" (see 'perf list --help' on how to encode it)\n");
|
2009-10-27 08:33:05 +08:00
|
|
|
printf("\n");
|
2009-06-06 18:24:17 +08:00
|
|
|
|
2011-04-30 04:52:42 +08:00
|
|
|
printf(" %-50s [%s]\n",
|
2009-12-29 16:37:07 +08:00
|
|
|
"mem:<addr>[:access]",
|
|
|
|
event_type_descriptors[PERF_TYPE_BREAKPOINT]);
|
2009-11-23 22:42:35 +08:00
|
|
|
printf("\n");
|
|
|
|
|
2011-02-18 01:38:58 +08:00
|
|
|
print_tracepoint_events(NULL, NULL);
|
2009-05-26 17:10:09 +08:00
|
|
|
}
|
2012-03-16 03:09:16 +08:00
|
|
|
|
|
|
|
int parse_events__is_hardcoded_term(struct parse_events__term *term)
|
|
|
|
{
|
|
|
|
return term->type <= PARSE_EVENTS__TERM_TYPE_HARDCODED_MAX;
|
|
|
|
}
|
|
|
|
|
|
|
|
int parse_events__new_term(struct parse_events__term **_term, int type,
|
|
|
|
char *config, char *str, long num)
|
|
|
|
{
|
|
|
|
struct parse_events__term *term;
|
|
|
|
|
|
|
|
term = zalloc(sizeof(*term));
|
|
|
|
if (!term)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&term->list);
|
|
|
|
term->type = type;
|
|
|
|
term->config = config;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_CONFIG:
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_CONFIG2:
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_NUM:
|
|
|
|
term->val.num = num;
|
|
|
|
break;
|
|
|
|
case PARSE_EVENTS__TERM_TYPE_STR:
|
|
|
|
term->val.str = str;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
*_term = term;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void parse_events__free_terms(struct list_head *terms)
|
|
|
|
{
|
|
|
|
struct parse_events__term *term, *h;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(term, h, terms, list)
|
|
|
|
free(term);
|
|
|
|
|
|
|
|
free(terms);
|
|
|
|
}
|