2009-04-20 21:37:32 +08:00
|
|
|
/*
|
2009-06-03 05:37:05 +08:00
|
|
|
* builtin-stat.c
|
|
|
|
*
|
|
|
|
* Builtin stat command: Give a precise performance counters summary
|
|
|
|
* overview about any workload, CPU or specific PID.
|
|
|
|
*
|
|
|
|
* Sample output:
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2009-06-03 05:37:05 +08:00
|
|
|
$ perf stat ~/hackbench 10
|
|
|
|
Time: 0.104
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2009-06-03 05:37:05 +08:00
|
|
|
Performance counter stats for '/home/mingo/hackbench':
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2009-06-03 05:37:05 +08:00
|
|
|
1255.538611 task clock ticks # 10.143 CPU utilization factor
|
|
|
|
54011 context switches # 0.043 M/sec
|
|
|
|
385 CPU migrations # 0.000 M/sec
|
|
|
|
17755 pagefaults # 0.014 M/sec
|
|
|
|
3808323185 CPU cycles # 3033.219 M/sec
|
|
|
|
1575111190 instructions # 1254.530 M/sec
|
|
|
|
17367895 cache references # 13.833 M/sec
|
|
|
|
7674421 cache misses # 6.112 M/sec
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2009-06-03 05:37:05 +08:00
|
|
|
Wall-clock time elapsed: 123.786620 msecs
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2009-05-26 15:17:18 +08:00
|
|
|
*
|
|
|
|
* Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
|
|
|
|
*
|
|
|
|
* Improvements and fixes by:
|
|
|
|
*
|
|
|
|
* Arjan van de Ven <arjan@linux.intel.com>
|
|
|
|
* Yanmin Zhang <yanmin.zhang@intel.com>
|
|
|
|
* Wu Fengguang <fengguang.wu@intel.com>
|
|
|
|
* Mike Galbraith <efault@gmx.de>
|
|
|
|
* Paul Mackerras <paulus@samba.org>
|
2009-06-27 05:32:07 +08:00
|
|
|
* Jaswinder Singh Rajput <jaswinder@kernel.org>
|
2009-05-26 15:17:18 +08:00
|
|
|
*
|
|
|
|
* Released under the GPL v2. (and only v2, not any later version)
|
2009-04-20 21:37:32 +08:00
|
|
|
*/
|
|
|
|
|
2009-05-24 00:28:58 +08:00
|
|
|
#include "perf.h"
|
2009-05-27 15:10:38 +08:00
|
|
|
#include "builtin.h"
|
2009-04-27 14:02:14 +08:00
|
|
|
#include "util/util.h"
|
2009-05-26 15:17:18 +08:00
|
|
|
#include "util/parse-options.h"
|
|
|
|
#include "util/parse-events.h"
|
2009-08-17 04:05:48 +08:00
|
|
|
#include "util/event.h"
|
2011-01-12 06:56:53 +08:00
|
|
|
#include "util/evlist.h"
|
2011-01-04 02:39:04 +08:00
|
|
|
#include "util/evsel.h"
|
2009-08-17 04:05:48 +08:00
|
|
|
#include "util/debug.h"
|
2011-04-27 11:39:24 +08:00
|
|
|
#include "util/color.h"
|
2009-12-31 16:05:50 +08:00
|
|
|
#include "util/header.h"
|
perf tools: Fix sparse CPU numbering related bugs
At present, the perf subcommands that do system-wide monitoring
(perf stat, perf record and perf top) don't work properly unless
the online cpus are numbered 0, 1, ..., N-1. These tools ask
for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN)
and then try to create events for cpus 0, 1, ..., N-1.
This creates problems for systems where the online cpus are
numbered sparsely. For example, a POWER6 system in
single-threaded mode (i.e. only running 1 hardware thread per
core) will have only even-numbered cpus online.
This fixes the problem by reading the /sys/devices/system/cpu/online
file to find out which cpus are online. The code that does that is in
tools/perf/util/cpumap.[ch], and consists of a read_cpu_map()
function that sets up a cpumap[] array and returns the number of
online cpus. If /sys/devices/system/cpu/online can't be read or
can't be parsed successfully, it falls back to using sysconf to
ask how many cpus are online and sets up an identity map in cpumap[].
The perf record, perf stat and perf top code then calls
read_cpu_map() in the system-wide monitoring case (instead of
sysconf) and uses cpumap[] to get the cpu numbers to pass to
perf_event_open.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-10 17:36:09 +08:00
|
|
|
#include "util/cpumap.h"
|
2010-03-18 22:36:05 +08:00
|
|
|
#include "util/thread.h"
|
2011-01-19 01:15:24 +08:00
|
|
|
#include "util/thread_map.h"
|
2009-04-20 21:37:32 +08:00
|
|
|
|
|
|
|
#include <sys/prctl.h>
|
2009-06-13 20:57:28 +08:00
|
|
|
#include <math.h>
|
perf stat: add perf stat -B to pretty print large numbers
It is hard to read very large numbers so provide an option to perf stat
to separate thousands using a separator. The patch leverages the locale
support of stdio. You need to set your LC_NUMERIC appropriately, for
instance LC_NUMERIC=en_US.UTF8. You need to pass -B to activate this
feature. This way existing scripts parsing the output do not need to be
changed. Here is an example.
$ perf stat noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.347031 task-clock-msecs # 0.998 CPUs
61 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
118 page-faults # 0.000 M/sec
4,138,410,900 cycles # 2070.917 M/sec (scaled from 70.01%)
2,062,650,268 instructions # 0.498 IPC (scaled from 70.01%)
2,057,653,466 branches # 1029.678 M/sec (scaled from 70.01%)
40,267 branch-misses # 0.002 % (scaled from 30.04%)
2,055,961,348 cache-references # 1028.831 M/sec (scaled from 30.03%)
53,725 cache-misses # 0.027 M/sec (scaled from 30.02%)
2.001393933 seconds time elapsed
$ perf stat -B noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.297883 task-clock-msecs # 0.998 CPUs
59 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
119 page-faults # 0.000 M/sec
4,131,380,160 cycles # 2067.450 M/sec (scaled from 70.01%)
2,059,096,507 instructions # 0.498 IPC (scaled from 70.01%)
2,054,681,303 branches # 1028.216 M/sec (scaled from 70.01%)
25,650 branch-misses # 0.001 % (scaled from 30.05%)
2,056,283,014 cache-references # 1029.017 M/sec (scaled from 30.03%)
47,097 cache-misses # 0.024 M/sec (scaled from 30.02%)
2.001391016 seconds time elapsed
Cc: David S. Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4bf28fe8.914ed80a.01ca.fffff5f5@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-05-18 21:00:01 +08:00
|
|
|
#include <locale.h>
|
2009-05-05 23:50:27 +08:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
#define DEFAULT_SEPARATOR " "
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
static struct perf_event_attr default_attrs[] = {
|
2009-04-20 21:37:32 +08:00
|
|
|
|
perf stat: Re-align the default_attrs[] array
Clean up the array definition to be vertically aligned.
No functional effects.
Cc: Tim Blechmann <tim@klingt.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4ADC3975.8050109@klingt.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-stat.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c373683..95a55ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
2009-10-19 19:27:08 +08:00
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
|
2009-06-11 20:06:28 +08:00
|
|
|
|
perf stat: Re-align the default_attrs[] array
Clean up the array definition to be vertically aligned.
No functional effects.
Cc: Tim Blechmann <tim@klingt.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4ADC3975.8050109@klingt.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-stat.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c373683..95a55ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
2009-10-19 19:27:08 +08:00
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 11:20:22 +08:00
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES },
|
perf stat: Re-align the default_attrs[] array
Clean up the array definition to be vertically aligned.
No functional effects.
Cc: Tim Blechmann <tim@klingt.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4ADC3975.8050109@klingt.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-stat.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c373683..95a55ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
2009-10-19 19:27:08 +08:00
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
|
2009-06-11 20:06:28 +08:00
|
|
|
|
2009-04-20 21:37:32 +08:00
|
|
|
};
|
2009-05-26 15:17:18 +08:00
|
|
|
|
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:50:47 +08:00
|
|
|
/*
|
|
|
|
* Detailed stats:
|
|
|
|
*/
|
|
|
|
static struct perf_event_attr detailed_attrs[] = {
|
|
|
|
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
|
|
|
|
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
|
|
|
|
|
|
|
|
{ .type = PERF_TYPE_HW_CACHE,
|
|
|
|
.config =
|
|
|
|
PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
|
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
|
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
|
|
|
|
|
{ .type = PERF_TYPE_HW_CACHE,
|
|
|
|
.config =
|
|
|
|
PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
|
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
|
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
|
|
|
|
|
{ .type = PERF_TYPE_HW_CACHE,
|
|
|
|
.config =
|
|
|
|
PERF_COUNT_HW_CACHE_LL << 0 |
|
|
|
|
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
|
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
|
|
|
|
|
{ .type = PERF_TYPE_HW_CACHE,
|
|
|
|
.config =
|
|
|
|
PERF_COUNT_HW_CACHE_LL << 0 |
|
|
|
|
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
|
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
|
};
|
|
|
|
|
2011-01-12 06:56:53 +08:00
|
|
|
struct perf_evlist *evsel_list;
|
|
|
|
|
2010-04-13 16:37:33 +08:00
|
|
|
static bool system_wide = false;
|
2009-06-24 20:49:34 +08:00
|
|
|
static int run_idx = 0;
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2009-06-24 20:49:34 +08:00
|
|
|
static int run_count = 1;
|
2010-05-12 16:40:01 +08:00
|
|
|
static bool no_inherit = false;
|
2010-04-13 16:37:33 +08:00
|
|
|
static bool scale = true;
|
2010-11-16 17:05:01 +08:00
|
|
|
static bool no_aggr = false;
|
2009-10-04 08:35:01 +08:00
|
|
|
static pid_t target_pid = -1;
|
2010-03-18 22:36:05 +08:00
|
|
|
static pid_t target_tid = -1;
|
2009-10-04 08:35:01 +08:00
|
|
|
static pid_t child_pid = -1;
|
2010-04-13 16:37:33 +08:00
|
|
|
static bool null_run = false;
|
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:50:47 +08:00
|
|
|
static bool detailed_run = false;
|
2010-12-02 03:53:27 +08:00
|
|
|
static bool big_num = true;
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
static int big_num_opt = -1;
|
2010-05-28 18:00:01 +08:00
|
|
|
static const char *cpu_list;
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
static const char *csv_sep = NULL;
|
|
|
|
static bool csv_output = false;
|
perf stat: add perf stat -B to pretty print large numbers
It is hard to read very large numbers so provide an option to perf stat
to separate thousands using a separator. The patch leverages the locale
support of stdio. You need to set your LC_NUMERIC appropriately, for
instance LC_NUMERIC=en_US.UTF8. You need to pass -B to activate this
feature. This way existing scripts parsing the output do not need to be
changed. Here is an example.
$ perf stat noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.347031 task-clock-msecs # 0.998 CPUs
61 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
118 page-faults # 0.000 M/sec
4,138,410,900 cycles # 2070.917 M/sec (scaled from 70.01%)
2,062,650,268 instructions # 0.498 IPC (scaled from 70.01%)
2,057,653,466 branches # 1029.678 M/sec (scaled from 70.01%)
40,267 branch-misses # 0.002 % (scaled from 30.04%)
2,055,961,348 cache-references # 1028.831 M/sec (scaled from 30.03%)
53,725 cache-misses # 0.027 M/sec (scaled from 30.02%)
2.001393933 seconds time elapsed
$ perf stat -B noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.297883 task-clock-msecs # 0.998 CPUs
59 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
119 page-faults # 0.000 M/sec
4,131,380,160 cycles # 2067.450 M/sec (scaled from 70.01%)
2,059,096,507 instructions # 0.498 IPC (scaled from 70.01%)
2,054,681,303 branches # 1028.216 M/sec (scaled from 70.01%)
25,650 branch-misses # 0.001 % (scaled from 30.05%)
2,056,283,014 cache-references # 1029.017 M/sec (scaled from 30.03%)
47,097 cache-misses # 0.024 M/sec (scaled from 30.02%)
2.001391016 seconds time elapsed
Cc: David S. Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4bf28fe8.914ed80a.01ca.fffff5f5@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-05-18 21:00:01 +08:00
|
|
|
|
2009-12-31 16:05:50 +08:00
|
|
|
static volatile int done = 0;
|
|
|
|
|
2009-09-04 21:36:12 +08:00
|
|
|
struct stats
|
|
|
|
{
|
2009-09-04 23:26:26 +08:00
|
|
|
double n, mean, M2;
|
2009-09-04 21:36:12 +08:00
|
|
|
};
|
2009-06-13 20:57:28 +08:00
|
|
|
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_stat {
|
|
|
|
struct stats res_stats[3];
|
|
|
|
};
|
|
|
|
|
2011-01-04 03:45:52 +08:00
|
|
|
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
|
2011-01-04 02:39:04 +08:00
|
|
|
{
|
2011-01-04 03:45:52 +08:00
|
|
|
evsel->priv = zalloc(sizeof(struct perf_stat));
|
2011-01-04 02:39:04 +08:00
|
|
|
return evsel->priv == NULL ? -ENOMEM : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
|
|
|
|
{
|
|
|
|
free(evsel->priv);
|
|
|
|
evsel->priv = NULL;
|
|
|
|
}
|
|
|
|
|
2009-09-04 21:36:08 +08:00
|
|
|
static void update_stats(struct stats *stats, u64 val)
|
|
|
|
{
|
2009-09-04 23:26:26 +08:00
|
|
|
double delta;
|
2009-09-04 21:36:08 +08:00
|
|
|
|
2009-09-04 23:26:26 +08:00
|
|
|
stats->n++;
|
|
|
|
delta = val - stats->mean;
|
|
|
|
stats->mean += delta / stats->n;
|
|
|
|
stats->M2 += delta*(val - stats->mean);
|
2009-09-04 21:36:08 +08:00
|
|
|
}
|
|
|
|
|
2009-09-04 21:36:12 +08:00
|
|
|
static double avg_stats(struct stats *stats)
|
|
|
|
{
|
2009-09-04 23:26:26 +08:00
|
|
|
return stats->mean;
|
2009-09-04 21:36:12 +08:00
|
|
|
}
|
2009-06-13 20:57:28 +08:00
|
|
|
|
2009-09-04 21:36:12 +08:00
|
|
|
/*
|
2009-09-04 23:03:13 +08:00
|
|
|
* http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
|
|
|
*
|
2009-09-04 23:26:26 +08:00
|
|
|
* (\Sum n_i^2) - ((\Sum n_i)^2)/n
|
|
|
|
* s^2 = -------------------------------
|
|
|
|
* n - 1
|
2009-09-04 23:03:13 +08:00
|
|
|
*
|
|
|
|
* http://en.wikipedia.org/wiki/Stddev
|
|
|
|
*
|
|
|
|
* The std dev of the mean is related to the std dev by:
|
|
|
|
*
|
|
|
|
* s
|
|
|
|
* s_mean = -------
|
|
|
|
* sqrt(n)
|
|
|
|
*
|
2009-09-04 21:36:12 +08:00
|
|
|
*/
|
|
|
|
static double stddev_stats(struct stats *stats)
|
|
|
|
{
|
2009-09-04 23:26:26 +08:00
|
|
|
double variance = stats->M2 / (stats->n - 1);
|
|
|
|
double variance_mean = variance / stats->n;
|
2009-06-13 20:57:28 +08:00
|
|
|
|
2009-09-04 23:03:13 +08:00
|
|
|
return sqrt(variance_mean);
|
2009-09-04 21:36:12 +08:00
|
|
|
}
|
2009-06-13 20:57:28 +08:00
|
|
|
|
2010-11-16 17:05:01 +08:00
|
|
|
struct stats runtime_nsecs_stats[MAX_NR_CPUS];
|
|
|
|
struct stats runtime_cycles_stats[MAX_NR_CPUS];
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS];
|
2010-11-16 17:05:01 +08:00
|
|
|
struct stats runtime_branches_stats[MAX_NR_CPUS];
|
2011-04-27 09:42:18 +08:00
|
|
|
struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
|
perf stat: Print out miss/hit ratio for L1 data-cache events
Print out this kind of l1-dcache-misses percentage:
Performance counter stats for './bw_tcp localhost':
29,956,262,201 cycles # 3.002 GHz (scaled from 85.14%)
8,255,209,558 stalled-cycles # 27.56% of all cycles are idle (scaled from 86.56%)
1,206,130,308 l1-dcache-misses # 40.49% of all L1-dcache hits (scaled from 86.30%)
2,978,756,779 l1-dcache-refs # 298.512 M/sec (scaled from 70.02%)
8,861,956,159 instructions # 0.30 insns per cycle
# 0.93 stalled cycles per insn (scaled from 84.27%)
1,644,306,068 branches # 164.782 M/sec (scaled from 86.43%)
74,778,443 branch-misses # 4.55% of all branches (scaled from 70.69%)
9978.695711 task-clock # 0.693 CPUs utilized
14.404347983 seconds time elapsed
And color the result depending on the severity of cache-trashing.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-54gmz0zymaid84zcs7joq02p@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:25:24 +08:00
|
|
|
struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
|
2009-09-04 21:36:12 +08:00
|
|
|
struct stats walltime_nsecs_stats;
|
2009-05-29 15:10:54 +08:00
|
|
|
|
2011-01-04 03:48:12 +08:00
|
|
|
static int create_perf_stat_counter(struct perf_evsel *evsel)
|
2009-04-20 21:37:32 +08:00
|
|
|
{
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_event_attr *attr = &evsel->attr;
|
2009-05-05 23:50:27 +08:00
|
|
|
|
2009-04-20 21:37:32 +08:00
|
|
|
if (scale)
|
2009-06-06 15:58:57 +08:00
|
|
|
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
|
|
|
|
PERF_FORMAT_TOTAL_TIME_RUNNING;
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2011-04-14 22:20:14 +08:00
|
|
|
attr->inherit = !no_inherit;
|
|
|
|
|
2011-01-04 03:48:12 +08:00
|
|
|
if (system_wide)
|
2011-04-14 22:20:14 +08:00
|
|
|
return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false);
|
2011-01-04 03:48:12 +08:00
|
|
|
|
|
|
|
if (target_pid == -1 && target_tid == -1) {
|
|
|
|
attr->disabled = 1;
|
|
|
|
attr->enable_on_exec = 1;
|
2009-04-20 21:37:32 +08:00
|
|
|
}
|
2010-03-23 00:10:28 +08:00
|
|
|
|
2011-04-14 22:20:14 +08:00
|
|
|
return perf_evsel__open_per_thread(evsel, evsel_list->threads, false);
|
2009-04-20 21:37:32 +08:00
|
|
|
}
|
|
|
|
|
2009-05-29 15:10:54 +08:00
|
|
|
/*
|
|
|
|
* Does the counter have nsecs as a unit?
|
|
|
|
*/
|
2011-01-04 02:49:44 +08:00
|
|
|
static inline int nsec_counter(struct perf_evsel *evsel)
|
2009-05-29 15:10:54 +08:00
|
|
|
{
|
2011-01-04 02:49:44 +08:00
|
|
|
if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
|
|
|
|
perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
|
2009-05-29 15:10:54 +08:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-04-27 10:36:37 +08:00
|
|
|
/*
|
|
|
|
* Update various tracking values we maintain to print
|
|
|
|
* more semantic information such as miss/hit ratios,
|
|
|
|
* instruction rates, etc:
|
|
|
|
*/
|
|
|
|
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
|
|
|
|
{
|
|
|
|
if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
|
|
|
|
update_stats(&runtime_nsecs_stats[0], count[0]);
|
|
|
|
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
|
|
|
|
update_stats(&runtime_cycles_stats[0], count[0]);
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES))
|
|
|
|
update_stats(&runtime_stalled_cycles_stats[0], count[0]);
|
2011-04-27 10:36:37 +08:00
|
|
|
else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
|
|
|
|
update_stats(&runtime_branches_stats[0], count[0]);
|
|
|
|
else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
|
|
|
|
update_stats(&runtime_cacherefs_stats[0], count[0]);
|
perf stat: Print out miss/hit ratio for L1 data-cache events
Print out this kind of l1-dcache-misses percentage:
Performance counter stats for './bw_tcp localhost':
29,956,262,201 cycles # 3.002 GHz (scaled from 85.14%)
8,255,209,558 stalled-cycles # 27.56% of all cycles are idle (scaled from 86.56%)
1,206,130,308 l1-dcache-misses # 40.49% of all L1-dcache hits (scaled from 86.30%)
2,978,756,779 l1-dcache-refs # 298.512 M/sec (scaled from 70.02%)
8,861,956,159 instructions # 0.30 insns per cycle
# 0.93 stalled cycles per insn (scaled from 84.27%)
1,644,306,068 branches # 164.782 M/sec (scaled from 86.43%)
74,778,443 branch-misses # 4.55% of all branches (scaled from 70.69%)
9978.695711 task-clock # 0.693 CPUs utilized
14.404347983 seconds time elapsed
And color the result depending on the severity of cache-trashing.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-54gmz0zymaid84zcs7joq02p@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:25:24 +08:00
|
|
|
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
|
|
|
|
update_stats(&runtime_l1_dcache_stats[0], count[0]);
|
2011-04-27 10:36:37 +08:00
|
|
|
}
|
|
|
|
|
2009-05-29 15:10:54 +08:00
|
|
|
/*
|
2009-05-29 15:10:54 +08:00
|
|
|
* Read out the results of a single counter:
|
2010-11-16 17:05:01 +08:00
|
|
|
* aggregate counts across CPUs in system-wide mode
|
2009-05-29 15:10:54 +08:00
|
|
|
*/
|
2011-01-04 03:45:52 +08:00
|
|
|
static int read_counter_aggr(struct perf_evsel *counter)
|
2009-05-29 15:10:54 +08:00
|
|
|
{
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_stat *ps = counter->priv;
|
2011-01-04 03:45:52 +08:00
|
|
|
u64 *count = counter->counts->aggr.values;
|
|
|
|
int i;
|
2009-05-29 15:10:54 +08:00
|
|
|
|
2011-01-30 21:59:43 +08:00
|
|
|
if (__perf_evsel__read(counter, evsel_list->cpus->nr,
|
|
|
|
evsel_list->threads->nr, scale) < 0)
|
2011-01-04 03:45:52 +08:00
|
|
|
return -1;
|
2009-09-04 21:36:08 +08:00
|
|
|
|
|
|
|
for (i = 0; i < 3; i++)
|
2011-01-04 02:39:04 +08:00
|
|
|
update_stats(&ps->res_stats[i], count[i]);
|
2009-09-04 21:36:08 +08:00
|
|
|
|
|
|
|
if (verbose) {
|
2011-01-23 06:37:02 +08:00
|
|
|
fprintf(stderr, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
|
|
|
event_name(counter), count[0], count[1], count[2]);
|
2009-09-04 21:36:08 +08:00
|
|
|
}
|
|
|
|
|
2009-05-29 15:10:54 +08:00
|
|
|
/*
|
|
|
|
* Save the full runtime - to allow normalization during printout:
|
|
|
|
*/
|
2011-04-27 10:36:37 +08:00
|
|
|
update_shadow_stats(counter, count);
|
2011-01-04 03:45:52 +08:00
|
|
|
|
|
|
|
return 0;
|
2010-11-16 17:05:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read out the results of a single counter:
|
|
|
|
* do not aggregate counts across CPUs in system-wide mode
|
|
|
|
*/
|
2011-01-04 03:45:52 +08:00
|
|
|
static int read_counter(struct perf_evsel *counter)
|
2010-11-16 17:05:01 +08:00
|
|
|
{
|
2011-01-04 03:45:52 +08:00
|
|
|
u64 *count;
|
2010-11-16 17:05:01 +08:00
|
|
|
int cpu;
|
|
|
|
|
2011-01-30 21:59:43 +08:00
|
|
|
for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
|
2011-01-04 03:45:52 +08:00
|
|
|
if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
|
|
|
|
return -1;
|
2010-11-16 17:05:01 +08:00
|
|
|
|
2011-01-04 03:45:52 +08:00
|
|
|
count = counter->counts->cpu[cpu].values;
|
2010-11-16 17:05:01 +08:00
|
|
|
|
2011-04-27 10:36:37 +08:00
|
|
|
update_shadow_stats(counter, count);
|
2010-11-16 17:05:01 +08:00
|
|
|
}
|
2011-01-04 03:45:52 +08:00
|
|
|
|
|
|
|
return 0;
|
2009-05-29 15:10:54 +08:00
|
|
|
}
|
|
|
|
|
2009-07-01 18:37:06 +08:00
|
|
|
static int run_perf_stat(int argc __used, const char **argv)
|
2009-06-13 20:57:28 +08:00
|
|
|
{
|
|
|
|
unsigned long long t0, t1;
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_evsel *counter;
|
2009-06-13 20:57:28 +08:00
|
|
|
int status = 0;
|
2009-06-29 19:13:21 +08:00
|
|
|
int child_ready_pipe[2], go_pipe[2];
|
2010-03-18 22:36:03 +08:00
|
|
|
const bool forks = (argc > 0);
|
2009-06-29 19:13:21 +08:00
|
|
|
char buf;
|
2009-06-13 20:57:28 +08:00
|
|
|
|
2009-12-31 16:05:50 +08:00
|
|
|
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
|
2009-06-29 19:13:21 +08:00
|
|
|
perror("failed to create pipes");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2009-12-31 16:05:50 +08:00
|
|
|
if (forks) {
|
2010-03-18 22:36:03 +08:00
|
|
|
if ((child_pid = fork()) < 0)
|
2009-12-31 16:05:50 +08:00
|
|
|
perror("failed to fork");
|
|
|
|
|
2010-03-18 22:36:03 +08:00
|
|
|
if (!child_pid) {
|
2009-12-31 16:05:50 +08:00
|
|
|
close(child_ready_pipe[0]);
|
|
|
|
close(go_pipe[1]);
|
|
|
|
fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a dummy execvp to get the PLT entry resolved,
|
|
|
|
* so we avoid the resolver overhead on the real
|
|
|
|
* execvp call.
|
|
|
|
*/
|
|
|
|
execvp("", (char **)argv);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tell the parent we're ready to go
|
|
|
|
*/
|
|
|
|
close(child_ready_pipe[1]);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait until the parent tells us to go.
|
|
|
|
*/
|
|
|
|
if (read(go_pipe[0], &buf, 1) == -1)
|
|
|
|
perror("unable to read pipe");
|
|
|
|
|
|
|
|
execvp(argv[0], (char **)argv);
|
|
|
|
|
|
|
|
perror(argv[0]);
|
|
|
|
exit(-1);
|
|
|
|
}
|
2009-06-29 19:13:21 +08:00
|
|
|
|
2010-03-18 22:36:05 +08:00
|
|
|
if (target_tid == -1 && target_pid == -1 && !system_wide)
|
2011-01-30 21:59:43 +08:00
|
|
|
evsel_list->threads->map[0] = child_pid;
|
2010-03-18 22:36:05 +08:00
|
|
|
|
2009-06-29 19:13:21 +08:00
|
|
|
/*
|
2009-12-31 16:05:50 +08:00
|
|
|
* Wait for the child to be ready to exec.
|
2009-06-29 19:13:21 +08:00
|
|
|
*/
|
|
|
|
close(child_ready_pipe[1]);
|
2009-12-31 16:05:50 +08:00
|
|
|
close(go_pipe[0]);
|
|
|
|
if (read(child_ready_pipe[0], &buf, 1) == -1)
|
2009-07-02 03:02:10 +08:00
|
|
|
perror("unable to read pipe");
|
2009-12-31 16:05:50 +08:00
|
|
|
close(child_ready_pipe[0]);
|
2009-06-29 19:13:21 +08:00
|
|
|
}
|
|
|
|
|
2011-01-12 06:56:53 +08:00
|
|
|
list_for_each_entry(counter, &evsel_list->entries, node) {
|
2011-01-04 03:48:12 +08:00
|
|
|
if (create_perf_stat_counter(counter) < 0) {
|
|
|
|
if (errno == -EPERM || errno == -EACCES) {
|
|
|
|
error("You may not have permission to collect %sstats.\n"
|
|
|
|
"\t Consider tweaking"
|
|
|
|
" /proc/sys/kernel/perf_event_paranoid or running as root.",
|
|
|
|
system_wide ? "system-wide " : "");
|
2011-01-10 02:21:12 +08:00
|
|
|
} else if (errno == ENOENT) {
|
|
|
|
error("%s event is not supported. ", event_name(counter));
|
2011-01-04 03:48:12 +08:00
|
|
|
} else {
|
|
|
|
error("open_counter returned with %d (%s). "
|
|
|
|
"/bin/dmesg may provide additional information.\n",
|
|
|
|
errno, strerror(errno));
|
|
|
|
}
|
|
|
|
if (child_pid != -1)
|
|
|
|
kill(child_pid, SIGTERM);
|
|
|
|
die("Not all events could be opened.\n");
|
|
|
|
return -1;
|
|
|
|
}
|
2010-03-23 00:10:28 +08:00
|
|
|
}
|
2009-06-13 20:57:28 +08:00
|
|
|
|
2011-03-14 23:40:30 +08:00
|
|
|
if (perf_evlist__set_filters(evsel_list)) {
|
|
|
|
error("failed to set filter with %d (%s)\n", errno,
|
|
|
|
strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2009-06-13 20:57:28 +08:00
|
|
|
/*
|
|
|
|
* Enable counters and exec the command:
|
|
|
|
*/
|
|
|
|
t0 = rdclock();
|
|
|
|
|
2009-12-31 16:05:50 +08:00
|
|
|
if (forks) {
|
|
|
|
close(go_pipe[1]);
|
|
|
|
wait(&status);
|
|
|
|
} else {
|
2010-03-18 22:36:03 +08:00
|
|
|
while(!done) sleep(1);
|
2009-12-31 16:05:50 +08:00
|
|
|
}
|
2009-06-13 20:57:28 +08:00
|
|
|
|
|
|
|
t1 = rdclock();
|
|
|
|
|
2009-09-04 21:36:08 +08:00
|
|
|
update_stats(&walltime_nsecs_stats, t1 - t0);
|
2009-06-13 20:57:28 +08:00
|
|
|
|
2010-11-16 17:05:01 +08:00
|
|
|
if (no_aggr) {
|
2011-01-12 06:56:53 +08:00
|
|
|
list_for_each_entry(counter, &evsel_list->entries, node) {
|
2010-11-16 17:05:01 +08:00
|
|
|
read_counter(counter);
|
2011-01-30 21:59:43 +08:00
|
|
|
perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
|
2011-01-04 03:45:52 +08:00
|
|
|
}
|
2010-11-16 17:05:01 +08:00
|
|
|
} else {
|
2011-01-12 06:56:53 +08:00
|
|
|
list_for_each_entry(counter, &evsel_list->entries, node) {
|
2010-11-16 17:05:01 +08:00
|
|
|
read_counter_aggr(counter);
|
2011-01-30 21:59:43 +08:00
|
|
|
perf_evsel__close_fd(counter, evsel_list->cpus->nr,
|
|
|
|
evsel_list->threads->nr);
|
2011-01-04 03:45:52 +08:00
|
|
|
}
|
2010-11-16 17:05:01 +08:00
|
|
|
}
|
2011-01-04 03:45:52 +08:00
|
|
|
|
2009-06-13 20:57:28 +08:00
|
|
|
return WEXITSTATUS(status);
|
|
|
|
}
|
|
|
|
|
2011-04-27 11:35:39 +08:00
|
|
|
static void print_noise_pct(double total, double avg)
|
|
|
|
{
|
|
|
|
double pct = 0.0;
|
|
|
|
|
|
|
|
if (avg)
|
|
|
|
pct = 100.0*total/avg;
|
|
|
|
|
|
|
|
fprintf(stderr, " ( +-%6.2f%% )", pct);
|
|
|
|
}
|
|
|
|
|
2011-01-04 02:39:04 +08:00
|
|
|
static void print_noise(struct perf_evsel *evsel, double avg)
|
2009-06-13 20:57:28 +08:00
|
|
|
{
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_stat *ps;
|
|
|
|
|
2009-09-05 00:23:38 +08:00
|
|
|
if (run_count == 1)
|
|
|
|
return;
|
|
|
|
|
2011-01-04 02:39:04 +08:00
|
|
|
ps = evsel->priv;
|
2011-04-27 11:35:39 +08:00
|
|
|
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
|
2009-06-13 20:57:28 +08:00
|
|
|
}
|
|
|
|
|
2011-01-04 02:49:44 +08:00
|
|
|
static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
|
2009-06-13 19:35:00 +08:00
|
|
|
{
|
2009-09-04 21:36:12 +08:00
|
|
|
double msecs = avg / 1e6;
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
char cpustr[16] = { '\0', };
|
|
|
|
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
|
2009-06-13 19:35:00 +08:00
|
|
|
|
2010-11-16 17:05:01 +08:00
|
|
|
if (no_aggr)
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
sprintf(cpustr, "CPU%*d%s",
|
|
|
|
csv_output ? 0 : -4,
|
2011-01-30 21:59:43 +08:00
|
|
|
evsel_list->cpus->map[cpu], csv_sep);
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
|
2011-01-04 02:49:44 +08:00
|
|
|
fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
if (evsel->cgrp)
|
|
|
|
fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
|
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
if (csv_output)
|
|
|
|
return;
|
2009-06-13 19:35:00 +08:00
|
|
|
|
2011-01-04 02:49:44 +08:00
|
|
|
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats));
|
2009-06-13 19:35:00 +08:00
|
|
|
}
|
|
|
|
|
2011-04-27 11:39:24 +08:00
|
|
|
static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg)
|
|
|
|
{
|
|
|
|
double total, ratio = 0.0;
|
|
|
|
const char *color;
|
|
|
|
|
|
|
|
total = avg_stats(&runtime_cycles_stats[cpu]);
|
|
|
|
|
|
|
|
if (total)
|
|
|
|
ratio = avg / total * 100.0;
|
|
|
|
|
|
|
|
color = PERF_COLOR_NORMAL;
|
|
|
|
if (ratio > 75.0)
|
|
|
|
color = PERF_COLOR_RED;
|
|
|
|
else if (ratio > 50.0)
|
|
|
|
color = PERF_COLOR_MAGENTA;
|
|
|
|
else if (ratio > 25.0)
|
|
|
|
color = PERF_COLOR_YELLOW;
|
|
|
|
|
|
|
|
fprintf(stderr, " # ");
|
|
|
|
color_fprintf(stderr, color, "%5.2f%%", ratio);
|
|
|
|
fprintf(stderr, " of all cycles are idle ");
|
|
|
|
}
|
|
|
|
|
2011-04-27 18:16:10 +08:00
|
|
|
static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
|
|
|
|
{
|
|
|
|
double total, ratio = 0.0;
|
|
|
|
const char *color;
|
|
|
|
|
|
|
|
total = avg_stats(&runtime_branches_stats[cpu]);
|
|
|
|
|
|
|
|
if (total)
|
|
|
|
ratio = avg / total * 100.0;
|
|
|
|
|
|
|
|
color = PERF_COLOR_NORMAL;
|
|
|
|
if (ratio > 20.0)
|
|
|
|
color = PERF_COLOR_RED;
|
|
|
|
else if (ratio > 10.0)
|
|
|
|
color = PERF_COLOR_MAGENTA;
|
|
|
|
else if (ratio > 5.0)
|
|
|
|
color = PERF_COLOR_YELLOW;
|
|
|
|
|
|
|
|
fprintf(stderr, " # ");
|
|
|
|
color_fprintf(stderr, color, "%5.2f%%", ratio);
|
|
|
|
fprintf(stderr, " of all branches ");
|
|
|
|
}
|
|
|
|
|
perf stat: Print out miss/hit ratio for L1 data-cache events
Print out this kind of l1-dcache-misses percentage:
Performance counter stats for './bw_tcp localhost':
29,956,262,201 cycles # 3.002 GHz (scaled from 85.14%)
8,255,209,558 stalled-cycles # 27.56% of all cycles are idle (scaled from 86.56%)
1,206,130,308 l1-dcache-misses # 40.49% of all L1-dcache hits (scaled from 86.30%)
2,978,756,779 l1-dcache-refs # 298.512 M/sec (scaled from 70.02%)
8,861,956,159 instructions # 0.30 insns per cycle
# 0.93 stalled cycles per insn (scaled from 84.27%)
1,644,306,068 branches # 164.782 M/sec (scaled from 86.43%)
74,778,443 branch-misses # 4.55% of all branches (scaled from 70.69%)
9978.695711 task-clock # 0.693 CPUs utilized
14.404347983 seconds time elapsed
And color the result depending on the severity of cache-trashing.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-54gmz0zymaid84zcs7joq02p@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:25:24 +08:00
|
|
|
static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
|
|
|
|
{
|
|
|
|
double total, ratio = 0.0;
|
|
|
|
const char *color;
|
|
|
|
|
|
|
|
total = avg_stats(&runtime_l1_dcache_stats[cpu]);
|
|
|
|
|
|
|
|
if (total)
|
|
|
|
ratio = avg / total * 100.0;
|
|
|
|
|
|
|
|
color = PERF_COLOR_NORMAL;
|
|
|
|
if (ratio > 20.0)
|
|
|
|
color = PERF_COLOR_RED;
|
|
|
|
else if (ratio > 10.0)
|
|
|
|
color = PERF_COLOR_MAGENTA;
|
|
|
|
else if (ratio > 5.0)
|
|
|
|
color = PERF_COLOR_YELLOW;
|
|
|
|
|
|
|
|
fprintf(stderr, " # ");
|
|
|
|
color_fprintf(stderr, color, "%5.2f%%", ratio);
|
|
|
|
fprintf(stderr, " of all L1-dcache hits ");
|
|
|
|
}
|
|
|
|
|
2011-01-04 02:49:44 +08:00
|
|
|
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
|
2009-06-13 19:35:00 +08:00
|
|
|
{
|
2009-09-22 20:53:51 +08:00
|
|
|
double total, ratio = 0.0;
|
2010-11-16 17:05:01 +08:00
|
|
|
char cpustr[16] = { '\0', };
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
const char *fmt;
|
|
|
|
|
|
|
|
if (csv_output)
|
|
|
|
fmt = "%s%.0f%s%s";
|
|
|
|
else if (big_num)
|
|
|
|
fmt = "%s%'18.0f%s%-24s";
|
|
|
|
else
|
|
|
|
fmt = "%s%18.0f%s%-24s";
|
2010-11-16 17:05:01 +08:00
|
|
|
|
|
|
|
if (no_aggr)
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
sprintf(cpustr, "CPU%*d%s",
|
|
|
|
csv_output ? 0 : -4,
|
2011-01-30 21:59:43 +08:00
|
|
|
evsel_list->cpus->map[cpu], csv_sep);
|
2010-11-16 17:05:01 +08:00
|
|
|
else
|
|
|
|
cpu = 0;
|
2009-09-22 20:53:51 +08:00
|
|
|
|
2011-01-04 02:49:44 +08:00
|
|
|
fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
if (evsel->cgrp)
|
|
|
|
fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
|
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
if (csv_output)
|
|
|
|
return;
|
2009-06-13 19:35:00 +08:00
|
|
|
|
2011-01-04 02:49:44 +08:00
|
|
|
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
|
2010-11-16 17:05:01 +08:00
|
|
|
total = avg_stats(&runtime_cycles_stats[cpu]);
|
2009-09-22 20:53:51 +08:00
|
|
|
|
|
|
|
if (total)
|
|
|
|
ratio = avg / total;
|
|
|
|
|
2011-04-28 08:57:53 +08:00
|
|
|
fprintf(stderr, " # %4.2f insns per cycle ", ratio);
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
|
|
|
|
total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
|
|
|
|
|
|
|
|
if (total && avg) {
|
|
|
|
ratio = total / avg;
|
|
|
|
fprintf(stderr, "\n # %4.2f stalled cycles per insn", ratio);
|
|
|
|
}
|
|
|
|
|
2011-01-04 02:49:44 +08:00
|
|
|
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
|
2010-11-16 17:05:01 +08:00
|
|
|
runtime_branches_stats[cpu].n != 0) {
|
2011-04-27 18:16:10 +08:00
|
|
|
print_branch_misses(cpu, evsel, avg);
|
perf stat: Print out miss/hit ratio for L1 data-cache events
Print out this kind of l1-dcache-misses percentage:
Performance counter stats for './bw_tcp localhost':
29,956,262,201 cycles # 3.002 GHz (scaled from 85.14%)
8,255,209,558 stalled-cycles # 27.56% of all cycles are idle (scaled from 86.56%)
1,206,130,308 l1-dcache-misses # 40.49% of all L1-dcache hits (scaled from 86.30%)
2,978,756,779 l1-dcache-refs # 298.512 M/sec (scaled from 70.02%)
8,861,956,159 instructions # 0.30 insns per cycle
# 0.93 stalled cycles per insn (scaled from 84.27%)
1,644,306,068 branches # 164.782 M/sec (scaled from 86.43%)
74,778,443 branch-misses # 4.55% of all branches (scaled from 70.69%)
9978.695711 task-clock # 0.693 CPUs utilized
14.404347983 seconds time elapsed
And color the result depending on the severity of cache-trashing.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-54gmz0zymaid84zcs7joq02p@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:25:24 +08:00
|
|
|
} else if (
|
|
|
|
evsel->attr.type == PERF_TYPE_HW_CACHE &&
|
|
|
|
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
|
|
|
|
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
|
|
|
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
|
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:50:47 +08:00
|
|
|
runtime_l1_dcache_stats[cpu].n != 0) {
|
perf stat: Print out miss/hit ratio for L1 data-cache events
Print out this kind of l1-dcache-misses percentage:
Performance counter stats for './bw_tcp localhost':
29,956,262,201 cycles # 3.002 GHz (scaled from 85.14%)
8,255,209,558 stalled-cycles # 27.56% of all cycles are idle (scaled from 86.56%)
1,206,130,308 l1-dcache-misses # 40.49% of all L1-dcache hits (scaled from 86.30%)
2,978,756,779 l1-dcache-refs # 298.512 M/sec (scaled from 70.02%)
8,861,956,159 instructions # 0.30 insns per cycle
# 0.93 stalled cycles per insn (scaled from 84.27%)
1,644,306,068 branches # 164.782 M/sec (scaled from 86.43%)
74,778,443 branch-misses # 4.55% of all branches (scaled from 70.69%)
9978.695711 task-clock # 0.693 CPUs utilized
14.404347983 seconds time elapsed
And color the result depending on the severity of cache-trashing.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-54gmz0zymaid84zcs7joq02p@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:25:24 +08:00
|
|
|
print_l1_dcache_misses(cpu, evsel, avg);
|
2011-04-27 09:42:18 +08:00
|
|
|
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
|
|
|
|
runtime_cacherefs_stats[cpu].n != 0) {
|
|
|
|
total = avg_stats(&runtime_cacherefs_stats[cpu]);
|
|
|
|
|
|
|
|
if (total)
|
|
|
|
ratio = avg * 100 / total;
|
|
|
|
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
fprintf(stderr, " # %8.3f %% of all cache refs ", ratio);
|
2011-04-27 09:42:18 +08:00
|
|
|
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
|
2011-04-27 11:39:24 +08:00
|
|
|
print_stalled_cycles(cpu, evsel, avg);
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
|
2010-11-16 17:05:01 +08:00
|
|
|
total = avg_stats(&runtime_nsecs_stats[cpu]);
|
2009-09-22 20:53:51 +08:00
|
|
|
|
|
|
|
if (total)
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
ratio = 1.0 * avg / total;
|
2009-09-22 20:53:51 +08:00
|
|
|
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
fprintf(stderr, " # %8.3f GHz ", ratio);
|
|
|
|
} else if (runtime_nsecs_stats[cpu].n != 0) {
|
|
|
|
total = avg_stats(&runtime_nsecs_stats[cpu]);
|
2011-04-24 21:05:10 +08:00
|
|
|
|
|
|
|
if (total)
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
ratio = 1000.0 * avg / total;
|
2011-04-24 21:05:10 +08:00
|
|
|
|
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 10:34:16 +08:00
|
|
|
fprintf(stderr, " # %8.3f M/sec ", ratio);
|
2011-04-27 11:39:24 +08:00
|
|
|
} else {
|
|
|
|
fprintf(stderr, " ");
|
2009-06-13 19:35:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-29 15:10:54 +08:00
|
|
|
/*
|
|
|
|
* Print out the results of a single counter:
|
2010-11-16 17:05:01 +08:00
|
|
|
* aggregated counts in system-wide mode
|
2009-05-29 15:10:54 +08:00
|
|
|
*/
|
2011-01-04 02:39:04 +08:00
|
|
|
static void print_counter_aggr(struct perf_evsel *counter)
|
2009-05-29 15:10:54 +08:00
|
|
|
{
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_stat *ps = counter->priv;
|
|
|
|
double avg = avg_stats(&ps->res_stats[0]);
|
2011-01-04 03:45:52 +08:00
|
|
|
int scaled = counter->counts->scaled;
|
2009-05-29 15:10:54 +08:00
|
|
|
|
|
|
|
if (scaled == -1) {
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
fprintf(stderr, "%*s%s%*s",
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
csv_output ? 0 : 18,
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
"<not counted>",
|
|
|
|
csv_sep,
|
|
|
|
csv_output ? 0 : -24,
|
|
|
|
event_name(counter));
|
|
|
|
|
|
|
|
if (counter->cgrp)
|
|
|
|
fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
|
|
|
|
|
|
|
|
fputc('\n', stderr);
|
2009-05-29 15:10:54 +08:00
|
|
|
return;
|
|
|
|
}
|
2009-05-29 15:10:54 +08:00
|
|
|
|
2009-06-13 19:35:00 +08:00
|
|
|
if (nsec_counter(counter))
|
2010-11-16 17:05:01 +08:00
|
|
|
nsec_printout(-1, counter, avg);
|
2009-06-13 19:35:00 +08:00
|
|
|
else
|
2010-11-16 17:05:01 +08:00
|
|
|
abs_printout(-1, counter, avg);
|
2009-09-05 00:23:38 +08:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
if (csv_output) {
|
|
|
|
fputc('\n', stderr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-09-05 00:23:38 +08:00
|
|
|
print_noise(counter, avg);
|
2009-09-04 21:36:12 +08:00
|
|
|
|
|
|
|
if (scaled) {
|
|
|
|
double avg_enabled, avg_running;
|
|
|
|
|
2011-01-04 02:39:04 +08:00
|
|
|
avg_enabled = avg_stats(&ps->res_stats[1]);
|
|
|
|
avg_running = avg_stats(&ps->res_stats[2]);
|
2009-05-30 18:38:51 +08:00
|
|
|
|
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:50:47 +08:00
|
|
|
fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled);
|
2009-09-04 21:36:12 +08:00
|
|
|
}
|
2009-05-29 15:10:54 +08:00
|
|
|
fprintf(stderr, "\n");
|
|
|
|
}
|
|
|
|
|
2010-11-16 17:05:01 +08:00
|
|
|
/*
|
|
|
|
* Print out the results of a single counter:
|
|
|
|
* does not use aggregated count in system-wide
|
|
|
|
*/
|
2011-01-04 02:39:04 +08:00
|
|
|
static void print_counter(struct perf_evsel *counter)
|
2010-11-16 17:05:01 +08:00
|
|
|
{
|
|
|
|
u64 ena, run, val;
|
|
|
|
int cpu;
|
|
|
|
|
2011-01-30 21:59:43 +08:00
|
|
|
for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
|
2011-01-04 03:45:52 +08:00
|
|
|
val = counter->counts->cpu[cpu].val;
|
|
|
|
ena = counter->counts->cpu[cpu].ena;
|
|
|
|
run = counter->counts->cpu[cpu].run;
|
2010-11-16 17:05:01 +08:00
|
|
|
if (run == 0 || ena == 0) {
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
fprintf(stderr, "CPU%*d%s%*s%s%*s",
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
csv_output ? 0 : -4,
|
2011-01-30 21:59:43 +08:00
|
|
|
evsel_list->cpus->map[cpu], csv_sep,
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
csv_output ? 0 : 18,
|
|
|
|
"<not counted>", csv_sep,
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
csv_output ? 0 : -24,
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
event_name(counter));
|
2010-11-16 17:05:01 +08:00
|
|
|
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
if (counter->cgrp)
|
|
|
|
fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
|
|
|
|
|
|
|
|
fputc('\n', stderr);
|
2010-11-16 17:05:01 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nsec_counter(counter))
|
|
|
|
nsec_printout(cpu, counter, val);
|
|
|
|
else
|
|
|
|
abs_printout(cpu, counter, val);
|
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
if (!csv_output) {
|
|
|
|
print_noise(counter, 1.0);
|
2010-11-16 17:05:01 +08:00
|
|
|
|
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:50:47 +08:00
|
|
|
if (run != ena)
|
|
|
|
fprintf(stderr, " (%.2f%%)", 100.0 * run / ena);
|
2010-11-16 17:05:01 +08:00
|
|
|
}
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
fputc('\n', stderr);
|
2010-11-16 17:05:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-13 20:57:28 +08:00
|
|
|
static void print_stat(int argc, const char **argv)
|
|
|
|
{
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_evsel *counter;
|
|
|
|
int i;
|
2009-06-13 20:57:28 +08:00
|
|
|
|
2009-04-20 21:37:32 +08:00
|
|
|
fflush(stdout);
|
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
if (!csv_output) {
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, " Performance counter stats for ");
|
|
|
|
if(target_pid == -1 && target_tid == -1) {
|
|
|
|
fprintf(stderr, "\'%s", argv[0]);
|
|
|
|
for (i = 1; i < argc; i++)
|
|
|
|
fprintf(stderr, " %s", argv[i]);
|
|
|
|
} else if (target_pid != -1)
|
|
|
|
fprintf(stderr, "process id \'%d", target_pid);
|
|
|
|
else
|
|
|
|
fprintf(stderr, "thread id \'%d", target_tid);
|
2009-06-04 01:36:07 +08:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
fprintf(stderr, "\'");
|
|
|
|
if (run_count > 1)
|
|
|
|
fprintf(stderr, " (%d runs)", run_count);
|
|
|
|
fprintf(stderr, ":\n\n");
|
|
|
|
}
|
2009-05-29 15:10:54 +08:00
|
|
|
|
2010-11-16 17:05:01 +08:00
|
|
|
if (no_aggr) {
|
2011-01-12 06:56:53 +08:00
|
|
|
list_for_each_entry(counter, &evsel_list->entries, node)
|
2010-11-16 17:05:01 +08:00
|
|
|
print_counter(counter);
|
|
|
|
} else {
|
2011-01-12 06:56:53 +08:00
|
|
|
list_for_each_entry(counter, &evsel_list->entries, node)
|
2010-11-16 17:05:01 +08:00
|
|
|
print_counter_aggr(counter);
|
|
|
|
}
|
2009-04-20 21:37:32 +08:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
if (!csv_output) {
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, " %18.9f seconds time elapsed",
|
|
|
|
avg_stats(&walltime_nsecs_stats)/1e9);
|
|
|
|
if (run_count > 1) {
|
2011-04-27 11:35:39 +08:00
|
|
|
print_noise_pct(stddev_stats(&walltime_nsecs_stats),
|
|
|
|
avg_stats(&walltime_nsecs_stats));
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
}
|
|
|
|
fprintf(stderr, "\n\n");
|
2009-06-27 12:24:32 +08:00
|
|
|
}
|
2009-04-20 21:37:32 +08:00
|
|
|
}
|
|
|
|
|
2009-06-10 21:55:59 +08:00
|
|
|
static volatile int signr = -1;
|
|
|
|
|
2009-05-26 15:17:18 +08:00
|
|
|
static void skip_signal(int signo)
|
2009-04-20 21:37:32 +08:00
|
|
|
{
|
2010-03-18 22:36:03 +08:00
|
|
|
if(child_pid == -1)
|
2009-12-31 16:05:50 +08:00
|
|
|
done = 1;
|
|
|
|
|
2009-06-10 21:55:59 +08:00
|
|
|
signr = signo;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sig_atexit(void)
|
|
|
|
{
|
2009-10-04 08:35:01 +08:00
|
|
|
if (child_pid != -1)
|
|
|
|
kill(child_pid, SIGTERM);
|
|
|
|
|
2009-06-10 21:55:59 +08:00
|
|
|
if (signr == -1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
signal(signr, SIG_DFL);
|
|
|
|
kill(getpid(), signr);
|
2009-05-26 15:17:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const stat_usage[] = {
|
2009-12-31 16:05:50 +08:00
|
|
|
"perf stat [<options>] [<command>]",
|
2009-05-26 15:17:18 +08:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
static int stat__set_big_num(const struct option *opt __used,
|
|
|
|
const char *s __used, int unset)
|
|
|
|
{
|
|
|
|
big_num_opt = unset ? 0 : 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-26 15:17:18 +08:00
|
|
|
static const struct option options[] = {
|
2011-01-12 06:56:53 +08:00
|
|
|
OPT_CALLBACK('e', "event", &evsel_list, "event",
|
2009-06-06 18:24:17 +08:00
|
|
|
"event selector. use 'perf list' to list available events",
|
|
|
|
parse_events),
|
2011-03-14 23:40:30 +08:00
|
|
|
OPT_CALLBACK(0, "filter", &evsel_list, "filter",
|
|
|
|
"event filter", parse_filter),
|
2010-05-12 16:40:01 +08:00
|
|
|
OPT_BOOLEAN('i', "no-inherit", &no_inherit,
|
|
|
|
"child tasks do not inherit counters"),
|
2009-05-26 15:17:18 +08:00
|
|
|
OPT_INTEGER('p', "pid", &target_pid,
|
2010-03-18 22:36:05 +08:00
|
|
|
"stat events on existing process id"),
|
|
|
|
OPT_INTEGER('t', "tid", &target_tid,
|
|
|
|
"stat events on existing thread id"),
|
2009-05-26 15:17:18 +08:00
|
|
|
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
2009-06-24 20:49:34 +08:00
|
|
|
"system-wide collection from all CPUs"),
|
2009-08-07 16:18:39 +08:00
|
|
|
OPT_BOOLEAN('c', "scale", &scale,
|
2009-06-24 20:49:34 +08:00
|
|
|
"scale/normalize counters"),
|
2010-04-13 16:37:33 +08:00
|
|
|
OPT_INCR('v', "verbose", &verbose,
|
2009-06-07 23:06:46 +08:00
|
|
|
"be more verbose (show counter open errors, etc)"),
|
2009-06-13 20:57:28 +08:00
|
|
|
OPT_INTEGER('r', "repeat", &run_count,
|
|
|
|
"repeat command and print average + stddev (max: 100)"),
|
2009-06-27 12:10:30 +08:00
|
|
|
OPT_BOOLEAN('n', "null", &null_run,
|
|
|
|
"null run - dont start any counters"),
|
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:50:47 +08:00
|
|
|
OPT_BOOLEAN('d', "detailed", &detailed_run,
|
|
|
|
"detailed run - start a lot of events"),
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
|
|
|
|
"print large numbers with thousands\' separators",
|
|
|
|
stat__set_big_num),
|
2010-05-28 18:00:01 +08:00
|
|
|
OPT_STRING('C', "cpu", &cpu_list, "cpu",
|
|
|
|
"list of cpus to monitor in system-wide"),
|
2010-11-16 17:05:01 +08:00
|
|
|
OPT_BOOLEAN('A', "no-aggr", &no_aggr,
|
|
|
|
"disable CPU count aggregation"),
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
OPT_STRING('x', "field-separator", &csv_sep, "separator",
|
|
|
|
"print counts with custom separator"),
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
|
|
|
|
"monitor event in cgroup name only",
|
|
|
|
parse_cgroups),
|
2009-05-26 15:17:18 +08:00
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
2009-07-01 18:37:06 +08:00
|
|
|
int cmd_stat(int argc, const char **argv, const char *prefix __used)
|
2009-05-26 15:17:18 +08:00
|
|
|
{
|
2011-01-04 02:39:04 +08:00
|
|
|
struct perf_evsel *pos;
|
|
|
|
int status = -ENOMEM;
|
2009-06-13 20:57:28 +08:00
|
|
|
|
perf stat: add perf stat -B to pretty print large numbers
It is hard to read very large numbers so provide an option to perf stat
to separate thousands using a separator. The patch leverages the locale
support of stdio. You need to set your LC_NUMERIC appropriately, for
instance LC_NUMERIC=en_US.UTF8. You need to pass -B to activate this
feature. This way existing scripts parsing the output do not need to be
changed. Here is an example.
$ perf stat noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.347031 task-clock-msecs # 0.998 CPUs
61 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
118 page-faults # 0.000 M/sec
4,138,410,900 cycles # 2070.917 M/sec (scaled from 70.01%)
2,062,650,268 instructions # 0.498 IPC (scaled from 70.01%)
2,057,653,466 branches # 1029.678 M/sec (scaled from 70.01%)
40,267 branch-misses # 0.002 % (scaled from 30.04%)
2,055,961,348 cache-references # 1028.831 M/sec (scaled from 30.03%)
53,725 cache-misses # 0.027 M/sec (scaled from 30.02%)
2.001393933 seconds time elapsed
$ perf stat -B noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.297883 task-clock-msecs # 0.998 CPUs
59 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
119 page-faults # 0.000 M/sec
4,131,380,160 cycles # 2067.450 M/sec (scaled from 70.01%)
2,059,096,507 instructions # 0.498 IPC (scaled from 70.01%)
2,054,681,303 branches # 1028.216 M/sec (scaled from 70.01%)
25,650 branch-misses # 0.001 % (scaled from 30.05%)
2,056,283,014 cache-references # 1029.017 M/sec (scaled from 30.03%)
47,097 cache-misses # 0.024 M/sec (scaled from 30.02%)
2.001391016 seconds time elapsed
Cc: David S. Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4bf28fe8.914ed80a.01ca.fffff5f5@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-05-18 21:00:01 +08:00
|
|
|
setlocale(LC_ALL, "");
|
|
|
|
|
2011-01-30 21:59:43 +08:00
|
|
|
evsel_list = perf_evlist__new(NULL, NULL);
|
2011-01-12 06:56:53 +08:00
|
|
|
if (evsel_list == NULL)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2009-07-22 21:04:12 +08:00
|
|
|
argc = parse_options(argc, argv, options, stat_usage,
|
|
|
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 00:49:05 +08:00
|
|
|
|
|
|
|
if (csv_sep)
|
|
|
|
csv_output = true;
|
|
|
|
else
|
|
|
|
csv_sep = DEFAULT_SEPARATOR;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* let the spreadsheet do the pretty-printing
|
|
|
|
*/
|
|
|
|
if (csv_output) {
|
|
|
|
/* User explicitely passed -B? */
|
|
|
|
if (big_num_opt == 1) {
|
|
|
|
fprintf(stderr, "-B option not supported with -x\n");
|
|
|
|
usage_with_options(stat_usage, options);
|
|
|
|
} else /* Nope, so disable big number formatting */
|
|
|
|
big_num = false;
|
|
|
|
} else if (big_num_opt == 0) /* User passed --no-big-num */
|
|
|
|
big_num = false;
|
|
|
|
|
2010-03-18 22:36:05 +08:00
|
|
|
if (!argc && target_pid == -1 && target_tid == -1)
|
2009-05-26 15:17:18 +08:00
|
|
|
usage_with_options(stat_usage, options);
|
2009-09-04 21:36:08 +08:00
|
|
|
if (run_count <= 0)
|
2009-06-13 20:57:28 +08:00
|
|
|
usage_with_options(stat_usage, options);
|
2009-04-20 21:37:32 +08:00
|
|
|
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
/* no_aggr, cgroup are for system-wide only */
|
|
|
|
if ((no_aggr || nr_cgroups) && !system_wide) {
|
|
|
|
fprintf(stderr, "both cgroup and no-aggregation "
|
|
|
|
"modes only available in system-wide mode\n");
|
|
|
|
|
2010-11-16 17:05:01 +08:00
|
|
|
usage_with_options(stat_usage, options);
|
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 17:20:01 +08:00
|
|
|
}
|
2010-11-16 17:05:01 +08:00
|
|
|
|
2009-06-28 02:19:09 +08:00
|
|
|
/* Set attrs and nr_counters if no event is selected and !null_run */
|
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 19:50:47 +08:00
|
|
|
if (detailed_run) {
|
|
|
|
size_t c;
|
|
|
|
|
|
|
|
for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) {
|
|
|
|
pos = perf_evsel__new(&detailed_attrs[c], c);
|
|
|
|
if (pos == NULL)
|
|
|
|
goto out;
|
|
|
|
perf_evlist__add(evsel_list, pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Set attrs and nr_counters if no event is selected and !null_run */
|
|
|
|
if (!detailed_run && !null_run && !evsel_list->nr_entries) {
|
2011-01-04 02:39:04 +08:00
|
|
|
size_t c;
|
|
|
|
|
|
|
|
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
|
2011-01-12 06:56:53 +08:00
|
|
|
pos = perf_evsel__new(&default_attrs[c], c);
|
2011-01-04 02:39:04 +08:00
|
|
|
if (pos == NULL)
|
|
|
|
goto out;
|
2011-01-12 06:56:53 +08:00
|
|
|
perf_evlist__add(evsel_list, pos);
|
2011-01-04 02:39:04 +08:00
|
|
|
}
|
2009-06-28 02:19:09 +08:00
|
|
|
}
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2011-01-04 03:53:33 +08:00
|
|
|
if (target_pid != -1)
|
|
|
|
target_tid = target_pid;
|
|
|
|
|
2011-01-30 21:59:43 +08:00
|
|
|
evsel_list->threads = thread_map__new(target_pid, target_tid);
|
|
|
|
if (evsel_list->threads == NULL) {
|
2011-01-04 03:53:33 +08:00
|
|
|
pr_err("Problems finding threads of monitor\n");
|
|
|
|
usage_with_options(stat_usage, options);
|
|
|
|
}
|
|
|
|
|
perf tools: Fix sparse CPU numbering related bugs
At present, the perf subcommands that do system-wide monitoring
(perf stat, perf record and perf top) don't work properly unless
the online cpus are numbered 0, 1, ..., N-1. These tools ask
for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN)
and then try to create events for cpus 0, 1, ..., N-1.
This creates problems for systems where the online cpus are
numbered sparsely. For example, a POWER6 system in
single-threaded mode (i.e. only running 1 hardware thread per
core) will have only even-numbered cpus online.
This fixes the problem by reading the /sys/devices/system/cpu/online
file to find out which cpus are online. The code that does that is in
tools/perf/util/cpumap.[ch], and consists of a read_cpu_map()
function that sets up a cpumap[] array and returns the number of
online cpus. If /sys/devices/system/cpu/online can't be read or
can't be parsed successfully, it falls back to using sysconf to
ask how many cpus are online and sets up an identity map in cpumap[].
The perf record, perf stat and perf top code then calls
read_cpu_map() in the system-wide monitoring case (instead of
sysconf) and uses cpumap[] to get the cpu numbers to pass to
perf_event_open.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-10 17:36:09 +08:00
|
|
|
if (system_wide)
|
2011-01-30 21:59:43 +08:00
|
|
|
evsel_list->cpus = cpu_map__new(cpu_list);
|
perf tools: Fix sparse CPU numbering related bugs
At present, the perf subcommands that do system-wide monitoring
(perf stat, perf record and perf top) don't work properly unless
the online cpus are numbered 0, 1, ..., N-1. These tools ask
for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN)
and then try to create events for cpus 0, 1, ..., N-1.
This creates problems for systems where the online cpus are
numbered sparsely. For example, a POWER6 system in
single-threaded mode (i.e. only running 1 hardware thread per
core) will have only even-numbered cpus online.
This fixes the problem by reading the /sys/devices/system/cpu/online
file to find out which cpus are online. The code that does that is in
tools/perf/util/cpumap.[ch], and consists of a read_cpu_map()
function that sets up a cpumap[] array and returns the number of
online cpus. If /sys/devices/system/cpu/online can't be read or
can't be parsed successfully, it falls back to using sysconf to
ask how many cpus are online and sets up an identity map in cpumap[].
The perf record, perf stat and perf top code then calls
read_cpu_map() in the system-wide monitoring case (instead of
sysconf) and uses cpumap[] to get the cpu numbers to pass to
perf_event_open.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-10 17:36:09 +08:00
|
|
|
else
|
2011-01-30 21:59:43 +08:00
|
|
|
evsel_list->cpus = cpu_map__dummy_new();
|
2009-04-20 21:37:32 +08:00
|
|
|
|
2011-01-30 21:59:43 +08:00
|
|
|
if (evsel_list->cpus == NULL) {
|
2011-01-04 03:49:48 +08:00
|
|
|
perror("failed to parse CPUs map");
|
2010-05-28 18:00:01 +08:00
|
|
|
usage_with_options(stat_usage, options);
|
2011-01-04 03:49:48 +08:00
|
|
|
return -1;
|
|
|
|
}
|
2010-05-28 18:00:01 +08:00
|
|
|
|
2011-01-12 06:56:53 +08:00
|
|
|
list_for_each_entry(pos, &evsel_list->entries, node) {
|
2011-01-04 03:45:52 +08:00
|
|
|
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
|
2011-01-30 21:59:43 +08:00
|
|
|
perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 ||
|
|
|
|
perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
|
2011-01-04 02:39:04 +08:00
|
|
|
goto out_free_fd;
|
2010-03-18 22:36:05 +08:00
|
|
|
}
|
|
|
|
|
2009-05-15 17:03:23 +08:00
|
|
|
/*
|
|
|
|
* We dont want to block the signals - that would cause
|
|
|
|
* child tasks to inherit that and Ctrl-C would not work.
|
|
|
|
* What we want is for Ctrl-C to work in the exec()-ed
|
|
|
|
* task, but being ignored by perf stat itself:
|
|
|
|
*/
|
2009-06-10 21:55:59 +08:00
|
|
|
atexit(sig_atexit);
|
2009-05-15 17:03:23 +08:00
|
|
|
signal(SIGINT, skip_signal);
|
|
|
|
signal(SIGALRM, skip_signal);
|
|
|
|
signal(SIGABRT, skip_signal);
|
|
|
|
|
2009-06-13 20:57:28 +08:00
|
|
|
status = 0;
|
|
|
|
for (run_idx = 0; run_idx < run_count; run_idx++) {
|
|
|
|
if (run_count != 1 && verbose)
|
2009-06-24 20:49:34 +08:00
|
|
|
fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
|
2009-06-13 20:57:28 +08:00
|
|
|
status = run_perf_stat(argc, argv);
|
|
|
|
}
|
|
|
|
|
2010-03-23 00:10:28 +08:00
|
|
|
if (status != -1)
|
|
|
|
print_stat(argc, argv);
|
2011-01-04 02:39:04 +08:00
|
|
|
out_free_fd:
|
2011-01-12 06:56:53 +08:00
|
|
|
list_for_each_entry(pos, &evsel_list->entries, node)
|
2011-01-04 02:39:04 +08:00
|
|
|
perf_evsel__free_stat_priv(pos);
|
2011-01-30 21:59:43 +08:00
|
|
|
perf_evlist__delete_maps(evsel_list);
|
2011-02-02 02:18:10 +08:00
|
|
|
out:
|
|
|
|
perf_evlist__delete(evsel_list);
|
2009-06-13 20:57:28 +08:00
|
|
|
return status;
|
2009-04-20 21:37:32 +08:00
|
|
|
}
|