Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux-2.6 into perf/core

This commit is contained in:
Ingo Molnar 2010-12-02 11:20:11 +01:00
commit e4b546a364
15 changed files with 324 additions and 68 deletions

View File

@ -24,12 +24,47 @@ OPTIONS
--input=::
Input file name. (default: perf.data)
-d::
--dsos=<dso[,dso...]>::
Only consider symbols in these dsos.
-s::
--symbol=<symbol>::
Symbol to annotate.
-f::
--force::
Don't complain, do it.
-v::
--verbose::
Be more verbose. (Show symbol address, etc)
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
-k::
--vmlinux=<file>::
vmlinux pathname.
-m::
--modules::
Load module symbols. WARNING: use only with -k and LIVE kernel.
-l::
--print-line::
Print matching source lines (may be slow).
-P::
--full-paths::
Don't shorten the displayed pathnames.
--stdio:: Use the stdio interface.
--tui:: Use the TUI interface Use of --tui requires a tty, if one is not
present, as when piping to other commands, the stdio interface is
used. This interfaces starts by centering on the line with more
samples, TAB/UNTAB cycles thru the lines with more samples.
samples, TAB/UNTAB cycles through the lines with more samples.
SEE ALSO
--------

View File

@ -18,6 +18,9 @@ perf report.
OPTIONS
-------
-H::
--with-hits::
Show only DSOs with hits.
-i::
--input=::
Input file name. (default: perf.data)

View File

@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data.
OPTIONS
-------
-M::
--displacement::
Show position displacement relative to baseline.
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
-m::
--modules::
Load module symbols. WARNING: use only with -k and LIVE kernel
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
@ -42,7 +54,7 @@ OPTIONS
--field-separator=::
Use a special separator character and don't pad with spaces, replacing
all occurances of this separator in symbol names (and other output)
all occurrences of this separator in symbol names (and other output)
with a '.' character, that thus it's the only non valid separator.
-v::
@ -50,6 +62,11 @@ OPTIONS
Be verbose, for instance, show the raw counts in addition to the
diff.
-f::
--force::
Don't complain, do it.
SEE ALSO
--------
linkperf:perf-record[1]

View File

@ -22,7 +22,7 @@ There are a couple of variants of perf kvm:
a performance counter profile of guest os in realtime
of an arbitrary workload.
'perf kvm record <command>' to record the performance couinter profile
'perf kvm record <command>' to record the performance counter profile
of an arbitrary workload and save it into a perf data file. If both
--host and --guest are input, the perf data file name is perf.data.kvm.
If there is no --host but --guest, the file name is perf.data.guest.
@ -40,6 +40,12 @@ There are a couple of variants of perf kvm:
OPTIONS
-------
-i::
--input=::
Input file name.
-o::
--output::
Output file name.
--host=::
Collect host side performance profile.
--guest=::

View File

@ -24,6 +24,21 @@ and statistics with this 'perf lock' command.
'perf lock report' reports statistical data.
OPTIONS
-------
-i::
--input=<file>::
Input file name.
-v::
--verbose::
Be more verbose (show symbol address, etc).
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
SEE ALSO
--------
linkperf:perf[1]

View File

@ -115,7 +115,7 @@ Each probe argument follows below syntax.
LINE SYNTAX
-----------
Line range is descripted by following syntax.
Line range is described by following syntax.
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"

View File

@ -39,15 +39,24 @@ OPTIONS
be passed as follows: '\mem:addr[:[r][w][x]]'.
If you want to profile read-write accesses in 0x1000, just set
'mem:0x1000:rw'.
--filter=<filter>::
Event filter.
-a::
System-wide collection.
--all-cpus::
System-wide collection from all CPUs.
-l::
Scale counter values.
-p::
--pid=::
Record events on existing pid.
Record events on existing process ID.
-t::
--tid=::
Record events on existing thread ID.
-r::
--realtime=::
@ -109,8 +118,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun
-C::
--cpu::
Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
In per-thread mode with inheritance mode on (default), samples are captured only when
the thread executes on the designated CPUs. Default is to monitor all CPUs.

View File

@ -20,6 +20,11 @@ OPTIONS
-i::
--input=::
Input file name. (default: perf.data)
-v::
--verbose::
Be more verbose. (show symbol address, etc)
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
@ -27,6 +32,10 @@ OPTIONS
-n::
--show-nr-samples::
Show the number of samples for each symbol
--showcpuutilization::
Show sample percentage for different cpu modes.
-T::
--threads::
Show per-thread event counters
@ -39,12 +48,24 @@ OPTIONS
Only consider these symbols. CSV that understands
file://filename entries.
-U::
--hide-unresolved::
Only display entries resolved to a symbol.
-s::
--sort=::
Sort by key(s): pid, comm, dso, symbol, parent.
-p::
--parent=<regex>::
regex filter to identify parent, see: '--sort parent'
-x::
--exclude-other::
Only display entries with parent-match.
-w::
--field-width=::
--column-widths=<width[,width...]>::
Force each column width to the provided list, for large terminal
readability.
@ -52,19 +73,26 @@ OPTIONS
--field-separator=::
Use a special separator character and don't pad with spaces, replacing
all occurances of this separator in symbol names (and other output)
all occurrences of this separator in symbol names (and other output)
with a '.' character, that thus it's the only non valid separator.
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
-g [type,min]::
--call-graph::
Display callchains using type and min percent threshold.
Display call chains using type and min percent threshold.
type can be either:
- flat: single column, linear exposure of callchains.
- flat: single column, linear exposure of call chains.
- graph: use a graph tree, displaying absolute overhead rates.
- fractal: like graph, but displays relative rates. Each branch of
the tree is considered as a new profiled object. +
Default: fractal,0.5.
--pretty=<key>::
Pretty printing style. key: normal, raw
--stdio:: Use the stdio interface.
--tui:: Use the TUI interface, that is integrated with annotate and allows
@ -72,6 +100,19 @@ OPTIONS
requires a tty, if one is not present, as when piping to other
commands, the stdio interface is used.
-k::
--vmlinux=<file>::
vmlinux pathname
-m::
--modules::
Load module symbols. WARNING: This should only be used with -k and
a LIVE kernel.
-f::
--force::
Don't complain, do it.
SEE ALSO
--------
linkperf:perf-stat[1]

View File

@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
SYNOPSIS
--------
[verse]
'perf sched' {record|latency|replay|trace}
'perf sched' {record|latency|map|replay|trace}
DESCRIPTION
-----------
There are four variants of perf sched:
There are five variants of perf sched:
'perf sched record <command>' to record the scheduling events
of an arbitrary workload.
@ -30,8 +30,22 @@ There are four variants of perf sched:
of the workload as it occurred when it was recorded - and can repeat
it a number of times, measuring its performance.)
'perf sched map' to print a textual context-switching outline of
workload captured via perf sched record. Columns stand for
individual CPUs, and the two-letter shortcuts stand for tasks that
are running on a CPU. A '*' denotes the CPU that had the event, and
a dot signals an idle CPU.
OPTIONS
-------
-i::
--input=<file>::
Input file name. (default: perf.data)
-v::
--verbose::
Be more verbose. (show symbol address, etc)
-D::
--dump-raw-trace=::
Display verbose dump of the sched data.

View File

@ -104,6 +104,13 @@ OPTIONS
normally don't - this option allows the latter to be run in
system-wide mode.
-i::
--input=::
Input file name.
-d::
--debug-mode::
Do various checks like samples ordering and lost events.
SEE ALSO
--------

View File

@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
SYNOPSIS
--------
[verse]
'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
DESCRIPTION
-----------
@ -35,21 +35,33 @@ OPTIONS
child tasks do not inherit counters
-p::
--pid=<pid>::
stat events on existing pid
stat events on existing process id
-t::
--tid=<tid>::
stat events on existing thread id
-a::
system-wide collection
--all-cpus::
system-wide collection from all CPUs
-c::
scale counter values
--scale::
scale/normalize counter values
-r::
--repeat=<n>::
repeat command and print average + stddev (max: 100)
-B::
--big-num::
print large numbers with thousands' separators according to locale
-C::
--cpu=::
Count only on the list of cpus provided. Multiple CPUs can be provided as a
comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
Count only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
In per-thread mode, this option is ignored. The -a option is still necessary
to activate system-wide monitoring. Default is to count on all CPUs.
@ -58,6 +70,19 @@ to activate system-wide monitoring. Default is to count on all CPUs.
Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
This option is only valid in system-wide mode.
-n::
--null::
null run - don't start any counters
-v::
--verbose::
be more verbose (show counter open errors, etc)
-x SEP::
--field-separator SEP::
print counts using a CSV-style output to make it easy to import directly into
spreadsheets. Columns are separated by the string specified in SEP.
EXAMPLES
--------

View File

@ -12,7 +12,7 @@ SYNOPSIS
DESCRIPTION
-----------
This command does assorted sanity tests, initially thru linked routines but
This command does assorted sanity tests, initially through linked routines but
also will look for a directory with more tests in the form of scripts.
OPTIONS

View File

@ -12,7 +12,7 @@ SYNOPSIS
DESCRIPTION
-----------
This command generates and displays a performance counter profile in realtime.
This command generates and displays a performance counter profile in real time.
OPTIONS
@ -27,8 +27,8 @@ OPTIONS
-C <cpu-list>::
--cpu=<cpu>::
Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
Default is to monitor all CPUS.
-d <seconds>::
@ -50,6 +50,10 @@ Default is to monitor all CPUS.
--count-filter=<count>::
Only display functions with more events than this.
-g::
--group::
Put the counters into a counter group.
-F <freq>::
--freq=<freq>::
Profile at this frequency.
@ -68,7 +72,11 @@ Default is to monitor all CPUS.
-p <pid>::
--pid=<pid>::
Profile events on existing pid.
Profile events on existing Process ID.
-t <tid>::
--tid=<tid>::
Profile events on existing thread ID.
-r <priority>::
--realtime=<priority>::
@ -78,6 +86,18 @@ Default is to monitor all CPUS.
--sym-annotate=<symbol>::
Annotate this symbol.
-K::
--hide_kernel_symbols::
Hide kernel symbols.
-U::
--hide_user_symbols::
Hide user symbols.
-D::
--dump-symtab::
Dump the symbol table used for profiling.
-v::
--verbose::
Be more verbose (show counter open errors, etc).

View File

@ -173,7 +173,7 @@ static const char * const diff_usage[] = {
static const struct option options[] = {
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('m', "displacement", &show_displacement,
OPT_BOOLEAN('M', "displacement", &show_displacement,
"Show position displacement relative to baseline"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),

View File

@ -52,6 +52,8 @@
#include <math.h>
#include <locale.h>
#define DEFAULT_SEPARATOR " "
static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@ -82,8 +84,11 @@ static pid_t *all_tids = NULL;
static int thread_num = 0;
static pid_t child_pid = -1;
static bool null_run = false;
static bool big_num = false;
static bool big_num = true;
static int big_num_opt = -1;
static const char *cpu_list;
static const char *csv_sep = NULL;
static bool csv_output = false;
static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
@ -449,12 +454,18 @@ static void print_noise(int counter, double avg)
static void nsec_printout(int cpu, int counter, double avg)
{
double msecs = avg / 1e6;
char cpustr[16] = { '\0', };
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
if (no_aggr)
fprintf(stderr, "CPU%-4d %18.6f %-24s",
cpumap[cpu], msecs, event_name(counter));
else
fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter));
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
cpumap[cpu], csv_sep);
fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter));
if (csv_output)
return;
if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
fprintf(stderr, " # %10.3f CPUs ",
@ -466,18 +477,26 @@ static void abs_printout(int cpu, int counter, double avg)
{
double total, ratio = 0.0;
char cpustr[16] = { '\0', };
const char *fmt;
if (csv_output)
fmt = "%s%.0f%s%s";
else if (big_num)
fmt = "%s%'18.0f%s%-24s";
else
fmt = "%s%18.0f%s%-24s";
if (no_aggr)
sprintf(cpustr, "CPU%-4d", cpumap[cpu]);
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
cpumap[cpu], csv_sep);
else
cpu = 0;
if (big_num)
fprintf(stderr, "%s %'18.0f %-24s",
cpustr, avg, event_name(counter));
else
fprintf(stderr, "%s %18.0f %-24s",
cpustr, avg, event_name(counter));
fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter));
if (csv_output)
return;
if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
total = avg_stats(&runtime_cycles_stats[cpu]);
@ -515,8 +534,9 @@ static void print_counter_aggr(int counter)
int scaled = event_scaled[counter];
if (scaled == -1) {
fprintf(stderr, " %18s %-24s\n",
"<not counted>", event_name(counter));
fprintf(stderr, "%*s%s%-24s\n",
csv_output ? 0 : 18,
"<not counted>", csv_sep, event_name(counter));
return;
}
@ -525,6 +545,11 @@ static void print_counter_aggr(int counter)
else
abs_printout(-1, counter, avg);
if (csv_output) {
fputc('\n', stderr);
return;
}
print_noise(counter, avg);
if (scaled) {
@ -554,8 +579,12 @@ static void print_counter(int counter)
ena = cpu_counts[cpu][counter].ena;
run = cpu_counts[cpu][counter].run;
if (run == 0 || ena == 0) {
fprintf(stderr, "CPU%-4d %18s %-24s", cpumap[cpu],
"<not counted>", event_name(counter));
fprintf(stderr, "CPU%*d%s%*s%s%-24s",
csv_output ? 0 : -4,
cpumap[cpu], csv_sep,
csv_output ? 0 : 18,
"<not counted>", csv_sep,
event_name(counter));
fprintf(stderr, "\n");
continue;
@ -566,12 +595,14 @@ static void print_counter(int counter)
else
abs_printout(cpu, counter, val);
if (!csv_output) {
print_noise(counter, 1.0);
if (run != ena) {
fprintf(stderr, " (scaled from %.2f%%)",
100.0 * run / ena);
}
}
fprintf(stderr, "\n");
}
}
@ -582,6 +613,7 @@ static void print_stat(int argc, const char **argv)
fflush(stdout);
if (!csv_output) {
fprintf(stderr, "\n");
fprintf(stderr, " Performance counter stats for ");
if(target_pid == -1 && target_tid == -1) {
@ -597,6 +629,7 @@ static void print_stat(int argc, const char **argv)
if (run_count > 1)
fprintf(stderr, " (%d runs)", run_count);
fprintf(stderr, ":\n\n");
}
if (no_aggr) {
for (counter = 0; counter < nr_counters; counter++)
@ -606,6 +639,7 @@ static void print_stat(int argc, const char **argv)
print_counter_aggr(counter);
}
if (!csv_output) {
fprintf(stderr, "\n");
fprintf(stderr, " %18.9f seconds time elapsed",
avg_stats(&walltime_nsecs_stats)/1e9);
@ -615,6 +649,7 @@ static void print_stat(int argc, const char **argv)
avg_stats(&walltime_nsecs_stats));
}
fprintf(stderr, "\n\n");
}
}
static volatile int signr = -1;
@ -644,6 +679,13 @@ static const char * const stat_usage[] = {
NULL
};
static int stat__set_big_num(const struct option *opt __used,
const char *s __used, int unset)
{
big_num_opt = unset ? 0 : 1;
return 0;
}
static const struct option options[] = {
OPT_CALLBACK('e', "event", NULL, "event",
"event selector. use 'perf list' to list available events",
@ -664,12 +706,15 @@ static const struct option options[] = {
"repeat command and print average + stddev (max: 100)"),
OPT_BOOLEAN('n', "null", &null_run,
"null run - dont start any counters"),
OPT_BOOLEAN('B', "big-num", &big_num,
"print large numbers with thousands\' separators"),
OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
"print large numbers with thousands\' separators",
stat__set_big_num),
OPT_STRING('C', "cpu", &cpu_list, "cpu",
"list of cpus to monitor in system-wide"),
OPT_BOOLEAN('A', "no-aggr", &no_aggr,
"disable CPU count aggregation"),
OPT_STRING('x', "field-separator", &csv_sep, "separator",
"print counts with custom separator"),
OPT_END()
};
@ -682,6 +727,25 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (csv_sep)
csv_output = true;
else
csv_sep = DEFAULT_SEPARATOR;
/*
* let the spreadsheet do the pretty-printing
*/
if (csv_output) {
/* User explicitely passed -B? */
if (big_num_opt == 1) {
fprintf(stderr, "-B option not supported with -x\n");
usage_with_options(stat_usage, options);
} else /* Nope, so disable big number formatting */
big_num = false;
} else if (big_num_opt == 0) /* User passed --no-big-num */
big_num = false;
if (!argc && target_pid == -1 && target_tid == -1)
usage_with_options(stat_usage, options);
if (run_count <= 0)