perf record: Allow filtering perf's pid via --exclude-perf

This patch allows 'perf record' to exclude events issued by perf itself
by '--exclude-perf' option.

Before this patch, when doing something like:

 # perf record -a -e syscalls:sys_enter_write <cmd>

One could easily get result like this:

 # /tmp/perf report --stdio
 ...
  # Overhead  Command  Shared Object       Symbol
  # ........  .......  ..................  ....................
  #
      99.99%  perf     libpthread-2.18.so  [.] __write_nocancel
      0.01%   ls       libc-2.18.so        [.] write
      0.01%   sshd     libc-2.18.so        [.] write
 ...

Where most events are generated by perf itself.

A shell trick can be done to filter perf itself out:

 # cat << EOF > ./tmp
 > #!/bin/sh
 > exec perf record -e ... --filter="common_pid != \$\$" -a sleep 10
 > EOF
 # chmod a+x ./tmp
 # ./tmp

However, doing so is user unfriendly.

This patch extracts evsel iteration framework introduced by patch 'perf
record: Apply filter to all events in a glob matching' into
foreach_evsel_in_last_glob(), and makes exclude_perf() function append
new filter expression to each evsel selected by a '-e' selector.

To avoid losing filters if user pass '--filter' after '--exclude-perf',
this patch uses perf_evsel__append_filter() in both case, instead of
perf_evsel__set_filter() which removes old filter. As a side effect, now
it is possible to use multiple '--filter' option for one selector. They
are combinded with '&&'.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1436513770-8896-2-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Wang Nan 2015-07-10 07:36:10 +00:00 committed by Arnaldo Carvalho de Melo
parent 15bfd2cc10
commit 4ba1faa19f
4 changed files with 84 additions and 14 deletions

View File

@ -69,7 +69,16 @@ OPTIONS
"perf report" to view group events together.
--filter=<filter>::
Event filter.
Event filter. This option should follow a event selector (-e) which
selects tracepoint event(s). Multiple '--filter' options are combined
using '&&'.
--exclude-perf::
Don't record events issued by perf itself. This option should follow
a event selector (-e) which selects tracepoint event(s). It adds a
filter expression 'common_pid != $PERFPID' to filters. If other
'--filter' exists, the new filter expression will be combined with
them by '&&'.
-a::
--all-cpus::

View File

@ -992,6 +992,9 @@ struct option __record_options[] = {
parse_events_option),
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
"event filter", parse_filter),
OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
NULL, "don't record events from perf itself",
exclude_perf),
OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
"record events on existing process id"),
OPT_STRING('t', "tid", &record.opts.target.tid, "tid",

View File

@ -1167,27 +1167,24 @@ int parse_events_option(const struct option *opt, const char *str,
return ret;
}
int parse_filter(const struct option *opt, const char *str,
int unset __maybe_unused)
static int
foreach_evsel_in_last_glob(struct perf_evlist *evlist,
int (*func)(struct perf_evsel *evsel,
const void *arg),
const void *arg)
{
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_evsel *last = NULL;
int err;
if (evlist->nr_entries > 0)
last = perf_evlist__last(evlist);
do {
if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
fprintf(stderr,
"--filter option should follow a -e tracepoint option\n");
err = (*func)(last, arg);
if (err)
return -1;
}
if (perf_evsel__set_filter(last, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
}
if (!last)
return 0;
if (last->node.prev == &evlist->entries)
return 0;
@ -1197,6 +1194,66 @@ int parse_filter(const struct option *opt, const char *str,
return 0;
}
static int set_filter(struct perf_evsel *evsel, const void *arg)
{
const char *str = arg;
if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
fprintf(stderr,
"--filter option should follow a -e tracepoint option\n");
return -1;
}
if (perf_evsel__append_filter(evsel, "&&", str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
}
return 0;
}
int parse_filter(const struct option *opt, const char *str,
int unset __maybe_unused)
{
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
return foreach_evsel_in_last_glob(evlist, set_filter,
(const void *)str);
}
static int add_exclude_perf_filter(struct perf_evsel *evsel,
const void *arg __maybe_unused)
{
char new_filter[64];
if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
fprintf(stderr,
"--exclude-perf option should follow a -e tracepoint option\n");
return -1;
}
snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
}
return 0;
}
int exclude_perf(const struct option *opt,
const char *arg __maybe_unused,
int unset __maybe_unused)
{
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter,
NULL);
}
static const char * const event_type_descriptors[] = {
"Hardware event",
"Software event",

View File

@ -34,6 +34,7 @@ extern int parse_events(struct perf_evlist *evlist, const char *str,
struct parse_events_error *error);
extern int parse_events_terms(struct list_head *terms, const char *str);
extern int parse_filter(const struct option *opt, const char *str, int unset);
extern int exclude_perf(const struct option *opt, const char *arg, int unset);
#define EVENTS_HELP_MAX (128*1024)