perf/core improvements and fixes:

User visible:
 
 - Do event name substring search as last resort in 'perf list'.
   (Arnaldo Carvalho de Melo)
 
   E.g.:
 
    # perf list clock
 
    List of pre-defined events (to be used in -e):
 
      cpu-clock                                          [Software event]
      task-clock                                         [Software event]
 
      uncore_cbox_0/clockticks/                          [Kernel PMU event]
      uncore_cbox_1/clockticks/                          [Kernel PMU event]
 
      kvm:kvm_pvclock_update                             [Tracepoint event]
      kvm:kvm_update_master_clock                        [Tracepoint event]
      power:clock_disable                                [Tracepoint event]
      power:clock_enable                                 [Tracepoint event]
      power:clock_set_rate                               [Tracepoint event]
      syscalls:sys_enter_clock_adjtime                   [Tracepoint event]
      syscalls:sys_enter_clock_getres                    [Tracepoint event]
      syscalls:sys_enter_clock_gettime                   [Tracepoint event]
      syscalls:sys_enter_clock_nanosleep                 [Tracepoint event]
      syscalls:sys_enter_clock_settime                   [Tracepoint event]
      syscalls:sys_exit_clock_adjtime                    [Tracepoint event]
      syscalls:sys_exit_clock_getres                     [Tracepoint event]
      syscalls:sys_exit_clock_gettime                    [Tracepoint event]
      syscalls:sys_exit_clock_nanosleep                  [Tracepoint event]
      syscalls:sys_exit_clock_settime                    [Tracepoint event]
 
 - Reduce min 'perf stat --interval-print/-I' to 10ms (Kan Liang)
 
   perf stat --interval in action:
 
   # perf stat -e cycles -I 50 -a usleep $((200 * 1000))
   print interval < 100ms. The overhead percentage could be high in some cases. Please proceed with caution.
   #   time                    counts unit events
       0.050233636         48,240,396      cycles
       0.100557098         35,492,594      cycles
       0.150804687         39,295,112      cycles
       0.201032269         33,101,961      cycles
       0.201980732            786,379      cycles
   #
 
 - Allow for max_stack greater than PERF_MAX_STACK_DEPTH, as when
   synthesizing callchains from Intel PT data (Adrian Hunter)
 
 - Allow probing on kmodules without DWARF (Masami Hiramatsu)
 
 - Fix a segfault when processing a perf.data file with callchains using
   "perf report --call-graph none" (Namhyung Kim)
 
 - Fix unresolved COMMs in 'perf top' when -s comm is used (Namhyung Kim)
 
 - Register idle thread in 'perf top' (Namhyung Kim)
 
 - Change 'record.samples' type to unsigned long long, fixing output of
   number of samples in 32-bit architectures (Yang Shi)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJWDufPAAoJENZQFvNTUqpAxRQQAKW+jGA1GbO4ix9UFWwZ4z/n
 2h/0M8Z6+OxctHw03fnSBqteaoQddWRYGc7mxSR/FivaVtqTw5uWenDCt5BZTzKM
 +mZaQ7q6hlHEC1q9R3WSG0/LE2VdB7sek1EPLmQgAXRO9D3QHG1n8RktZ/Ujvblc
 xV10MwXfbclbe947SkTY81/RsQJoSpYzl/4VxiD/D4SwAFvWgj8H4OxAvO8VA6Eg
 iEWP30/JD5kKoXgm2QtP26yCBrf2QjRRof8qK61zfoX4eoOzP5eWfUzNh3IBCufH
 2cahWPmBqaStq/0RidHUvo/B09STgn4QAP72Z3jmbi20tPtWNh6vqSIdCQlqHbBU
 eysgySJ9f3+lx/2JO3nY0y5wAfmsgVqxkxiu4RSVkcS2sUE0X3UjaQ2vnD2yJ/LO
 EcnhofqD74qd9Dsp7tZUm+6V3V9rCPt0MvPGBC0EaGWy4rFhZUceiOcB05xmpQ+F
 jHb/nsRpuYzGaQGZ+Z9L44r+o4sD5h5iuj8+A2sstTyfW+z/tO5MGNOBam5bw/O+
 WEy01IYiN3DqyrZa5V2lAXgQ7NcwRYFLfUMafntlSQG+aQG16X5XCDOf168Y48I2
 jhaV0dZc4UThwPWSkMVApDzHDVF7xHoqlclK65Xca9kd1M9giAe36SP0mJuRjcfY
 RAxtXB26dwEqGGbME5se
 =RrfP
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

 - Do event name substring search as last resort in 'perf list'.
   (Arnaldo Carvalho de Melo)

   E.g.:

    # perf list clock

    List of pre-defined events (to be used in -e):

     cpu-clock                                          [Software event]
     task-clock                                         [Software event]

     uncore_cbox_0/clockticks/                          [Kernel PMU event]
     uncore_cbox_1/clockticks/                          [Kernel PMU event]

     kvm:kvm_pvclock_update                             [Tracepoint event]
     kvm:kvm_update_master_clock                        [Tracepoint event]
     power:clock_disable                                [Tracepoint event]
     power:clock_enable                                 [Tracepoint event]
     power:clock_set_rate                               [Tracepoint event]
     syscalls:sys_enter_clock_adjtime                   [Tracepoint event]
     syscalls:sys_enter_clock_getres                    [Tracepoint event]
     syscalls:sys_enter_clock_gettime                   [Tracepoint event]
     syscalls:sys_enter_clock_nanosleep                 [Tracepoint event]
     syscalls:sys_enter_clock_settime                   [Tracepoint event]
     syscalls:sys_exit_clock_adjtime                    [Tracepoint event]
     syscalls:sys_exit_clock_getres                     [Tracepoint event]
     syscalls:sys_exit_clock_gettime                    [Tracepoint event]
     syscalls:sys_exit_clock_nanosleep                  [Tracepoint event]
     syscalls:sys_exit_clock_settime                    [Tracepoint event]

 - Reduce min 'perf stat --interval-print/-I' to 10ms. (Kan Liang)

   perf stat --interval in action:

   # perf stat -e cycles -I 50 -a usleep $((200 * 1000))
   print interval < 100ms. The overhead percentage could be high in some cases. Please proceed with caution.
   #   time                    counts unit events
      0.050233636         48,240,396      cycles
      0.100557098         35,492,594      cycles
      0.150804687         39,295,112      cycles
      0.201032269         33,101,961      cycles
      0.201980732            786,379      cycles
  #

 - Allow for max_stack greater than PERF_MAX_STACK_DEPTH, as when
   synthesizing callchains from Intel PT data. (Adrian Hunter)

 - Allow probing on kmodules without DWARF. (Masami Hiramatsu)

 - Fix a segfault when processing a perf.data file with callchains using
   "perf report --call-graph none". (Namhyung Kim)

 - Fix unresolved COMMs in 'perf top' when -s comm is used. (Namhyung Kim)

 - Register idle thread in 'perf top'. (Namhyung Kim)

 - Change 'record.samples' type to unsigned long long, fixing output of
   number of samples in 32-bit architectures. (Yang Shi)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2015-10-03 08:20:14 +02:00
commit e3b0ac1b7a
13 changed files with 65 additions and 26 deletions

View File

@ -125,6 +125,8 @@ To limit the list use:
. If none of the above is matched, it will apply the supplied glob to all
events, printing the ones that match.
. As a last resort, it will do a substring search in all event names.
One or more types can be used at the same time, listing the events for the
types specified.

View File

@ -128,8 +128,9 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
-I msecs::
--interval-print msecs::
Print count deltas every N milliseconds (minimum: 100ms)
example: perf stat -I 1000 -e cycles -a sleep 5
Print count deltas every N milliseconds (minimum: 10ms)
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
example: 'perf stat -I 1000 -e cycles -a sleep 5'
--per-socket::
Aggregate counts per processor socket for system-wide mode measurements. This

View File

@ -45,6 +45,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
}
for (i = 0; i < argc; ++i) {
char *sep, *s;
if (strcmp(argv[i], "tracepoint") == 0)
print_tracepoint_events(NULL, NULL, raw_dump);
else if (strcmp(argv[i], "hw") == 0 ||
@ -60,8 +62,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
print_hwcache_events(NULL, raw_dump);
else if (strcmp(argv[i], "pmu") == 0)
print_pmu_events(NULL, raw_dump);
else {
char *sep = strchr(argv[i], ':'), *s;
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;
if (sep == NULL) {
@ -76,6 +77,19 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
s[sep_idx] = '\0';
print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
free(s);
} else {
if (asprintf(&s, "*%s*", argv[i]) < 0) {
printf("Critical: Not enough memory! Trying to continue...\n");
continue;
}
print_symbol_events(s, PERF_TYPE_HARDWARE,
event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
print_symbol_events(s, PERF_TYPE_SOFTWARE,
event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
print_hwcache_events(s, raw_dump);
print_pmu_events(s, raw_dump);
print_tracepoint_events(NULL, s, raw_dump);
free(s);
}
}
return 0;

View File

@ -182,10 +182,8 @@ static int opt_set_target(const struct option *opt, const char *str,
if (str) {
if (!strcmp(opt->long_name, "exec"))
params.uprobes = true;
#ifdef HAVE_DWARF_SUPPORT
else if (!strcmp(opt->long_name, "module"))
params.uprobes = false;
#endif
else
return ret;
@ -490,9 +488,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
"file", "vmlinux pathname"),
OPT_STRING('s', "source", &symbol_conf.source_prefix,
"directory", "path to kernel source"),
OPT_CALLBACK('m', "module", NULL, "modname|path",
"target module name (for online) or path (for offline)",
opt_set_target),
OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
"Don't search inlined functions"),
#endif
@ -509,6 +504,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
opt_set_filter),
OPT_CALLBACK('x', "exec", NULL, "executable|path",
"target executable name or path", opt_set_target),
OPT_CALLBACK('m', "module", NULL, "modname|path",
"target module name (for online) or path (for offline)",
opt_set_target),
OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
"Enable symbol demangling"),
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,

View File

@ -49,7 +49,7 @@ struct record {
int realtime_prio;
bool no_buildid;
bool no_buildid_cache;
long samples;
unsigned long long samples;
};
static int record__write(struct record *rec, void *bf, size_t size)
@ -637,17 +637,25 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* Let the child rip
*/
if (forks) {
union perf_event event;
union perf_event *event;
event = malloc(sizeof(event->comm) + machine->id_hdr_size);
if (event == NULL) {
err = -ENOMEM;
goto out_child;
}
/*
* Some H/W events are generated before COMM event
* which is emitted during exec(), so perf script
* cannot see a correct process name for those events.
* Synthesize COMM event to prevent it.
*/
perf_event__synthesize_comm(tool, &event,
perf_event__synthesize_comm(tool, event,
rec->evlist->workload.pid,
process_synthesized_event,
machine);
free(event);
perf_evlist__start_workload(rec->evlist);
}
@ -659,7 +667,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
auxtrace_snapshot_enabled = 1;
for (;;) {
int hits = rec->samples;
unsigned long long hits = rec->samples;
if (record__mmap_read_all(rec) < 0) {
auxtrace_snapshot_enabled = 0;

View File

@ -1179,7 +1179,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING(0, "post", &post_cmd, "command",
"command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &stat_config.interval,
"print counts at regular interval in ms (>= 100)"),
"print counts at regular interval in ms (>= 10)"),
OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
@ -1332,9 +1332,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
thread_map__read_comms(evsel_list->threads);
if (interval && interval < 100) {
pr_err("print interval must be >= 100ms\n");
parse_options_usage(stat_usage, options, "I", 1);
goto out;
if (interval < 10) {
pr_err("print interval must be >= 10ms\n");
parse_options_usage(stat_usage, options, "I", 1);
goto out;
} else
pr_warning("print interval < 100ms. "
"The overhead percentage could be high in some cases. "
"Please proceed with caution.\n");
}
if (perf_evlist__alloc_stats(evsel_list, interval))

View File

@ -857,9 +857,12 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
* TODO: we don't process guest user from host side
* except simple counting.
*/
/* Fall thru */
default:
goto next_event;
default:
if (event->header.type == PERF_RECORD_SAMPLE)
goto next_event;
machine = &session->machines.host;
break;
}
@ -961,6 +964,9 @@ static int __cmd_top(struct perf_top *top)
if (ret)
goto out_delete;
if (perf_session__register_idle_thread(top->session) == NULL)
goto out_delete;
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->threads, false, opts->proc_map_timeout);

View File

@ -1151,7 +1151,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
struct perf_evsel *evsel = hists_to_evsel(hists);
bool use_callchain;
if (evsel && !symbol_conf.show_ref_callgraph)
if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
else
use_callchain = symbol_conf.use_callchain;

View File

@ -1831,7 +1831,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
}
check_calls:
if (chain->nr > PERF_MAX_STACK_DEPTH) {
if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) {
pr_warning("corrupted callchain. skipping...\n");
return 0;
}

View File

@ -1008,7 +1008,8 @@ void print_pmu_events(const char *event_glob, bool name_only)
goto out_enomem;
j++;
}
if (pmu->selectable) {
if (pmu->selectable &&
(event_glob == NULL || strglobmatch(pmu->name, event_glob))) {
char *s;
if (asprintf(&s, "%s//", pmu->name) < 0)
goto out_enomem;

View File

@ -2543,7 +2543,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
goto out;
}
if (!pev->uprobes && !pp->retprobe) {
/* Note that the symbols in the kmodule are not relocated */
if (!pev->uprobes && !pp->retprobe && !pev->target) {
reloc_sym = kernel_get_ref_reloc_sym();
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
@ -2580,8 +2581,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
}
/* Add one probe point */
tp->address = map->unmap_ip(map, sym->start) + pp->offset;
/* If we found a wrong one, mark it by NULL symbol */
if (!pev->uprobes &&
/* Check the kprobe (not in module) is within .text */
if (!pev->uprobes && !pev->target &&
kprobe_warn_out_range(sym->name, tp->address)) {
tp->symbol = NULL; /* Skip it */
skipped++;

View File

@ -1311,7 +1311,7 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
static struct thread *perf_session__register_idle_thread(struct perf_session *session)
struct thread *perf_session__register_idle_thread(struct perf_session *session)
{
struct thread *thread;

View File

@ -89,6 +89,8 @@ struct machine *perf_session__findnew_machine(struct perf_session *session, pid_
}
struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
struct thread *perf_session__register_idle_thread(struct perf_session *session);
size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp);