perf/core improvements and fixes:

User visible:
 
 - IPC and cycle accounting in 'perf annotate' (Andi Kleen)
 
 - Display cycles in branch sort mode in 'perf report' (Andi Kleen)
 
 - Add total time column to 'perf trace' syscall stats summary (Milian Woff)
 
 Infrastructure:
 
 - PMU helpers to use in Intel PT (Adrian Hunter)
 
 - Fix perf-with-kcore script not to split args with spaces (Adrian Hunter)
 
 - Add empty Build files for some more architectures (Ben Hutchings)
 
 - Move 'perf stat' config variables to a struct to allow using some
   of its functions in more places (Jiri Olsa)
 
 - Add DWARF register names for 'xtensa' arch (Max Filippov)
 
 - Implement BPF programs attached to uprobes (Wang Nan)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJVxA9EAAoJENZQFvNTUqpAP88P/0I8l7DJrD4e2PpwSIuwUPII
 kq8fYMJ4OpR2XiGjyny68iZmnASIon+cV6AoidZ27eqG/+qmhzgv9nCjMlPUIpAT
 KcilxUXjOc2xba8nUdrNRKHKdcxcvp5iuw1dXkfJuf5U5l7cSTv5tko6vDaA6ngH
 bpmn8wa73ajRwtTErgBSJAwQVMPzo9Ods/FLeZK6t0hYNYNN9ISp7pq+0RhEnNVb
 gtlE1/DgGccsTs9NDWQqi3bmvCVsVMhaeWLDyCBjx/cwkwuhdhYHAfs8Llmse+51
 7adaqFQ7BZMS/8wXCUwCNnIMBBURpQodW/3H//GQ8CtBSNRt+EX8u0zHs+aJj/NR
 JqlRVOxhFFJU3E/67HDnU1IM9ANQYZq2JomQ1B+PJTUBZvUaBQtKfFlfKhI36o21
 2Sv/fsOjcZLJePBPeUVgjCmBvc0vAUBHPN23wHMyP8o6I6NTmTb3LvomZGaO7Af5
 HuebGfd92ahVPT1/h3y5lVDnjiNYikoNKJdDh8JiTTbuj8LtvhHN/o5AeAP3Ig2H
 kJEWMbSuDCdUPRGYeW4z3aDDP0/vxEH8+kXWoTSwORVZcXXbg38eoYcOPqfQQWQt
 80+Rf3sTNt8RCoKWJ0AS+nS/S2HWHrJ5G4DIdc1ldm+zL6ElkOkPVm1W7EqvCBd6
 tZP13miwhMxritYGX7pM
 =b2ho
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - IPC and cycle accounting in 'perf annotate'. (Andi Kleen)

  - Display cycles in branch sort mode in 'perf report'. (Andi Kleen)

  - Add total time column to 'perf trace' syscall stats summary. (Milian Woff)

Infrastructure changes:

  - PMU helpers to use in Intel PT. (Adrian Hunter)

  - Fix perf-with-kcore script not to split args with spaces. (Adrian Hunter)

  - Add empty Build files for some more architectures. (Ben Hutchings)

  - Move 'perf stat' config variables to a struct to allow using some
    of its functions in more places. (Jiri Olsa)

  - Add DWARF register names for 'xtensa' arch. (Max Filippov)

  - Implement BPF programs attached to uprobes. (Wang Nan)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2015-08-07 09:11:30 +02:00
commit 1354ac6ad8
39 changed files with 716 additions and 225 deletions

View File

@ -243,6 +243,7 @@ enum {
TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
TRACE_EVENT_FL_TRACEPOINT_BIT,
TRACE_EVENT_FL_KPROBE_BIT,
TRACE_EVENT_FL_UPROBE_BIT,
};
/*
@ -257,6 +258,7 @@ enum {
* USE_CALL_FILTER - For trace internal events, don't use file filter
* TRACEPOINT - Event is a tracepoint
* KPROBE - Event is a kprobe
* UPROBE - Event is a uprobe
*/
enum {
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@ -267,8 +269,11 @@ enum {
TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT),
};
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
struct trace_event_call {
struct list_head list;
struct trace_event_class *class;
@ -542,7 +547,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
event_triggers_post_call(file, tt);
}
#ifdef CONFIG_BPF_SYSCALL
#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
#else
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)

View File

@ -6846,8 +6846,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
if (event->tp_event->prog)
return -EEXIST;
if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
/* bpf programs can only be attached to kprobes */
if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
/* bpf programs can only be attached to u/kprobes */
return -EINVAL;
prog = bpf_prog_get(prog_fd);

View File

@ -434,7 +434,7 @@ config UPROBE_EVENT
config BPF_EVENTS
depends on BPF_SYSCALL
depends on KPROBE_EVENT
depends on KPROBE_EVENT || UPROBE_EVENT
bool
default y
help

View File

@ -1095,11 +1095,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
{
struct trace_event_call *call = &tu->tp.call;
struct uprobe_trace_entry_head *entry;
struct bpf_prog *prog = call->prog;
struct hlist_head *head;
void *data;
int size, esize;
int rctx;
if (prog && !trace_call_bpf(prog, regs))
return;
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
size = esize + tu->tp.size + dsize;
@ -1289,6 +1293,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
return -ENODEV;
}
call->flags = TRACE_EVENT_FL_UPROBE;
call->class->reg = trace_uprobe_register;
call->data = tu;
ret = trace_add_event_call(call);

View File

@ -109,6 +109,7 @@ OPTIONS
- mispredict: "N" for predicted branch, "Y" for mispredicted branch
- in_tx: branch in TSX transaction
- abort: TSX transaction abort.
- cycles: Cycles in basic block
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.

View File

@ -208,6 +208,27 @@ Default is to monitor all CPUS.
This option sets the time out limit. The default value is 500 ms.
-b::
--branch-any::
Enable taken branch stack sampling. Any type of taken branch may be sampled.
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
-j::
--branch-filter::
Enable taken branch stack sampling. Each sample captures a series of consecutive
taken branches. The number of branches captured with each sample depends on the
underlying hardware, the type of branches of interest, and the executed code.
It is possible to select the types of branches captured by enabling filters.
For a full list of modifiers please see the perf record manpage.
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
The privilege levels may be omitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
is enabled for all the sampling events. The sampled branch type is the same for all events.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
INTERACTIVE PROMPTING KEYS
--------------------------

View File

@ -0,0 +1 @@
# empty

View File

@ -0,0 +1 @@
# empty

View File

@ -0,0 +1 @@
# empty

View File

@ -0,0 +1 @@
libperf-y += util/

View File

@ -0,0 +1,3 @@
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif

View File

@ -0,0 +1 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o

View File

@ -0,0 +1,25 @@
/*
* Mapping of DWARF debug register numbers into register names.
*
* Copyright (c) 2015 Cadence Design Systems Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <stddef.h>
#include <dwarf-regs.h>
#define XTENSA_MAX_REGS 16
const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
};
const char *get_arch_regstr(unsigned int n)
{
return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
}

View File

@ -187,6 +187,7 @@ find_next:
* symbol, free he->ms.sym->src to signal we already
* processed this symbol.
*/
zfree(&notes->src->cycles_hist);
zfree(&notes->src);
}
}

View File

@ -53,6 +53,7 @@ struct report {
bool mem_mode;
bool header;
bool header_only;
bool nonany_branch_mode;
int max_stack;
struct perf_read_values show_threads_values;
const char *pretty_printing_style;
@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
if (!ui__has_annotation())
return 0;
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
rep->nonany_branch_mode);
if (sort__mode == SORT_MODE__BRANCH) {
bi = he->branch_info;
err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
/* ??? handle more cases than just ANY? */
if (!(perf_evlist__combined_branch_type(session->evlist) &
PERF_SAMPLE_BRANCH_ANY))
rep->nonany_branch_mode = true;
return 0;
}

View File

@ -101,8 +101,6 @@ static struct target target = {
static int run_count = 1;
static bool no_inherit = false;
static bool scale = true;
static enum aggr_mode aggr_mode = AGGR_GLOBAL;
static volatile pid_t child_pid = -1;
static bool null_run = false;
static int detailed_run = 0;
@ -112,11 +110,9 @@ static int big_num_opt = -1;
static const char *csv_sep = NULL;
static bool csv_output = false;
static bool group = false;
static FILE *output = NULL;
static const char *pre_cmd = NULL;
static const char *post_cmd = NULL;
static bool sync_run = false;
static unsigned int interval = 0;
static unsigned int initial_delay = 0;
static unsigned int unit_width = 4; /* strlen("unit") */
static bool forever = false;
@ -126,6 +122,11 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu);
static volatile int done = 0;
static struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
.scale = true,
};
static inline void diff_timespec(struct timespec *r, struct timespec *a,
struct timespec *b)
{
@ -148,7 +149,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
{
struct perf_event_attr *attr = &evsel->attr;
if (scale)
if (stat_config.scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
@ -178,142 +179,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
return 0;
}
static void zero_per_pkg(struct perf_evsel *counter)
{
if (counter->per_pkg_mask)
memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
}
static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
{
unsigned long *mask = counter->per_pkg_mask;
struct cpu_map *cpus = perf_evsel__cpus(counter);
int s;
*skip = false;
if (!counter->per_pkg)
return 0;
if (cpu_map__empty(cpus))
return 0;
if (!mask) {
mask = zalloc(MAX_NR_CPUS);
if (!mask)
return -ENOMEM;
counter->per_pkg_mask = mask;
}
s = cpu_map__get_socket(cpus, cpu);
if (s < 0)
return -1;
*skip = test_and_set_bit(s, mask) == 1;
return 0;
}
static int
process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values *aggr = &evsel->counts->aggr;
static struct perf_counts_values zero;
bool skip = false;
if (check_per_pkg(evsel, cpu, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}
if (skip)
count = &zero;
switch (aggr_mode) {
case AGGR_THREAD:
case AGGR_CORE:
case AGGR_SOCKET:
case AGGR_NONE:
if (!evsel->snapshot)
perf_evsel__compute_deltas(evsel, cpu, thread, count);
perf_counts_values__scale(count, scale, NULL);
if (aggr_mode == AGGR_NONE)
perf_stat__update_shadow_stats(evsel, count->values, cpu);
break;
case AGGR_GLOBAL:
aggr->val += count->val;
if (scale) {
aggr->ena += count->ena;
aggr->run += count->run;
}
default:
break;
}
return 0;
}
static int process_counter_maps(struct perf_evsel *counter)
{
int nthreads = thread_map__nr(counter->threads);
int ncpus = perf_evsel__nr_cpus(counter);
int cpu, thread;
if (counter->system_wide)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
if (process_counter_values(counter, cpu, thread,
perf_counts(counter->counts, cpu, thread)))
return -1;
}
}
return 0;
}
static int process_counter(struct perf_evsel *counter)
{
struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat *ps = counter->priv;
u64 *count = counter->counts->aggr.values;
int i, ret;
aggr->val = aggr->ena = aggr->run = 0;
init_stats(ps->res_stats);
if (counter->per_pkg)
zero_per_pkg(counter);
ret = process_counter_maps(counter);
if (ret)
return ret;
if (aggr_mode != AGGR_GLOBAL)
return 0;
if (!counter->snapshot)
perf_evsel__compute_deltas(counter, -1, -1, aggr);
perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
for (i = 0; i < 3; i++)
update_stats(&ps->res_stats[i], count[i]);
if (verbose) {
fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter), count[0], count[1], count[2]);
}
/*
* Save the full runtime - to allow normalization during printout:
*/
perf_stat__update_shadow_stats(counter, count, 0);
return 0;
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
@ -351,7 +216,7 @@ static void read_counters(bool close_counters)
if (read_counter(counter))
pr_warning("failed to read counter %s\n", counter->name);
if (process_counter(counter))
if (perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
if (close_counters) {
@ -402,6 +267,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
static int __run_perf_stat(int argc, const char **argv)
{
int interval = stat_config.interval;
char msg[512];
unsigned long long t0, t1;
struct perf_evsel *counter;
@ -545,13 +411,13 @@ static int run_perf_stat(int argc, const char **argv)
static void print_running(u64 run, u64 ena)
{
if (csv_output) {
fprintf(output, "%s%" PRIu64 "%s%.2f",
fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
csv_sep,
run,
csv_sep,
ena ? 100.0 * run / ena : 100.0);
} else if (run != ena) {
fprintf(output, " (%.2f%%)", 100.0 * run / ena);
fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
}
}
@ -560,9 +426,9 @@ static void print_noise_pct(double total, double avg)
double pct = rel_stddev_stats(total, avg);
if (csv_output)
fprintf(output, "%s%.2f%%", csv_sep, pct);
fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
else if (pct)
fprintf(output, " ( +-%6.2f%% )", pct);
fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
}
static void print_noise(struct perf_evsel *evsel, double avg)
@ -578,9 +444,9 @@ static void print_noise(struct perf_evsel *evsel, double avg)
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
{
switch (aggr_mode) {
switch (stat_config.aggr_mode) {
case AGGR_CORE:
fprintf(output, "S%d-C%*d%s%*d%s",
fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
cpu_map__id_to_socket(id),
csv_output ? 0 : -8,
cpu_map__id_to_cpu(id),
@ -590,7 +456,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
csv_sep);
break;
case AGGR_SOCKET:
fprintf(output, "S%*d%s%*d%s",
fprintf(stat_config.output, "S%*d%s%*d%s",
csv_output ? 0 : -5,
id,
csv_sep,
@ -599,12 +465,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
csv_sep);
break;
case AGGR_NONE:
fprintf(output, "CPU%*d%s",
fprintf(stat_config.output, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[id], csv_sep);
break;
case AGGR_THREAD:
fprintf(output, "%*s-%*d%s",
fprintf(stat_config.output, "%*s-%*d%s",
csv_output ? 0 : 16,
thread_map__comm(evsel->threads, id),
csv_output ? 0 : -8,
@ -619,6 +485,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
FILE *output = stat_config.output;
double msecs = avg / 1e6;
const char *fmt_v, *fmt_n;
char name[25];
@ -643,7 +510,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
if (csv_output || interval)
if (csv_output || stat_config.interval)
return;
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@ -655,6 +522,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
FILE *output = stat_config.output;
double sc = evsel->scale;
const char *fmt;
int cpu = cpu_map__id_to_cpu(id);
@ -670,7 +538,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
aggr_printout(evsel, id, nr);
if (aggr_mode == AGGR_GLOBAL)
if (stat_config.aggr_mode == AGGR_GLOBAL)
cpu = 0;
fprintf(output, fmt, avg, csv_sep);
@ -685,14 +553,16 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
if (csv_output || interval)
if (csv_output || stat_config.interval)
return;
perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
perf_stat__print_shadow_stats(output, evsel, avg, cpu,
stat_config.aggr_mode);
}
static void print_aggr(char *prefix)
{
FILE *output = stat_config.output;
struct perf_evsel *counter;
int cpu, cpu2, s, s2, id, nr;
double uval;
@ -761,6 +631,7 @@ static void print_aggr(char *prefix)
static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
{
FILE *output = stat_config.output;
int nthreads = thread_map__nr(counter->threads);
int ncpus = cpu_map__nr(counter->cpus);
int cpu, thread;
@ -799,6 +670,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
*/
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
{
FILE *output = stat_config.output;
struct perf_stat *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
int scaled = counter->counts->scaled;
@ -850,6 +722,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
*/
static void print_counter(struct perf_evsel *counter, char *prefix)
{
FILE *output = stat_config.output;
u64 ena, run, val;
double uval;
int cpu;
@ -904,12 +777,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
static void print_interval(char *prefix, struct timespec *ts)
{
FILE *output = stat_config.output;
static int num_print_interval;
sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
if (num_print_interval == 0 && !csv_output) {
switch (aggr_mode) {
switch (stat_config.aggr_mode) {
case AGGR_SOCKET:
fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
break;
@ -934,6 +808,7 @@ static void print_interval(char *prefix, struct timespec *ts)
static void print_header(int argc, const char **argv)
{
FILE *output = stat_config.output;
int i;
fflush(stdout);
@ -963,6 +838,8 @@ static void print_header(int argc, const char **argv)
static void print_footer(void)
{
FILE *output = stat_config.output;
if (!null_run)
fprintf(output, "\n");
fprintf(output, " %17.9f seconds time elapsed",
@ -977,6 +854,7 @@ static void print_footer(void)
static void print_counters(struct timespec *ts, int argc, const char **argv)
{
int interval = stat_config.interval;
struct perf_evsel *counter;
char buf[64], *prefix = NULL;
@ -985,7 +863,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
else
print_header(argc, argv);
switch (aggr_mode) {
switch (stat_config.aggr_mode) {
case AGGR_CORE:
case AGGR_SOCKET:
print_aggr(prefix);
@ -1009,14 +887,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
if (!interval && !csv_output)
print_footer();
fflush(output);
fflush(stat_config.output);
}
static volatile int signr = -1;
static void skip_signal(int signo)
{
if ((child_pid == -1) || interval)
if ((child_pid == -1) || stat_config.interval)
done = 1;
signr = signo;
@ -1064,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
static int perf_stat_init_aggr_mode(void)
{
switch (aggr_mode) {
switch (stat_config.aggr_mode) {
case AGGR_SOCKET:
if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
perror("cannot build socket map");
@ -1270,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"system-wide collection from all CPUs"),
OPT_BOOLEAN('g', "group", &group,
"put the counters into a counter group"),
OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_INTEGER('r', "repeat", &run_count,
@ -1286,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
stat__set_big_num),
OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
"list of cpus to monitor in system-wide"),
OPT_SET_UINT('A', "no-aggr", &aggr_mode,
OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
"disable CPU count aggregation", AGGR_NONE),
OPT_STRING('x', "field-separator", &csv_sep, "separator",
"print counts with custom separator"),
@ -1300,13 +1178,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"command to run prior to the measured command"),
OPT_STRING(0, "post", &post_cmd, "command",
"command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &interval,
OPT_UINTEGER('I', "interval-print", &stat_config.interval,
"print counts at regular interval in ms (>= 100)"),
OPT_SET_UINT(0, "per-socket", &aggr_mode,
OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-core", &aggr_mode,
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-thread", &aggr_mode,
OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
"aggregate counts per thread", AGGR_THREAD),
OPT_UINTEGER('D', "delay", &initial_delay,
"ms to wait before starting measurement after program start"),
@ -1318,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
};
int status = -EINVAL, run_idx;
const char *mode;
FILE *output = stderr;
unsigned int interval;
setlocale(LC_ALL, "");
@ -1328,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
output = stderr;
interval = stat_config.interval;
if (output_name && strcmp(output_name, "-"))
output = NULL;
@ -1365,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
}
}
stat_config.output = output;
if (csv_sep) {
csv_output = true;
if (!strcmp(csv_sep, "\\t"))
@ -1399,7 +1282,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
run_count = 1;
}
if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
fprintf(stderr, "The --per-thread option is only available "
"when monitoring via -p -t options.\n");
parse_options_usage(NULL, options, "p", 1);
@ -1411,7 +1294,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
* no_aggr, cgroup are for system-wide only
* --per-thread is aggregated per thread, we dont mix it with cpu mode
*/
if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
if (((stat_config.aggr_mode != AGGR_GLOBAL &&
stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
!target__has_cpu(&target)) {
fprintf(stderr, "both cgroup and no-aggregation "
"modes only available in system-wide mode\n");
@ -1444,7 +1328,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
* Initialize thread_map with comm names,
* so we could print it out on output.
*/
if (aggr_mode == AGGR_THREAD)
if (stat_config.aggr_mode == AGGR_THREAD)
thread_map__read_comms(evsel_list->threads);
if (interval && interval < 100) {

View File

@ -40,6 +40,7 @@
#include "util/xyarray.h"
#include "util/sort.h"
#include "util/intlist.h"
#include "util/parse-branch-options.h"
#include "arch/common.h"
#include "util/debug.h"
@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
perf_top__record_precise_ip(top, he, evsel->idx, ip);
}
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
!(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
return 0;
}
@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
"don't try to adjust column width, use these fixed values"),
OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
"branch any", "sample any taken branches",
parse_branch_stack),
OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
"branch filter mask", "branch stack filter modes",
parse_branch_stack),
OPT_END()
};
const char * const top_usage[] = {

View File

@ -2773,9 +2773,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
printed += fprintf(fp, "\n");
printed += fprintf(fp, " syscall calls min avg max stddev\n");
printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
printed += fprintf(fp, " syscall calls total min avg max stddev\n");
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
/* each int_node is a syscall */
while (inode) {
@ -2792,8 +2792,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
sc = &trace->syscalls.table[inode->i];
printed += fprintf(fp, " %-15s", sc->name);
printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
n, min, avg);
printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
n, avg * n, min, avg);
printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
}

View File

@ -50,7 +50,7 @@ copy_kcore()
fi
rm -f perf.data.junk
("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null &
("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
PERF_PID=$!
# Need to make sure that perf has started
@ -160,18 +160,18 @@ record()
echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
fi
if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
fi
if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
fi
if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
true
elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
true
elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
@ -193,8 +193,8 @@ record()
mkdir "$PERF_DATA_DIR"
echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*"
"$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true
echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
"$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
exit 1
@ -209,8 +209,8 @@ subcommand()
{
find_perf
check_buildid_cache_permissions
echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*"
"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $*
echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
}
if [ "$1" = "fix_buildid_cache_permissions" ] ; then
@ -234,7 +234,7 @@ fi
case "$PERF_SUB_COMMAND" in
"record")
while [ "$1" != "--" ] ; do
PERF_OPTIONS+="$1 "
PERF_OPTIONS+=("$1")
shift || break
done
if [ "$1" != "--" ] ; then
@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in
usage
fi
shift
record $*
record "$@"
;;
"script")
subcommand $*
subcommand "$@"
;;
"report")
subcommand $*
subcommand "$@"
;;
"inject")
subcommand $*
subcommand "$@"
;;
*)
usage

View File

@ -16,6 +16,9 @@ struct disasm_line_samples {
u64 nr;
};
#define IPC_WIDTH 6
#define CYCLES_WIDTH 6
struct browser_disasm_line {
struct rb_node rb_node;
u32 idx;
@ -53,6 +56,7 @@ struct annotate_browser {
int max_jump_sources;
int nr_jumps;
bool searching_backwards;
bool have_cycles;
u8 addr_width;
u8 jumps_width;
u8 target_width;
@ -96,6 +100,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
return ui_browser__set_color(&browser->b, color);
}
static int annotate_browser__pcnt_width(struct annotate_browser *ab)
{
int w = 7 * ab->nr_events;
if (ab->have_cycles)
w += IPC_WIDTH + CYCLES_WIDTH;
return w;
}
static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@ -106,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
(!current_entry || (browser->use_navkeypressed &&
!browser->navkeypressed)));
int width = browser->width, printed;
int i, pcnt_width = 7 * ab->nr_events;
int i, pcnt_width = annotate_browser__pcnt_width(ab);
double percent_max = 0.0;
char bf[256];
@ -116,19 +129,34 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
}
if (dl->offset != -1 && percent_max != 0.0) {
for (i = 0; i < ab->nr_events; i++) {
ui_browser__set_percent_color(browser,
bdl->samples[i].percent,
current_entry);
if (annotate_browser__opts.show_total_period)
slsmg_printf("%6" PRIu64 " ",
bdl->samples[i].nr);
else
slsmg_printf("%6.2f ", bdl->samples[i].percent);
if (percent_max != 0.0) {
for (i = 0; i < ab->nr_events; i++) {
ui_browser__set_percent_color(browser,
bdl->samples[i].percent,
current_entry);
if (annotate_browser__opts.show_total_period)
slsmg_printf("%6" PRIu64 " ",
bdl->samples[i].nr);
else
slsmg_printf("%6.2f ", bdl->samples[i].percent);
}
} else {
slsmg_write_nstring(" ", 7 * ab->nr_events);
}
} else {
ui_browser__set_percent_color(browser, 0, current_entry);
slsmg_write_nstring(" ", pcnt_width);
slsmg_write_nstring(" ", 7 * ab->nr_events);
}
if (ab->have_cycles) {
if (dl->ipc)
slsmg_printf("%*.2f ", IPC_WIDTH - 1, dl->ipc);
else
slsmg_write_nstring(" ", IPC_WIDTH);
if (dl->cycles)
slsmg_printf("%*" PRIu64 " ",
CYCLES_WIDTH - 1, dl->cycles);
else
slsmg_write_nstring(" ", CYCLES_WIDTH);
}
SLsmg_write_char(' ');
@ -231,7 +259,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
unsigned int from, to;
struct map_symbol *ms = ab->b.priv;
struct symbol *sym = ms->sym;
u8 pcnt_width = 7;
u8 pcnt_width = annotate_browser__pcnt_width(ab);
/* PLT symbols contain external offsets */
if (strstr(sym->name, "@plt"))
@ -255,8 +283,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
to = (u64)btarget->idx;
}
pcnt_width *= ab->nr_events;
ui_browser__set_color(browser, HE_COLORSET_CODE);
__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
from, to);
@ -266,9 +292,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
int ret = ui_browser__list_head_refresh(browser);
int pcnt_width;
pcnt_width = 7 * ab->nr_events;
int pcnt_width = annotate_browser__pcnt_width(ab);
if (annotate_browser__opts.jump_arrows)
annotate_browser__draw_current_jump(browser);
@ -390,7 +414,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
max_percent = bpos->samples[i].percent;
}
if (max_percent < 0.01) {
if (max_percent < 0.01 && pos->ipc == 0) {
RB_CLEAR_NODE(&bpos->rb_node);
continue;
}
@ -869,6 +893,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
return map_symbol__tui_annotate(&he->ms, evsel, hbt);
}
static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
{
unsigned n_insn = 0;
u64 offset;
for (offset = start; offset <= end; offset++) {
if (browser->offsets[offset])
n_insn++;
}
return n_insn;
}
static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
struct cyc_hist *ch)
{
unsigned n_insn;
u64 offset;
n_insn = count_insn(browser, start, end);
if (n_insn && ch->num && ch->cycles) {
float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
/* Hide data when there are too many overlaps. */
if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
return;
for (offset = start; offset <= end; offset++) {
struct disasm_line *dl = browser->offsets[offset];
if (dl)
dl->ipc = ipc;
}
}
}
/*
* This should probably be in util/annotate.c to share with the tty
* annotate, but right now we need the per byte offsets arrays,
* which are only here.
*/
static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
struct symbol *sym)
{
u64 offset;
struct annotation *notes = symbol__annotation(sym);
if (!notes->src || !notes->src->cycles_hist)
return;
pthread_mutex_lock(&notes->lock);
for (offset = 0; offset < size; ++offset) {
struct cyc_hist *ch;
ch = &notes->src->cycles_hist[offset];
if (ch && ch->cycles) {
struct disasm_line *dl;
if (ch->have_start)
count_and_fill(browser, ch->start, offset, ch);
dl = browser->offsets[offset];
if (dl && ch->num_aggr)
dl->cycles = ch->cycles_aggr / ch->num_aggr;
browser->have_cycles = true;
}
}
pthread_mutex_unlock(&notes->lock);
}
static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
size_t size)
{
@ -991,6 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
}
annotate_browser__mark_jump_targets(&browser, size);
annotate__compute_ipc(&browser, size, sym);
browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
browser.max_addr_width = hex_width(sym->end);

View File

@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
return 0;
}
/* The cycles histogram is lazily allocated. */
static int symbol__alloc_hist_cycles(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
const size_t size = symbol__size(sym);
notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
if (notes->src->cycles_hist == NULL)
return -1;
return 0;
}
void symbol__annotate_zero_histograms(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
pthread_mutex_lock(&notes->lock);
if (notes->src != NULL)
if (notes->src != NULL) {
memset(notes->src->histograms, 0,
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
if (notes->src->cycles_hist)
memset(notes->src->cycles_hist, 0,
symbol__size(sym) * sizeof(struct cyc_hist));
}
pthread_mutex_unlock(&notes->lock);
}
static int __symbol__account_cycles(struct annotation *notes,
u64 start,
unsigned offset, unsigned cycles,
unsigned have_start)
{
struct cyc_hist *ch;
ch = notes->src->cycles_hist;
/*
* For now we can only account one basic block per
* final jump. But multiple could be overlapping.
* Always account the longest one. So when
* a shorter one has been already seen throw it away.
*
* We separately always account the full cycles.
*/
ch[offset].num_aggr++;
ch[offset].cycles_aggr += cycles;
if (!have_start && ch[offset].have_start)
return 0;
if (ch[offset].num) {
if (have_start && (!ch[offset].have_start ||
ch[offset].start > start)) {
ch[offset].have_start = 0;
ch[offset].cycles = 0;
ch[offset].num = 0;
if (ch[offset].reset < 0xffff)
ch[offset].reset++;
} else if (have_start &&
ch[offset].start < start)
return 0;
}
ch[offset].have_start = have_start;
ch[offset].start = start;
ch[offset].cycles += cycles;
ch[offset].num++;
return 0;
}
static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
struct annotation *notes, int evidx, u64 addr)
{
@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
return 0;
}
static struct annotation *symbol__get_annotation(struct symbol *sym)
static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
{
struct annotation *notes = symbol__annotation(sym);
@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym)
if (symbol__alloc_hist(sym) < 0)
return NULL;
}
if (!notes->src->cycles_hist && cycles) {
if (symbol__alloc_hist_cycles(sym) < 0)
return NULL;
}
return notes;
}
@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
if (sym == NULL)
return 0;
notes = symbol__get_annotation(sym);
notes = symbol__get_annotation(sym, false);
if (notes == NULL)
return -ENOMEM;
return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
}
static int symbol__account_cycles(u64 addr, u64 start,
struct symbol *sym, unsigned cycles)
{
struct annotation *notes;
unsigned offset;
if (sym == NULL)
return 0;
notes = symbol__get_annotation(sym, true);
if (notes == NULL)
return -ENOMEM;
if (addr < sym->start || addr >= sym->end)
return -ERANGE;
if (start) {
if (start < sym->start || start >= sym->end)
return -ERANGE;
if (start >= addr)
start = 0;
}
offset = addr - sym->start;
return __symbol__account_cycles(notes,
start ? start - sym->start : 0,
offset, cycles,
!!start);
}
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start,
unsigned cycles)
{
unsigned long saddr = 0;
int err;
if (!cycles)
return 0;
/*
* Only set start when IPC can be computed. We can only
* compute it when the basic block is completely in a single
* function.
* Special case the case when the jump is elsewhere, but
* it starts on the function start.
*/
if (start &&
(start->sym == ams->sym ||
(ams->sym &&
start->addr == ams->sym->start + ams->map->start)))
saddr = start->al_addr;
if (saddr == 0)
pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n",
ams->addr,
start ? start->addr : 0,
ams->sym ? ams->sym->start + ams->map->start : 0,
saddr);
err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
if (err)
pr_debug2("account_cycles failed %d\n", err);
return err;
}
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
{
return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);

View File

@ -59,6 +59,8 @@ struct disasm_line {
char *name;
struct ins *ins;
int line_nr;
float ipc;
u64 cycles;
struct ins_operands ops;
};
@ -79,6 +81,17 @@ struct sym_hist {
u64 addr[0];
};
struct cyc_hist {
u64 start;
u64 cycles;
u64 cycles_aggr;
u32 num;
u32 num_aggr;
u8 have_start;
/* 1 byte padding */
u16 reset;
};
struct source_line_samples {
double percent;
double percent_sum;
@ -97,6 +110,7 @@ struct source_line {
* @histogram: Array of addr hit histograms per event being monitored
* @lines: If 'print_lines' is specified, per source code line percentages
* @source: source parsed from a disassembler like objdump -dS
* @cyc_hist: Average cycles per basic block
*
* lines is allocated, percentages calculated and all sorted by percentage
* when the annotation is about to be presented, so the percentages are for
@ -109,6 +123,7 @@ struct annotated_source {
struct source_line *lines;
int nr_histograms;
int sizeof_sym_hist;
struct cyc_hist *cycles_hist;
struct sym_hist histograms[0];
};
@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start,
unsigned cycles);
int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
int symbol__alloc_hist(struct symbol *sym);

View File

@ -942,6 +942,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
struct itrace_synth_opts *synth_opts = opt->value;
const char *p;
char *endptr;
bool period_type_set = false;
synth_opts->set = true;
@ -970,10 +971,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
case 'i':
synth_opts->period_type =
PERF_ITRACE_PERIOD_INSTRUCTIONS;
period_type_set = true;
break;
case 't':
synth_opts->period_type =
PERF_ITRACE_PERIOD_TICKS;
period_type_set = true;
break;
case 'm':
synth_opts->period *= 1000;
@ -986,6 +989,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
goto out_err;
synth_opts->period_type =
PERF_ITRACE_PERIOD_NANOSECS;
period_type_set = true;
break;
case '\0':
goto out;
@ -1039,7 +1043,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
}
out:
if (synth_opts->instructions) {
if (!synth_opts->period_type)
if (!period_type_set)
synth_opts->period_type =
PERF_ITRACE_DEFAULT_PERIOD_TYPE;
if (!synth_opts->period)

View File

@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
return ret;
}
int veprintf(int level, int var, const char *fmt, va_list args)
{
return _eprintf(level, var, fmt, args);
}
int eprintf(int level, int var, const char *fmt, ...)
{
va_list args;

View File

@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...);
int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
int veprintf(int level, int var, const char *fmt, va_list args);
int perf_debug_option(const char *str);

View File

@ -134,7 +134,8 @@ struct branch_flags {
u64 predicted:1;
u64 in_tx:1;
u64 abort:1;
u64 reserved:60;
u64 cycles:16;
u64 reserved:44;
};
struct branch_entry {

View File

@ -1273,6 +1273,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
return __perf_evlist__combined_sample_type(evlist);
}
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
u64 branch_type = 0;
evlist__for_each(evlist, evsel)
branch_type |= evsel->attr.branch_sample_type;
return branch_type;
}
bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
{
struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;

View File

@ -165,6 +165,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist);
u64 perf_evlist__read_format(struct perf_evlist *evlist);
u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);

View File

@ -618,7 +618,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
1, 1, 0, true);
1, bi->flags.cycles ? bi->flags.cycles : 1,
0, true);
if (he == NULL)
return -ENOMEM;
@ -1414,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other)
return 0;
}
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode)
{
struct branch_info *bi;
/* If we have branch cycles always annotate them. */
if (bs && bs->nr && bs->entries[0].flags.cycles) {
int i;
bi = sample__resolve_bstack(sample, al);
if (bi) {
struct addr_map_symbol *prev = NULL;
/*
* Ignore errors, still want to process the
* other entries.
*
* For non standard branch modes always
* force no IPC (prev == NULL)
*
* Note that perf stores branches reversed from
* program order!
*/
for (i = bs->nr - 1; i >= 0; i--) {
addr_map_symbol__account_cycles(&bi[i].from,
nonany_branch_mode ? NULL : prev,
bi[i].flags.cycles);
prev = &bi[i].to;
}
free(bi);
}
}
}
size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
{

View File

@ -47,6 +47,7 @@ enum hist_column {
HISTC_MEM_SNOOP,
HISTC_MEM_DCACHELINE,
HISTC_TRANSACTION,
HISTC_CYCLES,
HISTC_NR_COLS, /* Last entry */
};
@ -349,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
unsigned int hists__sort_list_width(struct hists *hists);
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode);
struct option;
int parse_filter_percentage(const struct option *opt __maybe_unused,
const char *arg, int unset __maybe_unused);

View File

@ -1168,7 +1168,7 @@ static void parse_events_print_error(struct parse_events_error *err,
* Maximum error index indent, we will cut
* the event string if it's bigger.
*/
int max_err_idx = 10;
int max_err_idx = 13;
/*
* Let's be specific with the message when

View File

@ -542,7 +542,7 @@ struct perf_pmu *perf_pmu__find(const char *name)
}
static struct perf_pmu_format *
pmu_find_format(struct list_head *formats, char *name)
pmu_find_format(struct list_head *formats, const char *name)
{
struct perf_pmu_format *format;
@ -553,6 +553,21 @@ pmu_find_format(struct list_head *formats, char *name)
return NULL;
}
__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
{
struct perf_pmu_format *format = pmu_find_format(formats, name);
__u64 bits = 0;
int fbit;
if (!format)
return 0;
for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
bits |= 1ULL << fbit;
return bits;
}
/*
* Sets value based on the format definition (format parameter)
* and unformated value (value parameter).
@ -574,6 +589,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
}
}
static __u64 pmu_format_max_value(const unsigned long *format)
{
int w;
w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
if (!w)
return 0;
if (w < 64)
return (1ULL << w) - 1;
return -1;
}
/*
* Term is a string term, and might be a param-term. Try to look up it's value
* in the remaining terms.
@ -647,7 +674,7 @@ static int pmu_config_term(struct list_head *formats,
{
struct perf_pmu_format *format;
__u64 *vp;
__u64 val;
__u64 val, max_val;
/*
* If this is a parameter we've already used for parameterized-eval,
@ -713,6 +740,22 @@ static int pmu_config_term(struct list_head *formats,
} else
return -EINVAL;
max_val = pmu_format_max_value(format->bits);
if (val > max_val) {
if (err) {
err->idx = term->err_val;
if (asprintf(&err->str,
"value too big for format, maximum is %llu",
(unsigned long long)max_val) < 0)
err->str = strdup("value too big for format");
return -EINVAL;
}
/*
* Assume we don't care if !err, in which case the value will be
* silently truncated.
*/
}
pmu_format_value(format->bits, val, vp, zero);
return 0;
}

View File

@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats,
struct perf_event_attr *attr,
struct list_head *head_terms,
bool zero, struct parse_events_error *error);
__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
struct perf_pmu_info *info);
struct list_head *perf_pmu__alias(struct perf_pmu *pmu,

View File

@ -106,6 +106,8 @@ struct variable_list {
struct strlist *vars; /* Available variables */
};
struct map;
/* Command string to events */
extern int parse_perf_probe_command(const char *cmd,
struct perf_probe_event *pev);

View File

@ -784,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample)
printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++)
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
i, sample->branch_stack->entries[i].from,
sample->branch_stack->entries[i].to);
for (i = 0; i < sample->branch_stack->nr; i++) {
struct branch_entry *e = &sample->branch_stack->entries[i];
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
i, e->from, e->to,
e->flags.cycles,
e->flags.mispred ? "M" : " ",
e->flags.predicted ? "P" : " ",
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
(unsigned)e->flags.reserved);
}
}
static void regs_dump__printf(u64 mask, u64 *regs)

View File

@ -9,7 +9,7 @@ regex_t parent_regex;
const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char default_sort_order[] = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
@ -526,6 +526,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
}
static int64_t
sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
{
return left->branch_info->flags.cycles -
right->branch_info->flags.cycles;
}
static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
if (he->branch_info->flags.cycles == 0)
return repsep_snprintf(bf, size, "%-*s", width, "-");
return repsep_snprintf(bf, size, "%-*hd", width,
he->branch_info->flags.cycles);
}
struct sort_entry sort_cycles = {
.se_header = "Basic Block Cycles",
.se_cmp = sort__cycles_cmp,
.se_snprintf = hist_entry__cycles_snprintf,
.se_width_idx = HISTC_CYCLES,
};
/* --sort daddr_sym */
static int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
@ -1190,6 +1213,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
DIM(SORT_IN_TX, "in_tx", sort_in_tx),
DIM(SORT_ABORT, "abort", sort_abort),
DIM(SORT_CYCLES, "cycles", sort_cycles),
};
#undef DIM

View File

@ -185,6 +185,7 @@ enum sort_type {
SORT_MISPREDICT,
SORT_ABORT,
SORT_IN_TX,
SORT_CYCLES,
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,

View File

@ -238,3 +238,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
perf_evsel__reset_counts(evsel);
}
}
static void zero_per_pkg(struct perf_evsel *counter)
{
if (counter->per_pkg_mask)
memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
}
static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
{
unsigned long *mask = counter->per_pkg_mask;
struct cpu_map *cpus = perf_evsel__cpus(counter);
int s;
*skip = false;
if (!counter->per_pkg)
return 0;
if (cpu_map__empty(cpus))
return 0;
if (!mask) {
mask = zalloc(MAX_NR_CPUS);
if (!mask)
return -ENOMEM;
counter->per_pkg_mask = mask;
}
s = cpu_map__get_socket(cpus, cpu);
if (s < 0)
return -1;
*skip = test_and_set_bit(s, mask) == 1;
return 0;
}
static int
process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
int cpu, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values *aggr = &evsel->counts->aggr;
static struct perf_counts_values zero;
bool skip = false;
if (check_per_pkg(evsel, cpu, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}
if (skip)
count = &zero;
switch (config->aggr_mode) {
case AGGR_THREAD:
case AGGR_CORE:
case AGGR_SOCKET:
case AGGR_NONE:
if (!evsel->snapshot)
perf_evsel__compute_deltas(evsel, cpu, thread, count);
perf_counts_values__scale(count, config->scale, NULL);
if (config->aggr_mode == AGGR_NONE)
perf_stat__update_shadow_stats(evsel, count->values, cpu);
break;
case AGGR_GLOBAL:
aggr->val += count->val;
if (config->scale) {
aggr->ena += count->ena;
aggr->run += count->run;
}
default:
break;
}
return 0;
}
static int process_counter_maps(struct perf_stat_config *config,
struct perf_evsel *counter)
{
int nthreads = thread_map__nr(counter->threads);
int ncpus = perf_evsel__nr_cpus(counter);
int cpu, thread;
if (counter->system_wide)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
if (process_counter_values(config, counter, cpu, thread,
perf_counts(counter->counts, cpu, thread)))
return -1;
}
}
return 0;
}
int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_evsel *counter)
{
struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat *ps = counter->priv;
u64 *count = counter->counts->aggr.values;
int i, ret;
aggr->val = aggr->ena = aggr->run = 0;
init_stats(ps->res_stats);
if (counter->per_pkg)
zero_per_pkg(counter);
ret = process_counter_maps(config, counter);
if (ret)
return ret;
if (config->aggr_mode != AGGR_GLOBAL)
return 0;
if (!counter->snapshot)
perf_evsel__compute_deltas(counter, -1, -1, aggr);
perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
for (i = 0; i < 3; i++)
update_stats(&ps->res_stats[i], count[i]);
if (verbose) {
fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter), count[0], count[1], count[2]);
}
/*
* Save the full runtime - to allow normalization during printout:
*/
perf_stat__update_shadow_stats(counter, count, 0);
return 0;
}

View File

@ -50,6 +50,13 @@ struct perf_counts {
struct xyarray *values;
};
struct perf_stat_config {
enum aggr_mode aggr_mode;
bool scale;
FILE *output;
unsigned int interval;
};
static inline struct perf_counts_values*
perf_counts(struct perf_counts *counts, int cpu, int thread)
{
@ -109,4 +116,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
void perf_evlist__free_stats(struct perf_evlist *evlist);
void perf_evlist__reset_stats(struct perf_evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_evsel *counter);
#endif