perf/core improvements and fixes:
User visible: - IPC and cycle accounting in 'perf annotate' (Andi Kleen) - Display cycles in branch sort mode in 'perf report' (Andi Kleen) - Add total time column to 'perf trace' syscall stats summary (Milian Woff) Infrastructure: - PMU helpers to use in Intel PT (Adrian Hunter) - Fix perf-with-kcore script not to split args with spaces (Adrian Hunter) - Add empty Build files for some more architectures (Ben Hutchings) - Move 'perf stat' config variables to a struct to allow using some of its functions in more places (Jiri Olsa) - Add DWARF register names for 'xtensa' arch (Max Filippov) - Implement BPF programs attached to uprobes (Wang Nan) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJVxA9EAAoJENZQFvNTUqpAP88P/0I8l7DJrD4e2PpwSIuwUPII kq8fYMJ4OpR2XiGjyny68iZmnASIon+cV6AoidZ27eqG/+qmhzgv9nCjMlPUIpAT KcilxUXjOc2xba8nUdrNRKHKdcxcvp5iuw1dXkfJuf5U5l7cSTv5tko6vDaA6ngH bpmn8wa73ajRwtTErgBSJAwQVMPzo9Ods/FLeZK6t0hYNYNN9ISp7pq+0RhEnNVb gtlE1/DgGccsTs9NDWQqi3bmvCVsVMhaeWLDyCBjx/cwkwuhdhYHAfs8Llmse+51 7adaqFQ7BZMS/8wXCUwCNnIMBBURpQodW/3H//GQ8CtBSNRt+EX8u0zHs+aJj/NR JqlRVOxhFFJU3E/67HDnU1IM9ANQYZq2JomQ1B+PJTUBZvUaBQtKfFlfKhI36o21 2Sv/fsOjcZLJePBPeUVgjCmBvc0vAUBHPN23wHMyP8o6I6NTmTb3LvomZGaO7Af5 HuebGfd92ahVPT1/h3y5lVDnjiNYikoNKJdDh8JiTTbuj8LtvhHN/o5AeAP3Ig2H kJEWMbSuDCdUPRGYeW4z3aDDP0/vxEH8+kXWoTSwORVZcXXbg38eoYcOPqfQQWQt 80+Rf3sTNt8RCoKWJ0AS+nS/S2HWHrJ5G4DIdc1ldm+zL6ElkOkPVm1W7EqvCBd6 tZP13miwhMxritYGX7pM =b2ho -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - IPC and cycle accounting in 'perf annotate'. (Andi Kleen) - Display cycles in branch sort mode in 'perf report'. (Andi Kleen) - Add total time column to 'perf trace' syscall stats summary. (Milian Woff) Infrastructure changes: - PMU helpers to use in Intel PT. (Adrian Hunter) - Fix perf-with-kcore script not to split args with spaces. (Adrian Hunter) - Add empty Build files for some more architectures. (Ben Hutchings) - Move 'perf stat' config variables to a struct to allow using some of its functions in more places. (Jiri Olsa) - Add DWARF register names for 'xtensa' arch. (Max Filippov) - Implement BPF programs attached to uprobes. (Wang Nan) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
1354ac6ad8
|
@ -243,6 +243,7 @@ enum {
|
|||
TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
|
||||
TRACE_EVENT_FL_TRACEPOINT_BIT,
|
||||
TRACE_EVENT_FL_KPROBE_BIT,
|
||||
TRACE_EVENT_FL_UPROBE_BIT,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -257,6 +258,7 @@ enum {
|
|||
* USE_CALL_FILTER - For trace internal events, don't use file filter
|
||||
* TRACEPOINT - Event is a tracepoint
|
||||
* KPROBE - Event is a kprobe
|
||||
* UPROBE - Event is a uprobe
|
||||
*/
|
||||
enum {
|
||||
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
|
||||
|
@ -267,8 +269,11 @@ enum {
|
|||
TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
|
||||
TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
|
||||
TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
|
||||
TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT),
|
||||
};
|
||||
|
||||
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
|
||||
|
||||
struct trace_event_call {
|
||||
struct list_head list;
|
||||
struct trace_event_class *class;
|
||||
|
@ -542,7 +547,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
|
|||
event_triggers_post_call(file, tt);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
#ifdef CONFIG_BPF_EVENTS
|
||||
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
|
||||
#else
|
||||
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
|
||||
|
|
|
@ -6846,8 +6846,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
|
|||
if (event->tp_event->prog)
|
||||
return -EEXIST;
|
||||
|
||||
if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
|
||||
/* bpf programs can only be attached to kprobes */
|
||||
if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
|
||||
/* bpf programs can only be attached to u/kprobes */
|
||||
return -EINVAL;
|
||||
|
||||
prog = bpf_prog_get(prog_fd);
|
||||
|
|
|
@ -434,7 +434,7 @@ config UPROBE_EVENT
|
|||
|
||||
config BPF_EVENTS
|
||||
depends on BPF_SYSCALL
|
||||
depends on KPROBE_EVENT
|
||||
depends on KPROBE_EVENT || UPROBE_EVENT
|
||||
bool
|
||||
default y
|
||||
help
|
||||
|
|
|
@ -1095,11 +1095,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
|
|||
{
|
||||
struct trace_event_call *call = &tu->tp.call;
|
||||
struct uprobe_trace_entry_head *entry;
|
||||
struct bpf_prog *prog = call->prog;
|
||||
struct hlist_head *head;
|
||||
void *data;
|
||||
int size, esize;
|
||||
int rctx;
|
||||
|
||||
if (prog && !trace_call_bpf(prog, regs))
|
||||
return;
|
||||
|
||||
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
||||
|
||||
size = esize + tu->tp.size + dsize;
|
||||
|
@ -1289,6 +1293,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
call->flags = TRACE_EVENT_FL_UPROBE;
|
||||
call->class->reg = trace_uprobe_register;
|
||||
call->data = tu;
|
||||
ret = trace_add_event_call(call);
|
||||
|
|
|
@ -109,6 +109,7 @@ OPTIONS
|
|||
- mispredict: "N" for predicted branch, "Y" for mispredicted branch
|
||||
- in_tx: branch in TSX transaction
|
||||
- abort: TSX transaction abort.
|
||||
- cycles: Cycles in basic block
|
||||
|
||||
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
|
||||
and symbol_to, see '--branch-stack'.
|
||||
|
|
|
@ -208,6 +208,27 @@ Default is to monitor all CPUS.
|
|||
This option sets the time out limit. The default value is 500 ms.
|
||||
|
||||
|
||||
-b::
|
||||
--branch-any::
|
||||
Enable taken branch stack sampling. Any type of taken branch may be sampled.
|
||||
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
|
||||
|
||||
-j::
|
||||
--branch-filter::
|
||||
Enable taken branch stack sampling. Each sample captures a series of consecutive
|
||||
taken branches. The number of branches captured with each sample depends on the
|
||||
underlying hardware, the type of branches of interest, and the executed code.
|
||||
It is possible to select the types of branches captured by enabling filters.
|
||||
For a full list of modifiers please see the perf record manpage.
|
||||
|
||||
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
|
||||
The privilege levels may be omitted, in which case, the privilege levels of the associated
|
||||
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
|
||||
levels are subject to permissions. When sampling on multiple events, branch stack sampling
|
||||
is enabled for all the sampling events. The sampled branch type is the same for all events.
|
||||
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
|
||||
Note that this feature may not be available on all processors.
|
||||
|
||||
INTERACTIVE PROMPTING KEYS
|
||||
--------------------------
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
# empty
|
|
@ -0,0 +1 @@
|
|||
# empty
|
|
@ -0,0 +1 @@
|
|||
# empty
|
|
@ -0,0 +1 @@
|
|||
libperf-y += util/
|
|
@ -0,0 +1,3 @@
|
|||
ifndef NO_DWARF
|
||||
PERF_HAVE_DWARF_REGS := 1
|
||||
endif
|
|
@ -0,0 +1 @@
|
|||
libperf-$(CONFIG_DWARF) += dwarf-regs.o
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Mapping of DWARF debug register numbers into register names.
|
||||
*
|
||||
* Copyright (c) 2015 Cadence Design Systems Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <dwarf-regs.h>
|
||||
|
||||
#define XTENSA_MAX_REGS 16
|
||||
|
||||
const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
|
||||
"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
|
||||
"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
|
||||
};
|
||||
|
||||
const char *get_arch_regstr(unsigned int n)
|
||||
{
|
||||
return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
|
||||
}
|
|
@ -187,6 +187,7 @@ find_next:
|
|||
* symbol, free he->ms.sym->src to signal we already
|
||||
* processed this symbol.
|
||||
*/
|
||||
zfree(¬es->src->cycles_hist);
|
||||
zfree(¬es->src);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,6 +53,7 @@ struct report {
|
|||
bool mem_mode;
|
||||
bool header;
|
||||
bool header_only;
|
||||
bool nonany_branch_mode;
|
||||
int max_stack;
|
||||
struct perf_read_values show_threads_values;
|
||||
const char *pretty_printing_style;
|
||||
|
@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
|
|||
if (!ui__has_annotation())
|
||||
return 0;
|
||||
|
||||
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
|
||||
rep->nonany_branch_mode);
|
||||
|
||||
if (sort__mode == SORT_MODE__BRANCH) {
|
||||
bi = he->branch_info;
|
||||
err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
|
||||
|
@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep)
|
|||
else
|
||||
callchain_param.record_mode = CALLCHAIN_FP;
|
||||
}
|
||||
|
||||
/* ??? handle more cases than just ANY? */
|
||||
if (!(perf_evlist__combined_branch_type(session->evlist) &
|
||||
PERF_SAMPLE_BRANCH_ANY))
|
||||
rep->nonany_branch_mode = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -101,8 +101,6 @@ static struct target target = {
|
|||
|
||||
static int run_count = 1;
|
||||
static bool no_inherit = false;
|
||||
static bool scale = true;
|
||||
static enum aggr_mode aggr_mode = AGGR_GLOBAL;
|
||||
static volatile pid_t child_pid = -1;
|
||||
static bool null_run = false;
|
||||
static int detailed_run = 0;
|
||||
|
@ -112,11 +110,9 @@ static int big_num_opt = -1;
|
|||
static const char *csv_sep = NULL;
|
||||
static bool csv_output = false;
|
||||
static bool group = false;
|
||||
static FILE *output = NULL;
|
||||
static const char *pre_cmd = NULL;
|
||||
static const char *post_cmd = NULL;
|
||||
static bool sync_run = false;
|
||||
static unsigned int interval = 0;
|
||||
static unsigned int initial_delay = 0;
|
||||
static unsigned int unit_width = 4; /* strlen("unit") */
|
||||
static bool forever = false;
|
||||
|
@ -126,6 +122,11 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu);
|
|||
|
||||
static volatile int done = 0;
|
||||
|
||||
static struct perf_stat_config stat_config = {
|
||||
.aggr_mode = AGGR_GLOBAL,
|
||||
.scale = true,
|
||||
};
|
||||
|
||||
static inline void diff_timespec(struct timespec *r, struct timespec *a,
|
||||
struct timespec *b)
|
||||
{
|
||||
|
@ -148,7 +149,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
|
|||
{
|
||||
struct perf_event_attr *attr = &evsel->attr;
|
||||
|
||||
if (scale)
|
||||
if (stat_config.scale)
|
||||
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
|
||||
PERF_FORMAT_TOTAL_TIME_RUNNING;
|
||||
|
||||
|
@ -178,142 +179,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void zero_per_pkg(struct perf_evsel *counter)
|
||||
{
|
||||
if (counter->per_pkg_mask)
|
||||
memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
|
||||
}
|
||||
|
||||
static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
|
||||
{
|
||||
unsigned long *mask = counter->per_pkg_mask;
|
||||
struct cpu_map *cpus = perf_evsel__cpus(counter);
|
||||
int s;
|
||||
|
||||
*skip = false;
|
||||
|
||||
if (!counter->per_pkg)
|
||||
return 0;
|
||||
|
||||
if (cpu_map__empty(cpus))
|
||||
return 0;
|
||||
|
||||
if (!mask) {
|
||||
mask = zalloc(MAX_NR_CPUS);
|
||||
if (!mask)
|
||||
return -ENOMEM;
|
||||
|
||||
counter->per_pkg_mask = mask;
|
||||
}
|
||||
|
||||
s = cpu_map__get_socket(cpus, cpu);
|
||||
if (s < 0)
|
||||
return -1;
|
||||
|
||||
*skip = test_and_set_bit(s, mask) == 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
|
||||
struct perf_counts_values *count)
|
||||
{
|
||||
struct perf_counts_values *aggr = &evsel->counts->aggr;
|
||||
static struct perf_counts_values zero;
|
||||
bool skip = false;
|
||||
|
||||
if (check_per_pkg(evsel, cpu, &skip)) {
|
||||
pr_err("failed to read per-pkg counter\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (skip)
|
||||
count = &zero;
|
||||
|
||||
switch (aggr_mode) {
|
||||
case AGGR_THREAD:
|
||||
case AGGR_CORE:
|
||||
case AGGR_SOCKET:
|
||||
case AGGR_NONE:
|
||||
if (!evsel->snapshot)
|
||||
perf_evsel__compute_deltas(evsel, cpu, thread, count);
|
||||
perf_counts_values__scale(count, scale, NULL);
|
||||
if (aggr_mode == AGGR_NONE)
|
||||
perf_stat__update_shadow_stats(evsel, count->values, cpu);
|
||||
break;
|
||||
case AGGR_GLOBAL:
|
||||
aggr->val += count->val;
|
||||
if (scale) {
|
||||
aggr->ena += count->ena;
|
||||
aggr->run += count->run;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_counter_maps(struct perf_evsel *counter)
|
||||
{
|
||||
int nthreads = thread_map__nr(counter->threads);
|
||||
int ncpus = perf_evsel__nr_cpus(counter);
|
||||
int cpu, thread;
|
||||
|
||||
if (counter->system_wide)
|
||||
nthreads = 1;
|
||||
|
||||
for (thread = 0; thread < nthreads; thread++) {
|
||||
for (cpu = 0; cpu < ncpus; cpu++) {
|
||||
if (process_counter_values(counter, cpu, thread,
|
||||
perf_counts(counter->counts, cpu, thread)))
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_counter(struct perf_evsel *counter)
|
||||
{
|
||||
struct perf_counts_values *aggr = &counter->counts->aggr;
|
||||
struct perf_stat *ps = counter->priv;
|
||||
u64 *count = counter->counts->aggr.values;
|
||||
int i, ret;
|
||||
|
||||
aggr->val = aggr->ena = aggr->run = 0;
|
||||
init_stats(ps->res_stats);
|
||||
|
||||
if (counter->per_pkg)
|
||||
zero_per_pkg(counter);
|
||||
|
||||
ret = process_counter_maps(counter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (aggr_mode != AGGR_GLOBAL)
|
||||
return 0;
|
||||
|
||||
if (!counter->snapshot)
|
||||
perf_evsel__compute_deltas(counter, -1, -1, aggr);
|
||||
perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
update_stats(&ps->res_stats[i], count[i]);
|
||||
|
||||
if (verbose) {
|
||||
fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
||||
perf_evsel__name(counter), count[0], count[1], count[2]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the full runtime - to allow normalization during printout:
|
||||
*/
|
||||
perf_stat__update_shadow_stats(counter, count, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read out the results of a single counter:
|
||||
* do not aggregate counts across CPUs in system-wide mode
|
||||
|
@ -351,7 +216,7 @@ static void read_counters(bool close_counters)
|
|||
if (read_counter(counter))
|
||||
pr_warning("failed to read counter %s\n", counter->name);
|
||||
|
||||
if (process_counter(counter))
|
||||
if (perf_stat_process_counter(&stat_config, counter))
|
||||
pr_warning("failed to process counter %s\n", counter->name);
|
||||
|
||||
if (close_counters) {
|
||||
|
@ -402,6 +267,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
|
|||
|
||||
static int __run_perf_stat(int argc, const char **argv)
|
||||
{
|
||||
int interval = stat_config.interval;
|
||||
char msg[512];
|
||||
unsigned long long t0, t1;
|
||||
struct perf_evsel *counter;
|
||||
|
@ -545,13 +411,13 @@ static int run_perf_stat(int argc, const char **argv)
|
|||
static void print_running(u64 run, u64 ena)
|
||||
{
|
||||
if (csv_output) {
|
||||
fprintf(output, "%s%" PRIu64 "%s%.2f",
|
||||
fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
|
||||
csv_sep,
|
||||
run,
|
||||
csv_sep,
|
||||
ena ? 100.0 * run / ena : 100.0);
|
||||
} else if (run != ena) {
|
||||
fprintf(output, " (%.2f%%)", 100.0 * run / ena);
|
||||
fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -560,9 +426,9 @@ static void print_noise_pct(double total, double avg)
|
|||
double pct = rel_stddev_stats(total, avg);
|
||||
|
||||
if (csv_output)
|
||||
fprintf(output, "%s%.2f%%", csv_sep, pct);
|
||||
fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
|
||||
else if (pct)
|
||||
fprintf(output, " ( +-%6.2f%% )", pct);
|
||||
fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
|
||||
}
|
||||
|
||||
static void print_noise(struct perf_evsel *evsel, double avg)
|
||||
|
@ -578,9 +444,9 @@ static void print_noise(struct perf_evsel *evsel, double avg)
|
|||
|
||||
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
|
||||
{
|
||||
switch (aggr_mode) {
|
||||
switch (stat_config.aggr_mode) {
|
||||
case AGGR_CORE:
|
||||
fprintf(output, "S%d-C%*d%s%*d%s",
|
||||
fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
|
||||
cpu_map__id_to_socket(id),
|
||||
csv_output ? 0 : -8,
|
||||
cpu_map__id_to_cpu(id),
|
||||
|
@ -590,7 +456,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
|
|||
csv_sep);
|
||||
break;
|
||||
case AGGR_SOCKET:
|
||||
fprintf(output, "S%*d%s%*d%s",
|
||||
fprintf(stat_config.output, "S%*d%s%*d%s",
|
||||
csv_output ? 0 : -5,
|
||||
id,
|
||||
csv_sep,
|
||||
|
@ -599,12 +465,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
|
|||
csv_sep);
|
||||
break;
|
||||
case AGGR_NONE:
|
||||
fprintf(output, "CPU%*d%s",
|
||||
fprintf(stat_config.output, "CPU%*d%s",
|
||||
csv_output ? 0 : -4,
|
||||
perf_evsel__cpus(evsel)->map[id], csv_sep);
|
||||
break;
|
||||
case AGGR_THREAD:
|
||||
fprintf(output, "%*s-%*d%s",
|
||||
fprintf(stat_config.output, "%*s-%*d%s",
|
||||
csv_output ? 0 : 16,
|
||||
thread_map__comm(evsel->threads, id),
|
||||
csv_output ? 0 : -8,
|
||||
|
@ -619,6 +485,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
|
|||
|
||||
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
double msecs = avg / 1e6;
|
||||
const char *fmt_v, *fmt_n;
|
||||
char name[25];
|
||||
|
@ -643,7 +510,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
|
|||
if (evsel->cgrp)
|
||||
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
|
||||
|
||||
if (csv_output || interval)
|
||||
if (csv_output || stat_config.interval)
|
||||
return;
|
||||
|
||||
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
|
||||
|
@ -655,6 +522,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
|
|||
|
||||
static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
double sc = evsel->scale;
|
||||
const char *fmt;
|
||||
int cpu = cpu_map__id_to_cpu(id);
|
||||
|
@ -670,7 +538,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
|
|||
|
||||
aggr_printout(evsel, id, nr);
|
||||
|
||||
if (aggr_mode == AGGR_GLOBAL)
|
||||
if (stat_config.aggr_mode == AGGR_GLOBAL)
|
||||
cpu = 0;
|
||||
|
||||
fprintf(output, fmt, avg, csv_sep);
|
||||
|
@ -685,14 +553,16 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
|
|||
if (evsel->cgrp)
|
||||
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
|
||||
|
||||
if (csv_output || interval)
|
||||
if (csv_output || stat_config.interval)
|
||||
return;
|
||||
|
||||
perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
|
||||
perf_stat__print_shadow_stats(output, evsel, avg, cpu,
|
||||
stat_config.aggr_mode);
|
||||
}
|
||||
|
||||
static void print_aggr(char *prefix)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
struct perf_evsel *counter;
|
||||
int cpu, cpu2, s, s2, id, nr;
|
||||
double uval;
|
||||
|
@ -761,6 +631,7 @@ static void print_aggr(char *prefix)
|
|||
|
||||
static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
int nthreads = thread_map__nr(counter->threads);
|
||||
int ncpus = cpu_map__nr(counter->cpus);
|
||||
int cpu, thread;
|
||||
|
@ -799,6 +670,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
|
|||
*/
|
||||
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
struct perf_stat *ps = counter->priv;
|
||||
double avg = avg_stats(&ps->res_stats[0]);
|
||||
int scaled = counter->counts->scaled;
|
||||
|
@ -850,6 +722,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
|
|||
*/
|
||||
static void print_counter(struct perf_evsel *counter, char *prefix)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
u64 ena, run, val;
|
||||
double uval;
|
||||
int cpu;
|
||||
|
@ -904,12 +777,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
|
|||
|
||||
static void print_interval(char *prefix, struct timespec *ts)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
static int num_print_interval;
|
||||
|
||||
sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
|
||||
|
||||
if (num_print_interval == 0 && !csv_output) {
|
||||
switch (aggr_mode) {
|
||||
switch (stat_config.aggr_mode) {
|
||||
case AGGR_SOCKET:
|
||||
fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
|
||||
break;
|
||||
|
@ -934,6 +808,7 @@ static void print_interval(char *prefix, struct timespec *ts)
|
|||
|
||||
static void print_header(int argc, const char **argv)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
int i;
|
||||
|
||||
fflush(stdout);
|
||||
|
@ -963,6 +838,8 @@ static void print_header(int argc, const char **argv)
|
|||
|
||||
static void print_footer(void)
|
||||
{
|
||||
FILE *output = stat_config.output;
|
||||
|
||||
if (!null_run)
|
||||
fprintf(output, "\n");
|
||||
fprintf(output, " %17.9f seconds time elapsed",
|
||||
|
@ -977,6 +854,7 @@ static void print_footer(void)
|
|||
|
||||
static void print_counters(struct timespec *ts, int argc, const char **argv)
|
||||
{
|
||||
int interval = stat_config.interval;
|
||||
struct perf_evsel *counter;
|
||||
char buf[64], *prefix = NULL;
|
||||
|
||||
|
@ -985,7 +863,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
|
|||
else
|
||||
print_header(argc, argv);
|
||||
|
||||
switch (aggr_mode) {
|
||||
switch (stat_config.aggr_mode) {
|
||||
case AGGR_CORE:
|
||||
case AGGR_SOCKET:
|
||||
print_aggr(prefix);
|
||||
|
@ -1009,14 +887,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
|
|||
if (!interval && !csv_output)
|
||||
print_footer();
|
||||
|
||||
fflush(output);
|
||||
fflush(stat_config.output);
|
||||
}
|
||||
|
||||
static volatile int signr = -1;
|
||||
|
||||
static void skip_signal(int signo)
|
||||
{
|
||||
if ((child_pid == -1) || interval)
|
||||
if ((child_pid == -1) || stat_config.interval)
|
||||
done = 1;
|
||||
|
||||
signr = signo;
|
||||
|
@ -1064,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
|
|||
|
||||
static int perf_stat_init_aggr_mode(void)
|
||||
{
|
||||
switch (aggr_mode) {
|
||||
switch (stat_config.aggr_mode) {
|
||||
case AGGR_SOCKET:
|
||||
if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
|
||||
perror("cannot build socket map");
|
||||
|
@ -1270,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
"system-wide collection from all CPUs"),
|
||||
OPT_BOOLEAN('g', "group", &group,
|
||||
"put the counters into a counter group"),
|
||||
OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
|
||||
OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
|
||||
OPT_INCR('v', "verbose", &verbose,
|
||||
"be more verbose (show counter open errors, etc)"),
|
||||
OPT_INTEGER('r', "repeat", &run_count,
|
||||
|
@ -1286,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
stat__set_big_num),
|
||||
OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
|
||||
"list of cpus to monitor in system-wide"),
|
||||
OPT_SET_UINT('A', "no-aggr", &aggr_mode,
|
||||
OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
|
||||
"disable CPU count aggregation", AGGR_NONE),
|
||||
OPT_STRING('x', "field-separator", &csv_sep, "separator",
|
||||
"print counts with custom separator"),
|
||||
|
@ -1300,13 +1178,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
"command to run prior to the measured command"),
|
||||
OPT_STRING(0, "post", &post_cmd, "command",
|
||||
"command to run after to the measured command"),
|
||||
OPT_UINTEGER('I', "interval-print", &interval,
|
||||
OPT_UINTEGER('I', "interval-print", &stat_config.interval,
|
||||
"print counts at regular interval in ms (>= 100)"),
|
||||
OPT_SET_UINT(0, "per-socket", &aggr_mode,
|
||||
OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
|
||||
"aggregate counts per processor socket", AGGR_SOCKET),
|
||||
OPT_SET_UINT(0, "per-core", &aggr_mode,
|
||||
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
|
||||
"aggregate counts per physical processor core", AGGR_CORE),
|
||||
OPT_SET_UINT(0, "per-thread", &aggr_mode,
|
||||
OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
|
||||
"aggregate counts per thread", AGGR_THREAD),
|
||||
OPT_UINTEGER('D', "delay", &initial_delay,
|
||||
"ms to wait before starting measurement after program start"),
|
||||
|
@ -1318,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
};
|
||||
int status = -EINVAL, run_idx;
|
||||
const char *mode;
|
||||
FILE *output = stderr;
|
||||
unsigned int interval;
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
|
@ -1328,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
argc = parse_options(argc, argv, options, stat_usage,
|
||||
PARSE_OPT_STOP_AT_NON_OPTION);
|
||||
|
||||
output = stderr;
|
||||
interval = stat_config.interval;
|
||||
|
||||
if (output_name && strcmp(output_name, "-"))
|
||||
output = NULL;
|
||||
|
||||
|
@ -1365,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
}
|
||||
}
|
||||
|
||||
stat_config.output = output;
|
||||
|
||||
if (csv_sep) {
|
||||
csv_output = true;
|
||||
if (!strcmp(csv_sep, "\\t"))
|
||||
|
@ -1399,7 +1282,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
run_count = 1;
|
||||
}
|
||||
|
||||
if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
|
||||
if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
|
||||
fprintf(stderr, "The --per-thread option is only available "
|
||||
"when monitoring via -p -t options.\n");
|
||||
parse_options_usage(NULL, options, "p", 1);
|
||||
|
@ -1411,7 +1294,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
* no_aggr, cgroup are for system-wide only
|
||||
* --per-thread is aggregated per thread, we dont mix it with cpu mode
|
||||
*/
|
||||
if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
|
||||
if (((stat_config.aggr_mode != AGGR_GLOBAL &&
|
||||
stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
|
||||
!target__has_cpu(&target)) {
|
||||
fprintf(stderr, "both cgroup and no-aggregation "
|
||||
"modes only available in system-wide mode\n");
|
||||
|
@ -1444,7 +1328,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
* Initialize thread_map with comm names,
|
||||
* so we could print it out on output.
|
||||
*/
|
||||
if (aggr_mode == AGGR_THREAD)
|
||||
if (stat_config.aggr_mode == AGGR_THREAD)
|
||||
thread_map__read_comms(evsel_list->threads);
|
||||
|
||||
if (interval && interval < 100) {
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "util/xyarray.h"
|
||||
#include "util/sort.h"
|
||||
#include "util/intlist.h"
|
||||
#include "util/parse-branch-options.h"
|
||||
#include "arch/common.h"
|
||||
|
||||
#include "util/debug.h"
|
||||
|
@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
|
|||
perf_top__record_precise_ip(top, he, evsel->idx, ip);
|
||||
}
|
||||
|
||||
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
|
||||
!(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
"don't try to adjust column width, use these fixed values"),
|
||||
OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
|
||||
"per thread proc mmap processing timeout in ms"),
|
||||
OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
|
||||
"branch any", "sample any taken branches",
|
||||
parse_branch_stack),
|
||||
OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
|
||||
"branch filter mask", "branch stack filter modes",
|
||||
parse_branch_stack),
|
||||
OPT_END()
|
||||
};
|
||||
const char * const top_usage[] = {
|
||||
|
|
|
@ -2773,9 +2773,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
|
|||
|
||||
printed += fprintf(fp, "\n");
|
||||
|
||||
printed += fprintf(fp, " syscall calls min avg max stddev\n");
|
||||
printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
|
||||
printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
|
||||
printed += fprintf(fp, " syscall calls total min avg max stddev\n");
|
||||
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
|
||||
printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
|
||||
|
||||
/* each int_node is a syscall */
|
||||
while (inode) {
|
||||
|
@ -2792,8 +2792,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
|
|||
|
||||
sc = &trace->syscalls.table[inode->i];
|
||||
printed += fprintf(fp, " %-15s", sc->name);
|
||||
printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
|
||||
n, min, avg);
|
||||
printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
|
||||
n, avg * n, min, avg);
|
||||
printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
|
||||
}
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ copy_kcore()
|
|||
fi
|
||||
|
||||
rm -f perf.data.junk
|
||||
("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null &
|
||||
("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
|
||||
PERF_PID=$!
|
||||
|
||||
# Need to make sure that perf has started
|
||||
|
@ -160,18 +160,18 @@ record()
|
|||
echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
|
||||
fi
|
||||
|
||||
if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
|
||||
if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
|
||||
echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
|
||||
fi
|
||||
|
||||
if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
|
||||
if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
|
||||
if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
|
||||
echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
|
||||
fi
|
||||
|
||||
if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
|
||||
if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
|
||||
true
|
||||
elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
|
||||
elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
|
||||
true
|
||||
elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
|
||||
echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
|
||||
|
@ -193,8 +193,8 @@ record()
|
|||
|
||||
mkdir "$PERF_DATA_DIR"
|
||||
|
||||
echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*"
|
||||
"$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true
|
||||
echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
|
||||
"$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
|
||||
|
||||
if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
|
||||
exit 1
|
||||
|
@ -209,8 +209,8 @@ subcommand()
|
|||
{
|
||||
find_perf
|
||||
check_buildid_cache_permissions
|
||||
echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*"
|
||||
"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $*
|
||||
echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
|
||||
"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
|
||||
}
|
||||
|
||||
if [ "$1" = "fix_buildid_cache_permissions" ] ; then
|
||||
|
@ -234,7 +234,7 @@ fi
|
|||
case "$PERF_SUB_COMMAND" in
|
||||
"record")
|
||||
while [ "$1" != "--" ] ; do
|
||||
PERF_OPTIONS+="$1 "
|
||||
PERF_OPTIONS+=("$1")
|
||||
shift || break
|
||||
done
|
||||
if [ "$1" != "--" ] ; then
|
||||
|
@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in
|
|||
usage
|
||||
fi
|
||||
shift
|
||||
record $*
|
||||
record "$@"
|
||||
;;
|
||||
"script")
|
||||
subcommand $*
|
||||
subcommand "$@"
|
||||
;;
|
||||
"report")
|
||||
subcommand $*
|
||||
subcommand "$@"
|
||||
;;
|
||||
"inject")
|
||||
subcommand $*
|
||||
subcommand "$@"
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
|
|
|
@ -16,6 +16,9 @@ struct disasm_line_samples {
|
|||
u64 nr;
|
||||
};
|
||||
|
||||
#define IPC_WIDTH 6
|
||||
#define CYCLES_WIDTH 6
|
||||
|
||||
struct browser_disasm_line {
|
||||
struct rb_node rb_node;
|
||||
u32 idx;
|
||||
|
@ -53,6 +56,7 @@ struct annotate_browser {
|
|||
int max_jump_sources;
|
||||
int nr_jumps;
|
||||
bool searching_backwards;
|
||||
bool have_cycles;
|
||||
u8 addr_width;
|
||||
u8 jumps_width;
|
||||
u8 target_width;
|
||||
|
@ -96,6 +100,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
|
|||
return ui_browser__set_color(&browser->b, color);
|
||||
}
|
||||
|
||||
static int annotate_browser__pcnt_width(struct annotate_browser *ab)
|
||||
{
|
||||
int w = 7 * ab->nr_events;
|
||||
|
||||
if (ab->have_cycles)
|
||||
w += IPC_WIDTH + CYCLES_WIDTH;
|
||||
return w;
|
||||
}
|
||||
|
||||
static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
|
||||
{
|
||||
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
|
||||
|
@ -106,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
|
|||
(!current_entry || (browser->use_navkeypressed &&
|
||||
!browser->navkeypressed)));
|
||||
int width = browser->width, printed;
|
||||
int i, pcnt_width = 7 * ab->nr_events;
|
||||
int i, pcnt_width = annotate_browser__pcnt_width(ab);
|
||||
double percent_max = 0.0;
|
||||
char bf[256];
|
||||
|
||||
|
@ -116,19 +129,34 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
|
|||
}
|
||||
|
||||
if (dl->offset != -1 && percent_max != 0.0) {
|
||||
for (i = 0; i < ab->nr_events; i++) {
|
||||
ui_browser__set_percent_color(browser,
|
||||
bdl->samples[i].percent,
|
||||
current_entry);
|
||||
if (annotate_browser__opts.show_total_period)
|
||||
slsmg_printf("%6" PRIu64 " ",
|
||||
bdl->samples[i].nr);
|
||||
else
|
||||
slsmg_printf("%6.2f ", bdl->samples[i].percent);
|
||||
if (percent_max != 0.0) {
|
||||
for (i = 0; i < ab->nr_events; i++) {
|
||||
ui_browser__set_percent_color(browser,
|
||||
bdl->samples[i].percent,
|
||||
current_entry);
|
||||
if (annotate_browser__opts.show_total_period)
|
||||
slsmg_printf("%6" PRIu64 " ",
|
||||
bdl->samples[i].nr);
|
||||
else
|
||||
slsmg_printf("%6.2f ", bdl->samples[i].percent);
|
||||
}
|
||||
} else {
|
||||
slsmg_write_nstring(" ", 7 * ab->nr_events);
|
||||
}
|
||||
} else {
|
||||
ui_browser__set_percent_color(browser, 0, current_entry);
|
||||
slsmg_write_nstring(" ", pcnt_width);
|
||||
slsmg_write_nstring(" ", 7 * ab->nr_events);
|
||||
}
|
||||
if (ab->have_cycles) {
|
||||
if (dl->ipc)
|
||||
slsmg_printf("%*.2f ", IPC_WIDTH - 1, dl->ipc);
|
||||
else
|
||||
slsmg_write_nstring(" ", IPC_WIDTH);
|
||||
if (dl->cycles)
|
||||
slsmg_printf("%*" PRIu64 " ",
|
||||
CYCLES_WIDTH - 1, dl->cycles);
|
||||
else
|
||||
slsmg_write_nstring(" ", CYCLES_WIDTH);
|
||||
}
|
||||
|
||||
SLsmg_write_char(' ');
|
||||
|
@ -231,7 +259,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
|
|||
unsigned int from, to;
|
||||
struct map_symbol *ms = ab->b.priv;
|
||||
struct symbol *sym = ms->sym;
|
||||
u8 pcnt_width = 7;
|
||||
u8 pcnt_width = annotate_browser__pcnt_width(ab);
|
||||
|
||||
/* PLT symbols contain external offsets */
|
||||
if (strstr(sym->name, "@plt"))
|
||||
|
@ -255,8 +283,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
|
|||
to = (u64)btarget->idx;
|
||||
}
|
||||
|
||||
pcnt_width *= ab->nr_events;
|
||||
|
||||
ui_browser__set_color(browser, HE_COLORSET_CODE);
|
||||
__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
|
||||
from, to);
|
||||
|
@ -266,9 +292,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
|
|||
{
|
||||
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
|
||||
int ret = ui_browser__list_head_refresh(browser);
|
||||
int pcnt_width;
|
||||
|
||||
pcnt_width = 7 * ab->nr_events;
|
||||
int pcnt_width = annotate_browser__pcnt_width(ab);
|
||||
|
||||
if (annotate_browser__opts.jump_arrows)
|
||||
annotate_browser__draw_current_jump(browser);
|
||||
|
@ -390,7 +414,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
|
|||
max_percent = bpos->samples[i].percent;
|
||||
}
|
||||
|
||||
if (max_percent < 0.01) {
|
||||
if (max_percent < 0.01 && pos->ipc == 0) {
|
||||
RB_CLEAR_NODE(&bpos->rb_node);
|
||||
continue;
|
||||
}
|
||||
|
@ -869,6 +893,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
|
|||
return map_symbol__tui_annotate(&he->ms, evsel, hbt);
|
||||
}
|
||||
|
||||
|
||||
static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
|
||||
{
|
||||
unsigned n_insn = 0;
|
||||
u64 offset;
|
||||
|
||||
for (offset = start; offset <= end; offset++) {
|
||||
if (browser->offsets[offset])
|
||||
n_insn++;
|
||||
}
|
||||
return n_insn;
|
||||
}
|
||||
|
||||
static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
|
||||
struct cyc_hist *ch)
|
||||
{
|
||||
unsigned n_insn;
|
||||
u64 offset;
|
||||
|
||||
n_insn = count_insn(browser, start, end);
|
||||
if (n_insn && ch->num && ch->cycles) {
|
||||
float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
|
||||
|
||||
/* Hide data when there are too many overlaps. */
|
||||
if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
|
||||
return;
|
||||
|
||||
for (offset = start; offset <= end; offset++) {
|
||||
struct disasm_line *dl = browser->offsets[offset];
|
||||
|
||||
if (dl)
|
||||
dl->ipc = ipc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This should probably be in util/annotate.c to share with the tty
|
||||
* annotate, but right now we need the per byte offsets arrays,
|
||||
* which are only here.
|
||||
*/
|
||||
static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
|
||||
struct symbol *sym)
|
||||
{
|
||||
u64 offset;
|
||||
struct annotation *notes = symbol__annotation(sym);
|
||||
|
||||
if (!notes->src || !notes->src->cycles_hist)
|
||||
return;
|
||||
|
||||
pthread_mutex_lock(¬es->lock);
|
||||
for (offset = 0; offset < size; ++offset) {
|
||||
struct cyc_hist *ch;
|
||||
|
||||
ch = ¬es->src->cycles_hist[offset];
|
||||
if (ch && ch->cycles) {
|
||||
struct disasm_line *dl;
|
||||
|
||||
if (ch->have_start)
|
||||
count_and_fill(browser, ch->start, offset, ch);
|
||||
dl = browser->offsets[offset];
|
||||
if (dl && ch->num_aggr)
|
||||
dl->cycles = ch->cycles_aggr / ch->num_aggr;
|
||||
browser->have_cycles = true;
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(¬es->lock);
|
||||
}
|
||||
|
||||
static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
|
||||
size_t size)
|
||||
{
|
||||
|
@ -991,6 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
|
|||
}
|
||||
|
||||
annotate_browser__mark_jump_targets(&browser, size);
|
||||
annotate__compute_ipc(&browser, size, sym);
|
||||
|
||||
browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
|
||||
browser.max_addr_width = hex_width(sym->end);
|
||||
|
|
|
@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* The cycles histogram is lazily allocated. */
|
||||
static int symbol__alloc_hist_cycles(struct symbol *sym)
|
||||
{
|
||||
struct annotation *notes = symbol__annotation(sym);
|
||||
const size_t size = symbol__size(sym);
|
||||
|
||||
notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
|
||||
if (notes->src->cycles_hist == NULL)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void symbol__annotate_zero_histograms(struct symbol *sym)
|
||||
{
|
||||
struct annotation *notes = symbol__annotation(sym);
|
||||
|
||||
pthread_mutex_lock(¬es->lock);
|
||||
if (notes->src != NULL)
|
||||
if (notes->src != NULL) {
|
||||
memset(notes->src->histograms, 0,
|
||||
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
|
||||
if (notes->src->cycles_hist)
|
||||
memset(notes->src->cycles_hist, 0,
|
||||
symbol__size(sym) * sizeof(struct cyc_hist));
|
||||
}
|
||||
pthread_mutex_unlock(¬es->lock);
|
||||
}
|
||||
|
||||
static int __symbol__account_cycles(struct annotation *notes,
|
||||
u64 start,
|
||||
unsigned offset, unsigned cycles,
|
||||
unsigned have_start)
|
||||
{
|
||||
struct cyc_hist *ch;
|
||||
|
||||
ch = notes->src->cycles_hist;
|
||||
/*
|
||||
* For now we can only account one basic block per
|
||||
* final jump. But multiple could be overlapping.
|
||||
* Always account the longest one. So when
|
||||
* a shorter one has been already seen throw it away.
|
||||
*
|
||||
* We separately always account the full cycles.
|
||||
*/
|
||||
ch[offset].num_aggr++;
|
||||
ch[offset].cycles_aggr += cycles;
|
||||
|
||||
if (!have_start && ch[offset].have_start)
|
||||
return 0;
|
||||
if (ch[offset].num) {
|
||||
if (have_start && (!ch[offset].have_start ||
|
||||
ch[offset].start > start)) {
|
||||
ch[offset].have_start = 0;
|
||||
ch[offset].cycles = 0;
|
||||
ch[offset].num = 0;
|
||||
if (ch[offset].reset < 0xffff)
|
||||
ch[offset].reset++;
|
||||
} else if (have_start &&
|
||||
ch[offset].start < start)
|
||||
return 0;
|
||||
}
|
||||
ch[offset].have_start = have_start;
|
||||
ch[offset].start = start;
|
||||
ch[offset].cycles += cycles;
|
||||
ch[offset].num++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
|
||||
struct annotation *notes, int evidx, u64 addr)
|
||||
{
|
||||
|
@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct annotation *symbol__get_annotation(struct symbol *sym)
|
||||
static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
|
||||
{
|
||||
struct annotation *notes = symbol__annotation(sym);
|
||||
|
||||
|
@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym)
|
|||
if (symbol__alloc_hist(sym) < 0)
|
||||
return NULL;
|
||||
}
|
||||
if (!notes->src->cycles_hist && cycles) {
|
||||
if (symbol__alloc_hist_cycles(sym) < 0)
|
||||
return NULL;
|
||||
}
|
||||
return notes;
|
||||
}
|
||||
|
||||
|
@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
|
|||
|
||||
if (sym == NULL)
|
||||
return 0;
|
||||
notes = symbol__get_annotation(sym);
|
||||
notes = symbol__get_annotation(sym, false);
|
||||
if (notes == NULL)
|
||||
return -ENOMEM;
|
||||
return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
|
||||
}
|
||||
|
||||
static int symbol__account_cycles(u64 addr, u64 start,
|
||||
struct symbol *sym, unsigned cycles)
|
||||
{
|
||||
struct annotation *notes;
|
||||
unsigned offset;
|
||||
|
||||
if (sym == NULL)
|
||||
return 0;
|
||||
notes = symbol__get_annotation(sym, true);
|
||||
if (notes == NULL)
|
||||
return -ENOMEM;
|
||||
if (addr < sym->start || addr >= sym->end)
|
||||
return -ERANGE;
|
||||
|
||||
if (start) {
|
||||
if (start < sym->start || start >= sym->end)
|
||||
return -ERANGE;
|
||||
if (start >= addr)
|
||||
start = 0;
|
||||
}
|
||||
offset = addr - sym->start;
|
||||
return __symbol__account_cycles(notes,
|
||||
start ? start - sym->start : 0,
|
||||
offset, cycles,
|
||||
!!start);
|
||||
}
|
||||
|
||||
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
|
||||
struct addr_map_symbol *start,
|
||||
unsigned cycles)
|
||||
{
|
||||
unsigned long saddr = 0;
|
||||
int err;
|
||||
|
||||
if (!cycles)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Only set start when IPC can be computed. We can only
|
||||
* compute it when the basic block is completely in a single
|
||||
* function.
|
||||
* Special case the case when the jump is elsewhere, but
|
||||
* it starts on the function start.
|
||||
*/
|
||||
if (start &&
|
||||
(start->sym == ams->sym ||
|
||||
(ams->sym &&
|
||||
start->addr == ams->sym->start + ams->map->start)))
|
||||
saddr = start->al_addr;
|
||||
if (saddr == 0)
|
||||
pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n",
|
||||
ams->addr,
|
||||
start ? start->addr : 0,
|
||||
ams->sym ? ams->sym->start + ams->map->start : 0,
|
||||
saddr);
|
||||
err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
|
||||
if (err)
|
||||
pr_debug2("account_cycles failed %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
|
||||
{
|
||||
return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);
|
||||
|
|
|
@ -59,6 +59,8 @@ struct disasm_line {
|
|||
char *name;
|
||||
struct ins *ins;
|
||||
int line_nr;
|
||||
float ipc;
|
||||
u64 cycles;
|
||||
struct ins_operands ops;
|
||||
};
|
||||
|
||||
|
@ -79,6 +81,17 @@ struct sym_hist {
|
|||
u64 addr[0];
|
||||
};
|
||||
|
||||
struct cyc_hist {
|
||||
u64 start;
|
||||
u64 cycles;
|
||||
u64 cycles_aggr;
|
||||
u32 num;
|
||||
u32 num_aggr;
|
||||
u8 have_start;
|
||||
/* 1 byte padding */
|
||||
u16 reset;
|
||||
};
|
||||
|
||||
struct source_line_samples {
|
||||
double percent;
|
||||
double percent_sum;
|
||||
|
@ -97,6 +110,7 @@ struct source_line {
|
|||
* @histogram: Array of addr hit histograms per event being monitored
|
||||
* @lines: If 'print_lines' is specified, per source code line percentages
|
||||
* @source: source parsed from a disassembler like objdump -dS
|
||||
* @cyc_hist: Average cycles per basic block
|
||||
*
|
||||
* lines is allocated, percentages calculated and all sorted by percentage
|
||||
* when the annotation is about to be presented, so the percentages are for
|
||||
|
@ -109,6 +123,7 @@ struct annotated_source {
|
|||
struct source_line *lines;
|
||||
int nr_histograms;
|
||||
int sizeof_sym_hist;
|
||||
struct cyc_hist *cycles_hist;
|
||||
struct sym_hist histograms[0];
|
||||
};
|
||||
|
||||
|
@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
|
|||
|
||||
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
|
||||
|
||||
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
|
||||
struct addr_map_symbol *start,
|
||||
unsigned cycles);
|
||||
|
||||
int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
|
||||
|
||||
int symbol__alloc_hist(struct symbol *sym);
|
||||
|
|
|
@ -942,6 +942,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
|
|||
struct itrace_synth_opts *synth_opts = opt->value;
|
||||
const char *p;
|
||||
char *endptr;
|
||||
bool period_type_set = false;
|
||||
|
||||
synth_opts->set = true;
|
||||
|
||||
|
@ -970,10 +971,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
|
|||
case 'i':
|
||||
synth_opts->period_type =
|
||||
PERF_ITRACE_PERIOD_INSTRUCTIONS;
|
||||
period_type_set = true;
|
||||
break;
|
||||
case 't':
|
||||
synth_opts->period_type =
|
||||
PERF_ITRACE_PERIOD_TICKS;
|
||||
period_type_set = true;
|
||||
break;
|
||||
case 'm':
|
||||
synth_opts->period *= 1000;
|
||||
|
@ -986,6 +989,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
|
|||
goto out_err;
|
||||
synth_opts->period_type =
|
||||
PERF_ITRACE_PERIOD_NANOSECS;
|
||||
period_type_set = true;
|
||||
break;
|
||||
case '\0':
|
||||
goto out;
|
||||
|
@ -1039,7 +1043,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
|
|||
}
|
||||
out:
|
||||
if (synth_opts->instructions) {
|
||||
if (!synth_opts->period_type)
|
||||
if (!period_type_set)
|
||||
synth_opts->period_type =
|
||||
PERF_ITRACE_DEFAULT_PERIOD_TYPE;
|
||||
if (!synth_opts->period)
|
||||
|
|
|
@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int veprintf(int level, int var, const char *fmt, va_list args)
|
||||
{
|
||||
return _eprintf(level, var, fmt, args);
|
||||
}
|
||||
|
||||
int eprintf(int level, int var, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
|
|
|
@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...);
|
|||
|
||||
int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
|
||||
int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
|
||||
int veprintf(int level, int var, const char *fmt, va_list args);
|
||||
|
||||
int perf_debug_option(const char *str);
|
||||
|
||||
|
|
|
@ -134,7 +134,8 @@ struct branch_flags {
|
|||
u64 predicted:1;
|
||||
u64 in_tx:1;
|
||||
u64 abort:1;
|
||||
u64 reserved:60;
|
||||
u64 cycles:16;
|
||||
u64 reserved:44;
|
||||
};
|
||||
|
||||
struct branch_entry {
|
||||
|
|
|
@ -1273,6 +1273,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
|
|||
return __perf_evlist__combined_sample_type(evlist);
|
||||
}
|
||||
|
||||
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
|
||||
{
|
||||
struct perf_evsel *evsel;
|
||||
u64 branch_type = 0;
|
||||
|
||||
evlist__for_each(evlist, evsel)
|
||||
branch_type |= evsel->attr.branch_sample_type;
|
||||
return branch_type;
|
||||
}
|
||||
|
||||
bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
|
||||
{
|
||||
struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
|
||||
|
|
|
@ -165,6 +165,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist);
|
|||
u64 perf_evlist__read_format(struct perf_evlist *evlist);
|
||||
u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
|
||||
u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
|
||||
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
|
||||
bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
|
||||
u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
|
||||
|
||||
|
|
|
@ -618,7 +618,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
|
|||
* and not events sampled. Thus we use a pseudo period of 1.
|
||||
*/
|
||||
he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
|
||||
1, 1, 0, true);
|
||||
1, bi->flags.cycles ? bi->flags.cycles : 1,
|
||||
0, true);
|
||||
if (he == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -1414,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
|
||||
struct perf_sample *sample, bool nonany_branch_mode)
|
||||
{
|
||||
struct branch_info *bi;
|
||||
|
||||
/* If we have branch cycles always annotate them. */
|
||||
if (bs && bs->nr && bs->entries[0].flags.cycles) {
|
||||
int i;
|
||||
|
||||
bi = sample__resolve_bstack(sample, al);
|
||||
if (bi) {
|
||||
struct addr_map_symbol *prev = NULL;
|
||||
|
||||
/*
|
||||
* Ignore errors, still want to process the
|
||||
* other entries.
|
||||
*
|
||||
* For non standard branch modes always
|
||||
* force no IPC (prev == NULL)
|
||||
*
|
||||
* Note that perf stores branches reversed from
|
||||
* program order!
|
||||
*/
|
||||
for (i = bs->nr - 1; i >= 0; i--) {
|
||||
addr_map_symbol__account_cycles(&bi[i].from,
|
||||
nonany_branch_mode ? NULL : prev,
|
||||
bi[i].flags.cycles);
|
||||
prev = &bi[i].to;
|
||||
}
|
||||
free(bi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
|
||||
{
|
||||
|
|
|
@ -47,6 +47,7 @@ enum hist_column {
|
|||
HISTC_MEM_SNOOP,
|
||||
HISTC_MEM_DCACHELINE,
|
||||
HISTC_TRANSACTION,
|
||||
HISTC_CYCLES,
|
||||
HISTC_NR_COLS, /* Last entry */
|
||||
};
|
||||
|
||||
|
@ -349,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
|
|||
|
||||
unsigned int hists__sort_list_width(struct hists *hists);
|
||||
|
||||
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
|
||||
struct perf_sample *sample, bool nonany_branch_mode);
|
||||
|
||||
struct option;
|
||||
int parse_filter_percentage(const struct option *opt __maybe_unused,
|
||||
const char *arg, int unset __maybe_unused);
|
||||
|
|
|
@ -1168,7 +1168,7 @@ static void parse_events_print_error(struct parse_events_error *err,
|
|||
* Maximum error index indent, we will cut
|
||||
* the event string if it's bigger.
|
||||
*/
|
||||
int max_err_idx = 10;
|
||||
int max_err_idx = 13;
|
||||
|
||||
/*
|
||||
* Let's be specific with the message when
|
||||
|
|
|
@ -542,7 +542,7 @@ struct perf_pmu *perf_pmu__find(const char *name)
|
|||
}
|
||||
|
||||
static struct perf_pmu_format *
|
||||
pmu_find_format(struct list_head *formats, char *name)
|
||||
pmu_find_format(struct list_head *formats, const char *name)
|
||||
{
|
||||
struct perf_pmu_format *format;
|
||||
|
||||
|
@ -553,6 +553,21 @@ pmu_find_format(struct list_head *formats, char *name)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
|
||||
{
|
||||
struct perf_pmu_format *format = pmu_find_format(formats, name);
|
||||
__u64 bits = 0;
|
||||
int fbit;
|
||||
|
||||
if (!format)
|
||||
return 0;
|
||||
|
||||
for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
|
||||
bits |= 1ULL << fbit;
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sets value based on the format definition (format parameter)
|
||||
* and unformated value (value parameter).
|
||||
|
@ -574,6 +589,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
|
|||
}
|
||||
}
|
||||
|
||||
static __u64 pmu_format_max_value(const unsigned long *format)
|
||||
{
|
||||
int w;
|
||||
|
||||
w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
|
||||
if (!w)
|
||||
return 0;
|
||||
if (w < 64)
|
||||
return (1ULL << w) - 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Term is a string term, and might be a param-term. Try to look up it's value
|
||||
* in the remaining terms.
|
||||
|
@ -647,7 +674,7 @@ static int pmu_config_term(struct list_head *formats,
|
|||
{
|
||||
struct perf_pmu_format *format;
|
||||
__u64 *vp;
|
||||
__u64 val;
|
||||
__u64 val, max_val;
|
||||
|
||||
/*
|
||||
* If this is a parameter we've already used for parameterized-eval,
|
||||
|
@ -713,6 +740,22 @@ static int pmu_config_term(struct list_head *formats,
|
|||
} else
|
||||
return -EINVAL;
|
||||
|
||||
max_val = pmu_format_max_value(format->bits);
|
||||
if (val > max_val) {
|
||||
if (err) {
|
||||
err->idx = term->err_val;
|
||||
if (asprintf(&err->str,
|
||||
"value too big for format, maximum is %llu",
|
||||
(unsigned long long)max_val) < 0)
|
||||
err->str = strdup("value too big for format");
|
||||
return -EINVAL;
|
||||
}
|
||||
/*
|
||||
* Assume we don't care if !err, in which case the value will be
|
||||
* silently truncated.
|
||||
*/
|
||||
}
|
||||
|
||||
pmu_format_value(format->bits, val, vp, zero);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats,
|
|||
struct perf_event_attr *attr,
|
||||
struct list_head *head_terms,
|
||||
bool zero, struct parse_events_error *error);
|
||||
__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
|
||||
int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
|
||||
struct perf_pmu_info *info);
|
||||
struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
|
||||
|
|
|
@ -106,6 +106,8 @@ struct variable_list {
|
|||
struct strlist *vars; /* Available variables */
|
||||
};
|
||||
|
||||
struct map;
|
||||
|
||||
/* Command string to events */
|
||||
extern int parse_perf_probe_command(const char *cmd,
|
||||
struct perf_probe_event *pev);
|
||||
|
|
|
@ -784,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample)
|
|||
|
||||
printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
|
||||
|
||||
for (i = 0; i < sample->branch_stack->nr; i++)
|
||||
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
|
||||
i, sample->branch_stack->entries[i].from,
|
||||
sample->branch_stack->entries[i].to);
|
||||
for (i = 0; i < sample->branch_stack->nr; i++) {
|
||||
struct branch_entry *e = &sample->branch_stack->entries[i];
|
||||
|
||||
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
|
||||
i, e->from, e->to,
|
||||
e->flags.cycles,
|
||||
e->flags.mispred ? "M" : " ",
|
||||
e->flags.predicted ? "P" : " ",
|
||||
e->flags.abort ? "A" : " ",
|
||||
e->flags.in_tx ? "T" : " ",
|
||||
(unsigned)e->flags.reserved);
|
||||
}
|
||||
}
|
||||
|
||||
static void regs_dump__printf(u64 mask, u64 *regs)
|
||||
|
|
|
@ -9,7 +9,7 @@ regex_t parent_regex;
|
|||
const char default_parent_pattern[] = "^sys_|^do_page_fault";
|
||||
const char *parent_pattern = default_parent_pattern;
|
||||
const char default_sort_order[] = "comm,dso,symbol";
|
||||
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to";
|
||||
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
|
||||
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
|
||||
const char default_top_sort_order[] = "dso,symbol";
|
||||
const char default_diff_sort_order[] = "dso,symbol";
|
||||
|
@ -526,6 +526,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
|
|||
return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
|
||||
}
|
||||
|
||||
static int64_t
|
||||
sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
return left->branch_info->flags.cycles -
|
||||
right->branch_info->flags.cycles;
|
||||
}
|
||||
|
||||
static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
if (he->branch_info->flags.cycles == 0)
|
||||
return repsep_snprintf(bf, size, "%-*s", width, "-");
|
||||
return repsep_snprintf(bf, size, "%-*hd", width,
|
||||
he->branch_info->flags.cycles);
|
||||
}
|
||||
|
||||
struct sort_entry sort_cycles = {
|
||||
.se_header = "Basic Block Cycles",
|
||||
.se_cmp = sort__cycles_cmp,
|
||||
.se_snprintf = hist_entry__cycles_snprintf,
|
||||
.se_width_idx = HISTC_CYCLES,
|
||||
};
|
||||
|
||||
/* --sort daddr_sym */
|
||||
static int64_t
|
||||
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
|
@ -1190,6 +1213,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
|
|||
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
|
||||
DIM(SORT_IN_TX, "in_tx", sort_in_tx),
|
||||
DIM(SORT_ABORT, "abort", sort_abort),
|
||||
DIM(SORT_CYCLES, "cycles", sort_cycles),
|
||||
};
|
||||
|
||||
#undef DIM
|
||||
|
|
|
@ -185,6 +185,7 @@ enum sort_type {
|
|||
SORT_MISPREDICT,
|
||||
SORT_ABORT,
|
||||
SORT_IN_TX,
|
||||
SORT_CYCLES,
|
||||
|
||||
/* memory mode specific sort keys */
|
||||
__SORT_MEMORY_MODE,
|
||||
|
|
|
@ -238,3 +238,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
|
|||
perf_evsel__reset_counts(evsel);
|
||||
}
|
||||
}
|
||||
|
||||
static void zero_per_pkg(struct perf_evsel *counter)
|
||||
{
|
||||
if (counter->per_pkg_mask)
|
||||
memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
|
||||
}
|
||||
|
||||
static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
|
||||
{
|
||||
unsigned long *mask = counter->per_pkg_mask;
|
||||
struct cpu_map *cpus = perf_evsel__cpus(counter);
|
||||
int s;
|
||||
|
||||
*skip = false;
|
||||
|
||||
if (!counter->per_pkg)
|
||||
return 0;
|
||||
|
||||
if (cpu_map__empty(cpus))
|
||||
return 0;
|
||||
|
||||
if (!mask) {
|
||||
mask = zalloc(MAX_NR_CPUS);
|
||||
if (!mask)
|
||||
return -ENOMEM;
|
||||
|
||||
counter->per_pkg_mask = mask;
|
||||
}
|
||||
|
||||
s = cpu_map__get_socket(cpus, cpu);
|
||||
if (s < 0)
|
||||
return -1;
|
||||
|
||||
*skip = test_and_set_bit(s, mask) == 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
|
||||
int cpu, int thread,
|
||||
struct perf_counts_values *count)
|
||||
{
|
||||
struct perf_counts_values *aggr = &evsel->counts->aggr;
|
||||
static struct perf_counts_values zero;
|
||||
bool skip = false;
|
||||
|
||||
if (check_per_pkg(evsel, cpu, &skip)) {
|
||||
pr_err("failed to read per-pkg counter\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (skip)
|
||||
count = &zero;
|
||||
|
||||
switch (config->aggr_mode) {
|
||||
case AGGR_THREAD:
|
||||
case AGGR_CORE:
|
||||
case AGGR_SOCKET:
|
||||
case AGGR_NONE:
|
||||
if (!evsel->snapshot)
|
||||
perf_evsel__compute_deltas(evsel, cpu, thread, count);
|
||||
perf_counts_values__scale(count, config->scale, NULL);
|
||||
if (config->aggr_mode == AGGR_NONE)
|
||||
perf_stat__update_shadow_stats(evsel, count->values, cpu);
|
||||
break;
|
||||
case AGGR_GLOBAL:
|
||||
aggr->val += count->val;
|
||||
if (config->scale) {
|
||||
aggr->ena += count->ena;
|
||||
aggr->run += count->run;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_counter_maps(struct perf_stat_config *config,
|
||||
struct perf_evsel *counter)
|
||||
{
|
||||
int nthreads = thread_map__nr(counter->threads);
|
||||
int ncpus = perf_evsel__nr_cpus(counter);
|
||||
int cpu, thread;
|
||||
|
||||
if (counter->system_wide)
|
||||
nthreads = 1;
|
||||
|
||||
for (thread = 0; thread < nthreads; thread++) {
|
||||
for (cpu = 0; cpu < ncpus; cpu++) {
|
||||
if (process_counter_values(config, counter, cpu, thread,
|
||||
perf_counts(counter->counts, cpu, thread)))
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int perf_stat_process_counter(struct perf_stat_config *config,
|
||||
struct perf_evsel *counter)
|
||||
{
|
||||
struct perf_counts_values *aggr = &counter->counts->aggr;
|
||||
struct perf_stat *ps = counter->priv;
|
||||
u64 *count = counter->counts->aggr.values;
|
||||
int i, ret;
|
||||
|
||||
aggr->val = aggr->ena = aggr->run = 0;
|
||||
init_stats(ps->res_stats);
|
||||
|
||||
if (counter->per_pkg)
|
||||
zero_per_pkg(counter);
|
||||
|
||||
ret = process_counter_maps(config, counter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (config->aggr_mode != AGGR_GLOBAL)
|
||||
return 0;
|
||||
|
||||
if (!counter->snapshot)
|
||||
perf_evsel__compute_deltas(counter, -1, -1, aggr);
|
||||
perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
update_stats(&ps->res_stats[i], count[i]);
|
||||
|
||||
if (verbose) {
|
||||
fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
||||
perf_evsel__name(counter), count[0], count[1], count[2]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the full runtime - to allow normalization during printout:
|
||||
*/
|
||||
perf_stat__update_shadow_stats(counter, count, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -50,6 +50,13 @@ struct perf_counts {
|
|||
struct xyarray *values;
|
||||
};
|
||||
|
||||
struct perf_stat_config {
|
||||
enum aggr_mode aggr_mode;
|
||||
bool scale;
|
||||
FILE *output;
|
||||
unsigned int interval;
|
||||
};
|
||||
|
||||
static inline struct perf_counts_values*
|
||||
perf_counts(struct perf_counts *counts, int cpu, int thread)
|
||||
{
|
||||
|
@ -109,4 +116,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
|
|||
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
|
||||
void perf_evlist__free_stats(struct perf_evlist *evlist);
|
||||
void perf_evlist__reset_stats(struct perf_evlist *evlist);
|
||||
|
||||
int perf_stat_process_counter(struct perf_stat_config *config,
|
||||
struct perf_evsel *counter);
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue