Merge branch 'perf/hw-branch-sampling' into perf/core
Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
commit
bea95c152d
|
@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
|
|||
{
|
||||
int err;
|
||||
|
||||
/* does not support taken branch sampling */
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_RAW:
|
||||
case PERF_TYPE_HARDWARE:
|
||||
|
|
|
@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event)
|
|||
int err = 0;
|
||||
atomic_t *active_events = &armpmu->active_events;
|
||||
|
||||
/* does not support taken branch sampling */
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (armpmu->map_event(event) == -ENOENT)
|
||||
return -ENOENT;
|
||||
|
||||
|
|
|
@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event)
|
|||
{
|
||||
int err = 0;
|
||||
|
||||
/* does not support taken branch sampling */
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_RAW:
|
||||
case PERF_TYPE_HARDWARE:
|
||||
|
|
|
@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event)
|
|||
if (!ppmu)
|
||||
return -ENOENT;
|
||||
|
||||
/* does not support taken branch sampling */
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
ev = event->attr.config;
|
||||
|
|
|
@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event)
|
|||
{
|
||||
int err;
|
||||
|
||||
/* does not support taken branch sampling */
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_RAW:
|
||||
case PERF_TYPE_HW_CACHE:
|
||||
|
|
|
@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event)
|
|||
if (atomic_read(&nmi_active) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
/* does not support taken branch sampling */
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (attr->type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
if (attr->config >= sparc_pmu->max_events)
|
||||
|
|
|
@ -56,6 +56,13 @@
|
|||
#define MSR_OFFCORE_RSP_0 0x000001a6
|
||||
#define MSR_OFFCORE_RSP_1 0x000001a7
|
||||
|
||||
#define MSR_LBR_SELECT 0x000001c8
|
||||
#define MSR_LBR_TOS 0x000001c9
|
||||
#define MSR_LBR_NHM_FROM 0x00000680
|
||||
#define MSR_LBR_NHM_TO 0x000006c0
|
||||
#define MSR_LBR_CORE_FROM 0x00000040
|
||||
#define MSR_LBR_CORE_TO 0x00000060
|
||||
|
||||
#define MSR_IA32_PEBS_ENABLE 0x000003f1
|
||||
#define MSR_IA32_DS_AREA 0x00000600
|
||||
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
|
||||
|
|
|
@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* check that branch_sample_type is compatible with
|
||||
* settings needed for precise_ip > 1 which implies
|
||||
* using the LBR to capture ALL taken branches at the
|
||||
* priv levels of the measurement
|
||||
*/
|
||||
static inline int precise_br_compat(struct perf_event *event)
|
||||
{
|
||||
u64 m = event->attr.branch_sample_type;
|
||||
u64 b = 0;
|
||||
|
||||
/* must capture all branches */
|
||||
if (!(m & PERF_SAMPLE_BRANCH_ANY))
|
||||
return 0;
|
||||
|
||||
m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
|
||||
|
||||
if (!event->attr.exclude_user)
|
||||
b |= PERF_SAMPLE_BRANCH_USER;
|
||||
|
||||
if (!event->attr.exclude_kernel)
|
||||
b |= PERF_SAMPLE_BRANCH_KERNEL;
|
||||
|
||||
/*
|
||||
* ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
|
||||
*/
|
||||
|
||||
return m == b;
|
||||
}
|
||||
|
||||
int x86_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.precise_ip) {
|
||||
|
@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)
|
|||
|
||||
if (event->attr.precise_ip > precise)
|
||||
return -EOPNOTSUPP;
|
||||
/*
|
||||
* check that PEBS LBR correction does not conflict with
|
||||
* whatever the user is asking with attr->branch_sample_type
|
||||
*/
|
||||
if (event->attr.precise_ip > 1) {
|
||||
u64 *br_type = &event->attr.branch_sample_type;
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
if (!precise_br_compat(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* branch_sample_type is compatible */
|
||||
|
||||
} else {
|
||||
/*
|
||||
* user did not specify branch_sample_type
|
||||
*
|
||||
* For PEBS fixups, we capture all
|
||||
* the branches at the priv level of the
|
||||
* event.
|
||||
*/
|
||||
*br_type = PERF_SAMPLE_BRANCH_ANY;
|
||||
|
||||
if (!event->attr.exclude_user)
|
||||
*br_type |= PERF_SAMPLE_BRANCH_USER;
|
||||
|
||||
if (!event->attr.exclude_kernel)
|
||||
*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
|
|||
/* mark unused */
|
||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||
|
||||
/* mark not used */
|
||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||
event->hw.branch_reg.idx = EXTRA_REG_NONE;
|
||||
|
||||
return x86_pmu.hw_config(event);
|
||||
}
|
||||
|
||||
|
@ -1607,25 +1671,32 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
|
|||
NULL,
|
||||
};
|
||||
|
||||
static void x86_pmu_flush_branch_stack(void)
|
||||
{
|
||||
if (x86_pmu.flush_branch_stack)
|
||||
x86_pmu.flush_branch_stack();
|
||||
}
|
||||
|
||||
static struct pmu pmu = {
|
||||
.pmu_enable = x86_pmu_enable,
|
||||
.pmu_disable = x86_pmu_disable,
|
||||
.pmu_enable = x86_pmu_enable,
|
||||
.pmu_disable = x86_pmu_disable,
|
||||
|
||||
.attr_groups = x86_pmu_attr_groups,
|
||||
|
||||
.event_init = x86_pmu_event_init,
|
||||
|
||||
.add = x86_pmu_add,
|
||||
.del = x86_pmu_del,
|
||||
.start = x86_pmu_start,
|
||||
.stop = x86_pmu_stop,
|
||||
.read = x86_pmu_read,
|
||||
.add = x86_pmu_add,
|
||||
.del = x86_pmu_del,
|
||||
.start = x86_pmu_start,
|
||||
.stop = x86_pmu_stop,
|
||||
.read = x86_pmu_read,
|
||||
|
||||
.start_txn = x86_pmu_start_txn,
|
||||
.cancel_txn = x86_pmu_cancel_txn,
|
||||
.commit_txn = x86_pmu_commit_txn,
|
||||
|
||||
.event_idx = x86_pmu_event_idx,
|
||||
.flush_branch_stack = x86_pmu_flush_branch_stack,
|
||||
};
|
||||
|
||||
void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
|
||||
|
|
|
@ -33,6 +33,7 @@ enum extra_reg_type {
|
|||
|
||||
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
|
||||
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
|
||||
EXTRA_REG_LBR = 2, /* lbr_select */
|
||||
|
||||
EXTRA_REG_MAX /* number of entries needed */
|
||||
};
|
||||
|
@ -130,6 +131,8 @@ struct cpu_hw_events {
|
|||
void *lbr_context;
|
||||
struct perf_branch_stack lbr_stack;
|
||||
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
|
||||
struct er_account *lbr_sel;
|
||||
u64 br_sel;
|
||||
|
||||
/*
|
||||
* Intel host/guest exclude bits
|
||||
|
@ -344,6 +347,7 @@ struct x86_pmu {
|
|||
void (*cpu_starting)(int cpu);
|
||||
void (*cpu_dying)(int cpu);
|
||||
void (*cpu_dead)(int cpu);
|
||||
void (*flush_branch_stack)(void);
|
||||
|
||||
/*
|
||||
* Intel Arch Perfmon v2+
|
||||
|
@ -365,6 +369,8 @@ struct x86_pmu {
|
|||
*/
|
||||
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
|
||||
int lbr_nr; /* hardware stack size */
|
||||
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
|
||||
const int *lbr_sel_map; /* lbr_select mappings */
|
||||
|
||||
/*
|
||||
* Extra registers for events
|
||||
|
@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint;
|
|||
|
||||
extern struct event_constraint unconstrained;
|
||||
|
||||
static inline bool kernel_ip(unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return ip > PAGE_OFFSET;
|
||||
#else
|
||||
return (long)ip < 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_AMD
|
||||
|
||||
int amd_pmu_init(void);
|
||||
|
@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);
|
|||
|
||||
void intel_pmu_lbr_init_atom(void);
|
||||
|
||||
void intel_pmu_lbr_init_snb(void);
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||
|
||||
int p4_pmu_init(void);
|
||||
|
||||
int p6_pmu_init(void);
|
||||
|
|
|
@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (event->attr.exclude_host && event->attr.exclude_guest)
|
||||
/*
|
||||
* When HO == GO == 1 the hardware treats that as GO == HO == 0
|
||||
|
|
|
@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
|
|||
},
|
||||
};
|
||||
|
||||
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
|
||||
{
|
||||
/* user explicitly requested branch sampling */
|
||||
if (has_branch_stack(event))
|
||||
return true;
|
||||
|
||||
/* implicit branch sampling to correct PEBS skid */
|
||||
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void intel_pmu_disable_all(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
|
@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
|
|||
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
|
||||
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
|
||||
|
||||
/*
|
||||
* must disable before any actual event
|
||||
* because any event may be combined with LBR
|
||||
*/
|
||||
if (intel_pmu_needs_lbr_smpl(event))
|
||||
intel_pmu_lbr_disable(event);
|
||||
|
||||
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
|
||||
intel_pmu_disable_fixed(hwc);
|
||||
return;
|
||||
|
@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
|||
intel_pmu_enable_bts(hwc->config);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* must enabled before any actual event
|
||||
* because any event may be combined with LBR
|
||||
*/
|
||||
if (intel_pmu_needs_lbr_smpl(event))
|
||||
intel_pmu_lbr_enable(event);
|
||||
|
||||
if (event->attr.exclude_host)
|
||||
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
|
||||
|
@ -1058,6 +1084,9 @@ again:
|
|||
|
||||
data.period = event->hw.last_period;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
}
|
||||
|
@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
|
|||
*/
|
||||
static struct event_constraint *
|
||||
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
struct perf_event *event,
|
||||
struct hw_perf_event_extra *reg)
|
||||
{
|
||||
struct event_constraint *c = &emptyconstraint;
|
||||
struct hw_perf_event_extra *reg = &event->hw.extra_reg;
|
||||
struct er_account *era;
|
||||
unsigned long flags;
|
||||
int orig_idx = reg->idx;
|
||||
|
||||
/* already allocated shared msr */
|
||||
if (reg->alloc)
|
||||
return &unconstrained;
|
||||
return NULL; /* call x86_get_event_constraint() */
|
||||
|
||||
again:
|
||||
era = &cpuc->shared_regs->regs[reg->idx];
|
||||
|
@ -1157,14 +1186,10 @@ again:
|
|||
reg->alloc = 1;
|
||||
|
||||
/*
|
||||
* All events using extra_reg are unconstrained.
|
||||
* Avoids calling x86_get_event_constraints()
|
||||
*
|
||||
* Must revisit if extra_reg controlling events
|
||||
* ever have constraints. Worst case we go through
|
||||
* the regular event constraint table.
|
||||
* need to call x86_get_event_constraint()
|
||||
* to check if associated event has constraints
|
||||
*/
|
||||
c = &unconstrained;
|
||||
c = NULL;
|
||||
} else if (intel_try_alt_er(event, orig_idx)) {
|
||||
raw_spin_unlock_irqrestore(&era->lock, flags);
|
||||
goto again;
|
||||
|
@ -1201,11 +1226,23 @@ static struct event_constraint *
|
|||
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c = NULL;
|
||||
|
||||
if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
|
||||
c = __intel_shared_reg_get_constraints(cpuc, event);
|
||||
struct event_constraint *c = NULL, *d;
|
||||
struct hw_perf_event_extra *xreg, *breg;
|
||||
|
||||
xreg = &event->hw.extra_reg;
|
||||
if (xreg->idx != EXTRA_REG_NONE) {
|
||||
c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
|
||||
if (c == &emptyconstraint)
|
||||
return c;
|
||||
}
|
||||
breg = &event->hw.branch_reg;
|
||||
if (breg->idx != EXTRA_REG_NONE) {
|
||||
d = __intel_shared_reg_get_constraints(cpuc, event, breg);
|
||||
if (d == &emptyconstraint) {
|
||||
__intel_shared_reg_put_constraints(cpuc, xreg);
|
||||
c = d;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
|
|||
reg = &event->hw.extra_reg;
|
||||
if (reg->idx != EXTRA_REG_NONE)
|
||||
__intel_shared_reg_put_constraints(cpuc, reg);
|
||||
|
||||
reg = &event->hw.branch_reg;
|
||||
if (reg->idx != EXTRA_REG_NONE)
|
||||
__intel_shared_reg_put_constraints(cpuc, reg);
|
||||
}
|
||||
|
||||
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
|
@ -1295,6 +1336,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
|||
event->hw.config = alt_config;
|
||||
}
|
||||
|
||||
if (intel_pmu_needs_lbr_smpl(event)) {
|
||||
ret = intel_pmu_setup_lbr_filter(event);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (event->attr.type != PERF_TYPE_RAW)
|
||||
return 0;
|
||||
|
||||
|
@ -1433,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu)
|
|||
{
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
if (!x86_pmu.extra_regs)
|
||||
if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
|
||||
return NOTIFY_OK;
|
||||
|
||||
cpuc->shared_regs = allocate_shared_regs(cpu);
|
||||
|
@ -1455,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu)
|
|||
*/
|
||||
intel_pmu_lbr_reset();
|
||||
|
||||
if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
|
||||
cpuc->lbr_sel = NULL;
|
||||
|
||||
if (!cpuc->shared_regs)
|
||||
return;
|
||||
|
||||
for_each_cpu(i, topology_thread_cpumask(cpu)) {
|
||||
struct intel_shared_regs *pc;
|
||||
if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
|
||||
for_each_cpu(i, topology_thread_cpumask(cpu)) {
|
||||
struct intel_shared_regs *pc;
|
||||
|
||||
pc = per_cpu(cpu_hw_events, i).shared_regs;
|
||||
if (pc && pc->core_id == core_id) {
|
||||
cpuc->kfree_on_online = cpuc->shared_regs;
|
||||
cpuc->shared_regs = pc;
|
||||
break;
|
||||
pc = per_cpu(cpu_hw_events, i).shared_regs;
|
||||
if (pc && pc->core_id == core_id) {
|
||||
cpuc->kfree_on_online = cpuc->shared_regs;
|
||||
cpuc->shared_regs = pc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cpuc->shared_regs->core_id = core_id;
|
||||
cpuc->shared_regs->refcnt++;
|
||||
}
|
||||
|
||||
cpuc->shared_regs->core_id = core_id;
|
||||
cpuc->shared_regs->refcnt++;
|
||||
if (x86_pmu.lbr_sel_map)
|
||||
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
|
||||
}
|
||||
|
||||
static void intel_pmu_cpu_dying(int cpu)
|
||||
|
@ -1488,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu)
|
|||
fini_debug_store_on_cpu(cpu);
|
||||
}
|
||||
|
||||
static void intel_pmu_flush_branch_stack(void)
|
||||
{
|
||||
/*
|
||||
* Intel LBR does not tag entries with the
|
||||
* PID of the current task, then we need to
|
||||
* flush it on ctxsw
|
||||
* For now, we simply reset it
|
||||
*/
|
||||
if (x86_pmu.lbr_nr)
|
||||
intel_pmu_lbr_reset();
|
||||
}
|
||||
|
||||
static __initconst const struct x86_pmu intel_pmu = {
|
||||
.name = "Intel",
|
||||
.handle_irq = intel_pmu_handle_irq,
|
||||
|
@ -1515,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = {
|
|||
.cpu_starting = intel_pmu_cpu_starting,
|
||||
.cpu_dying = intel_pmu_cpu_dying,
|
||||
.guest_get_msrs = intel_guest_get_msrs,
|
||||
.flush_branch_stack = intel_pmu_flush_branch_stack,
|
||||
};
|
||||
|
||||
static __init void intel_clovertown_quirk(void)
|
||||
|
@ -1745,7 +1811,7 @@ __init int intel_pmu_init(void)
|
|||
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
intel_pmu_lbr_init_nhm();
|
||||
intel_pmu_lbr_init_snb();
|
||||
|
||||
x86_pmu.event_constraints = intel_snb_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
|
@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
|||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
||||
intel_pmu_lbr_enable(event);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
|
@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
|||
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
||||
|
||||
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
||||
intel_pmu_lbr_disable(event);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_enable_all(void)
|
||||
|
@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
|
|||
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
|
||||
}
|
||||
|
||||
#include <asm/insn.h>
|
||||
|
||||
static inline bool kernel_ip(unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return ip > PAGE_OFFSET;
|
||||
#else
|
||||
return (long)ip < 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
|
@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|||
* both formats and we don't use the other fields in this
|
||||
* routine.
|
||||
*/
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct pebs_record_core *pebs = __pebs;
|
||||
struct perf_sample_data data;
|
||||
struct pt_regs regs;
|
||||
|
@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|||
else
|
||||
regs.flags &= ~PERF_EFLAGS_EXACT;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
|
||||
if (perf_event_overflow(event, &data, ®s))
|
||||
x86_pmu_stop(event, 0);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
|
@ -13,6 +14,100 @@ enum {
|
|||
LBR_FORMAT_EIP_FLAGS = 0x03,
|
||||
};
|
||||
|
||||
/*
|
||||
* Intel LBR_SELECT bits
|
||||
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
|
||||
*
|
||||
* Hardware branch filter (not available on all CPUs)
|
||||
*/
|
||||
#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
|
||||
#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
|
||||
#define LBR_JCC_BIT 2 /* do not capture conditional branches */
|
||||
#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
|
||||
#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
|
||||
#define LBR_RETURN_BIT 5 /* do not capture near returns */
|
||||
#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
|
||||
#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
|
||||
#define LBR_FAR_BIT 8 /* do not capture far branches */
|
||||
|
||||
#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
|
||||
#define LBR_USER (1 << LBR_USER_BIT)
|
||||
#define LBR_JCC (1 << LBR_JCC_BIT)
|
||||
#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
|
||||
#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
|
||||
#define LBR_RETURN (1 << LBR_RETURN_BIT)
|
||||
#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
|
||||
#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
|
||||
#define LBR_FAR (1 << LBR_FAR_BIT)
|
||||
|
||||
#define LBR_PLM (LBR_KERNEL | LBR_USER)
|
||||
|
||||
#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
|
||||
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
|
||||
#define LBR_IGN 0 /* ignored */
|
||||
|
||||
#define LBR_ANY \
|
||||
(LBR_JCC |\
|
||||
LBR_REL_CALL |\
|
||||
LBR_IND_CALL |\
|
||||
LBR_RETURN |\
|
||||
LBR_REL_JMP |\
|
||||
LBR_IND_JMP |\
|
||||
LBR_FAR)
|
||||
|
||||
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
||||
|
||||
#define for_each_branch_sample_type(x) \
|
||||
for ((x) = PERF_SAMPLE_BRANCH_USER; \
|
||||
(x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
|
||||
|
||||
/*
|
||||
* x86control flow change classification
|
||||
* x86control flow changes include branches, interrupts, traps, faults
|
||||
*/
|
||||
enum {
|
||||
X86_BR_NONE = 0, /* unknown */
|
||||
|
||||
X86_BR_USER = 1 << 0, /* branch target is user */
|
||||
X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
|
||||
|
||||
X86_BR_CALL = 1 << 2, /* call */
|
||||
X86_BR_RET = 1 << 3, /* return */
|
||||
X86_BR_SYSCALL = 1 << 4, /* syscall */
|
||||
X86_BR_SYSRET = 1 << 5, /* syscall return */
|
||||
X86_BR_INT = 1 << 6, /* sw interrupt */
|
||||
X86_BR_IRET = 1 << 7, /* return from interrupt */
|
||||
X86_BR_JCC = 1 << 8, /* conditional */
|
||||
X86_BR_JMP = 1 << 9, /* jump */
|
||||
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
|
||||
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
|
||||
};
|
||||
|
||||
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
|
||||
|
||||
#define X86_BR_ANY \
|
||||
(X86_BR_CALL |\
|
||||
X86_BR_RET |\
|
||||
X86_BR_SYSCALL |\
|
||||
X86_BR_SYSRET |\
|
||||
X86_BR_INT |\
|
||||
X86_BR_IRET |\
|
||||
X86_BR_JCC |\
|
||||
X86_BR_JMP |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_IND_CALL)
|
||||
|
||||
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
|
||||
|
||||
#define X86_BR_ANY_CALL \
|
||||
(X86_BR_CALL |\
|
||||
X86_BR_IND_CALL |\
|
||||
X86_BR_SYSCALL |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_INT)
|
||||
|
||||
static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
|
||||
|
||||
/*
|
||||
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
|
||||
* otherwise it becomes near impossible to get a reliable stack.
|
||||
|
@ -21,6 +116,10 @@ enum {
|
|||
static void __intel_pmu_lbr_enable(void)
|
||||
{
|
||||
u64 debugctl;
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
|
||||
if (cpuc->lbr_sel)
|
||||
wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
|
||||
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
||||
|
@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event)
|
|||
* Reset the LBR stack if we changed task context to
|
||||
* avoid data leaks.
|
||||
*/
|
||||
|
||||
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
|
||||
intel_pmu_lbr_reset();
|
||||
cpuc->lbr_context = event->ctx;
|
||||
}
|
||||
cpuc->br_sel = event->hw.branch_reg.reg;
|
||||
|
||||
cpuc->lbr_users++;
|
||||
}
|
||||
|
@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
|
|||
cpuc->lbr_users--;
|
||||
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
||||
|
||||
if (cpuc->enabled && !cpuc->lbr_users)
|
||||
if (cpuc->enabled && !cpuc->lbr_users) {
|
||||
__intel_pmu_lbr_disable();
|
||||
/* avoid stale pointer */
|
||||
cpuc->lbr_context = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_enable_all(void)
|
||||
|
@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
|
|||
__intel_pmu_lbr_disable();
|
||||
}
|
||||
|
||||
/*
|
||||
* TOS = most recently recorded branch
|
||||
*/
|
||||
static inline u64 intel_pmu_lbr_tos(void)
|
||||
{
|
||||
u64 tos;
|
||||
|
@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
|
|||
|
||||
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
|
||||
|
||||
cpuc->lbr_entries[i].from = msr_lastbranch.from;
|
||||
cpuc->lbr_entries[i].to = msr_lastbranch.to;
|
||||
cpuc->lbr_entries[i].flags = 0;
|
||||
cpuc->lbr_entries[i].from = msr_lastbranch.from;
|
||||
cpuc->lbr_entries[i].to = msr_lastbranch.to;
|
||||
cpuc->lbr_entries[i].mispred = 0;
|
||||
cpuc->lbr_entries[i].predicted = 0;
|
||||
cpuc->lbr_entries[i].reserved = 0;
|
||||
}
|
||||
cpuc->lbr_stack.nr = i;
|
||||
}
|
||||
|
||||
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
||||
|
||||
/*
|
||||
* Due to lack of segmentation in Linux the effective address (offset)
|
||||
* is the same as the linear address, allowing us to merge the LIP and EIP
|
||||
|
@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
|||
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
unsigned long lbr_idx = (tos - i) & mask;
|
||||
u64 from, to, flags = 0;
|
||||
u64 from, to, mis = 0, pred = 0;
|
||||
|
||||
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
|
||||
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
|
||||
|
||||
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
|
||||
flags = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||
pred = !mis;
|
||||
from = (u64)((((s64)from) << 1) >> 1);
|
||||
}
|
||||
|
||||
cpuc->lbr_entries[i].from = from;
|
||||
cpuc->lbr_entries[i].to = to;
|
||||
cpuc->lbr_entries[i].flags = flags;
|
||||
cpuc->lbr_entries[i].from = from;
|
||||
cpuc->lbr_entries[i].to = to;
|
||||
cpuc->lbr_entries[i].mispred = mis;
|
||||
cpuc->lbr_entries[i].predicted = pred;
|
||||
cpuc->lbr_entries[i].reserved = 0;
|
||||
}
|
||||
cpuc->lbr_stack.nr = i;
|
||||
}
|
||||
|
@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void)
|
|||
intel_pmu_lbr_read_32(cpuc);
|
||||
else
|
||||
intel_pmu_lbr_read_64(cpuc);
|
||||
|
||||
intel_pmu_lbr_filter(cpuc);
|
||||
}
|
||||
|
||||
/*
|
||||
* SW filter is used:
|
||||
* - in case there is no HW filter
|
||||
* - in case the HW filter has errata or limitations
|
||||
*/
|
||||
static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
|
||||
{
|
||||
u64 br_type = event->attr.branch_sample_type;
|
||||
int mask = 0;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_USER)
|
||||
mask |= X86_BR_USER;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
|
||||
mask |= X86_BR_KERNEL;
|
||||
|
||||
/* we ignore BRANCH_HV here */
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY)
|
||||
mask |= X86_BR_ANY;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
|
||||
mask |= X86_BR_ANY_CALL;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
|
||||
mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
|
||||
mask |= X86_BR_IND_CALL;
|
||||
/*
|
||||
* stash actual user request into reg, it may
|
||||
* be used by fixup code for some CPU
|
||||
*/
|
||||
event->hw.branch_reg.reg = mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* setup the HW LBR filter
|
||||
* Used only when available, may not be enough to disambiguate
|
||||
* all branches, may need the help of the SW filter
|
||||
*/
|
||||
static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event_extra *reg;
|
||||
u64 br_type = event->attr.branch_sample_type;
|
||||
u64 mask = 0, m;
|
||||
u64 v;
|
||||
|
||||
for_each_branch_sample_type(m) {
|
||||
if (!(br_type & m))
|
||||
continue;
|
||||
|
||||
v = x86_pmu.lbr_sel_map[m];
|
||||
if (v == LBR_NOT_SUPP)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (v != LBR_IGN)
|
||||
mask |= v;
|
||||
}
|
||||
reg = &event->hw.branch_reg;
|
||||
reg->idx = EXTRA_REG_LBR;
|
||||
|
||||
/* LBR_SELECT operates in suppress mode so invert mask */
|
||||
reg->config = ~mask & x86_pmu.lbr_sel_mask;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* no LBR on this PMU
|
||||
*/
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* setup SW LBR filter
|
||||
*/
|
||||
intel_pmu_setup_sw_lbr_filter(event);
|
||||
|
||||
/*
|
||||
* setup HW LBR filter, if any
|
||||
*/
|
||||
if (x86_pmu.lbr_sel_map)
|
||||
ret = intel_pmu_setup_hw_lbr_filter(event);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* return the type of control flow change at address "from"
|
||||
* intruction is not necessarily a branch (in case of interrupt).
|
||||
*
|
||||
* The branch type returned also includes the priv level of the
|
||||
* target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
|
||||
*
|
||||
* If a branch type is unknown OR the instruction cannot be
|
||||
* decoded (e.g., text page not present), then X86_BR_NONE is
|
||||
* returned.
|
||||
*/
|
||||
static int branch_type(unsigned long from, unsigned long to)
|
||||
{
|
||||
struct insn insn;
|
||||
void *addr;
|
||||
int bytes, size = MAX_INSN_SIZE;
|
||||
int ret = X86_BR_NONE;
|
||||
int ext, to_plm, from_plm;
|
||||
u8 buf[MAX_INSN_SIZE];
|
||||
int is64 = 0;
|
||||
|
||||
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
|
||||
from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
|
||||
|
||||
/*
|
||||
* maybe zero if lbr did not fill up after a reset by the time
|
||||
* we get a PMU interrupt
|
||||
*/
|
||||
if (from == 0 || to == 0)
|
||||
return X86_BR_NONE;
|
||||
|
||||
if (from_plm == X86_BR_USER) {
|
||||
/*
|
||||
* can happen if measuring at the user level only
|
||||
* and we interrupt in a kernel thread, e.g., idle.
|
||||
*/
|
||||
if (!current->mm)
|
||||
return X86_BR_NONE;
|
||||
|
||||
/* may fail if text not present */
|
||||
bytes = copy_from_user_nmi(buf, (void __user *)from, size);
|
||||
if (bytes != size)
|
||||
return X86_BR_NONE;
|
||||
|
||||
addr = buf;
|
||||
} else
|
||||
addr = (void *)from;
|
||||
|
||||
/*
|
||||
* decoder needs to know the ABI especially
|
||||
* on 64-bit systems running 32-bit apps
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
|
||||
#endif
|
||||
insn_init(&insn, addr, is64);
|
||||
insn_get_opcode(&insn);
|
||||
|
||||
switch (insn.opcode.bytes[0]) {
|
||||
case 0xf:
|
||||
switch (insn.opcode.bytes[1]) {
|
||||
case 0x05: /* syscall */
|
||||
case 0x34: /* sysenter */
|
||||
ret = X86_BR_SYSCALL;
|
||||
break;
|
||||
case 0x07: /* sysret */
|
||||
case 0x35: /* sysexit */
|
||||
ret = X86_BR_SYSRET;
|
||||
break;
|
||||
case 0x80 ... 0x8f: /* conditional */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
default:
|
||||
ret = X86_BR_NONE;
|
||||
}
|
||||
break;
|
||||
case 0x70 ... 0x7f: /* conditional */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
case 0xc2: /* near ret */
|
||||
case 0xc3: /* near ret */
|
||||
case 0xca: /* far ret */
|
||||
case 0xcb: /* far ret */
|
||||
ret = X86_BR_RET;
|
||||
break;
|
||||
case 0xcf: /* iret */
|
||||
ret = X86_BR_IRET;
|
||||
break;
|
||||
case 0xcc ... 0xce: /* int */
|
||||
ret = X86_BR_INT;
|
||||
break;
|
||||
case 0xe8: /* call near rel */
|
||||
case 0x9a: /* call far absolute */
|
||||
ret = X86_BR_CALL;
|
||||
break;
|
||||
case 0xe0 ... 0xe3: /* loop jmp */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
case 0xe9 ... 0xeb: /* jmp */
|
||||
ret = X86_BR_JMP;
|
||||
break;
|
||||
case 0xff: /* call near absolute, call far absolute ind */
|
||||
insn_get_modrm(&insn);
|
||||
ext = (insn.modrm.bytes[0] >> 3) & 0x7;
|
||||
switch (ext) {
|
||||
case 2: /* near ind call */
|
||||
case 3: /* far ind call */
|
||||
ret = X86_BR_IND_CALL;
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
ret = X86_BR_JMP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ret = X86_BR_NONE;
|
||||
}
|
||||
/*
|
||||
* interrupts, traps, faults (and thus ring transition) may
|
||||
* occur on any instructions. Thus, to classify them correctly,
|
||||
* we need to first look at the from and to priv levels. If they
|
||||
* are different and to is in the kernel, then it indicates
|
||||
* a ring transition. If the from instruction is not a ring
|
||||
* transition instr (syscall, systenter, int), then it means
|
||||
* it was a irq, trap or fault.
|
||||
*
|
||||
* we have no way of detecting kernel to kernel faults.
|
||||
*/
|
||||
if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
|
||||
&& ret != X86_BR_SYSCALL && ret != X86_BR_INT)
|
||||
ret = X86_BR_IRQ;
|
||||
|
||||
/*
|
||||
* branch priv level determined by target as
|
||||
* is done by HW when LBR_SELECT is implemented
|
||||
*/
|
||||
if (ret != X86_BR_NONE)
|
||||
ret |= to_plm;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* implement actual branch filter based on user demand.
|
||||
* Hardware may not exactly satisfy that request, thus
|
||||
* we need to inspect opcodes. Mismatched branches are
|
||||
* discarded. Therefore, the number of branches returned
|
||||
* in PERF_SAMPLE_BRANCH_STACK sample may vary.
|
||||
*/
|
||||
static void
|
||||
intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
u64 from, to;
|
||||
int br_sel = cpuc->br_sel;
|
||||
int i, j, type;
|
||||
bool compress = false;
|
||||
|
||||
/* if sampling all branches, then nothing to filter */
|
||||
if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
|
||||
return;
|
||||
|
||||
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
|
||||
|
||||
from = cpuc->lbr_entries[i].from;
|
||||
to = cpuc->lbr_entries[i].to;
|
||||
|
||||
type = branch_type(from, to);
|
||||
|
||||
/* if type does not correspond, then discard */
|
||||
if (type == X86_BR_NONE || (br_sel & type) != type) {
|
||||
cpuc->lbr_entries[i].from = 0;
|
||||
compress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!compress)
|
||||
return;
|
||||
|
||||
/* remove all entries with from=0 */
|
||||
for (i = 0; i < cpuc->lbr_stack.nr; ) {
|
||||
if (!cpuc->lbr_entries[i].from) {
|
||||
j = i;
|
||||
while (++j < cpuc->lbr_stack.nr)
|
||||
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
|
||||
cpuc->lbr_stack.nr--;
|
||||
if (!cpuc->lbr_entries[i].from)
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Map interface branch filters onto LBR filters
|
||||
*/
|
||||
static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
|
||||
[PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
|
||||
[PERF_SAMPLE_BRANCH_USER] = LBR_USER,
|
||||
[PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
|
||||
[PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
|
||||
[PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
|
||||
| LBR_IND_JMP | LBR_FAR,
|
||||
/*
|
||||
* NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
|
||||
*/
|
||||
[PERF_SAMPLE_BRANCH_ANY_CALL] =
|
||||
LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
|
||||
/*
|
||||
* NHM/WSM erratum: must include IND_JMP to capture IND_CALL
|
||||
*/
|
||||
[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
|
||||
};
|
||||
|
||||
static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
|
||||
[PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
|
||||
[PERF_SAMPLE_BRANCH_USER] = LBR_USER,
|
||||
[PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
|
||||
[PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
|
||||
[PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
|
||||
[PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
|
||||
| LBR_FAR,
|
||||
[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
|
||||
};
|
||||
|
||||
/* core */
|
||||
void intel_pmu_lbr_init_core(void)
|
||||
{
|
||||
x86_pmu.lbr_nr = 4;
|
||||
x86_pmu.lbr_tos = 0x01c9;
|
||||
x86_pmu.lbr_from = 0x40;
|
||||
x86_pmu.lbr_to = 0x60;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - compensate for lack of HW filter
|
||||
*/
|
||||
pr_cont("4-deep LBR, ");
|
||||
}
|
||||
|
||||
/* nehalem/westmere */
|
||||
void intel_pmu_lbr_init_nhm(void)
|
||||
{
|
||||
x86_pmu.lbr_nr = 16;
|
||||
x86_pmu.lbr_tos = 0x01c9;
|
||||
x86_pmu.lbr_from = 0x680;
|
||||
x86_pmu.lbr_to = 0x6c0;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
|
||||
|
||||
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
||||
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - workaround LBR_SEL errata (see above)
|
||||
* - support syscall, sysret capture.
|
||||
* That requires LBR_FAR but that means far
|
||||
* jmp need to be filtered out
|
||||
*/
|
||||
pr_cont("16-deep LBR, ");
|
||||
}
|
||||
|
||||
/* sandy bridge */
|
||||
void intel_pmu_lbr_init_snb(void)
|
||||
{
|
||||
x86_pmu.lbr_nr = 16;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
|
||||
|
||||
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
||||
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - support syscall, sysret capture.
|
||||
* That requires LBR_FAR but that means far
|
||||
* jmp need to be filtered out
|
||||
*/
|
||||
pr_cont("16-deep LBR, ");
|
||||
}
|
||||
|
||||
/* atom */
|
||||
void intel_pmu_lbr_init_atom(void)
|
||||
{
|
||||
/*
|
||||
* only models starting at stepping 10 seems
|
||||
* to have an operational LBR which can freeze
|
||||
* on PMU interrupt
|
||||
*/
|
||||
if (boot_cpu_data.x86_mask < 10) {
|
||||
pr_cont("LBR disabled due to erratum");
|
||||
return;
|
||||
}
|
||||
|
||||
x86_pmu.lbr_nr = 8;
|
||||
x86_pmu.lbr_tos = 0x01c9;
|
||||
x86_pmu.lbr_from = 0x40;
|
||||
x86_pmu.lbr_to = 0x60;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - compensate for lack of HW filter
|
||||
*/
|
||||
pr_cont("8-deep LBR, ");
|
||||
}
|
||||
|
|
|
@ -129,10 +129,39 @@ enum perf_event_sample_format {
|
|||
PERF_SAMPLE_PERIOD = 1U << 8,
|
||||
PERF_SAMPLE_STREAM_ID = 1U << 9,
|
||||
PERF_SAMPLE_RAW = 1U << 10,
|
||||
PERF_SAMPLE_BRANCH_STACK = 1U << 11,
|
||||
|
||||
PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
|
||||
PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */
|
||||
};
|
||||
|
||||
/*
|
||||
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
|
||||
*
|
||||
* If the user does not pass priv level information via branch_sample_type,
|
||||
* the kernel uses the event's priv level. Branch and event priv levels do
|
||||
* not have to match. Branch priv level is checked for permissions.
|
||||
*
|
||||
* The branch types can be combined, however BRANCH_ANY covers all types
|
||||
* of branches and therefore it supersedes all the other types.
|
||||
*/
|
||||
enum perf_branch_sample_type {
|
||||
PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */
|
||||
PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
|
||||
PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */
|
||||
|
||||
PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */
|
||||
PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
|
||||
PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
|
||||
PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
|
||||
|
||||
PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */
|
||||
};
|
||||
|
||||
#define PERF_SAMPLE_BRANCH_PLM_ALL \
|
||||
(PERF_SAMPLE_BRANCH_USER|\
|
||||
PERF_SAMPLE_BRANCH_KERNEL|\
|
||||
PERF_SAMPLE_BRANCH_HV)
|
||||
|
||||
/*
|
||||
* The format of the data returned by read() on a perf event fd,
|
||||
* as specified by attr.read_format:
|
||||
|
@ -163,6 +192,8 @@ enum perf_event_read_format {
|
|||
};
|
||||
|
||||
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
|
||||
#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
|
||||
#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
|
||||
|
||||
/*
|
||||
* Hardware event_id to monitor via a performance monitoring event:
|
||||
|
@ -240,6 +271,7 @@ struct perf_event_attr {
|
|||
__u64 bp_len;
|
||||
__u64 config2; /* extension of config1 */
|
||||
};
|
||||
__u64 branch_sample_type; /* enum branch_sample_type */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -458,6 +490,8 @@ enum perf_event_type {
|
|||
*
|
||||
* { u32 size;
|
||||
* char data[size];}&& PERF_SAMPLE_RAW
|
||||
*
|
||||
* { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_SAMPLE = 9,
|
||||
|
@ -530,12 +564,34 @@ struct perf_raw_record {
|
|||
void *data;
|
||||
};
|
||||
|
||||
/*
|
||||
* single taken branch record layout:
|
||||
*
|
||||
* from: source instruction (may not always be a branch insn)
|
||||
* to: branch target
|
||||
* mispred: branch target was mispredicted
|
||||
* predicted: branch target was predicted
|
||||
*
|
||||
* support for mispred, predicted is optional. In case it
|
||||
* is not supported mispred = predicted = 0.
|
||||
*/
|
||||
struct perf_branch_entry {
|
||||
__u64 from;
|
||||
__u64 to;
|
||||
__u64 flags;
|
||||
__u64 from;
|
||||
__u64 to;
|
||||
__u64 mispred:1, /* target mispredicted */
|
||||
predicted:1,/* target predicted */
|
||||
reserved:62;
|
||||
};
|
||||
|
||||
/*
|
||||
* branch stack layout:
|
||||
* nr: number of taken branches stored in entries[]
|
||||
*
|
||||
* Note that nr can vary from sample to sample
|
||||
* branches (to, from) are stored from most recent
|
||||
* to least recent, i.e., entries[0] contains the most
|
||||
* recent branch.
|
||||
*/
|
||||
struct perf_branch_stack {
|
||||
__u64 nr;
|
||||
struct perf_branch_entry entries[0];
|
||||
|
@ -566,7 +622,9 @@ struct hw_perf_event {
|
|||
unsigned long event_base;
|
||||
int idx;
|
||||
int last_cpu;
|
||||
|
||||
struct hw_perf_event_extra extra_reg;
|
||||
struct hw_perf_event_extra branch_reg;
|
||||
};
|
||||
struct { /* software */
|
||||
struct hrtimer hrtimer;
|
||||
|
@ -690,6 +748,11 @@ struct pmu {
|
|||
* if no implementation is provided it will default to: event->hw.idx + 1.
|
||||
*/
|
||||
int (*event_idx) (struct perf_event *event); /*optional */
|
||||
|
||||
/*
|
||||
* flush branch stack on context-switches (needed in cpu-wide mode)
|
||||
*/
|
||||
void (*flush_branch_stack) (void);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -923,7 +986,8 @@ struct perf_event_context {
|
|||
u64 parent_gen;
|
||||
u64 generation;
|
||||
int pin_count;
|
||||
int nr_cgroups; /* cgroup events present */
|
||||
int nr_cgroups; /* cgroup evts */
|
||||
int nr_branch_stack; /* branch_stack evt */
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
|
@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
|
|||
extern u64 perf_event_read_value(struct perf_event *event,
|
||||
u64 *enabled, u64 *running);
|
||||
|
||||
|
||||
struct perf_sample_data {
|
||||
u64 type;
|
||||
|
||||
|
@ -1007,12 +1072,14 @@ struct perf_sample_data {
|
|||
u64 period;
|
||||
struct perf_callchain_entry *callchain;
|
||||
struct perf_raw_record *raw;
|
||||
struct perf_branch_stack *br_stack;
|
||||
};
|
||||
|
||||
static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
|
||||
{
|
||||
data->addr = addr;
|
||||
data->raw = NULL;
|
||||
data->br_stack = NULL;
|
||||
}
|
||||
|
||||
extern void perf_output_sample(struct perf_output_handle *handle,
|
||||
|
@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data);
|
|||
# define perf_instruction_pointer(regs) instruction_pointer(regs)
|
||||
#endif
|
||||
|
||||
static inline bool has_branch_stack(struct perf_event *event)
|
||||
{
|
||||
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
extern int perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event, unsigned int size);
|
||||
extern void perf_output_end(struct perf_output_handle *handle);
|
||||
|
|
|
@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
|
|||
PERF_FLAG_FD_OUTPUT |\
|
||||
PERF_FLAG_PID_CGROUP)
|
||||
|
||||
/*
|
||||
* branch priv levels that need permission checks
|
||||
*/
|
||||
#define PERF_SAMPLE_BRANCH_PERM_PLM \
|
||||
(PERF_SAMPLE_BRANCH_KERNEL |\
|
||||
PERF_SAMPLE_BRANCH_HV)
|
||||
|
||||
enum event_type_t {
|
||||
EVENT_FLEXIBLE = 0x1,
|
||||
EVENT_PINNED = 0x2,
|
||||
|
@ -130,6 +137,7 @@ enum event_type_t {
|
|||
*/
|
||||
struct static_key_deferred perf_sched_events __read_mostly;
|
||||
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
|
||||
static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
static atomic_t nr_comm_events __read_mostly;
|
||||
|
@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
|
|||
if (is_cgroup_event(event))
|
||||
ctx->nr_cgroups++;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
ctx->nr_branch_stack++;
|
||||
|
||||
list_add_rcu(&event->event_entry, &ctx->event_list);
|
||||
if (!ctx->nr_events)
|
||||
perf_pmu_rotate_start(ctx->pmu);
|
||||
|
@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
|
|||
cpuctx->cgrp = NULL;
|
||||
}
|
||||
|
||||
if (has_branch_stack(event))
|
||||
ctx->nr_branch_stack--;
|
||||
|
||||
ctx->nr_events--;
|
||||
if (event->attr.inherit_stat)
|
||||
ctx->nr_stat--;
|
||||
|
@ -2194,6 +2208,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
|||
perf_pmu_rotate_start(ctx->pmu);
|
||||
}
|
||||
|
||||
/*
|
||||
* When sampling the branck stack in system-wide, it may be necessary
|
||||
* to flush the stack on context switch. This happens when the branch
|
||||
* stack does not tag its entries with the pid of the current task.
|
||||
* Otherwise it becomes impossible to associate a branch entry with a
|
||||
* task. This ambiguity is more likely to appear when the branch stack
|
||||
* supports priv level filtering and the user sets it to monitor only
|
||||
* at the user level (which could be a useful measurement in system-wide
|
||||
* mode). In that case, the risk is high of having a branch stack with
|
||||
* branch from multiple tasks. Flushing may mean dropping the existing
|
||||
* entries or stashing them somewhere in the PMU specific code layer.
|
||||
*
|
||||
* This function provides the context switch callback to the lower code
|
||||
* layer. It is invoked ONLY when there is at least one system-wide context
|
||||
* with at least one active event using taken branch sampling.
|
||||
*/
|
||||
static void perf_branch_stack_sched_in(struct task_struct *prev,
|
||||
struct task_struct *task)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct pmu *pmu;
|
||||
unsigned long flags;
|
||||
|
||||
/* no need to flush branch stack if not changing task */
|
||||
if (prev == task)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(pmu, &pmus, entry) {
|
||||
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
/*
|
||||
* check if the context has at least one
|
||||
* event using PERF_SAMPLE_BRANCH_STACK
|
||||
*/
|
||||
if (cpuctx->ctx.nr_branch_stack > 0
|
||||
&& pmu->flush_branch_stack) {
|
||||
|
||||
pmu = cpuctx->ctx.pmu;
|
||||
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
|
||||
perf_pmu_disable(pmu);
|
||||
|
||||
pmu->flush_branch_stack();
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from scheduler to add the events of the current task
|
||||
* with interrupts disabled.
|
||||
|
@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
|
|||
*/
|
||||
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
|
||||
perf_cgroup_sched_in(prev, task);
|
||||
|
||||
/* check for system-wide branch_stack events */
|
||||
if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
|
||||
perf_branch_stack_sched_in(prev, task);
|
||||
}
|
||||
|
||||
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
|
||||
|
@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event)
|
|||
atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
|
||||
static_key_slow_dec_deferred(&perf_sched_events);
|
||||
}
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
static_key_slow_dec_deferred(&perf_sched_events);
|
||||
/* is system-wide event */
|
||||
if (!(event->attach_state & PERF_ATTACH_TASK))
|
||||
atomic_dec(&per_cpu(perf_branch_stack_events,
|
||||
event->cpu));
|
||||
}
|
||||
}
|
||||
|
||||
if (event->rb) {
|
||||
|
@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
if (data->br_stack) {
|
||||
size_t size;
|
||||
|
||||
size = data->br_stack->nr
|
||||
* sizeof(struct perf_branch_entry);
|
||||
|
||||
perf_output_put(handle, data->br_stack->nr);
|
||||
perf_output_copy(handle, data->br_stack->entries, size);
|
||||
} else {
|
||||
/*
|
||||
* we always store at least the value of nr
|
||||
*/
|
||||
u64 nr = 0;
|
||||
perf_output_put(handle, nr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_event_header *header,
|
||||
|
@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header,
|
|||
WARN_ON_ONCE(size & (sizeof(u64)-1));
|
||||
header->size += size;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
int size = sizeof(u64); /* nr */
|
||||
if (data->br_stack) {
|
||||
size += data->br_stack->nr
|
||||
* sizeof(struct perf_branch_entry);
|
||||
}
|
||||
header->size += size;
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_event_output(struct perf_event *event,
|
||||
|
@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event)
|
|||
if (event->attr.type != PERF_TYPE_SOFTWARE)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for software events
|
||||
*/
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (event_id) {
|
||||
case PERF_COUNT_SW_CPU_CLOCK:
|
||||
case PERF_COUNT_SW_TASK_CLOCK:
|
||||
|
@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event)
|
|||
if (event->attr.type != PERF_TYPE_TRACEPOINT)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for tracepoint events
|
||||
*/
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
err = perf_trace_init(event);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event)
|
|||
if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for software events
|
||||
*/
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
perf_swevent_init_hrtimer(event);
|
||||
|
||||
return 0;
|
||||
|
@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event)
|
|||
if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for software events
|
||||
*/
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
perf_swevent_init_hrtimer(event);
|
||||
|
||||
return 0;
|
||||
|
@ -5866,6 +6003,12 @@ done:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
}
|
||||
if (has_branch_stack(event)) {
|
||||
static_key_slow_inc(&perf_sched_events.key);
|
||||
if (!(event->attach_state & PERF_ATTACH_TASK))
|
||||
atomic_inc(&per_cpu(perf_branch_stack_events,
|
||||
event->cpu));
|
||||
}
|
||||
}
|
||||
|
||||
return event;
|
||||
|
@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
|||
if (attr->read_format & ~(PERF_FORMAT_MAX-1))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
u64 mask = attr->branch_sample_type;
|
||||
|
||||
/* only using defined bits */
|
||||
if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
|
||||
return -EINVAL;
|
||||
|
||||
/* at least one branch bit must be set */
|
||||
if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
|
||||
return -EINVAL;
|
||||
|
||||
/* kernel level capture: check permissions */
|
||||
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
|
||||
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
/* propagate priv level, when not set for branch */
|
||||
if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
|
||||
|
||||
/* exclude_kernel checked on syscall entry */
|
||||
if (!attr->exclude_kernel)
|
||||
mask |= PERF_SAMPLE_BRANCH_KERNEL;
|
||||
|
||||
if (!attr->exclude_user)
|
||||
mask |= PERF_SAMPLE_BRANCH_USER;
|
||||
|
||||
if (!attr->exclude_hv)
|
||||
mask |= PERF_SAMPLE_BRANCH_HV;
|
||||
/*
|
||||
* adjust user setting (for HW filter setup)
|
||||
*/
|
||||
attr->branch_sample_type = mask;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
|
||||
|
|
|
@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
|
|||
if (bp->attr.type != PERF_TYPE_BREAKPOINT)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for breakpoint events
|
||||
*/
|
||||
if (has_branch_stack(bp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
err = register_perf_hw_breakpoint(bp);
|
||||
if (err)
|
||||
return err;
|
||||
|
|
|
@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
|
|||
corresponding events, i.e., they always refer to events defined earlier on the command
|
||||
line.
|
||||
|
||||
-b::
|
||||
--branch-any::
|
||||
Enable taken branch stack sampling. Any type of taken branch may be sampled.
|
||||
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
|
||||
|
||||
-j::
|
||||
--branch-filter::
|
||||
Enable taken branch stack sampling. Each sample captures a series of consecutive
|
||||
taken branches. The number of branches captured with each sample depends on the
|
||||
underlying hardware, the type of branches of interest, and the executed code.
|
||||
It is possible to select the types of branches captured by enabling filters. The
|
||||
following filters are defined:
|
||||
|
||||
- any: any type of branches
|
||||
- any_call: any function call or system call
|
||||
- any_ret: any function return or system call return
|
||||
- any_ind: any indirect branch
|
||||
- u: only when the branch target is at the user level
|
||||
- k: only when the branch target is in the kernel
|
||||
- hv: only when the target is at the hypervisor level
|
||||
|
||||
+
|
||||
The option requires at least one branch type among any, any_call, any_ret, ind_call.
|
||||
The privilege levels may be ommitted, in which case, the privilege levels of the associated
|
||||
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
|
||||
levels are subject to permissions. When sampling on multiple events, branch stack sampling
|
||||
is enabled for all the sampling events. The sampled branch type is the same for all events.
|
||||
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
|
||||
Note that this feature may not be available on all processors.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1], linkperf:perf-list[1]
|
||||
|
|
|
@ -153,6 +153,16 @@ OPTIONS
|
|||
information which may be very large and thus may clutter the display.
|
||||
It currently includes: cpu and numa topology of the host system.
|
||||
|
||||
-b::
|
||||
--branch-stack::
|
||||
Use the addresses of sampled taken branches instead of the instruction
|
||||
address to build the histograms. To generate meaningful output, the
|
||||
perf.data file must have been obtained using perf record -b or
|
||||
perf record --branch-filter xxx where xxx is a branch filter option.
|
||||
perf report is able to auto-detect whether a perf.data file contains
|
||||
branch stacks and it will automatically switch to the branch view mode,
|
||||
unless --no-branch-stack is used.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1], linkperf:perf-annotate[1]
|
||||
|
|
|
@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
|
|||
if (!have_tracepoints(&evsel_list->entries))
|
||||
perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
|
||||
|
||||
if (!rec->opts.branch_stack)
|
||||
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
|
||||
|
||||
if (!rec->file_new) {
|
||||
err = perf_session__read_header(session, output);
|
||||
if (err < 0)
|
||||
|
@ -638,6 +641,90 @@ out_delete_session:
|
|||
return err;
|
||||
}
|
||||
|
||||
#define BRANCH_OPT(n, m) \
|
||||
{ .name = n, .mode = (m) }
|
||||
|
||||
#define BRANCH_END { .name = NULL }
|
||||
|
||||
struct branch_mode {
|
||||
const char *name;
|
||||
int mode;
|
||||
};
|
||||
|
||||
static const struct branch_mode branch_modes[] = {
|
||||
BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
|
||||
BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
|
||||
BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
|
||||
BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
|
||||
BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
|
||||
BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
|
||||
BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
|
||||
BRANCH_END
|
||||
};
|
||||
|
||||
static int
|
||||
parse_branch_stack(const struct option *opt, const char *str, int unset)
|
||||
{
|
||||
#define ONLY_PLM \
|
||||
(PERF_SAMPLE_BRANCH_USER |\
|
||||
PERF_SAMPLE_BRANCH_KERNEL |\
|
||||
PERF_SAMPLE_BRANCH_HV)
|
||||
|
||||
uint64_t *mode = (uint64_t *)opt->value;
|
||||
const struct branch_mode *br;
|
||||
char *s, *os = NULL, *p;
|
||||
int ret = -1;
|
||||
|
||||
if (unset)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* cannot set it twice, -b + --branch-filter for instance
|
||||
*/
|
||||
if (*mode)
|
||||
return -1;
|
||||
|
||||
/* str may be NULL in case no arg is passed to -b */
|
||||
if (str) {
|
||||
/* because str is read-only */
|
||||
s = os = strdup(str);
|
||||
if (!s)
|
||||
return -1;
|
||||
|
||||
for (;;) {
|
||||
p = strchr(s, ',');
|
||||
if (p)
|
||||
*p = '\0';
|
||||
|
||||
for (br = branch_modes; br->name; br++) {
|
||||
if (!strcasecmp(s, br->name))
|
||||
break;
|
||||
}
|
||||
if (!br->name) {
|
||||
ui__warning("unknown branch filter %s,"
|
||||
" check man page\n", s);
|
||||
goto error;
|
||||
}
|
||||
|
||||
*mode |= br->mode;
|
||||
|
||||
if (!p)
|
||||
break;
|
||||
|
||||
s = p + 1;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
/* default to any branch */
|
||||
if ((*mode & ~ONLY_PLM) == 0) {
|
||||
*mode = PERF_SAMPLE_BRANCH_ANY;
|
||||
}
|
||||
error:
|
||||
free(os);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char * const record_usage[] = {
|
||||
"perf record [<options>] [<command>]",
|
||||
"perf record [<options>] -- <command> [<options>]",
|
||||
|
@ -727,6 +814,14 @@ const struct option record_options[] = {
|
|||
"monitor event in cgroup name only",
|
||||
parse_cgroups),
|
||||
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
|
||||
|
||||
OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
|
||||
"branch any", "sample any taken branches",
|
||||
parse_branch_stack),
|
||||
|
||||
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
|
||||
"branch filter mask", "branch stack filter modes",
|
||||
parse_branch_stack),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
|
|
|
@ -53,6 +53,82 @@ struct perf_report {
|
|||
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
|
||||
};
|
||||
|
||||
static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
|
||||
struct addr_location *al,
|
||||
struct perf_sample *sample,
|
||||
struct perf_evsel *evsel,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct perf_report *rep = container_of(tool, struct perf_report, tool);
|
||||
struct symbol *parent = NULL;
|
||||
int err = 0;
|
||||
unsigned i;
|
||||
struct hist_entry *he;
|
||||
struct branch_info *bi, *bx;
|
||||
|
||||
if ((sort__has_parent || symbol_conf.use_callchain)
|
||||
&& sample->callchain) {
|
||||
err = machine__resolve_callchain(machine, evsel, al->thread,
|
||||
sample->callchain, &parent);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
bi = machine__resolve_bstack(machine, al->thread,
|
||||
sample->branch_stack);
|
||||
if (!bi)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < sample->branch_stack->nr; i++) {
|
||||
if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
|
||||
continue;
|
||||
/*
|
||||
* The report shows the percentage of total branches captured
|
||||
* and not events sampled. Thus we use a pseudo period of 1.
|
||||
*/
|
||||
he = __hists__add_branch_entry(&evsel->hists, al, parent,
|
||||
&bi[i], 1);
|
||||
if (he) {
|
||||
struct annotation *notes;
|
||||
err = -ENOMEM;
|
||||
bx = he->branch_info;
|
||||
if (bx->from.sym && use_browser > 0) {
|
||||
notes = symbol__annotation(bx->from.sym);
|
||||
if (!notes->src
|
||||
&& symbol__alloc_hist(bx->from.sym) < 0)
|
||||
goto out;
|
||||
|
||||
err = symbol__inc_addr_samples(bx->from.sym,
|
||||
bx->from.map,
|
||||
evsel->idx,
|
||||
bx->from.al_addr);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bx->to.sym && use_browser > 0) {
|
||||
notes = symbol__annotation(bx->to.sym);
|
||||
if (!notes->src
|
||||
&& symbol__alloc_hist(bx->to.sym) < 0)
|
||||
goto out;
|
||||
|
||||
err = symbol__inc_addr_samples(bx->to.sym,
|
||||
bx->to.map,
|
||||
evsel->idx,
|
||||
bx->to.al_addr);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
evsel->hists.stats.total_period += 1;
|
||||
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
|
||||
err = 0;
|
||||
} else
|
||||
return -ENOMEM;
|
||||
}
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
|
||||
struct addr_location *al,
|
||||
struct perf_sample *sample,
|
||||
|
@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool,
|
|||
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
|
||||
return 0;
|
||||
|
||||
if (al.map != NULL)
|
||||
al.map->dso->hit = 1;
|
||||
if (sort__branch_mode == 1) {
|
||||
if (perf_report__add_branch_hist_entry(tool, &al, sample,
|
||||
evsel, machine)) {
|
||||
pr_debug("problem adding lbr entry, skipping event\n");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (al.map != NULL)
|
||||
al.map->dso->hit = 1;
|
||||
|
||||
if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
|
||||
pr_debug("problem incrementing symbol period, skipping event\n");
|
||||
return -1;
|
||||
if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
|
||||
pr_debug("problem incrementing symbol period, skipping event\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
|
|||
}
|
||||
}
|
||||
|
||||
if (sort__branch_mode == 1) {
|
||||
if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
|
||||
fprintf(stderr, "selected -b but no branch data."
|
||||
" Did you call perf record without"
|
||||
" -b?\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep)
|
|||
{
|
||||
int ret = -EINVAL;
|
||||
u64 nr_samples;
|
||||
struct perf_session *session;
|
||||
struct perf_session *session = rep->session;
|
||||
struct perf_evsel *pos;
|
||||
struct map *kernel_map;
|
||||
struct kmap *kernel_kmap;
|
||||
|
@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep)
|
|||
|
||||
signal(SIGINT, sig_handler);
|
||||
|
||||
session = perf_session__new(rep->input_name, O_RDONLY,
|
||||
rep->force, false, &rep->tool);
|
||||
if (session == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
rep->session = session;
|
||||
|
||||
if (rep->cpu_list) {
|
||||
ret = perf_session__cpu_bitmap(session, rep->cpu_list,
|
||||
rep->cpu_bitmap);
|
||||
|
@ -427,9 +512,19 @@ setup:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)
|
||||
{
|
||||
sort__branch_mode = !unset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmd_report(int argc, const char **argv, const char *prefix __used)
|
||||
{
|
||||
struct perf_session *session;
|
||||
struct stat st;
|
||||
bool has_br_stack = false;
|
||||
int ret = -1;
|
||||
char callchain_default_opt[] = "fractal,0.5,callee";
|
||||
const char * const report_usage[] = {
|
||||
"perf report [<options>]",
|
||||
|
@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
|
|||
OPT_BOOLEAN(0, "stdio", &report.use_stdio,
|
||||
"Use the stdio interface"),
|
||||
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
|
||||
"sort by key(s): pid, comm, dso, symbol, parent"),
|
||||
"sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
|
||||
" dso_from, symbol_to, symbol_from, mispredict"),
|
||||
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
|
||||
"Show sample percentage for different cpu modes"),
|
||||
OPT_STRING('p', "parent", &parent_pattern, "regex",
|
||||
|
@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
|
|||
"Specify disassembler style (e.g. -M intel for intel syntax)"),
|
||||
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
|
||||
"Show a column with the sum of periods"),
|
||||
OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
|
||||
"use branch records for histogram filling", parse_branch_mode),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
|
@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
|
|||
else
|
||||
report.input_name = "perf.data";
|
||||
}
|
||||
session = perf_session__new(report.input_name, O_RDONLY,
|
||||
report.force, false, &report.tool);
|
||||
if (session == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
if (strcmp(report.input_name, "-") != 0)
|
||||
report.session = session;
|
||||
|
||||
has_br_stack = perf_header__has_feat(&session->header,
|
||||
HEADER_BRANCH_STACK);
|
||||
|
||||
if (sort__branch_mode == -1 && has_br_stack)
|
||||
sort__branch_mode = 1;
|
||||
|
||||
/* sort__branch_mode could be 0 if --no-branch-stack */
|
||||
if (sort__branch_mode == 1) {
|
||||
/*
|
||||
* if no sort_order is provided, then specify
|
||||
* branch-mode specific order
|
||||
*/
|
||||
if (sort_order == default_sort_order)
|
||||
sort_order = "comm,dso_from,symbol_from,"
|
||||
"dso_to,symbol_to";
|
||||
|
||||
}
|
||||
|
||||
if (strcmp(report.input_name, "-") != 0) {
|
||||
setup_browser(true);
|
||||
else
|
||||
} else {
|
||||
use_browser = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only in the newt browser we are doing integrated annotation,
|
||||
|
@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
|
|||
}
|
||||
|
||||
if (symbol__init() < 0)
|
||||
return -1;
|
||||
goto error;
|
||||
|
||||
setup_sorting(report_usage, options);
|
||||
|
||||
if (parent_pattern != default_parent_pattern) {
|
||||
if (sort_dimension__add("parent") < 0)
|
||||
return -1;
|
||||
goto error;
|
||||
|
||||
/*
|
||||
* Only show the parent fields if we explicitly
|
||||
|
@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
|
|||
if (argc)
|
||||
usage_with_options(report_usage, options);
|
||||
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
|
||||
sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
|
||||
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
|
||||
|
||||
return __cmd_report(&report);
|
||||
if (sort__branch_mode == 1) {
|
||||
sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);
|
||||
sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);
|
||||
sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
|
||||
sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
|
||||
} else {
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
|
||||
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
|
||||
}
|
||||
|
||||
ret = __cmd_report(&report);
|
||||
error:
|
||||
perf_session__delete(session);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -179,6 +179,23 @@ struct ip_callchain {
|
|||
u64 ips[0];
|
||||
};
|
||||
|
||||
struct branch_flags {
|
||||
u64 mispred:1;
|
||||
u64 predicted:1;
|
||||
u64 reserved:62;
|
||||
};
|
||||
|
||||
struct branch_entry {
|
||||
u64 from;
|
||||
u64 to;
|
||||
struct branch_flags flags;
|
||||
};
|
||||
|
||||
struct branch_stack {
|
||||
u64 nr;
|
||||
struct branch_entry entries[0];
|
||||
};
|
||||
|
||||
extern bool perf_host, perf_guest;
|
||||
extern const char perf_version_string[];
|
||||
|
||||
|
@ -205,6 +222,7 @@ struct perf_record_opts {
|
|||
unsigned int freq;
|
||||
unsigned int mmap_pages;
|
||||
unsigned int user_freq;
|
||||
int branch_stack;
|
||||
u64 default_interval;
|
||||
u64 user_interval;
|
||||
const char *cpu_list;
|
||||
|
|
|
@ -81,6 +81,7 @@ struct perf_sample {
|
|||
u32 raw_size;
|
||||
void *raw_data;
|
||||
struct ip_callchain *callchain;
|
||||
struct branch_stack *branch_stack;
|
||||
};
|
||||
|
||||
#define BUILD_ID_SIZE 20
|
||||
|
|
|
@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
|
|||
attr->watermark = 0;
|
||||
attr->wakeup_events = 1;
|
||||
}
|
||||
if (opts->branch_stack) {
|
||||
attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
|
||||
attr->branch_sample_type = opts->branch_stack;
|
||||
}
|
||||
|
||||
attr->mmap = track;
|
||||
attr->comm = track;
|
||||
|
@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
|
|||
data->raw_data = (void *) pdata;
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
u64 sz;
|
||||
|
||||
data->branch_stack = (struct branch_stack *)array;
|
||||
array++; /* nr */
|
||||
|
||||
sz = data->branch_stack->nr * sizeof(struct branch_entry);
|
||||
sz /= sizeof(u64);
|
||||
array += sz;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1023,6 +1023,12 @@ write_it:
|
|||
return do_write_string(fd, buffer);
|
||||
}
|
||||
|
||||
static int write_branch_stack(int fd __used, struct perf_header *h __used,
|
||||
struct perf_evlist *evlist __used)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
|
||||
{
|
||||
char *str = do_read_string(fd, ph);
|
||||
|
@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
|
|||
uint64_t id;
|
||||
void *buf = NULL;
|
||||
char *str;
|
||||
u32 nre, sz, nr, i, j, msz;
|
||||
int ret;
|
||||
u32 nre, sz, nr, i, j;
|
||||
ssize_t ret;
|
||||
size_t msz;
|
||||
|
||||
/* number of events */
|
||||
ret = read(fd, &nre, sizeof(nre));
|
||||
|
@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
|
|||
if (ph->needs_swap)
|
||||
sz = bswap_32(sz);
|
||||
|
||||
/*
|
||||
* ensure it is at least to our ABI rev
|
||||
*/
|
||||
if (sz < (u32)sizeof(attr))
|
||||
goto error;
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
|
||||
/* read entire region to sync up to next field */
|
||||
/* buffer to hold on file attr struct */
|
||||
buf = malloc(sz);
|
||||
if (!buf)
|
||||
goto error;
|
||||
|
||||
msz = sizeof(attr);
|
||||
if (sz < msz)
|
||||
if (sz < (ssize_t)msz)
|
||||
msz = sz;
|
||||
|
||||
for (i = 0 ; i < nre; i++) {
|
||||
|
||||
/*
|
||||
* must read entire on-file attr struct to
|
||||
* sync up with layout.
|
||||
*/
|
||||
ret = read(fd, buf, sz);
|
||||
if (ret != (ssize_t)sz)
|
||||
goto error;
|
||||
|
@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
|
|||
free(str);
|
||||
}
|
||||
|
||||
static void print_branch_stack(struct perf_header *ph __used, int fd __used,
|
||||
FILE *fp)
|
||||
{
|
||||
fprintf(fp, "# contains samples with branch stack\n");
|
||||
}
|
||||
|
||||
static int __event_process_build_id(struct build_id_event *bev,
|
||||
char *filename,
|
||||
struct perf_session *session)
|
||||
|
@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
|
|||
FEAT_OPA(HEADER_CMDLINE, cmdline),
|
||||
FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),
|
||||
FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
|
||||
FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
|
||||
};
|
||||
|
||||
struct header_print_data {
|
||||
|
@ -1804,35 +1816,101 @@ out_free:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int check_magic_endian(u64 *magic, struct perf_file_header *header,
|
||||
struct perf_header *ph)
|
||||
static const int attr_file_abi_sizes[] = {
|
||||
[0] = PERF_ATTR_SIZE_VER0,
|
||||
[1] = PERF_ATTR_SIZE_VER1,
|
||||
0,
|
||||
};
|
||||
|
||||
/*
|
||||
* In the legacy file format, the magic number is not used to encode endianness.
|
||||
* hdr_sz was used to encode endianness. But given that hdr_sz can vary based
|
||||
* on ABI revisions, we need to try all combinations for all endianness to
|
||||
* detect the endianness.
|
||||
*/
|
||||
static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
|
||||
{
|
||||
uint64_t ref_size, attr_size;
|
||||
int i;
|
||||
|
||||
for (i = 0 ; attr_file_abi_sizes[i]; i++) {
|
||||
ref_size = attr_file_abi_sizes[i]
|
||||
+ sizeof(struct perf_file_section);
|
||||
if (hdr_sz != ref_size) {
|
||||
attr_size = bswap_64(hdr_sz);
|
||||
if (attr_size != ref_size)
|
||||
continue;
|
||||
|
||||
ph->needs_swap = true;
|
||||
}
|
||||
pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
|
||||
i,
|
||||
ph->needs_swap);
|
||||
return 0;
|
||||
}
|
||||
/* could not determine endianness */
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define PERF_PIPE_HDR_VER0 16
|
||||
|
||||
static const size_t attr_pipe_abi_sizes[] = {
|
||||
[0] = PERF_PIPE_HDR_VER0,
|
||||
0,
|
||||
};
|
||||
|
||||
/*
|
||||
* In the legacy pipe format, there is an implicit assumption that endiannesss
|
||||
* between host recording the samples, and host parsing the samples is the
|
||||
* same. This is not always the case given that the pipe output may always be
|
||||
* redirected into a file and analyzed on a different machine with possibly a
|
||||
* different endianness and perf_event ABI revsions in the perf tool itself.
|
||||
*/
|
||||
static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
|
||||
{
|
||||
u64 attr_size;
|
||||
int i;
|
||||
|
||||
for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
|
||||
if (hdr_sz != attr_pipe_abi_sizes[i]) {
|
||||
attr_size = bswap_64(hdr_sz);
|
||||
if (attr_size != hdr_sz)
|
||||
continue;
|
||||
|
||||
ph->needs_swap = true;
|
||||
}
|
||||
pr_debug("Pipe ABI%d perf.data file detected\n", i);
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int check_magic_endian(u64 magic, uint64_t hdr_sz,
|
||||
bool is_pipe, struct perf_header *ph)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* check for legacy format */
|
||||
ret = memcmp(magic, __perf_magic1, sizeof(*magic));
|
||||
ret = memcmp(&magic, __perf_magic1, sizeof(magic));
|
||||
if (ret == 0) {
|
||||
pr_debug("legacy perf.data format\n");
|
||||
if (!header)
|
||||
return -1;
|
||||
if (is_pipe)
|
||||
return try_all_pipe_abis(hdr_sz, ph);
|
||||
|
||||
if (header->attr_size != sizeof(struct perf_file_attr)) {
|
||||
u64 attr_size = bswap_64(header->attr_size);
|
||||
|
||||
if (attr_size != sizeof(struct perf_file_attr))
|
||||
return -1;
|
||||
|
||||
ph->needs_swap = true;
|
||||
}
|
||||
return 0;
|
||||
return try_all_file_abis(hdr_sz, ph);
|
||||
}
|
||||
/*
|
||||
* the new magic number serves two purposes:
|
||||
* - unique number to identify actual perf.data files
|
||||
* - encode endianness of file
|
||||
*/
|
||||
|
||||
/* check magic number with same endianness */
|
||||
if (*magic == __perf_magic2)
|
||||
/* check magic number with one endianness */
|
||||
if (magic == __perf_magic2)
|
||||
return 0;
|
||||
|
||||
/* check magic number but opposite endianness */
|
||||
if (*magic != __perf_magic2_sw)
|
||||
/* check magic number with opposite endianness */
|
||||
if (magic != __perf_magic2_sw)
|
||||
return -1;
|
||||
|
||||
ph->needs_swap = true;
|
||||
|
@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header,
|
|||
if (ret <= 0)
|
||||
return -1;
|
||||
|
||||
if (check_magic_endian(&header->magic, header, ph) < 0)
|
||||
if (check_magic_endian(header->magic,
|
||||
header->attr_size, false, ph) < 0) {
|
||||
pr_debug("magic/endian check failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ph->needs_swap) {
|
||||
mem_bswap_64(header, offsetof(struct perf_file_header,
|
||||
|
@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
|
|||
if (ret <= 0)
|
||||
return -1;
|
||||
|
||||
if (check_magic_endian(&header->magic, NULL, ph) < 0)
|
||||
if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
|
||||
pr_debug("endian/magic failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ph->needs_swap)
|
||||
header->size = bswap_64(header->size);
|
||||
|
||||
if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
|
||||
return -1;
|
||||
|
||||
if (header->size != sizeof(*header)) {
|
||||
u64 size = bswap_64(header->size);
|
||||
|
||||
if (size != sizeof(*header))
|
||||
return -1;
|
||||
|
||||
ph->needs_swap = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int read_attr(int fd, struct perf_header *ph,
|
||||
struct perf_file_attr *f_attr)
|
||||
{
|
||||
struct perf_event_attr *attr = &f_attr->attr;
|
||||
size_t sz, left;
|
||||
size_t our_sz = sizeof(f_attr->attr);
|
||||
int ret;
|
||||
|
||||
memset(f_attr, 0, sizeof(*f_attr));
|
||||
|
||||
/* read minimal guaranteed structure */
|
||||
ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
|
||||
if (ret <= 0) {
|
||||
pr_debug("cannot read %d bytes of header attr\n",
|
||||
PERF_ATTR_SIZE_VER0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* on file perf_event_attr size */
|
||||
sz = attr->size;
|
||||
|
||||
if (ph->needs_swap)
|
||||
sz = bswap_32(sz);
|
||||
|
||||
if (sz == 0) {
|
||||
/* assume ABI0 */
|
||||
sz = PERF_ATTR_SIZE_VER0;
|
||||
} else if (sz > our_sz) {
|
||||
pr_debug("file uses a more recent and unsupported ABI"
|
||||
" (%zu bytes extra)\n", sz - our_sz);
|
||||
return -1;
|
||||
}
|
||||
/* what we have not yet read and that we know about */
|
||||
left = sz - PERF_ATTR_SIZE_VER0;
|
||||
if (left) {
|
||||
void *ptr = attr;
|
||||
ptr += PERF_ATTR_SIZE_VER0;
|
||||
|
||||
ret = readn(fd, ptr, left);
|
||||
}
|
||||
/* read perf_file_section, ids are read in caller */
|
||||
ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
|
||||
|
||||
return ret <= 0 ? -1 : 0;
|
||||
}
|
||||
|
||||
int perf_session__read_header(struct perf_session *session, int fd)
|
||||
{
|
||||
struct perf_header *header = &session->header;
|
||||
|
@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd)
|
|||
if (session->fd_pipe)
|
||||
return perf_header__read_pipe(session, fd);
|
||||
|
||||
if (perf_file_header__read(&f_header, header, fd) < 0) {
|
||||
pr_debug("incompatible file format\n");
|
||||
if (perf_file_header__read(&f_header, header, fd) < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nr_attrs = f_header.attrs.size / sizeof(f_attr);
|
||||
nr_attrs = f_header.attrs.size / f_header.attr_size;
|
||||
lseek(fd, f_header.attrs.offset, SEEK_SET);
|
||||
|
||||
for (i = 0; i < nr_attrs; i++) {
|
||||
struct perf_evsel *evsel;
|
||||
off_t tmp;
|
||||
|
||||
if (readn(fd, &f_attr, sizeof(f_attr)) <= 0)
|
||||
if (read_attr(fd, header, &f_attr) < 0)
|
||||
goto out_errno;
|
||||
|
||||
if (header->needs_swap)
|
||||
|
|
|
@ -27,7 +27,7 @@ enum {
|
|||
HEADER_EVENT_DESC,
|
||||
HEADER_CPU_TOPOLOGY,
|
||||
HEADER_NUMA_TOPOLOGY,
|
||||
|
||||
HEADER_BRANCH_STACK,
|
||||
HEADER_LAST_FEATURE,
|
||||
HEADER_FEAT_BITS = 256,
|
||||
};
|
||||
|
|
|
@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists)
|
|||
hists__set_col_len(hists, col, 0);
|
||||
}
|
||||
|
||||
static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
|
||||
{
|
||||
const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
|
||||
|
||||
if (hists__col_len(hists, dso) < unresolved_col_width &&
|
||||
!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
|
||||
!symbol_conf.dso_list)
|
||||
hists__set_col_len(hists, dso, unresolved_col_width);
|
||||
}
|
||||
|
||||
static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
|
||||
{
|
||||
const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
|
||||
u16 len;
|
||||
|
||||
if (h->ms.sym)
|
||||
hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen);
|
||||
else {
|
||||
const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
|
||||
|
||||
if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width &&
|
||||
!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
|
||||
!symbol_conf.dso_list)
|
||||
hists__set_col_len(hists, HISTC_DSO,
|
||||
unresolved_col_width);
|
||||
}
|
||||
hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4);
|
||||
else
|
||||
hists__set_unres_dso_col_len(hists, HISTC_DSO);
|
||||
|
||||
len = thread__comm_len(h->thread);
|
||||
if (hists__new_col_len(hists, HISTC_COMM, len))
|
||||
|
@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
|
|||
len = dso__name_len(h->ms.map->dso);
|
||||
hists__new_col_len(hists, HISTC_DSO, len);
|
||||
}
|
||||
|
||||
if (h->branch_info) {
|
||||
int symlen;
|
||||
/*
|
||||
* +4 accounts for '[x] ' priv level info
|
||||
* +2 account of 0x prefix on raw addresses
|
||||
*/
|
||||
if (h->branch_info->from.sym) {
|
||||
symlen = (int)h->branch_info->from.sym->namelen + 4;
|
||||
hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
|
||||
|
||||
symlen = dso__name_len(h->branch_info->from.map->dso);
|
||||
hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
|
||||
} else {
|
||||
symlen = unresolved_col_width + 4 + 2;
|
||||
hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
|
||||
hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
|
||||
}
|
||||
|
||||
if (h->branch_info->to.sym) {
|
||||
symlen = (int)h->branch_info->to.sym->namelen + 4;
|
||||
hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
|
||||
|
||||
symlen = dso__name_len(h->branch_info->to.map->dso);
|
||||
hists__new_col_len(hists, HISTC_DSO_TO, symlen);
|
||||
} else {
|
||||
symlen = unresolved_col_width + 4 + 2;
|
||||
hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
|
||||
hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void hist_entry__add_cpumode_period(struct hist_entry *he,
|
||||
|
@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent)
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct hist_entry *__hists__add_entry(struct hists *hists,
|
||||
static struct hist_entry *add_hist_entry(struct hists *hists,
|
||||
struct hist_entry *entry,
|
||||
struct addr_location *al,
|
||||
struct symbol *sym_parent, u64 period)
|
||||
u64 period)
|
||||
{
|
||||
struct rb_node **p;
|
||||
struct rb_node *parent = NULL;
|
||||
struct hist_entry *he;
|
||||
struct hist_entry entry = {
|
||||
.thread = al->thread,
|
||||
.ms = {
|
||||
.map = al->map,
|
||||
.sym = al->sym,
|
||||
},
|
||||
.cpu = al->cpu,
|
||||
.ip = al->addr,
|
||||
.level = al->level,
|
||||
.period = period,
|
||||
.parent = sym_parent,
|
||||
.filtered = symbol__parent_filter(sym_parent),
|
||||
};
|
||||
int cmp;
|
||||
|
||||
pthread_mutex_lock(&hists->lock);
|
||||
|
@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
|
|||
parent = *p;
|
||||
he = rb_entry(parent, struct hist_entry, rb_node_in);
|
||||
|
||||
cmp = hist_entry__cmp(&entry, he);
|
||||
cmp = hist_entry__cmp(entry, he);
|
||||
|
||||
if (!cmp) {
|
||||
he->period += period;
|
||||
|
@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
|
|||
p = &(*p)->rb_right;
|
||||
}
|
||||
|
||||
he = hist_entry__new(&entry);
|
||||
he = hist_entry__new(entry);
|
||||
if (!he)
|
||||
goto out_unlock;
|
||||
|
||||
|
@ -252,6 +275,51 @@ out_unlock:
|
|||
return he;
|
||||
}
|
||||
|
||||
struct hist_entry *__hists__add_branch_entry(struct hists *self,
|
||||
struct addr_location *al,
|
||||
struct symbol *sym_parent,
|
||||
struct branch_info *bi,
|
||||
u64 period)
|
||||
{
|
||||
struct hist_entry entry = {
|
||||
.thread = al->thread,
|
||||
.ms = {
|
||||
.map = bi->to.map,
|
||||
.sym = bi->to.sym,
|
||||
},
|
||||
.cpu = al->cpu,
|
||||
.ip = bi->to.addr,
|
||||
.level = al->level,
|
||||
.period = period,
|
||||
.parent = sym_parent,
|
||||
.filtered = symbol__parent_filter(sym_parent),
|
||||
.branch_info = bi,
|
||||
};
|
||||
|
||||
return add_hist_entry(self, &entry, al, period);
|
||||
}
|
||||
|
||||
struct hist_entry *__hists__add_entry(struct hists *self,
|
||||
struct addr_location *al,
|
||||
struct symbol *sym_parent, u64 period)
|
||||
{
|
||||
struct hist_entry entry = {
|
||||
.thread = al->thread,
|
||||
.ms = {
|
||||
.map = al->map,
|
||||
.sym = al->sym,
|
||||
},
|
||||
.cpu = al->cpu,
|
||||
.ip = al->addr,
|
||||
.level = al->level,
|
||||
.period = period,
|
||||
.parent = sym_parent,
|
||||
.filtered = symbol__parent_filter(sym_parent),
|
||||
};
|
||||
|
||||
return add_hist_entry(self, &entry, al, period);
|
||||
}
|
||||
|
||||
int64_t
|
||||
hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
|
|
|
@ -42,6 +42,11 @@ enum hist_column {
|
|||
HISTC_COMM,
|
||||
HISTC_PARENT,
|
||||
HISTC_CPU,
|
||||
HISTC_MISPREDICT,
|
||||
HISTC_SYMBOL_FROM,
|
||||
HISTC_SYMBOL_TO,
|
||||
HISTC_DSO_FROM,
|
||||
HISTC_DSO_TO,
|
||||
HISTC_NR_COLS, /* Last entry */
|
||||
};
|
||||
|
||||
|
@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
|
|||
struct hists *hists);
|
||||
void hist_entry__free(struct hist_entry *);
|
||||
|
||||
struct hist_entry *__hists__add_branch_entry(struct hists *self,
|
||||
struct addr_location *al,
|
||||
struct symbol *sym_parent,
|
||||
struct branch_info *bi,
|
||||
u64 period);
|
||||
|
||||
void hists__output_resort(struct hists *self);
|
||||
void hists__output_resort_threaded(struct hists *hists);
|
||||
void hists__collapse_resort(struct hists *self);
|
||||
|
|
|
@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force)
|
|||
self->fd = STDIN_FILENO;
|
||||
|
||||
if (perf_session__read_header(self, self->fd) < 0)
|
||||
pr_err("incompatible file format");
|
||||
pr_err("incompatible file format (rerun with -v to learn more)");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force)
|
|||
}
|
||||
|
||||
if (perf_session__read_header(self, self->fd) < 0) {
|
||||
pr_err("incompatible file format");
|
||||
pr_err("incompatible file format (rerun with -v to learn more)");
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
|
@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static const u8 cpumodes[] = {
|
||||
PERF_RECORD_MISC_USER,
|
||||
PERF_RECORD_MISC_KERNEL,
|
||||
PERF_RECORD_MISC_GUEST_USER,
|
||||
PERF_RECORD_MISC_GUEST_KERNEL
|
||||
};
|
||||
#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
|
||||
|
||||
static void ip__resolve_ams(struct machine *self, struct thread *thread,
|
||||
struct addr_map_symbol *ams,
|
||||
u64 ip)
|
||||
{
|
||||
struct addr_location al;
|
||||
size_t i;
|
||||
u8 m;
|
||||
|
||||
memset(&al, 0, sizeof(al));
|
||||
|
||||
for (i = 0; i < NCPUMODES; i++) {
|
||||
m = cpumodes[i];
|
||||
/*
|
||||
* We cannot use the header.misc hint to determine whether a
|
||||
* branch stack address is user, kernel, guest, hypervisor.
|
||||
* Branches may straddle the kernel/user/hypervisor boundaries.
|
||||
* Thus, we have to try consecutively until we find a match
|
||||
* or else, the symbol is unknown
|
||||
*/
|
||||
thread__find_addr_location(thread, self, m, MAP__FUNCTION,
|
||||
ip, &al, NULL);
|
||||
if (al.sym)
|
||||
goto found;
|
||||
}
|
||||
found:
|
||||
ams->addr = ip;
|
||||
ams->al_addr = al.addr;
|
||||
ams->sym = al.sym;
|
||||
ams->map = al.map;
|
||||
}
|
||||
|
||||
struct branch_info *machine__resolve_bstack(struct machine *self,
|
||||
struct thread *thr,
|
||||
struct branch_stack *bs)
|
||||
{
|
||||
struct branch_info *bi;
|
||||
unsigned int i;
|
||||
|
||||
bi = calloc(bs->nr, sizeof(struct branch_info));
|
||||
if (!bi)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < bs->nr; i++) {
|
||||
ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to);
|
||||
ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from);
|
||||
bi[i].flags = bs->entries[i].flags;
|
||||
}
|
||||
return bi;
|
||||
}
|
||||
|
||||
int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
|
||||
struct thread *thread,
|
||||
struct ip_callchain *chain,
|
||||
|
@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample)
|
|||
i, sample->callchain->ips[i]);
|
||||
}
|
||||
|
||||
static void branch_stack__printf(struct perf_sample *sample)
|
||||
{
|
||||
uint64_t i;
|
||||
|
||||
printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
|
||||
|
||||
for (i = 0; i < sample->branch_stack->nr; i++)
|
||||
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
|
||||
i, sample->branch_stack->entries[i].from,
|
||||
sample->branch_stack->entries[i].to);
|
||||
}
|
||||
|
||||
static void perf_session__print_tstamp(struct perf_session *session,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample)
|
||||
|
@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
|
|||
|
||||
if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
callchain__printf(sample);
|
||||
|
||||
if (session->sample_type & PERF_SAMPLE_BRANCH_STACK)
|
||||
branch_stack__printf(sample);
|
||||
}
|
||||
|
||||
static struct machine *
|
||||
|
|
|
@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel
|
|||
struct ip_callchain *chain,
|
||||
struct symbol **parent);
|
||||
|
||||
struct branch_info *machine__resolve_bstack(struct machine *self,
|
||||
struct thread *thread,
|
||||
struct branch_stack *bs);
|
||||
|
||||
bool perf_session__has_traces(struct perf_session *self, const char *msg);
|
||||
|
||||
void mem_bswap_64(void *src, int byte_size);
|
||||
|
|
|
@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol";
|
|||
const char *sort_order = default_sort_order;
|
||||
int sort__need_collapse = 0;
|
||||
int sort__has_parent = 0;
|
||||
int sort__branch_mode = -1; /* -1 = means not set */
|
||||
|
||||
enum sort_type sort__first_dimension;
|
||||
|
||||
|
@ -94,21 +95,10 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
|
|||
return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
|
||||
}
|
||||
|
||||
struct sort_entry sort_comm = {
|
||||
.se_header = "Command",
|
||||
.se_cmp = sort__comm_cmp,
|
||||
.se_collapse = sort__comm_collapse,
|
||||
.se_snprintf = hist_entry__comm_snprintf,
|
||||
.se_width_idx = HISTC_COMM,
|
||||
};
|
||||
|
||||
/* --sort dso */
|
||||
|
||||
static int64_t
|
||||
sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
|
||||
{
|
||||
struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL;
|
||||
struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL;
|
||||
struct dso *dso_l = map_l ? map_l->dso : NULL;
|
||||
struct dso *dso_r = map_r ? map_r->dso : NULL;
|
||||
const char *dso_name_l, *dso_name_r;
|
||||
|
||||
if (!dso_l || !dso_r)
|
||||
|
@ -125,18 +115,87 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
|
|||
return strcmp(dso_name_l, dso_name_r);
|
||||
}
|
||||
|
||||
static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
struct sort_entry sort_comm = {
|
||||
.se_header = "Command",
|
||||
.se_cmp = sort__comm_cmp,
|
||||
.se_collapse = sort__comm_collapse,
|
||||
.se_snprintf = hist_entry__comm_snprintf,
|
||||
.se_width_idx = HISTC_COMM,
|
||||
};
|
||||
|
||||
/* --sort dso */
|
||||
|
||||
static int64_t
|
||||
sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
if (self->ms.map && self->ms.map->dso) {
|
||||
const char *dso_name = !verbose ? self->ms.map->dso->short_name :
|
||||
self->ms.map->dso->long_name;
|
||||
return _sort__dso_cmp(left->ms.map, right->ms.map);
|
||||
}
|
||||
|
||||
|
||||
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r,
|
||||
u64 ip_l, u64 ip_r)
|
||||
{
|
||||
if (!sym_l || !sym_r)
|
||||
return cmp_null(sym_l, sym_r);
|
||||
|
||||
if (sym_l == sym_r)
|
||||
return 0;
|
||||
|
||||
if (sym_l)
|
||||
ip_l = sym_l->start;
|
||||
if (sym_r)
|
||||
ip_r = sym_r->start;
|
||||
|
||||
return (int64_t)(ip_r - ip_l);
|
||||
}
|
||||
|
||||
static int _hist_entry__dso_snprintf(struct map *map, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
if (map && map->dso) {
|
||||
const char *dso_name = !verbose ? map->dso->short_name :
|
||||
map->dso->long_name;
|
||||
return repsep_snprintf(bf, size, "%-*s", width, dso_name);
|
||||
}
|
||||
|
||||
return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
|
||||
}
|
||||
|
||||
static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
return _hist_entry__dso_snprintf(self->ms.map, bf, size, width);
|
||||
}
|
||||
|
||||
static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
|
||||
u64 ip, char level, char *bf, size_t size,
|
||||
unsigned int width __used)
|
||||
{
|
||||
size_t ret = 0;
|
||||
|
||||
if (verbose) {
|
||||
char o = map ? dso__symtab_origin(map->dso) : '!';
|
||||
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
|
||||
BITS_PER_LONG / 4, ip, o);
|
||||
}
|
||||
|
||||
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
|
||||
if (sym)
|
||||
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
|
||||
width - ret,
|
||||
sym->name);
|
||||
else {
|
||||
size_t len = BITS_PER_LONG / 4;
|
||||
ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
|
||||
len, ip);
|
||||
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
|
||||
width - ret, "");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
struct sort_entry sort_dso = {
|
||||
.se_header = "Shared Object",
|
||||
.se_cmp = sort__dso_cmp,
|
||||
|
@ -144,8 +203,14 @@ struct sort_entry sort_dso = {
|
|||
.se_width_idx = HISTC_DSO,
|
||||
};
|
||||
|
||||
/* --sort symbol */
|
||||
static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width __used)
|
||||
{
|
||||
return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
|
||||
self->level, bf, size, width);
|
||||
}
|
||||
|
||||
/* --sort symbol */
|
||||
static int64_t
|
||||
sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
|
@ -163,31 +228,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
|
|||
ip_l = left->ms.sym->start;
|
||||
ip_r = right->ms.sym->start;
|
||||
|
||||
return (int64_t)(ip_r - ip_l);
|
||||
}
|
||||
|
||||
static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width __used)
|
||||
{
|
||||
size_t ret = 0;
|
||||
|
||||
if (verbose) {
|
||||
char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
|
||||
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
|
||||
BITS_PER_LONG / 4, self->ip, o);
|
||||
}
|
||||
|
||||
if (!sort_dso.elide)
|
||||
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level);
|
||||
|
||||
if (self->ms.sym)
|
||||
ret += repsep_snprintf(bf + ret, size - ret, "%s",
|
||||
self->ms.sym->name);
|
||||
else
|
||||
ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
|
||||
BITS_PER_LONG / 4, self->ip);
|
||||
|
||||
return ret;
|
||||
return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r);
|
||||
}
|
||||
|
||||
struct sort_entry sort_sym = {
|
||||
|
@ -246,19 +287,155 @@ struct sort_entry sort_cpu = {
|
|||
.se_width_idx = HISTC_CPU,
|
||||
};
|
||||
|
||||
static int64_t
|
||||
sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
return _sort__dso_cmp(left->branch_info->from.map,
|
||||
right->branch_info->from.map);
|
||||
}
|
||||
|
||||
static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
return _hist_entry__dso_snprintf(self->branch_info->from.map,
|
||||
bf, size, width);
|
||||
}
|
||||
|
||||
struct sort_entry sort_dso_from = {
|
||||
.se_header = "Source Shared Object",
|
||||
.se_cmp = sort__dso_from_cmp,
|
||||
.se_snprintf = hist_entry__dso_from_snprintf,
|
||||
.se_width_idx = HISTC_DSO_FROM,
|
||||
};
|
||||
|
||||
static int64_t
|
||||
sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
return _sort__dso_cmp(left->branch_info->to.map,
|
||||
right->branch_info->to.map);
|
||||
}
|
||||
|
||||
static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
return _hist_entry__dso_snprintf(self->branch_info->to.map,
|
||||
bf, size, width);
|
||||
}
|
||||
|
||||
static int64_t
|
||||
sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
struct addr_map_symbol *from_l = &left->branch_info->from;
|
||||
struct addr_map_symbol *from_r = &right->branch_info->from;
|
||||
|
||||
if (!from_l->sym && !from_r->sym)
|
||||
return right->level - left->level;
|
||||
|
||||
return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr,
|
||||
from_r->addr);
|
||||
}
|
||||
|
||||
static int64_t
|
||||
sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
struct addr_map_symbol *to_l = &left->branch_info->to;
|
||||
struct addr_map_symbol *to_r = &right->branch_info->to;
|
||||
|
||||
if (!to_l->sym && !to_r->sym)
|
||||
return right->level - left->level;
|
||||
|
||||
return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr);
|
||||
}
|
||||
|
||||
static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width __used)
|
||||
{
|
||||
struct addr_map_symbol *from = &self->branch_info->from;
|
||||
return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
|
||||
self->level, bf, size, width);
|
||||
|
||||
}
|
||||
|
||||
static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width __used)
|
||||
{
|
||||
struct addr_map_symbol *to = &self->branch_info->to;
|
||||
return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
|
||||
self->level, bf, size, width);
|
||||
|
||||
}
|
||||
|
||||
struct sort_entry sort_dso_to = {
|
||||
.se_header = "Target Shared Object",
|
||||
.se_cmp = sort__dso_to_cmp,
|
||||
.se_snprintf = hist_entry__dso_to_snprintf,
|
||||
.se_width_idx = HISTC_DSO_TO,
|
||||
};
|
||||
|
||||
struct sort_entry sort_sym_from = {
|
||||
.se_header = "Source Symbol",
|
||||
.se_cmp = sort__sym_from_cmp,
|
||||
.se_snprintf = hist_entry__sym_from_snprintf,
|
||||
.se_width_idx = HISTC_SYMBOL_FROM,
|
||||
};
|
||||
|
||||
struct sort_entry sort_sym_to = {
|
||||
.se_header = "Target Symbol",
|
||||
.se_cmp = sort__sym_to_cmp,
|
||||
.se_snprintf = hist_entry__sym_to_snprintf,
|
||||
.se_width_idx = HISTC_SYMBOL_TO,
|
||||
};
|
||||
|
||||
static int64_t
|
||||
sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
const unsigned char mp = left->branch_info->flags.mispred !=
|
||||
right->branch_info->flags.mispred;
|
||||
const unsigned char p = left->branch_info->flags.predicted !=
|
||||
right->branch_info->flags.predicted;
|
||||
|
||||
return mp || p;
|
||||
}
|
||||
|
||||
static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf,
|
||||
size_t size, unsigned int width){
|
||||
static const char *out = "N/A";
|
||||
|
||||
if (self->branch_info->flags.predicted)
|
||||
out = "N";
|
||||
else if (self->branch_info->flags.mispred)
|
||||
out = "Y";
|
||||
|
||||
return repsep_snprintf(bf, size, "%-*s", width, out);
|
||||
}
|
||||
|
||||
struct sort_entry sort_mispredict = {
|
||||
.se_header = "Branch Mispredicted",
|
||||
.se_cmp = sort__mispredict_cmp,
|
||||
.se_snprintf = hist_entry__mispredict_snprintf,
|
||||
.se_width_idx = HISTC_MISPREDICT,
|
||||
};
|
||||
|
||||
struct sort_dimension {
|
||||
const char *name;
|
||||
struct sort_entry *entry;
|
||||
int taken;
|
||||
};
|
||||
|
||||
#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
|
||||
|
||||
static struct sort_dimension sort_dimensions[] = {
|
||||
{ .name = "pid", .entry = &sort_thread, },
|
||||
{ .name = "comm", .entry = &sort_comm, },
|
||||
{ .name = "dso", .entry = &sort_dso, },
|
||||
{ .name = "symbol", .entry = &sort_sym, },
|
||||
{ .name = "parent", .entry = &sort_parent, },
|
||||
{ .name = "cpu", .entry = &sort_cpu, },
|
||||
DIM(SORT_PID, "pid", sort_thread),
|
||||
DIM(SORT_COMM, "comm", sort_comm),
|
||||
DIM(SORT_DSO, "dso", sort_dso),
|
||||
DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
|
||||
DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
|
||||
DIM(SORT_SYM, "symbol", sort_sym),
|
||||
DIM(SORT_SYM_TO, "symbol_from", sort_sym_from),
|
||||
DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to),
|
||||
DIM(SORT_PARENT, "parent", sort_parent),
|
||||
DIM(SORT_CPU, "cpu", sort_cpu),
|
||||
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
|
||||
};
|
||||
|
||||
int sort_dimension__add(const char *tok)
|
||||
|
@ -270,7 +447,6 @@ int sort_dimension__add(const char *tok)
|
|||
|
||||
if (strncasecmp(tok, sd->name, strlen(tok)))
|
||||
continue;
|
||||
|
||||
if (sd->entry == &sort_parent) {
|
||||
int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
|
||||
if (ret) {
|
||||
|
@ -302,6 +478,16 @@ int sort_dimension__add(const char *tok)
|
|||
sort__first_dimension = SORT_PARENT;
|
||||
else if (!strcmp(sd->name, "cpu"))
|
||||
sort__first_dimension = SORT_CPU;
|
||||
else if (!strcmp(sd->name, "symbol_from"))
|
||||
sort__first_dimension = SORT_SYM_FROM;
|
||||
else if (!strcmp(sd->name, "symbol_to"))
|
||||
sort__first_dimension = SORT_SYM_TO;
|
||||
else if (!strcmp(sd->name, "dso_from"))
|
||||
sort__first_dimension = SORT_DSO_FROM;
|
||||
else if (!strcmp(sd->name, "dso_to"))
|
||||
sort__first_dimension = SORT_DSO_TO;
|
||||
else if (!strcmp(sd->name, "mispredict"))
|
||||
sort__first_dimension = SORT_MISPREDICT;
|
||||
}
|
||||
|
||||
list_add_tail(&sd->entry->list, &hist_entry__sort_list);
|
||||
|
@ -309,7 +495,6 @@ int sort_dimension__add(const char *tok)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,11 +31,16 @@ extern const char *parent_pattern;
|
|||
extern const char default_sort_order[];
|
||||
extern int sort__need_collapse;
|
||||
extern int sort__has_parent;
|
||||
extern int sort__branch_mode;
|
||||
extern char *field_sep;
|
||||
extern struct sort_entry sort_comm;
|
||||
extern struct sort_entry sort_dso;
|
||||
extern struct sort_entry sort_sym;
|
||||
extern struct sort_entry sort_parent;
|
||||
extern struct sort_entry sort_dso_from;
|
||||
extern struct sort_entry sort_dso_to;
|
||||
extern struct sort_entry sort_sym_from;
|
||||
extern struct sort_entry sort_sym_to;
|
||||
extern enum sort_type sort__first_dimension;
|
||||
|
||||
/**
|
||||
|
@ -72,6 +77,7 @@ struct hist_entry {
|
|||
struct hist_entry *pair;
|
||||
struct rb_root sorted_chain;
|
||||
};
|
||||
struct branch_info *branch_info;
|
||||
struct callchain_root callchain[0];
|
||||
};
|
||||
|
||||
|
@ -82,6 +88,11 @@ enum sort_type {
|
|||
SORT_SYM,
|
||||
SORT_PARENT,
|
||||
SORT_CPU,
|
||||
SORT_DSO_FROM,
|
||||
SORT_DSO_TO,
|
||||
SORT_SYM_FROM,
|
||||
SORT_SYM_TO,
|
||||
SORT_MISPREDICT,
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "map.h"
|
||||
#include "../perf.h"
|
||||
#include <linux/list.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <stdio.h>
|
||||
|
@ -96,7 +97,11 @@ struct symbol_conf {
|
|||
*col_width_list_str;
|
||||
struct strlist *dso_list,
|
||||
*comm_list,
|
||||
*sym_list;
|
||||
*sym_list,
|
||||
*dso_from_list,
|
||||
*dso_to_list,
|
||||
*sym_from_list,
|
||||
*sym_to_list;
|
||||
const char *symfs;
|
||||
};
|
||||
|
||||
|
@ -120,6 +125,19 @@ struct map_symbol {
|
|||
bool has_children;
|
||||
};
|
||||
|
||||
struct addr_map_symbol {
|
||||
struct map *map;
|
||||
struct symbol *sym;
|
||||
u64 addr;
|
||||
u64 al_addr;
|
||||
};
|
||||
|
||||
struct branch_info {
|
||||
struct addr_map_symbol from;
|
||||
struct addr_map_symbol to;
|
||||
struct branch_flags flags;
|
||||
};
|
||||
|
||||
struct addr_location {
|
||||
struct thread *thread;
|
||||
struct map *map;
|
||||
|
|
|
@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
|
|||
self->hists = hists;
|
||||
self->b.refresh = hist_browser__refresh;
|
||||
self->b.seek = ui_browser__hists_seek;
|
||||
self->b.use_navkeypressed = true,
|
||||
self->has_symbols = sort_sym.list.next != NULL;
|
||||
self->b.use_navkeypressed = true;
|
||||
if (sort__branch_mode == 1)
|
||||
self->has_symbols = sort_sym_from.list.next != NULL;
|
||||
else
|
||||
self->has_symbols = sort_sym.list.next != NULL;
|
||||
}
|
||||
|
||||
return self;
|
||||
|
@ -853,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
|
|||
return printed;
|
||||
}
|
||||
|
||||
static inline void free_popup_options(char **options, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
free(options[i]);
|
||||
options[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
|
||||
const char *helpline, const char *ev_name,
|
||||
bool left_exits,
|
||||
|
@ -861,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
|
|||
{
|
||||
struct hists *self = &evsel->hists;
|
||||
struct hist_browser *browser = hist_browser__new(self);
|
||||
struct branch_info *bi;
|
||||
struct pstack *fstack;
|
||||
char *options[16];
|
||||
int nr_options = 0;
|
||||
int key = -1;
|
||||
|
||||
if (browser == NULL)
|
||||
|
@ -873,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
|
|||
|
||||
ui_helpline__push(helpline);
|
||||
|
||||
memset(options, 0, sizeof(options));
|
||||
|
||||
while (1) {
|
||||
const struct thread *thread = NULL;
|
||||
const struct dso *dso = NULL;
|
||||
char *options[16];
|
||||
int nr_options = 0, choice = 0, i,
|
||||
int choice = 0,
|
||||
annotate = -2, zoom_dso = -2, zoom_thread = -2,
|
||||
browse_map = -2;
|
||||
annotate_f = -2, annotate_t = -2, browse_map = -2;
|
||||
|
||||
nr_options = 0;
|
||||
|
||||
key = hist_browser__run(browser, ev_name, timer, arg, delay_secs);
|
||||
|
||||
|
@ -887,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
|
|||
thread = hist_browser__selected_thread(browser);
|
||||
dso = browser->selection->map ? browser->selection->map->dso : NULL;
|
||||
}
|
||||
|
||||
switch (key) {
|
||||
case K_TAB:
|
||||
case K_UNTAB:
|
||||
|
@ -902,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
|
|||
if (!browser->has_symbols) {
|
||||
ui_browser__warning(&browser->b, delay_secs * 2,
|
||||
"Annotation is only available for symbolic views, "
|
||||
"include \"sym\" in --sort to use it.");
|
||||
"include \"sym*\" in --sort to use it.");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -972,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
|
|||
if (!browser->has_symbols)
|
||||
goto add_exit_option;
|
||||
|
||||
if (browser->selection != NULL &&
|
||||
browser->selection->sym != NULL &&
|
||||
!browser->selection->map->dso->annotate_warned &&
|
||||
asprintf(&options[nr_options], "Annotate %s",
|
||||
browser->selection->sym->name) > 0)
|
||||
annotate = nr_options++;
|
||||
if (sort__branch_mode == 1) {
|
||||
bi = browser->he_selection->branch_info;
|
||||
if (browser->selection != NULL &&
|
||||
bi &&
|
||||
bi->from.sym != NULL &&
|
||||
!bi->from.map->dso->annotate_warned &&
|
||||
asprintf(&options[nr_options], "Annotate %s",
|
||||
bi->from.sym->name) > 0)
|
||||
annotate_f = nr_options++;
|
||||
|
||||
if (browser->selection != NULL &&
|
||||
bi &&
|
||||
bi->to.sym != NULL &&
|
||||
!bi->to.map->dso->annotate_warned &&
|
||||
(bi->to.sym != bi->from.sym ||
|
||||
bi->to.map->dso != bi->from.map->dso) &&
|
||||
asprintf(&options[nr_options], "Annotate %s",
|
||||
bi->to.sym->name) > 0)
|
||||
annotate_t = nr_options++;
|
||||
} else {
|
||||
|
||||
if (browser->selection != NULL &&
|
||||
browser->selection->sym != NULL &&
|
||||
!browser->selection->map->dso->annotate_warned &&
|
||||
asprintf(&options[nr_options], "Annotate %s",
|
||||
browser->selection->sym->name) > 0)
|
||||
annotate = nr_options++;
|
||||
}
|
||||
|
||||
if (thread != NULL &&
|
||||
asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
|
||||
|
@ -998,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
|
|||
browse_map = nr_options++;
|
||||
add_exit_option:
|
||||
options[nr_options++] = (char *)"Exit";
|
||||
|
||||
retry_popup_menu:
|
||||
choice = ui__popup_menu(nr_options, options);
|
||||
|
||||
for (i = 0; i < nr_options - 1; ++i)
|
||||
free(options[i]);
|
||||
|
||||
if (choice == nr_options - 1)
|
||||
break;
|
||||
|
||||
if (choice == -1)
|
||||
if (choice == -1) {
|
||||
free_popup_options(options, nr_options - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (choice == annotate) {
|
||||
if (choice == annotate || choice == annotate_t || choice == annotate_f) {
|
||||
struct hist_entry *he;
|
||||
int err;
|
||||
do_annotate:
|
||||
he = hist_browser__selected_entry(browser);
|
||||
if (he == NULL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* we stash the branch_info symbol + map into the
|
||||
* the ms so we don't have to rewrite all the annotation
|
||||
* code to use branch_info.
|
||||
* in branch mode, the ms struct is not used
|
||||
*/
|
||||
if (choice == annotate_f) {
|
||||
he->ms.sym = he->branch_info->from.sym;
|
||||
he->ms.map = he->branch_info->from.map;
|
||||
} else if (choice == annotate_t) {
|
||||
he->ms.sym = he->branch_info->to.sym;
|
||||
he->ms.map = he->branch_info->to.map;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't let this be freed, say, by hists__decay_entry.
|
||||
*/
|
||||
|
@ -1024,9 +1078,18 @@ do_annotate:
|
|||
err = hist_entry__tui_annotate(he, evsel->idx,
|
||||
timer, arg, delay_secs);
|
||||
he->used = false;
|
||||
/*
|
||||
* offer option to annotate the other branch source or target
|
||||
* (if they exists) when returning from annotate
|
||||
*/
|
||||
if ((err == 'q' || err == CTRL('c'))
|
||||
&& annotate_t != -2 && annotate_f != -2)
|
||||
goto retry_popup_menu;
|
||||
|
||||
ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
|
||||
if (err)
|
||||
ui_browser__handle_resize(&browser->b);
|
||||
|
||||
} else if (choice == browse_map)
|
||||
map__browse(browser->selection->map);
|
||||
else if (choice == zoom_dso) {
|
||||
|
@ -1072,6 +1135,7 @@ out_free_stack:
|
|||
pstack__delete(fstack);
|
||||
out:
|
||||
hist_browser__delete(browser);
|
||||
free_popup_options(options, nr_options - 1);
|
||||
return key;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue