Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (27 commits) perf_counter: Zero dead bytes from ftrace raw samples size alignment perf_counter: Subtract the buffer size field from the event record size perf_counter: Require CAP_SYS_ADMIN for raw tracepoint data perf_counter: Correct PERF_SAMPLE_RAW output perf tools: callchain: Fix bad rounding of minimum rate perf_counter tools: Fix libbfd detection for systems with libz dependency perf: "Longum est iter per praecepta, breve et efficax per exempla" perf_counter: Fix a race on perf_counter_ctx perf_counter: Fix tracepoint sampling to be part of generic sampling perf_counter: Work around gcc warning by initializing tracepoint record unconditionally perf tools: callchain: Fix sum of percentages to be 100% by displaying amount of ignored chains in fractal mode perf tools: callchain: Fix 'perf report' display to be callchain by default perf tools: callchain: Fix spurious 'perf report' warnings: ignore empty callchains perf record: Fix the -A UI for empty or non-existent perf.data perf util: Fix do_read() to fail on EOF instead of busy-looping perf list: Fix the output to not include tracepoints without an id perf_counter/powerpc: Fix oops on cpus without perf_counter hardware support perf stat: Fix tool option consistency: rename -S/--scale to -c/--scale perf report: Add debug help for the finding of symbol bugs - show the symtab origin (DSO, build-id, kernel, etc) perf report: Fix per task mult-counter stat reporting ...
This commit is contained in:
commit
d00aa6695b
|
@ -518,6 +518,8 @@ void hw_perf_disable(void)
|
|||
struct cpu_hw_counters *cpuhw;
|
||||
unsigned long flags;
|
||||
|
||||
if (!ppmu)
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
cpuhw = &__get_cpu_var(cpu_hw_counters);
|
||||
|
||||
|
@ -572,6 +574,8 @@ void hw_perf_enable(void)
|
|||
int n_lim;
|
||||
int idx;
|
||||
|
||||
if (!ppmu)
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
cpuhw = &__get_cpu_var(cpu_hw_counters);
|
||||
if (!cpuhw->disabled) {
|
||||
|
@ -737,6 +741,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
|
|||
long i, n, n0;
|
||||
struct perf_counter *sub;
|
||||
|
||||
if (!ppmu)
|
||||
return 0;
|
||||
cpuhw = &__get_cpu_var(cpu_hw_counters);
|
||||
n0 = cpuhw->n_counters;
|
||||
n = collect_events(group_leader, ppmu->n_counter - n0,
|
||||
|
@ -1281,6 +1287,8 @@ void hw_perf_counter_setup(int cpu)
|
|||
{
|
||||
struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
|
||||
|
||||
if (!ppmu)
|
||||
return;
|
||||
memset(cpuhw, 0, sizeof(*cpuhw));
|
||||
cpuhw->mmcr[0] = MMCR0_FC;
|
||||
}
|
||||
|
|
|
@ -121,7 +121,7 @@ enum perf_counter_sample_format {
|
|||
PERF_SAMPLE_CPU = 1U << 7,
|
||||
PERF_SAMPLE_PERIOD = 1U << 8,
|
||||
PERF_SAMPLE_STREAM_ID = 1U << 9,
|
||||
PERF_SAMPLE_TP_RECORD = 1U << 10,
|
||||
PERF_SAMPLE_RAW = 1U << 10,
|
||||
|
||||
PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
|
||||
};
|
||||
|
@ -369,6 +369,8 @@ enum perf_event_type {
|
|||
*
|
||||
* { u64 nr,
|
||||
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
|
||||
* { u32 size;
|
||||
* char data[size];}&& PERF_SAMPLE_RAW
|
||||
* };
|
||||
*/
|
||||
PERF_EVENT_SAMPLE = 9,
|
||||
|
@ -414,9 +416,9 @@ struct perf_callchain_entry {
|
|||
__u64 ip[PERF_MAX_STACK_DEPTH];
|
||||
};
|
||||
|
||||
struct perf_tracepoint_record {
|
||||
int size;
|
||||
char *record;
|
||||
struct perf_raw_record {
|
||||
u32 size;
|
||||
void *data;
|
||||
};
|
||||
|
||||
struct task_struct;
|
||||
|
@ -687,7 +689,7 @@ struct perf_sample_data {
|
|||
struct pt_regs *regs;
|
||||
u64 addr;
|
||||
u64 period;
|
||||
void *private;
|
||||
struct perf_raw_record *raw;
|
||||
};
|
||||
|
||||
extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
|
||||
|
|
|
@ -637,12 +637,20 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
|
|||
* pc = preempt_count();
|
||||
*
|
||||
* __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
|
||||
* __entry_size = __data_size + sizeof(*entry);
|
||||
*
|
||||
* // Below we want to get the aligned size by taking into account
|
||||
* // the u32 field that will later store the buffer size
|
||||
* __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),
|
||||
* sizeof(u64));
|
||||
* __entry_size -= sizeof(u32);
|
||||
*
|
||||
* do {
|
||||
* char raw_data[__entry_size]; <- allocate our sample in the stack
|
||||
* struct trace_entry *ent;
|
||||
*
|
||||
* zero dead bytes from alignment to avoid stack leak to userspace:
|
||||
*
|
||||
* *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
|
||||
* entry = (struct ftrace_raw_<call> *)raw_data;
|
||||
* ent = &entry->ent;
|
||||
* tracing_generic_entry_update(ent, irq_flags, pc);
|
||||
|
@ -685,12 +693,15 @@ static void ftrace_profile_##call(proto) \
|
|||
pc = preempt_count(); \
|
||||
\
|
||||
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
|
||||
__entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\
|
||||
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
|
||||
sizeof(u64)); \
|
||||
__entry_size -= sizeof(u32); \
|
||||
\
|
||||
do { \
|
||||
char raw_data[__entry_size]; \
|
||||
struct trace_entry *ent; \
|
||||
\
|
||||
*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
|
||||
entry = (struct ftrace_raw_##call *)raw_data; \
|
||||
ent = &entry->ent; \
|
||||
tracing_generic_entry_update(ent, irq_flags, pc); \
|
||||
|
|
|
@ -2646,7 +2646,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
|
|||
u64 counter;
|
||||
} group_entry;
|
||||
struct perf_callchain_entry *callchain = NULL;
|
||||
struct perf_tracepoint_record *tp;
|
||||
int callchain_size = 0;
|
||||
u64 time;
|
||||
struct {
|
||||
|
@ -2715,9 +2714,16 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
|
|||
header.size += sizeof(u64);
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TP_RECORD) {
|
||||
tp = data->private;
|
||||
header.size += tp->size;
|
||||
if (sample_type & PERF_SAMPLE_RAW) {
|
||||
int size = sizeof(u32);
|
||||
|
||||
if (data->raw)
|
||||
size += data->raw->size;
|
||||
else
|
||||
size += sizeof(u32);
|
||||
|
||||
WARN_ON_ONCE(size & (sizeof(u64)-1));
|
||||
header.size += size;
|
||||
}
|
||||
|
||||
ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
|
||||
|
@ -2783,8 +2789,21 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
|
|||
}
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TP_RECORD)
|
||||
perf_output_copy(&handle, tp->record, tp->size);
|
||||
if (sample_type & PERF_SAMPLE_RAW) {
|
||||
if (data->raw) {
|
||||
perf_output_put(&handle, data->raw->size);
|
||||
perf_output_copy(&handle, data->raw->data, data->raw->size);
|
||||
} else {
|
||||
struct {
|
||||
u32 size;
|
||||
u32 data;
|
||||
} raw = {
|
||||
.size = sizeof(u32),
|
||||
.data = 0,
|
||||
};
|
||||
perf_output_put(&handle, raw);
|
||||
}
|
||||
}
|
||||
|
||||
perf_output_end(&handle);
|
||||
}
|
||||
|
@ -2849,7 +2868,8 @@ perf_counter_read_event(struct perf_counter *counter,
|
|||
*/
|
||||
|
||||
struct perf_task_event {
|
||||
struct task_struct *task;
|
||||
struct task_struct *task;
|
||||
struct perf_counter_context *task_ctx;
|
||||
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
|
@ -2909,24 +2929,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx,
|
|||
static void perf_counter_task_event(struct perf_task_event *task_event)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct perf_counter_context *ctx;
|
||||
struct perf_counter_context *ctx = task_event->task_ctx;
|
||||
|
||||
cpuctx = &get_cpu_var(perf_cpu_context);
|
||||
perf_counter_task_ctx(&cpuctx->ctx, task_event);
|
||||
put_cpu_var(perf_cpu_context);
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* doesn't really matter which of the child contexts the
|
||||
* events ends up in.
|
||||
*/
|
||||
ctx = rcu_dereference(current->perf_counter_ctxp);
|
||||
if (!ctx)
|
||||
ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
|
||||
if (ctx)
|
||||
perf_counter_task_ctx(ctx, task_event);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void perf_counter_task(struct task_struct *task, int new)
|
||||
static void perf_counter_task(struct task_struct *task,
|
||||
struct perf_counter_context *task_ctx,
|
||||
int new)
|
||||
{
|
||||
struct perf_task_event task_event;
|
||||
|
||||
|
@ -2936,8 +2955,9 @@ static void perf_counter_task(struct task_struct *task, int new)
|
|||
return;
|
||||
|
||||
task_event = (struct perf_task_event){
|
||||
.task = task,
|
||||
.event = {
|
||||
.task = task,
|
||||
.task_ctx = task_ctx,
|
||||
.event = {
|
||||
.header = {
|
||||
.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
|
||||
.misc = 0,
|
||||
|
@ -2955,7 +2975,7 @@ static void perf_counter_task(struct task_struct *task, int new)
|
|||
|
||||
void perf_counter_fork(struct task_struct *task)
|
||||
{
|
||||
perf_counter_task(task, 1);
|
||||
perf_counter_task(task, NULL, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3344,87 +3364,81 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
|
|||
* Generic software counter infrastructure
|
||||
*/
|
||||
|
||||
static void perf_swcounter_update(struct perf_counter *counter)
|
||||
/*
|
||||
* We directly increment counter->count and keep a second value in
|
||||
* counter->hw.period_left to count intervals. This period counter
|
||||
* is kept in the range [-sample_period, 0] so that we can use the
|
||||
* sign as trigger.
|
||||
*/
|
||||
|
||||
static u64 perf_swcounter_set_period(struct perf_counter *counter)
|
||||
{
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
u64 prev, now;
|
||||
s64 delta;
|
||||
u64 period = hwc->last_period;
|
||||
u64 nr, offset;
|
||||
s64 old, val;
|
||||
|
||||
hwc->last_period = hwc->sample_period;
|
||||
|
||||
again:
|
||||
prev = atomic64_read(&hwc->prev_count);
|
||||
now = atomic64_read(&hwc->count);
|
||||
if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
|
||||
old = val = atomic64_read(&hwc->period_left);
|
||||
if (val < 0)
|
||||
return 0;
|
||||
|
||||
nr = div64_u64(period + val, period);
|
||||
offset = nr * period;
|
||||
val -= offset;
|
||||
if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
|
||||
goto again;
|
||||
|
||||
delta = now - prev;
|
||||
|
||||
atomic64_add(delta, &counter->count);
|
||||
atomic64_sub(delta, &hwc->period_left);
|
||||
}
|
||||
|
||||
static void perf_swcounter_set_period(struct perf_counter *counter)
|
||||
{
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
s64 left = atomic64_read(&hwc->period_left);
|
||||
s64 period = hwc->sample_period;
|
||||
|
||||
if (unlikely(left <= -period)) {
|
||||
left = period;
|
||||
atomic64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
}
|
||||
|
||||
if (unlikely(left <= 0)) {
|
||||
left += period;
|
||||
atomic64_add(period, &hwc->period_left);
|
||||
hwc->last_period = period;
|
||||
}
|
||||
|
||||
atomic64_set(&hwc->prev_count, -left);
|
||||
atomic64_set(&hwc->count, -left);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
|
||||
{
|
||||
enum hrtimer_restart ret = HRTIMER_RESTART;
|
||||
struct perf_sample_data data;
|
||||
struct perf_counter *counter;
|
||||
u64 period;
|
||||
|
||||
counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
|
||||
counter->pmu->read(counter);
|
||||
|
||||
data.addr = 0;
|
||||
data.regs = get_irq_regs();
|
||||
/*
|
||||
* In case we exclude kernel IPs or are somehow not in interrupt
|
||||
* context, provide the next best thing, the user IP.
|
||||
*/
|
||||
if ((counter->attr.exclude_kernel || !data.regs) &&
|
||||
!counter->attr.exclude_user)
|
||||
data.regs = task_pt_regs(current);
|
||||
|
||||
if (data.regs) {
|
||||
if (perf_counter_overflow(counter, 0, &data))
|
||||
ret = HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
period = max_t(u64, 10000, counter->hw.sample_period);
|
||||
hrtimer_forward_now(hrtimer, ns_to_ktime(period));
|
||||
|
||||
return ret;
|
||||
return nr;
|
||||
}
|
||||
|
||||
static void perf_swcounter_overflow(struct perf_counter *counter,
|
||||
int nmi, struct perf_sample_data *data)
|
||||
{
|
||||
data->period = counter->hw.last_period;
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
u64 overflow;
|
||||
|
||||
perf_swcounter_update(counter);
|
||||
perf_swcounter_set_period(counter);
|
||||
if (perf_counter_overflow(counter, nmi, data))
|
||||
/* soft-disable the counter */
|
||||
;
|
||||
data->period = counter->hw.last_period;
|
||||
overflow = perf_swcounter_set_period(counter);
|
||||
|
||||
if (hwc->interrupts == MAX_INTERRUPTS)
|
||||
return;
|
||||
|
||||
for (; overflow; overflow--) {
|
||||
if (perf_counter_overflow(counter, nmi, data)) {
|
||||
/*
|
||||
* We inhibit the overflow from happening when
|
||||
* hwc->interrupts == MAX_INTERRUPTS.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_swcounter_unthrottle(struct perf_counter *counter)
|
||||
{
|
||||
/*
|
||||
* Nothing to do, we already reset hwc->interrupts.
|
||||
*/
|
||||
}
|
||||
|
||||
static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
|
||||
int nmi, struct perf_sample_data *data)
|
||||
{
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
|
||||
atomic64_add(nr, &counter->count);
|
||||
|
||||
if (!hwc->sample_period)
|
||||
return;
|
||||
|
||||
if (!data->regs)
|
||||
return;
|
||||
|
||||
if (!atomic64_add_negative(nr, &hwc->period_left))
|
||||
perf_swcounter_overflow(counter, nmi, data);
|
||||
}
|
||||
|
||||
static int perf_swcounter_is_counting(struct perf_counter *counter)
|
||||
|
@ -3488,15 +3502,6 @@ static int perf_swcounter_match(struct perf_counter *counter,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
|
||||
int nmi, struct perf_sample_data *data)
|
||||
{
|
||||
int neg = atomic64_add_negative(nr, &counter->hw.count);
|
||||
|
||||
if (counter->hw.sample_period && !neg && data->regs)
|
||||
perf_swcounter_overflow(counter, nmi, data);
|
||||
}
|
||||
|
||||
static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
|
||||
enum perf_type_id type,
|
||||
u32 event, u64 nr, int nmi,
|
||||
|
@ -3575,26 +3580,65 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
|
|||
|
||||
static void perf_swcounter_read(struct perf_counter *counter)
|
||||
{
|
||||
perf_swcounter_update(counter);
|
||||
}
|
||||
|
||||
static int perf_swcounter_enable(struct perf_counter *counter)
|
||||
{
|
||||
perf_swcounter_set_period(counter);
|
||||
struct hw_perf_counter *hwc = &counter->hw;
|
||||
|
||||
if (hwc->sample_period) {
|
||||
hwc->last_period = hwc->sample_period;
|
||||
perf_swcounter_set_period(counter);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_swcounter_disable(struct perf_counter *counter)
|
||||
{
|
||||
perf_swcounter_update(counter);
|
||||
}
|
||||
|
||||
static const struct pmu perf_ops_generic = {
|
||||
.enable = perf_swcounter_enable,
|
||||
.disable = perf_swcounter_disable,
|
||||
.read = perf_swcounter_read,
|
||||
.unthrottle = perf_swcounter_unthrottle,
|
||||
};
|
||||
|
||||
/*
|
||||
* hrtimer based swcounter callback
|
||||
*/
|
||||
|
||||
static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
|
||||
{
|
||||
enum hrtimer_restart ret = HRTIMER_RESTART;
|
||||
struct perf_sample_data data;
|
||||
struct perf_counter *counter;
|
||||
u64 period;
|
||||
|
||||
counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
|
||||
counter->pmu->read(counter);
|
||||
|
||||
data.addr = 0;
|
||||
data.regs = get_irq_regs();
|
||||
/*
|
||||
* In case we exclude kernel IPs or are somehow not in interrupt
|
||||
* context, provide the next best thing, the user IP.
|
||||
*/
|
||||
if ((counter->attr.exclude_kernel || !data.regs) &&
|
||||
!counter->attr.exclude_user)
|
||||
data.regs = task_pt_regs(current);
|
||||
|
||||
if (data.regs) {
|
||||
if (perf_counter_overflow(counter, 0, &data))
|
||||
ret = HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
period = max_t(u64, 10000, counter->hw.sample_period);
|
||||
hrtimer_forward_now(hrtimer, ns_to_ktime(period));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Software counter: cpu wall time clock
|
||||
*/
|
||||
|
@ -3715,15 +3759,15 @@ static const struct pmu perf_ops_task_clock = {
|
|||
void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
|
||||
int entry_size)
|
||||
{
|
||||
struct perf_tracepoint_record tp = {
|
||||
struct perf_raw_record raw = {
|
||||
.size = entry_size,
|
||||
.record = record,
|
||||
.data = record,
|
||||
};
|
||||
|
||||
struct perf_sample_data data = {
|
||||
.regs = get_irq_regs(),
|
||||
.addr = addr,
|
||||
.private = &tp,
|
||||
.raw = &raw,
|
||||
};
|
||||
|
||||
if (!data.regs)
|
||||
|
@ -3743,6 +3787,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
|
|||
|
||||
static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
|
||||
{
|
||||
/*
|
||||
* Raw tracepoint data is a severe data leak, only allow root to
|
||||
* have these.
|
||||
*/
|
||||
if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
|
||||
!capable(CAP_SYS_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
if (ftrace_profile_enable(counter->attr.config))
|
||||
return NULL;
|
||||
|
||||
|
@ -4285,7 +4337,7 @@ void perf_counter_exit_task(struct task_struct *child)
|
|||
unsigned long flags;
|
||||
|
||||
if (likely(!child->perf_counter_ctxp)) {
|
||||
perf_counter_task(child, 0);
|
||||
perf_counter_task(child, NULL, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -4305,6 +4357,7 @@ void perf_counter_exit_task(struct task_struct *child)
|
|||
* incremented the context's refcount before we do put_ctx below.
|
||||
*/
|
||||
spin_lock(&child_ctx->lock);
|
||||
child->perf_counter_ctxp = NULL;
|
||||
/*
|
||||
* If this context is a clone; unclone it so it can't get
|
||||
* swapped to another process while we're removing all
|
||||
|
@ -4318,9 +4371,7 @@ void perf_counter_exit_task(struct task_struct *child)
|
|||
* won't get any samples after PERF_EVENT_EXIT. We can however still
|
||||
* get a few PERF_EVENT_READ events.
|
||||
*/
|
||||
perf_counter_task(child, 0);
|
||||
|
||||
child->perf_counter_ctxp = NULL;
|
||||
perf_counter_task(child, child_ctx, 0);
|
||||
|
||||
/*
|
||||
* We can recurse on the same lock type through:
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
|
||||
------------------------------
|
||||
****** perf by examples ******
|
||||
------------------------------
|
||||
|
||||
[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
|
||||
|
||||
|
||||
First, discovery/enumeration of available counters can be done via
|
||||
'perf list':
|
||||
|
||||
titan:~> perf list
|
||||
[...]
|
||||
kmem:kmalloc [Tracepoint event]
|
||||
kmem:kmem_cache_alloc [Tracepoint event]
|
||||
kmem:kmalloc_node [Tracepoint event]
|
||||
kmem:kmem_cache_alloc_node [Tracepoint event]
|
||||
kmem:kfree [Tracepoint event]
|
||||
kmem:kmem_cache_free [Tracepoint event]
|
||||
kmem:mm_page_free_direct [Tracepoint event]
|
||||
kmem:mm_pagevec_free [Tracepoint event]
|
||||
kmem:mm_page_alloc [Tracepoint event]
|
||||
kmem:mm_page_alloc_zone_locked [Tracepoint event]
|
||||
kmem:mm_page_pcpu_drain [Tracepoint event]
|
||||
kmem:mm_page_alloc_extfrag [Tracepoint event]
|
||||
|
||||
Then any (or all) of the above event sources can be activated and
|
||||
measured. For example the page alloc/free properties of a 'hackbench
|
||||
run' are:
|
||||
|
||||
titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
|
||||
-e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10
|
||||
Time: 0.575
|
||||
|
||||
Performance counter stats for './hackbench 10':
|
||||
|
||||
13857 kmem:mm_page_pcpu_drain
|
||||
27576 kmem:mm_page_alloc
|
||||
6025 kmem:mm_pagevec_free
|
||||
20934 kmem:mm_page_free_direct
|
||||
|
||||
0.613972165 seconds time elapsed
|
||||
|
||||
You can observe the statistical properties as well, by using the
|
||||
'repeat the workload N times' feature of perf stat:
|
||||
|
||||
titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
|
||||
kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
|
||||
kmem:mm_page_free_direct ./hackbench 10
|
||||
Time: 0.627
|
||||
Time: 0.644
|
||||
Time: 0.564
|
||||
Time: 0.559
|
||||
Time: 0.626
|
||||
|
||||
Performance counter stats for './hackbench 10' (5 runs):
|
||||
|
||||
12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
|
||||
25035 kmem:mm_page_alloc ( +- 3.783% )
|
||||
6104 kmem:mm_pagevec_free ( +- 0.934% )
|
||||
18376 kmem:mm_page_free_direct ( +- 4.941% )
|
||||
|
||||
0.643954516 seconds time elapsed ( +- 2.363% )
|
||||
|
||||
Furthermore, these tracepoints can be used to sample the workload as
|
||||
well. For example the page allocations done by a 'git gc' can be
|
||||
captured the following way:
|
||||
|
||||
titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
|
||||
Counting objects: 1148, done.
|
||||
Delta compression using up to 2 threads.
|
||||
Compressing objects: 100% (450/450), done.
|
||||
Writing objects: 100% (1148/1148), done.
|
||||
Total 1148 (delta 690), reused 1148 (delta 690)
|
||||
[ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
|
||||
|
||||
To check which functions generated page allocations:
|
||||
|
||||
titan:~/git> perf report
|
||||
# Samples: 10646
|
||||
#
|
||||
# Overhead Command Shared Object
|
||||
# ........ ............... ..........................
|
||||
#
|
||||
23.57% git-repack /lib64/libc-2.5.so
|
||||
21.81% git /lib64/libc-2.5.so
|
||||
14.59% git ./git
|
||||
11.79% git-repack ./git
|
||||
7.12% git /lib64/ld-2.5.so
|
||||
3.16% git-repack /lib64/libpthread-2.5.so
|
||||
2.09% git-repack /bin/bash
|
||||
1.97% rm /lib64/libc-2.5.so
|
||||
1.39% mv /lib64/ld-2.5.so
|
||||
1.37% mv /lib64/libc-2.5.so
|
||||
1.12% git-repack /lib64/ld-2.5.so
|
||||
0.95% rm /lib64/ld-2.5.so
|
||||
0.90% git-update-serv /lib64/libc-2.5.so
|
||||
0.73% git-update-serv /lib64/ld-2.5.so
|
||||
0.68% perf /lib64/libpthread-2.5.so
|
||||
0.64% git-repack /usr/lib64/libz.so.1.2.3
|
||||
|
||||
Or to see it on a more finegrained level:
|
||||
|
||||
titan:~/git> perf report --sort comm,dso,symbol
|
||||
# Samples: 10646
|
||||
#
|
||||
# Overhead Command Shared Object Symbol
|
||||
# ........ ............... .......................... ......
|
||||
#
|
||||
9.35% git-repack ./git [.] insert_obj_hash
|
||||
9.12% git ./git [.] insert_obj_hash
|
||||
7.31% git /lib64/libc-2.5.so [.] memcpy
|
||||
6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc
|
||||
6.24% git-repack /lib64/libc-2.5.so [.] memcpy
|
||||
5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork
|
||||
5.47% git /lib64/libc-2.5.so [.] _int_malloc
|
||||
2.99% git /lib64/libc-2.5.so [.] memset
|
||||
|
||||
Furthermore, call-graph sampling can be done too, of page
|
||||
allocations - to see precisely what kind of page allocations there
|
||||
are:
|
||||
|
||||
titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
|
||||
Counting objects: 1148, done.
|
||||
Delta compression using up to 2 threads.
|
||||
Compressing objects: 100% (450/450), done.
|
||||
Writing objects: 100% (1148/1148), done.
|
||||
Total 1148 (delta 690), reused 1148 (delta 690)
|
||||
[ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
|
||||
|
||||
titan:~/git> perf report -g
|
||||
# Samples: 10686
|
||||
#
|
||||
# Overhead Command Shared Object
|
||||
# ........ ............... ..........................
|
||||
#
|
||||
23.25% git-repack /lib64/libc-2.5.so
|
||||
|
|
||||
|--50.00%-- _int_free
|
||||
|
|
||||
|--37.50%-- __GI___fork
|
||||
| make_child
|
||||
|
|
||||
|--12.50%-- ptmalloc_unlock_all2
|
||||
| make_child
|
||||
|
|
||||
--6.25%-- __GI_strcpy
|
||||
21.61% git /lib64/libc-2.5.so
|
||||
|
|
||||
|--30.00%-- __GI_read
|
||||
| |
|
||||
| --83.33%-- git_config_from_file
|
||||
| git_config
|
||||
| |
|
||||
[...]
|
||||
|
||||
Or you can observe the whole system's page allocations for 10
|
||||
seconds:
|
||||
|
||||
titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
|
||||
kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
|
||||
kmem:mm_page_free_direct sleep 10
|
||||
|
||||
Performance counter stats for 'sleep 10':
|
||||
|
||||
171585 kmem:mm_page_pcpu_drain
|
||||
322114 kmem:mm_page_alloc
|
||||
73623 kmem:mm_pagevec_free
|
||||
254115 kmem:mm_page_free_direct
|
||||
|
||||
10.000591410 seconds time elapsed
|
||||
|
||||
Or observe how fluctuating the page allocations are, via statistical
|
||||
analysis done over ten 1-second intervals:
|
||||
|
||||
titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
|
||||
kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
|
||||
kmem:mm_page_free_direct sleep 1
|
||||
|
||||
Performance counter stats for 'sleep 1' (10 runs):
|
||||
|
||||
17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
|
||||
34394 kmem:mm_page_alloc ( +- 4.617% )
|
||||
7509 kmem:mm_pagevec_free ( +- 4.820% )
|
||||
25653 kmem:mm_page_free_direct ( +- 3.672% )
|
||||
|
||||
1.058135029 seconds time elapsed ( +- 3.089% )
|
||||
|
||||
Or you can annotate the recorded 'git gc' run on a per symbol basis
|
||||
and check which instructions/source-code generated page allocations:
|
||||
|
||||
titan:~/git> perf annotate __GI___fork
|
||||
------------------------------------------------
|
||||
Percent | Source code & Disassembly of libc-2.5.so
|
||||
------------------------------------------------
|
||||
:
|
||||
:
|
||||
: Disassembly of section .plt:
|
||||
: Disassembly of section .text:
|
||||
:
|
||||
: 00000031a2e95560 <__fork>:
|
||||
[...]
|
||||
0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax
|
||||
0.00 : 31a2e95607: 0f 05 syscall
|
||||
83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
|
||||
0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202>
|
||||
0.00 : 31a2e95615: 85 c0 test %eax,%eax
|
||||
|
||||
( this shows that 83.42% of __GI___fork's page allocations come from
|
||||
the 0x38 system call it performs. )
|
||||
|
||||
etc. etc. - a lot more is possible. I could list a dozen of
|
||||
other different usecases straight away - neither of which is
|
||||
possible via /proc/vmstat.
|
||||
|
||||
/proc/vmstat is not in the same league really, in terms of
|
||||
expressive power of system analysis and performance
|
||||
analysis.
|
||||
|
||||
All that the above results needed were those new tracepoints
|
||||
in include/tracing/events/kmem.h.
|
||||
|
||||
Ingo
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ OPTIONS
|
|||
-a::
|
||||
system-wide collection
|
||||
|
||||
-S::
|
||||
-c::
|
||||
scale counter values
|
||||
|
||||
EXAMPLES
|
||||
|
|
|
@ -3,36 +3,122 @@ perf-top(1)
|
|||
|
||||
NAME
|
||||
----
|
||||
perf-top - Run a command and profile it
|
||||
perf-top - System profiling tool.
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
|
||||
'perf top' [-e <EVENT> | --event=EVENT] [<options>]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
This command runs a command and gathers a performance counter profile
|
||||
from it.
|
||||
This command generates and displays a performance counter profile in realtime.
|
||||
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
<command>...::
|
||||
Any command you can specify in a shell.
|
||||
-a::
|
||||
--all-cpus::
|
||||
System-wide collection. (default)
|
||||
|
||||
-e::
|
||||
--event=::
|
||||
-c <count>::
|
||||
--count=<count>::
|
||||
Event period to sample.
|
||||
|
||||
-C <cpu>::
|
||||
--CPU=<cpu>::
|
||||
CPU to profile.
|
||||
|
||||
-d <seconds>::
|
||||
--delay=<seconds>::
|
||||
Number of seconds to delay between refreshes.
|
||||
|
||||
-e <event>::
|
||||
--event=<event>::
|
||||
Select the PMU event. Selection can be a symbolic event name
|
||||
(use 'perf list' to list all events) or a raw PMU
|
||||
event (eventsel+umask) in the form of rNNN where NNN is a
|
||||
hexadecimal event descriptor.
|
||||
hexadecimal event descriptor.
|
||||
|
||||
-a::
|
||||
system-wide collection
|
||||
-E <entries>::
|
||||
--entries=<entries>::
|
||||
Display this many functions.
|
||||
|
||||
-f <count>::
|
||||
--count-filter=<count>::
|
||||
Only display functions with more events than this.
|
||||
|
||||
-F <freq>::
|
||||
--freq=<freq>::
|
||||
Profile at this frequency.
|
||||
|
||||
-i::
|
||||
--inherit::
|
||||
Child tasks inherit counters, only makes sens with -p option.
|
||||
|
||||
-k <path>::
|
||||
--vmlinux=<path>::
|
||||
Path to vmlinux. Required for annotation functionality.
|
||||
|
||||
-m <pages>::
|
||||
--mmap-pages=<pages>::
|
||||
Number of mmapped data pages.
|
||||
|
||||
-p <pid>::
|
||||
--pid=<pid>::
|
||||
Profile events on existing pid.
|
||||
|
||||
-r <priority>::
|
||||
--realtime=<priority>::
|
||||
Collect data with this RT SCHED_FIFO priority.
|
||||
|
||||
-s <symbol>::
|
||||
--sym-annotate=<symbol>::
|
||||
Annotate this symbol. Requires -k option.
|
||||
|
||||
-v::
|
||||
--verbose::
|
||||
Be more verbose (show counter open errors, etc).
|
||||
|
||||
-z::
|
||||
--zero::
|
||||
Zero history across display updates.
|
||||
|
||||
INTERACTIVE PROMPTING KEYS
|
||||
--------------------------
|
||||
|
||||
[d]::
|
||||
Display refresh delay.
|
||||
|
||||
[e]::
|
||||
Number of entries to display.
|
||||
|
||||
[E]::
|
||||
Event to display when multiple counters are active.
|
||||
|
||||
[f]::
|
||||
Profile display filter (>= hit count).
|
||||
|
||||
[F]::
|
||||
Annotation display filter (>= % of total).
|
||||
|
||||
[s]::
|
||||
Annotate symbol.
|
||||
|
||||
[S]::
|
||||
Stop annotation, return to full profile display.
|
||||
|
||||
[w]::
|
||||
Toggle between weighted sum and individual count[E]r profile.
|
||||
|
||||
[z]::
|
||||
Toggle event count zeroing across display updates.
|
||||
|
||||
[qQ]::
|
||||
Quit.
|
||||
|
||||
Pressing any unmapped key displays a menu, and prompts for input.
|
||||
|
||||
-l::
|
||||
scale counter values
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
|
|
|
@ -387,10 +387,14 @@ else
|
|||
|
||||
has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
|
||||
|
||||
has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
|
||||
|
||||
ifeq ($(has_bfd),y)
|
||||
EXTLIBS += -lbfd
|
||||
else ifeq ($(has_bfd_iberty),y)
|
||||
EXTLIBS += -lbfd -liberty
|
||||
else ifeq ($(has_bfd_iberty_z),y)
|
||||
EXTLIBS += -lbfd -liberty -lz
|
||||
else
|
||||
msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
|
||||
BASIC_CFLAGS += -DNO_DEMANGLE
|
||||
|
|
|
@ -525,10 +525,14 @@ static int __cmd_record(int argc, const char **argv)
|
|||
signal(SIGCHLD, sig_handler);
|
||||
signal(SIGINT, sig_handler);
|
||||
|
||||
if (!stat(output_name, &st) && !force && !append_file) {
|
||||
fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
|
||||
output_name);
|
||||
exit(-1);
|
||||
if (!stat(output_name, &st) && st.st_size) {
|
||||
if (!force && !append_file) {
|
||||
fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
|
||||
output_name);
|
||||
exit(-1);
|
||||
}
|
||||
} else {
|
||||
append_file = 0;
|
||||
}
|
||||
|
||||
flags = O_CREAT|O_RDWR;
|
||||
|
|
|
@ -68,7 +68,7 @@ static int callchain;
|
|||
|
||||
static
|
||||
struct callchain_param callchain_param = {
|
||||
.mode = CHAIN_GRAPH_ABS,
|
||||
.mode = CHAIN_GRAPH_REL,
|
||||
.min_percent = 0.5
|
||||
};
|
||||
|
||||
|
@ -112,7 +112,9 @@ struct read_event {
|
|||
struct perf_event_header header;
|
||||
u32 pid,tid;
|
||||
u64 value;
|
||||
u64 format[3];
|
||||
u64 time_enabled;
|
||||
u64 time_running;
|
||||
u64 id;
|
||||
};
|
||||
|
||||
typedef union event_union {
|
||||
|
@ -698,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
|
|||
size_t ret = 0;
|
||||
|
||||
if (verbose)
|
||||
ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip);
|
||||
ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
|
||||
dso__symtab_origin(self->dso));
|
||||
|
||||
ret += repsep_fprintf(fp, "[%c] ", self->level);
|
||||
if (self->sym) {
|
||||
|
@ -888,6 +891,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static struct symbol *rem_sq_bracket;
|
||||
static struct callchain_list rem_hits;
|
||||
|
||||
static void init_rem_hits(void)
|
||||
{
|
||||
rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
|
||||
if (!rem_sq_bracket) {
|
||||
fprintf(stderr, "Not enough memory to display remaining hits\n");
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(rem_sq_bracket->name, "[...]");
|
||||
rem_hits.sym = rem_sq_bracket;
|
||||
}
|
||||
|
||||
static size_t
|
||||
callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
|
||||
u64 total_samples, int depth, int depth_mask)
|
||||
|
@ -897,25 +915,34 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
|
|||
struct callchain_list *chain;
|
||||
int new_depth_mask = depth_mask;
|
||||
u64 new_total;
|
||||
u64 remaining;
|
||||
size_t ret = 0;
|
||||
int i;
|
||||
|
||||
if (callchain_param.mode == CHAIN_GRAPH_REL)
|
||||
new_total = self->cumul_hit;
|
||||
new_total = self->children_hit;
|
||||
else
|
||||
new_total = total_samples;
|
||||
|
||||
remaining = new_total;
|
||||
|
||||
node = rb_first(&self->rb_root);
|
||||
while (node) {
|
||||
u64 cumul;
|
||||
|
||||
child = rb_entry(node, struct callchain_node, rb_node);
|
||||
cumul = cumul_hits(child);
|
||||
remaining -= cumul;
|
||||
|
||||
/*
|
||||
* The depth mask manages the output of pipes that show
|
||||
* the depth. We don't want to keep the pipes of the current
|
||||
* level for the last child of this depth
|
||||
* level for the last child of this depth.
|
||||
* Except if we have remaining filtered hits. They will
|
||||
* supersede the last child
|
||||
*/
|
||||
next = rb_next(node);
|
||||
if (!next)
|
||||
if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
|
||||
new_depth_mask &= ~(1 << (depth - 1));
|
||||
|
||||
/*
|
||||
|
@ -930,7 +957,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
|
|||
ret += ipchain__fprintf_graph(fp, chain, depth,
|
||||
new_depth_mask, i++,
|
||||
new_total,
|
||||
child->cumul_hit);
|
||||
cumul);
|
||||
}
|
||||
ret += callchain__fprintf_graph(fp, child, new_total,
|
||||
depth + 1,
|
||||
|
@ -938,6 +965,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
|
|||
node = next;
|
||||
}
|
||||
|
||||
if (callchain_param.mode == CHAIN_GRAPH_REL &&
|
||||
remaining && remaining != new_total) {
|
||||
|
||||
if (!rem_sq_bracket)
|
||||
return ret;
|
||||
|
||||
new_depth_mask &= ~(1 << (depth - 1));
|
||||
|
||||
ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
|
||||
new_depth_mask, 0, new_total,
|
||||
remaining);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1358,6 +1398,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples)
|
|||
unsigned int width;
|
||||
char *col_width = col_width_list_str;
|
||||
|
||||
init_rem_hits();
|
||||
|
||||
fprintf(fp, "# Samples: %Ld\n", (u64)total_samples);
|
||||
fprintf(fp, "#\n");
|
||||
|
||||
|
@ -1429,6 +1471,8 @@ print_entries:
|
|||
}
|
||||
fprintf(fp, "\n");
|
||||
|
||||
free(rem_sq_bracket);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1690,14 +1734,37 @@ static void trace_event(event_t *event)
|
|||
dprintf(".\n");
|
||||
}
|
||||
|
||||
static struct perf_header *header;
|
||||
|
||||
static struct perf_counter_attr *perf_header__find_attr(u64 id)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < header->attrs; i++) {
|
||||
struct perf_header_attr *attr = header->attr[i];
|
||||
int j;
|
||||
|
||||
for (j = 0; j < attr->ids; j++) {
|
||||
if (attr->id[j] == id)
|
||||
return &attr->attr;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
process_read_event(event_t *event, unsigned long offset, unsigned long head)
|
||||
{
|
||||
dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n",
|
||||
struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
|
||||
|
||||
dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
|
||||
(void *)(offset + head),
|
||||
(void *)(long)(event->header.size),
|
||||
event->read.pid,
|
||||
event->read.tid,
|
||||
attr ? __event_name(attr->type, attr->config)
|
||||
: "FAIL",
|
||||
event->read.value);
|
||||
|
||||
return 0;
|
||||
|
@ -1743,8 +1810,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct perf_header *header;
|
||||
|
||||
static u64 perf_header__sample_type(void)
|
||||
{
|
||||
u64 sample_type = 0;
|
||||
|
@ -1812,6 +1877,13 @@ static int __cmd_report(void)
|
|||
" -g?\n");
|
||||
exit(-1);
|
||||
}
|
||||
} else if (callchain_param.mode != CHAIN_NONE && !callchain) {
|
||||
callchain = 1;
|
||||
if (register_callchain_param(&callchain_param) < 0) {
|
||||
fprintf(stderr, "Can't register callchain"
|
||||
" params\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (load_kernel() < 0) {
|
||||
|
@ -1950,6 +2022,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
|
|||
else if (!strncmp(tok, "fractal", strlen(arg)))
|
||||
callchain_param.mode = CHAIN_GRAPH_REL;
|
||||
|
||||
else if (!strncmp(tok, "none", strlen(arg))) {
|
||||
callchain_param.mode = CHAIN_NONE;
|
||||
callchain = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
else
|
||||
return -1;
|
||||
|
||||
|
|
|
@ -496,7 +496,7 @@ static const struct option options[] = {
|
|||
"stat events on existing pid"),
|
||||
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
||||
"system-wide collection from all CPUs"),
|
||||
OPT_BOOLEAN('S', "scale", &scale,
|
||||
OPT_BOOLEAN('c', "scale", &scale,
|
||||
"scale/normalize counters"),
|
||||
OPT_BOOLEAN('v', "verbose", &verbose,
|
||||
"be more verbose (show counter open errors, etc)"),
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
#include <fcntl.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <termios.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
|
@ -54,7 +56,7 @@ static int system_wide = 0;
|
|||
|
||||
static int default_interval = 100000;
|
||||
|
||||
static u64 count_filter = 5;
|
||||
static int count_filter = 5;
|
||||
static int print_entries = 15;
|
||||
|
||||
static int target_pid = -1;
|
||||
|
@ -69,14 +71,27 @@ static int freq = 0;
|
|||
static int verbose = 0;
|
||||
static char *vmlinux = NULL;
|
||||
|
||||
static char *sym_filter;
|
||||
static unsigned long filter_start;
|
||||
static unsigned long filter_end;
|
||||
|
||||
static int delay_secs = 2;
|
||||
static int zero;
|
||||
static int dump_symtab;
|
||||
|
||||
/*
|
||||
* Source
|
||||
*/
|
||||
|
||||
struct source_line {
|
||||
u64 eip;
|
||||
unsigned long count[MAX_COUNTERS];
|
||||
char *line;
|
||||
struct source_line *next;
|
||||
};
|
||||
|
||||
static char *sym_filter = NULL;
|
||||
struct sym_entry *sym_filter_entry = NULL;
|
||||
static int sym_pcnt_filter = 5;
|
||||
static int sym_counter = 0;
|
||||
static int display_weighted = -1;
|
||||
|
||||
/*
|
||||
* Symbols
|
||||
*/
|
||||
|
@ -91,9 +106,237 @@ struct sym_entry {
|
|||
unsigned long snap_count;
|
||||
double weight;
|
||||
int skip;
|
||||
struct source_line *source;
|
||||
struct source_line *lines;
|
||||
struct source_line **lines_tail;
|
||||
pthread_mutex_t source_lock;
|
||||
};
|
||||
|
||||
struct sym_entry *sym_filter_entry;
|
||||
/*
|
||||
* Source functions
|
||||
*/
|
||||
|
||||
static void parse_source(struct sym_entry *syme)
|
||||
{
|
||||
struct symbol *sym;
|
||||
struct module *module;
|
||||
struct section *section = NULL;
|
||||
FILE *file;
|
||||
char command[PATH_MAX*2], *path = vmlinux;
|
||||
u64 start, end, len;
|
||||
|
||||
if (!syme)
|
||||
return;
|
||||
|
||||
if (syme->lines) {
|
||||
pthread_mutex_lock(&syme->source_lock);
|
||||
goto out_assign;
|
||||
}
|
||||
|
||||
sym = (struct symbol *)(syme + 1);
|
||||
module = sym->module;
|
||||
|
||||
if (module)
|
||||
path = module->path;
|
||||
if (!path)
|
||||
return;
|
||||
|
||||
start = sym->obj_start;
|
||||
if (!start)
|
||||
start = sym->start;
|
||||
|
||||
if (module) {
|
||||
section = module->sections->find_section(module->sections, ".text");
|
||||
if (section)
|
||||
start -= section->vma;
|
||||
}
|
||||
|
||||
end = start + sym->end - sym->start + 1;
|
||||
len = sym->end - sym->start;
|
||||
|
||||
sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
|
||||
|
||||
file = popen(command, "r");
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
pthread_mutex_lock(&syme->source_lock);
|
||||
syme->lines_tail = &syme->lines;
|
||||
while (!feof(file)) {
|
||||
struct source_line *src;
|
||||
size_t dummy = 0;
|
||||
char *c;
|
||||
|
||||
src = malloc(sizeof(struct source_line));
|
||||
assert(src != NULL);
|
||||
memset(src, 0, sizeof(struct source_line));
|
||||
|
||||
if (getline(&src->line, &dummy, file) < 0)
|
||||
break;
|
||||
if (!src->line)
|
||||
break;
|
||||
|
||||
c = strchr(src->line, '\n');
|
||||
if (c)
|
||||
*c = 0;
|
||||
|
||||
src->next = NULL;
|
||||
*syme->lines_tail = src;
|
||||
syme->lines_tail = &src->next;
|
||||
|
||||
if (strlen(src->line)>8 && src->line[8] == ':') {
|
||||
src->eip = strtoull(src->line, NULL, 16);
|
||||
if (section)
|
||||
src->eip += section->vma;
|
||||
}
|
||||
if (strlen(src->line)>8 && src->line[16] == ':') {
|
||||
src->eip = strtoull(src->line, NULL, 16);
|
||||
if (section)
|
||||
src->eip += section->vma;
|
||||
}
|
||||
}
|
||||
pclose(file);
|
||||
out_assign:
|
||||
sym_filter_entry = syme;
|
||||
pthread_mutex_unlock(&syme->source_lock);
|
||||
}
|
||||
|
||||
static void __zero_source_counters(struct sym_entry *syme)
|
||||
{
|
||||
int i;
|
||||
struct source_line *line;
|
||||
|
||||
line = syme->lines;
|
||||
while (line) {
|
||||
for (i = 0; i < nr_counters; i++)
|
||||
line->count[i] = 0;
|
||||
line = line->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
|
||||
{
|
||||
struct source_line *line;
|
||||
|
||||
if (syme != sym_filter_entry)
|
||||
return;
|
||||
|
||||
if (pthread_mutex_trylock(&syme->source_lock))
|
||||
return;
|
||||
|
||||
if (!syme->source)
|
||||
goto out_unlock;
|
||||
|
||||
for (line = syme->lines; line; line = line->next) {
|
||||
if (line->eip == ip) {
|
||||
line->count[counter]++;
|
||||
break;
|
||||
}
|
||||
if (line->eip > ip)
|
||||
break;
|
||||
}
|
||||
out_unlock:
|
||||
pthread_mutex_unlock(&syme->source_lock);
|
||||
}
|
||||
|
||||
static void lookup_sym_source(struct sym_entry *syme)
|
||||
{
|
||||
struct symbol *symbol = (struct symbol *)(syme + 1);
|
||||
struct source_line *line;
|
||||
char pattern[PATH_MAX];
|
||||
char *idx;
|
||||
|
||||
sprintf(pattern, "<%s>:", symbol->name);
|
||||
|
||||
if (symbol->module) {
|
||||
idx = strstr(pattern, "\t");
|
||||
if (idx)
|
||||
*idx = 0;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&syme->source_lock);
|
||||
for (line = syme->lines; line; line = line->next) {
|
||||
if (strstr(line->line, pattern)) {
|
||||
syme->source = line;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&syme->source_lock);
|
||||
}
|
||||
|
||||
static void show_lines(struct source_line *queue, int count, int total)
|
||||
{
|
||||
int i;
|
||||
struct source_line *line;
|
||||
|
||||
line = queue;
|
||||
for (i = 0; i < count; i++) {
|
||||
float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
|
||||
|
||||
printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
|
||||
line = line->next;
|
||||
}
|
||||
}
|
||||
|
||||
#define TRACE_COUNT 3
|
||||
|
||||
static void show_details(struct sym_entry *syme)
|
||||
{
|
||||
struct symbol *symbol;
|
||||
struct source_line *line;
|
||||
struct source_line *line_queue = NULL;
|
||||
int displayed = 0;
|
||||
int line_queue_count = 0, total = 0, more = 0;
|
||||
|
||||
if (!syme)
|
||||
return;
|
||||
|
||||
if (!syme->source)
|
||||
lookup_sym_source(syme);
|
||||
|
||||
if (!syme->source)
|
||||
return;
|
||||
|
||||
symbol = (struct symbol *)(syme + 1);
|
||||
printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
|
||||
printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
|
||||
|
||||
pthread_mutex_lock(&syme->source_lock);
|
||||
line = syme->source;
|
||||
while (line) {
|
||||
total += line->count[sym_counter];
|
||||
line = line->next;
|
||||
}
|
||||
|
||||
line = syme->source;
|
||||
while (line) {
|
||||
float pcnt = 0.0;
|
||||
|
||||
if (!line_queue_count)
|
||||
line_queue = line;
|
||||
line_queue_count++;
|
||||
|
||||
if (line->count[sym_counter])
|
||||
pcnt = 100.0 * line->count[sym_counter] / (float)total;
|
||||
if (pcnt >= (float)sym_pcnt_filter) {
|
||||
if (displayed <= print_entries)
|
||||
show_lines(line_queue, line_queue_count, total);
|
||||
else more++;
|
||||
displayed += line_queue_count;
|
||||
line_queue_count = 0;
|
||||
line_queue = NULL;
|
||||
} else if (line_queue_count > TRACE_COUNT) {
|
||||
line_queue = line_queue->next;
|
||||
line_queue_count--;
|
||||
}
|
||||
|
||||
line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
|
||||
line = line->next;
|
||||
}
|
||||
pthread_mutex_unlock(&syme->source_lock);
|
||||
if (more)
|
||||
printf("%d lines not displayed, maybe increase display entries [e]\n", more);
|
||||
}
|
||||
|
||||
struct dso *kernel_dso;
|
||||
|
||||
|
@ -112,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym)
|
|||
double weight = sym->snap_count;
|
||||
int counter;
|
||||
|
||||
if (!display_weighted)
|
||||
return weight;
|
||||
|
||||
for (counter = 1; counter < nr_counters-1; counter++)
|
||||
weight *= sym->count[counter];
|
||||
|
||||
|
@ -159,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
|
|||
static void print_sym_table(void)
|
||||
{
|
||||
int printed = 0, j;
|
||||
int counter;
|
||||
int counter, snap = !display_weighted ? sym_counter : 0;
|
||||
float samples_per_sec = samples/delay_secs;
|
||||
float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
|
||||
float sum_ksamples = 0.0;
|
||||
|
@ -175,7 +421,7 @@ static void print_sym_table(void)
|
|||
pthread_mutex_unlock(&active_symbols_lock);
|
||||
|
||||
list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
|
||||
syme->snap_count = syme->count[0];
|
||||
syme->snap_count = syme->count[snap];
|
||||
if (syme->snap_count != 0) {
|
||||
syme->weight = sym_weight(syme);
|
||||
rb_insert_active_sym(&tmp, syme);
|
||||
|
@ -195,7 +441,7 @@ static void print_sym_table(void)
|
|||
samples_per_sec,
|
||||
100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
|
||||
|
||||
if (nr_counters == 1) {
|
||||
if (nr_counters == 1 || !display_weighted) {
|
||||
printf("%Ld", (u64)attrs[0].sample_period);
|
||||
if (freq)
|
||||
printf("Hz ");
|
||||
|
@ -203,7 +449,9 @@ static void print_sym_table(void)
|
|||
printf(" ");
|
||||
}
|
||||
|
||||
for (counter = 0; counter < nr_counters; counter++) {
|
||||
if (!display_weighted)
|
||||
printf("%s", event_name(sym_counter));
|
||||
else for (counter = 0; counter < nr_counters; counter++) {
|
||||
if (counter)
|
||||
printf("/");
|
||||
|
||||
|
@ -228,6 +476,11 @@ static void print_sym_table(void)
|
|||
|
||||
printf("------------------------------------------------------------------------------\n\n");
|
||||
|
||||
if (sym_filter_entry) {
|
||||
show_details(sym_filter_entry);
|
||||
return;
|
||||
}
|
||||
|
||||
if (nr_counters == 1)
|
||||
printf(" samples pcnt");
|
||||
else
|
||||
|
@ -242,13 +495,13 @@ static void print_sym_table(void)
|
|||
struct symbol *sym = (struct symbol *)(syme + 1);
|
||||
double pcnt;
|
||||
|
||||
if (++printed > print_entries || syme->snap_count < count_filter)
|
||||
if (++printed > print_entries || (int)syme->snap_count < count_filter)
|
||||
continue;
|
||||
|
||||
pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
|
||||
sum_ksamples));
|
||||
|
||||
if (nr_counters == 1)
|
||||
if (nr_counters == 1 || !display_weighted)
|
||||
printf("%20.2f - ", syme->weight);
|
||||
else
|
||||
printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
|
||||
|
@ -261,19 +514,250 @@ static void print_sym_table(void)
|
|||
}
|
||||
}
|
||||
|
||||
static void prompt_integer(int *target, const char *msg)
|
||||
{
|
||||
char *buf = malloc(0), *p;
|
||||
size_t dummy = 0;
|
||||
int tmp;
|
||||
|
||||
fprintf(stdout, "\n%s: ", msg);
|
||||
if (getline(&buf, &dummy, stdin) < 0)
|
||||
return;
|
||||
|
||||
p = strchr(buf, '\n');
|
||||
if (p)
|
||||
*p = 0;
|
||||
|
||||
p = buf;
|
||||
while(*p) {
|
||||
if (!isdigit(*p))
|
||||
goto out_free;
|
||||
p++;
|
||||
}
|
||||
tmp = strtoul(buf, NULL, 10);
|
||||
*target = tmp;
|
||||
out_free:
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static void prompt_percent(int *target, const char *msg)
|
||||
{
|
||||
int tmp = 0;
|
||||
|
||||
prompt_integer(&tmp, msg);
|
||||
if (tmp >= 0 && tmp <= 100)
|
||||
*target = tmp;
|
||||
}
|
||||
|
||||
static void prompt_symbol(struct sym_entry **target, const char *msg)
|
||||
{
|
||||
char *buf = malloc(0), *p;
|
||||
struct sym_entry *syme = *target, *n, *found = NULL;
|
||||
size_t dummy = 0;
|
||||
|
||||
/* zero counters of active symbol */
|
||||
if (syme) {
|
||||
pthread_mutex_lock(&syme->source_lock);
|
||||
__zero_source_counters(syme);
|
||||
*target = NULL;
|
||||
pthread_mutex_unlock(&syme->source_lock);
|
||||
}
|
||||
|
||||
fprintf(stdout, "\n%s: ", msg);
|
||||
if (getline(&buf, &dummy, stdin) < 0)
|
||||
goto out_free;
|
||||
|
||||
p = strchr(buf, '\n');
|
||||
if (p)
|
||||
*p = 0;
|
||||
|
||||
pthread_mutex_lock(&active_symbols_lock);
|
||||
syme = list_entry(active_symbols.next, struct sym_entry, node);
|
||||
pthread_mutex_unlock(&active_symbols_lock);
|
||||
|
||||
list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
|
||||
struct symbol *sym = (struct symbol *)(syme + 1);
|
||||
|
||||
if (!strcmp(buf, sym->name)) {
|
||||
found = syme;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
|
||||
sleep(1);
|
||||
return;
|
||||
} else
|
||||
parse_source(found);
|
||||
|
||||
out_free:
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static void print_mapped_keys(void)
|
||||
{
|
||||
char *name = NULL;
|
||||
|
||||
if (sym_filter_entry) {
|
||||
struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
|
||||
name = sym->name;
|
||||
}
|
||||
|
||||
fprintf(stdout, "\nMapped keys:\n");
|
||||
fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
|
||||
fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
|
||||
|
||||
if (nr_counters > 1)
|
||||
fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter));
|
||||
|
||||
fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
|
||||
|
||||
if (vmlinux) {
|
||||
fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
|
||||
fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
|
||||
fprintf(stdout, "\t[S] stop annotation.\n");
|
||||
}
|
||||
|
||||
if (nr_counters > 1)
|
||||
fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
|
||||
|
||||
fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0);
|
||||
fprintf(stdout, "\t[qQ] quit.\n");
|
||||
}
|
||||
|
||||
static int key_mapped(int c)
|
||||
{
|
||||
switch (c) {
|
||||
case 'd':
|
||||
case 'e':
|
||||
case 'f':
|
||||
case 'z':
|
||||
case 'q':
|
||||
case 'Q':
|
||||
return 1;
|
||||
case 'E':
|
||||
case 'w':
|
||||
return nr_counters > 1 ? 1 : 0;
|
||||
case 'F':
|
||||
case 's':
|
||||
case 'S':
|
||||
return vmlinux ? 1 : 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void handle_keypress(int c)
|
||||
{
|
||||
if (!key_mapped(c)) {
|
||||
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
|
||||
struct termios tc, save;
|
||||
|
||||
print_mapped_keys();
|
||||
fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
|
||||
fflush(stdout);
|
||||
|
||||
tcgetattr(0, &save);
|
||||
tc = save;
|
||||
tc.c_lflag &= ~(ICANON | ECHO);
|
||||
tc.c_cc[VMIN] = 0;
|
||||
tc.c_cc[VTIME] = 0;
|
||||
tcsetattr(0, TCSANOW, &tc);
|
||||
|
||||
poll(&stdin_poll, 1, -1);
|
||||
c = getc(stdin);
|
||||
|
||||
tcsetattr(0, TCSAFLUSH, &save);
|
||||
if (!key_mapped(c))
|
||||
return;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case 'd':
|
||||
prompt_integer(&delay_secs, "Enter display delay");
|
||||
break;
|
||||
case 'e':
|
||||
prompt_integer(&print_entries, "Enter display entries (lines)");
|
||||
break;
|
||||
case 'E':
|
||||
if (nr_counters > 1) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "\nAvailable events:");
|
||||
for (i = 0; i < nr_counters; i++)
|
||||
fprintf(stderr, "\n\t%d %s", i, event_name(i));
|
||||
|
||||
prompt_integer(&sym_counter, "Enter details event counter");
|
||||
|
||||
if (sym_counter >= nr_counters) {
|
||||
fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
|
||||
sym_counter = 0;
|
||||
sleep(1);
|
||||
}
|
||||
} else sym_counter = 0;
|
||||
break;
|
||||
case 'f':
|
||||
prompt_integer(&count_filter, "Enter display event count filter");
|
||||
break;
|
||||
case 'F':
|
||||
prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
|
||||
break;
|
||||
case 'q':
|
||||
case 'Q':
|
||||
printf("exiting.\n");
|
||||
exit(0);
|
||||
case 's':
|
||||
prompt_symbol(&sym_filter_entry, "Enter details symbol");
|
||||
break;
|
||||
case 'S':
|
||||
if (!sym_filter_entry)
|
||||
break;
|
||||
else {
|
||||
struct sym_entry *syme = sym_filter_entry;
|
||||
|
||||
pthread_mutex_lock(&syme->source_lock);
|
||||
sym_filter_entry = NULL;
|
||||
__zero_source_counters(syme);
|
||||
pthread_mutex_unlock(&syme->source_lock);
|
||||
}
|
||||
break;
|
||||
case 'w':
|
||||
display_weighted = ~display_weighted;
|
||||
break;
|
||||
case 'z':
|
||||
zero = ~zero;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void *display_thread(void *arg __used)
|
||||
{
|
||||
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
|
||||
int delay_msecs = delay_secs * 1000;
|
||||
struct termios tc, save;
|
||||
int delay_msecs, c;
|
||||
|
||||
printf("PerfTop refresh period: %d seconds\n", delay_secs);
|
||||
tcgetattr(0, &save);
|
||||
tc = save;
|
||||
tc.c_lflag &= ~(ICANON | ECHO);
|
||||
tc.c_cc[VMIN] = 0;
|
||||
tc.c_cc[VTIME] = 0;
|
||||
|
||||
repeat:
|
||||
delay_msecs = delay_secs * 1000;
|
||||
tcsetattr(0, TCSANOW, &tc);
|
||||
/* trash return*/
|
||||
getc(stdin);
|
||||
|
||||
do {
|
||||
print_sym_table();
|
||||
} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
|
||||
|
||||
printf("key pressed - exiting.\n");
|
||||
exit(0);
|
||||
c = getc(stdin);
|
||||
tcsetattr(0, TCSAFLUSH, &save);
|
||||
|
||||
handle_keypress(c);
|
||||
goto repeat;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -293,7 +777,6 @@ static const char *skip_symbols[] = {
|
|||
|
||||
static int symbol_filter(struct dso *self, struct symbol *sym)
|
||||
{
|
||||
static int filter_match;
|
||||
struct sym_entry *syme;
|
||||
const char *name = sym->name;
|
||||
int i;
|
||||
|
@ -315,6 +798,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
|
|||
return 1;
|
||||
|
||||
syme = dso__sym_priv(self, sym);
|
||||
pthread_mutex_init(&syme->source_lock, NULL);
|
||||
if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
|
||||
sym_filter_entry = syme;
|
||||
|
||||
for (i = 0; skip_symbols[i]; i++) {
|
||||
if (!strcmp(skip_symbols[i], name)) {
|
||||
syme->skip = 1;
|
||||
|
@ -322,29 +809,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
|
|||
}
|
||||
}
|
||||
|
||||
if (filter_match == 1) {
|
||||
filter_end = sym->start;
|
||||
filter_match = -1;
|
||||
if (filter_end - filter_start > 10000) {
|
||||
fprintf(stderr,
|
||||
"hm, too large filter symbol <%s> - skipping.\n",
|
||||
sym_filter);
|
||||
fprintf(stderr, "symbol filter start: %016lx\n",
|
||||
filter_start);
|
||||
fprintf(stderr, " end: %016lx\n",
|
||||
filter_end);
|
||||
filter_end = filter_start = 0;
|
||||
sym_filter = NULL;
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
|
||||
filter_match = 1;
|
||||
filter_start = sym->start;
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -380,8 +844,6 @@ out_delete_dso:
|
|||
return -1;
|
||||
}
|
||||
|
||||
#define TRACE_COUNT 3
|
||||
|
||||
/*
|
||||
* Binary search in the histogram table and record the hit:
|
||||
*/
|
||||
|
@ -394,6 +856,7 @@ static void record_ip(u64 ip, int counter)
|
|||
|
||||
if (!syme->skip) {
|
||||
syme->count[counter]++;
|
||||
record_precise_ip(syme, counter, ip);
|
||||
pthread_mutex_lock(&active_symbols_lock);
|
||||
if (list_empty(&syme->node) || !syme->node.next)
|
||||
__list_insert_active_sym(syme);
|
||||
|
@ -690,8 +1153,8 @@ static const struct option options[] = {
|
|||
"put the counters into a counter group"),
|
||||
OPT_BOOLEAN('i', "inherit", &inherit,
|
||||
"child tasks inherit counters"),
|
||||
OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
|
||||
"only display symbols matchig this pattern"),
|
||||
OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
|
||||
"symbol to annotate - requires -k option"),
|
||||
OPT_BOOLEAN('z', "zero", &zero,
|
||||
"zero history across updates"),
|
||||
OPT_INTEGER('F', "freq", &freq,
|
||||
|
@ -734,6 +1197,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
|
|||
delay_secs = 1;
|
||||
|
||||
parse_symbols();
|
||||
parse_source(sym_filter_entry);
|
||||
|
||||
/*
|
||||
* Fill in the ones not specifically initialized via -c:
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "callchain.h"
|
||||
|
||||
|
@ -26,10 +27,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
|
|||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct callchain_node *rnode;
|
||||
u64 chain_cumul = cumul_hits(chain);
|
||||
|
||||
while (*p) {
|
||||
u64 rnode_cumul;
|
||||
|
||||
parent = *p;
|
||||
rnode = rb_entry(parent, struct callchain_node, rb_node);
|
||||
rnode_cumul = cumul_hits(rnode);
|
||||
|
||||
switch (mode) {
|
||||
case CHAIN_FLAT:
|
||||
|
@ -40,7 +45,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
|
|||
break;
|
||||
case CHAIN_GRAPH_ABS: /* Falldown */
|
||||
case CHAIN_GRAPH_REL:
|
||||
if (rnode->cumul_hit < chain->cumul_hit)
|
||||
if (rnode_cumul < chain_cumul)
|
||||
p = &(*p)->rb_left;
|
||||
else
|
||||
p = &(*p)->rb_right;
|
||||
|
@ -87,7 +92,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
|
|||
|
||||
chain_for_each_child(child, node) {
|
||||
__sort_chain_graph_abs(child, min_hit);
|
||||
if (child->cumul_hit >= min_hit)
|
||||
if (cumul_hits(child) >= min_hit)
|
||||
rb_insert_callchain(&node->rb_root, child,
|
||||
CHAIN_GRAPH_ABS);
|
||||
}
|
||||
|
@ -108,11 +113,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
|
|||
u64 min_hit;
|
||||
|
||||
node->rb_root = RB_ROOT;
|
||||
min_hit = node->cumul_hit * min_percent / 100.0;
|
||||
min_hit = ceil(node->children_hit * min_percent);
|
||||
|
||||
chain_for_each_child(child, node) {
|
||||
__sort_chain_graph_rel(child, min_percent);
|
||||
if (child->cumul_hit >= min_hit)
|
||||
if (cumul_hits(child) >= min_hit)
|
||||
rb_insert_callchain(&node->rb_root, child,
|
||||
CHAIN_GRAPH_REL);
|
||||
}
|
||||
|
@ -122,7 +127,7 @@ static void
|
|||
sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root,
|
||||
u64 min_hit __used, struct callchain_param *param)
|
||||
{
|
||||
__sort_chain_graph_rel(chain_root, param->min_percent);
|
||||
__sort_chain_graph_rel(chain_root, param->min_percent / 100.0);
|
||||
rb_root->rb_node = chain_root->rb_root.rb_node;
|
||||
}
|
||||
|
||||
|
@ -211,7 +216,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain,
|
|||
new = create_child(parent, false);
|
||||
fill_node(new, chain, start, syms);
|
||||
|
||||
new->cumul_hit = new->hit = 1;
|
||||
new->children_hit = 0;
|
||||
new->hit = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -241,7 +247,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
|
|||
|
||||
/* split the hits */
|
||||
new->hit = parent->hit;
|
||||
new->cumul_hit = parent->cumul_hit;
|
||||
new->children_hit = parent->children_hit;
|
||||
parent->children_hit = cumul_hits(new);
|
||||
new->val_nr = parent->val_nr - idx_local;
|
||||
parent->val_nr = idx_local;
|
||||
|
||||
|
@ -249,6 +256,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
|
|||
if (idx_total < chain->nr) {
|
||||
parent->hit = 0;
|
||||
add_child(parent, chain, idx_total, syms);
|
||||
parent->children_hit++;
|
||||
} else {
|
||||
parent->hit = 1;
|
||||
}
|
||||
|
@ -269,13 +277,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain,
|
|||
unsigned int ret = __append_chain(rnode, chain, start, syms);
|
||||
|
||||
if (!ret)
|
||||
goto cumul;
|
||||
goto inc_children_hit;
|
||||
}
|
||||
/* nothing in children, add to the current node */
|
||||
add_child(root, chain, start, syms);
|
||||
|
||||
cumul:
|
||||
root->cumul_hit++;
|
||||
inc_children_hit:
|
||||
root->children_hit++;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -317,8 +325,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
|
|||
/* we match 100% of the path, increment the hit */
|
||||
if (i - start == root->val_nr && i == chain->nr) {
|
||||
root->hit++;
|
||||
root->cumul_hit++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -331,5 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
|
|||
void append_chain(struct callchain_node *root, struct ip_callchain *chain,
|
||||
struct symbol **syms)
|
||||
{
|
||||
if (!chain->nr)
|
||||
return;
|
||||
__append_chain_children(root, chain, syms, 0);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "symbol.h"
|
||||
|
||||
enum chain_mode {
|
||||
CHAIN_NONE,
|
||||
CHAIN_FLAT,
|
||||
CHAIN_GRAPH_ABS,
|
||||
CHAIN_GRAPH_REL
|
||||
|
@ -21,7 +22,7 @@ struct callchain_node {
|
|||
struct rb_root rb_root; /* sorted tree of children */
|
||||
unsigned int val_nr;
|
||||
u64 hit;
|
||||
u64 cumul_hit; /* hit + hits of children */
|
||||
u64 children_hit;
|
||||
};
|
||||
|
||||
struct callchain_param;
|
||||
|
@ -48,6 +49,11 @@ static inline void callchain_init(struct callchain_node *node)
|
|||
INIT_LIST_HEAD(&node->val);
|
||||
}
|
||||
|
||||
static inline u64 cumul_hits(struct callchain_node *node)
|
||||
{
|
||||
return node->hit + node->children_hit;
|
||||
}
|
||||
|
||||
int register_callchain_param(struct callchain_param *param);
|
||||
void append_chain(struct callchain_node *root, struct ip_callchain *chain,
|
||||
struct symbol **syms);
|
||||
|
|
|
@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size)
|
|||
|
||||
if (ret < 0)
|
||||
die("failed to read");
|
||||
if (ret == 0)
|
||||
die("failed to read: missing data");
|
||||
|
||||
size -= ret;
|
||||
buf += ret;
|
||||
|
@ -213,9 +215,10 @@ struct perf_header *perf_header__read(int fd)
|
|||
|
||||
for (i = 0; i < nr_attrs; i++) {
|
||||
struct perf_header_attr *attr;
|
||||
off_t tmp = lseek(fd, 0, SEEK_CUR);
|
||||
off_t tmp;
|
||||
|
||||
do_read(fd, &f_attr, sizeof(f_attr));
|
||||
tmp = lseek(fd, 0, SEEK_CUR);
|
||||
|
||||
attr = perf_header_attr__new(&f_attr.attr);
|
||||
|
||||
|
|
|
@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = {
|
|||
(strcmp(sys_dirent.d_name, ".")) && \
|
||||
(strcmp(sys_dirent.d_name, "..")))
|
||||
|
||||
static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
|
||||
{
|
||||
char evt_path[MAXPATHLEN];
|
||||
int fd;
|
||||
|
||||
snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
|
||||
sys_dir->d_name, evt_dir->d_name);
|
||||
fd = open(evt_path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return -EINVAL;
|
||||
close(fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \
|
||||
while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \
|
||||
if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \
|
||||
sys_dirent.d_name, evt_dirent.d_name) && \
|
||||
(!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \
|
||||
(strcmp(evt_dirent.d_name, ".")) && \
|
||||
(strcmp(evt_dirent.d_name, "..")))
|
||||
(strcmp(evt_dirent.d_name, "..")) && \
|
||||
(!tp_event_has_id(&sys_dirent, &evt_dirent)))
|
||||
|
||||
#define MAX_EVENT_LENGTH 30
|
||||
|
||||
|
@ -223,9 +239,15 @@ char *event_name(int counter)
|
|||
{
|
||||
u64 config = attrs[counter].config;
|
||||
int type = attrs[counter].type;
|
||||
|
||||
return __event_name(type, config);
|
||||
}
|
||||
|
||||
char *__event_name(int type, u64 config)
|
||||
{
|
||||
static char buf[32];
|
||||
|
||||
if (attrs[counter].type == PERF_TYPE_RAW) {
|
||||
if (type == PERF_TYPE_RAW) {
|
||||
sprintf(buf, "raw 0x%llx", config);
|
||||
return buf;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ extern int nr_counters;
|
|||
extern struct perf_counter_attr attrs[MAX_COUNTERS];
|
||||
|
||||
extern char *event_name(int ctr);
|
||||
extern char *__event_name(int type, u64 config);
|
||||
|
||||
extern int parse_events(const struct option *opt, const char *str, int unset);
|
||||
|
||||
|
|
|
@ -24,6 +24,16 @@ const char *sym_hist_filter;
|
|||
#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
|
||||
#endif
|
||||
|
||||
enum dso_origin {
|
||||
DSO__ORIG_KERNEL = 0,
|
||||
DSO__ORIG_JAVA_JIT,
|
||||
DSO__ORIG_FEDORA,
|
||||
DSO__ORIG_UBUNTU,
|
||||
DSO__ORIG_BUILDID,
|
||||
DSO__ORIG_DSO,
|
||||
DSO__ORIG_NOT_FOUND,
|
||||
};
|
||||
|
||||
static struct symbol *symbol__new(u64 start, u64 len,
|
||||
const char *name, unsigned int priv_size,
|
||||
u64 obj_start, int verbose)
|
||||
|
@ -81,6 +91,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
|
|||
self->sym_priv_size = sym_priv_size;
|
||||
self->find_symbol = dso__find_symbol;
|
||||
self->slen_calculated = 0;
|
||||
self->origin = DSO__ORIG_NOT_FOUND;
|
||||
}
|
||||
|
||||
return self;
|
||||
|
@ -710,7 +721,7 @@ static char *dso__read_build_id(struct dso *self, int verbose)
|
|||
++raw;
|
||||
bid += 2;
|
||||
}
|
||||
if (verbose)
|
||||
if (verbose >= 2)
|
||||
printf("%s(%s): %s\n", __func__, self->name, build_id);
|
||||
out_elf_end:
|
||||
elf_end(elf);
|
||||
|
@ -720,11 +731,26 @@ out:
|
|||
return build_id;
|
||||
}
|
||||
|
||||
char dso__symtab_origin(const struct dso *self)
|
||||
{
|
||||
static const char origin[] = {
|
||||
[DSO__ORIG_KERNEL] = 'k',
|
||||
[DSO__ORIG_JAVA_JIT] = 'j',
|
||||
[DSO__ORIG_FEDORA] = 'f',
|
||||
[DSO__ORIG_UBUNTU] = 'u',
|
||||
[DSO__ORIG_BUILDID] = 'b',
|
||||
[DSO__ORIG_DSO] = 'd',
|
||||
};
|
||||
|
||||
if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
|
||||
return '!';
|
||||
return origin[self->origin];
|
||||
}
|
||||
|
||||
int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
|
||||
{
|
||||
int size = PATH_MAX;
|
||||
char *name = malloc(size), *build_id = NULL;
|
||||
int variant = 0;
|
||||
int ret = -1;
|
||||
int fd;
|
||||
|
||||
|
@ -733,19 +759,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
|
|||
|
||||
self->adjust_symbols = 0;
|
||||
|
||||
if (strncmp(self->name, "/tmp/perf-", 10) == 0)
|
||||
return dso__load_perf_map(self, filter, verbose);
|
||||
if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
|
||||
ret = dso__load_perf_map(self, filter, verbose);
|
||||
self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT :
|
||||
DSO__ORIG_NOT_FOUND;
|
||||
return ret;
|
||||
}
|
||||
|
||||
self->origin = DSO__ORIG_FEDORA - 1;
|
||||
|
||||
more:
|
||||
do {
|
||||
switch (variant) {
|
||||
case 0: /* Fedora */
|
||||
self->origin++;
|
||||
switch (self->origin) {
|
||||
case DSO__ORIG_FEDORA:
|
||||
snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
|
||||
break;
|
||||
case 1: /* Ubuntu */
|
||||
case DSO__ORIG_UBUNTU:
|
||||
snprintf(name, size, "/usr/lib/debug%s", self->name);
|
||||
break;
|
||||
case 2:
|
||||
case DSO__ORIG_BUILDID:
|
||||
build_id = dso__read_build_id(self, verbose);
|
||||
if (build_id != NULL) {
|
||||
snprintf(name, size,
|
||||
|
@ -754,16 +787,15 @@ more:
|
|||
free(build_id);
|
||||
break;
|
||||
}
|
||||
variant++;
|
||||
self->origin++;
|
||||
/* Fall thru */
|
||||
case 3: /* Sane people */
|
||||
case DSO__ORIG_DSO:
|
||||
snprintf(name, size, "%s", self->name);
|
||||
break;
|
||||
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
variant++;
|
||||
|
||||
fd = open(name, O_RDONLY);
|
||||
} while (fd < 0);
|
||||
|
@ -899,6 +931,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux,
|
|||
if (err <= 0)
|
||||
err = dso__load_kallsyms(self, filter, verbose);
|
||||
|
||||
if (err > 0)
|
||||
self->origin = DSO__ORIG_KERNEL;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ struct dso {
|
|||
unsigned int sym_priv_size;
|
||||
unsigned char adjust_symbols;
|
||||
unsigned char slen_calculated;
|
||||
unsigned char origin;
|
||||
char name[0];
|
||||
};
|
||||
|
||||
|
@ -49,6 +50,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose);
|
|||
int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
|
||||
|
||||
size_t dso__fprintf(struct dso *self, FILE *fp);
|
||||
char dso__symtab_origin(const struct dso *self);
|
||||
|
||||
void symbol__init(void);
|
||||
#endif /* _PERF_SYMBOL_ */
|
||||
|
|
Loading…
Reference in New Issue