perf_counter: minimize context time updates

Push the update_context_time() calls up the stack so that we get less
invokations and thereby a less noisy output:

before:

 # ./perfstat -e 1:0 -e 1:1 -e 1:1 -e 1:1 -l ls > /dev/null

 Performance counter stats for 'ls':

      10.163691  cpu clock ticks      (msecs)  (scaled from 98.94%)
      10.215360  task clock ticks     (msecs)  (scaled from 98.18%)
      10.185549  task clock ticks     (msecs)  (scaled from 98.53%)
      10.183581  task clock ticks     (msecs)  (scaled from 98.71%)

 Wall-clock time elapsed:    11.912858 msecs

after:

 # ./perfstat -e 1:0 -e 1:1 -e 1:1 -e 1:1 -l ls > /dev/null

 Performance counter stats for 'ls':

       9.316630  cpu clock ticks      (msecs)
       9.280789  task clock ticks     (msecs)
       9.280789  task clock ticks     (msecs)
       9.280789  task clock ticks     (msecs)

 Wall-clock time elapsed:     9.574872 msecs

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.618876874@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Zijlstra 2009-04-06 11:45:13 +02:00 committed by Ingo Molnar
parent 849691a6cd
commit bce379bf35
1 changed files with 5 additions and 2 deletions

View File

@ -319,6 +319,8 @@ static void __perf_counter_disable(void *info)
spin_lock_irqsave(&ctx->lock, flags); spin_lock_irqsave(&ctx->lock, flags);
update_context_time(ctx);
/* /*
* If the counter is on, turn it off. * If the counter is on, turn it off.
* If it is in error state, leave it in error state. * If it is in error state, leave it in error state.
@ -797,6 +799,8 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
if (likely(!cpuctx->task_ctx)) if (likely(!cpuctx->task_ctx))
return; return;
update_context_time(ctx);
regs = task_pt_regs(task); regs = task_pt_regs(task);
perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs); perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
__perf_counter_sched_out(ctx, cpuctx); __perf_counter_sched_out(ctx, cpuctx);
@ -2336,7 +2340,6 @@ static void task_clock_perf_counter_update(struct perf_counter *counter)
u64 prev, now; u64 prev, now;
s64 delta; s64 delta;
update_context_time(counter->ctx);
now = counter->ctx->time; now = counter->ctx->time;
prev = atomic64_xchg(&counter->hw.prev_count, now); prev = atomic64_xchg(&counter->hw.prev_count, now);
@ -2349,7 +2352,6 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
struct hw_perf_counter *hwc = &counter->hw; struct hw_perf_counter *hwc = &counter->hw;
u64 now; u64 now;
update_context_time(counter->ctx);
now = counter->ctx->time; now = counter->ctx->time;
atomic64_set(&hwc->prev_count, now); atomic64_set(&hwc->prev_count, now);
@ -2372,6 +2374,7 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter)
static void task_clock_perf_counter_read(struct perf_counter *counter) static void task_clock_perf_counter_read(struct perf_counter *counter)
{ {
update_context_time(counter->ctx);
task_clock_perf_counter_update(counter); task_clock_perf_counter_update(counter);
} }