perf/core: Fix perf_event_read()
perf_event_read() has a number of issues regarding the timekeeping bits. - The IPI didn't update group times when it found INACTIVE - The direct call would not re-check ->state after taking ctx->lock which can result in ->count and timestamps getting out of sync. And we can make use of the ordering introduced for perf_event_stop() to make it more accurate for ACTIVE. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
7f0ec32526
commit
0c1cbc18df
|
@ -2081,8 +2081,9 @@ event_sched_in(struct perf_event *event,
|
||||||
|
|
||||||
WRITE_ONCE(event->oncpu, smp_processor_id());
|
WRITE_ONCE(event->oncpu, smp_processor_id());
|
||||||
/*
|
/*
|
||||||
* Order event::oncpu write to happen before the ACTIVE state
|
* Order event::oncpu write to happen before the ACTIVE state is
|
||||||
* is visible.
|
* visible. This allows perf_event_{stop,read}() to observe the correct
|
||||||
|
* ->oncpu if it sees ACTIVE.
|
||||||
*/
|
*/
|
||||||
smp_wmb();
|
smp_wmb();
|
||||||
WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
|
WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
|
||||||
|
@ -3638,12 +3639,16 @@ static void __perf_event_read(void *info)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
raw_spin_lock(&ctx->lock);
|
raw_spin_lock(&ctx->lock);
|
||||||
if (ctx->is_active) {
|
if (ctx->is_active & EVENT_TIME) {
|
||||||
update_context_time(ctx);
|
update_context_time(ctx);
|
||||||
update_cgrp_time_from_event(event);
|
update_cgrp_time_from_event(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
update_event_times(event);
|
if (!data->group)
|
||||||
|
update_event_times(event);
|
||||||
|
else
|
||||||
|
update_group_times(event);
|
||||||
|
|
||||||
if (event->state != PERF_EVENT_STATE_ACTIVE)
|
if (event->state != PERF_EVENT_STATE_ACTIVE)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
|
@ -3658,7 +3663,6 @@ static void __perf_event_read(void *info)
|
||||||
pmu->read(event);
|
pmu->read(event);
|
||||||
|
|
||||||
list_for_each_entry(sub, &event->sibling_list, group_entry) {
|
list_for_each_entry(sub, &event->sibling_list, group_entry) {
|
||||||
update_event_times(sub);
|
|
||||||
if (sub->state == PERF_EVENT_STATE_ACTIVE) {
|
if (sub->state == PERF_EVENT_STATE_ACTIVE) {
|
||||||
/*
|
/*
|
||||||
* Use sibling's PMU rather than @event's since
|
* Use sibling's PMU rather than @event's since
|
||||||
|
@ -3748,23 +3752,35 @@ out:
|
||||||
|
|
||||||
static int perf_event_read(struct perf_event *event, bool group)
|
static int perf_event_read(struct perf_event *event, bool group)
|
||||||
{
|
{
|
||||||
|
enum perf_event_state state = READ_ONCE(event->state);
|
||||||
int event_cpu, ret = 0;
|
int event_cpu, ret = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If event is enabled and currently active on a CPU, update the
|
* If event is enabled and currently active on a CPU, update the
|
||||||
* value in the event structure:
|
* value in the event structure:
|
||||||
*/
|
*/
|
||||||
if (event->state == PERF_EVENT_STATE_ACTIVE) {
|
again:
|
||||||
struct perf_read_data data = {
|
if (state == PERF_EVENT_STATE_ACTIVE) {
|
||||||
.event = event,
|
struct perf_read_data data;
|
||||||
.group = group,
|
|
||||||
.ret = 0,
|
/*
|
||||||
};
|
* Orders the ->state and ->oncpu loads such that if we see
|
||||||
|
* ACTIVE we must also see the right ->oncpu.
|
||||||
|
*
|
||||||
|
* Matches the smp_wmb() from event_sched_in().
|
||||||
|
*/
|
||||||
|
smp_rmb();
|
||||||
|
|
||||||
event_cpu = READ_ONCE(event->oncpu);
|
event_cpu = READ_ONCE(event->oncpu);
|
||||||
if ((unsigned)event_cpu >= nr_cpu_ids)
|
if ((unsigned)event_cpu >= nr_cpu_ids)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
data = (struct perf_read_data){
|
||||||
|
.event = event,
|
||||||
|
.group = group,
|
||||||
|
.ret = 0,
|
||||||
|
};
|
||||||
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
event_cpu = __perf_event_read_cpu(event, event_cpu);
|
event_cpu = __perf_event_read_cpu(event, event_cpu);
|
||||||
|
|
||||||
|
@ -3781,20 +3797,27 @@ static int perf_event_read(struct perf_event *event, bool group)
|
||||||
(void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
|
(void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
ret = data.ret;
|
ret = data.ret;
|
||||||
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
|
|
||||||
|
} else if (state == PERF_EVENT_STATE_INACTIVE) {
|
||||||
struct perf_event_context *ctx = event->ctx;
|
struct perf_event_context *ctx = event->ctx;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&ctx->lock, flags);
|
raw_spin_lock_irqsave(&ctx->lock, flags);
|
||||||
|
state = event->state;
|
||||||
|
if (state != PERF_EVENT_STATE_INACTIVE) {
|
||||||
|
raw_spin_unlock_irqrestore(&ctx->lock, flags);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* may read while context is not active
|
* May read while context is not active (e.g., thread is
|
||||||
* (e.g., thread is blocked), in that case
|
* blocked), in that case we cannot update context time
|
||||||
* we cannot update context time
|
|
||||||
*/
|
*/
|
||||||
if (ctx->is_active) {
|
if (ctx->is_active & EVENT_TIME) {
|
||||||
update_context_time(ctx);
|
update_context_time(ctx);
|
||||||
update_cgrp_time_from_event(event);
|
update_cgrp_time_from_event(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (group)
|
if (group)
|
||||||
update_group_times(event);
|
update_group_times(event);
|
||||||
else
|
else
|
||||||
|
|
Loading…
Reference in New Issue