perf/x86: Fix shared register mutual exclusion enforcement

This patch fixes a problem with the shared registers mutual
exclusion code and incremental event scheduling by the
generic perf_event code.

There was a bug whereby the mutual exclusion on the shared
registers was not enforced because of incremental scheduling
abort due to event constraints. As an example on Intel
Nehalem, consider the following events:

group1= L1D_CACHE_LD:E_STATE,OFFCORE_RESPONSE_0:PF_RFO,L1D_CACHE_LD:I_STATE
group2= L1D_CACHE_LD:I_STATE

The L1D_CACHE_LD event can only be measured by 2 counters. Yet, there
are 3 instances here. The first group can be scheduled and is committed.
Then, the generic code tries to schedule group2 and this fails (because
there is no more counter to support the 3rd instance of L1D_CACHE_LD).
But in x86_schedule_events() error path, put_event_contraints() is invoked
on ALL the events and not just the ones that just failed. That causes the
"lock" on the shared offcore_response MSR to be released. Yet the first group
is actually scheduled and is exposed to reprogramming of that shared msr by
the sibling HT thread. In other words, there is no guarantee on what is
measured.

This patch fixes the problem by tagging committed events with the
PERF_X86_EVENT_COMMITTED tag. In the error path of x86_schedule_events(),
only the events NOT tagged have their constraint released. The tag
is eventually removed when the event in descheduled.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130620164254.GA3556@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Stephane Eranian 2013-06-20 18:42:54 +02:00 committed by Ingo Molnar
parent 069e0c3c40
commit 2f7f73a520
3 changed files with 27 additions and 4 deletions

View File

@ -726,6 +726,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{ {
struct event_constraint *c; struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
struct perf_event *e;
int i, wmin, wmax, num = 0; int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc; struct hw_perf_event *hwc;
@ -769,14 +770,32 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
num = perf_assign_events(cpuc->event_list, n, wmin, num = perf_assign_events(cpuc->event_list, n, wmin,
wmax, assign); wmax, assign);
/*
* Mark the event as committed, so we do not put_constraint()
* in case new events are added and fail scheduling.
*/
if (!num && assign) {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
e->hw.flags |= PERF_X86_EVENT_COMMITTED;
}
}
/* /*
* scheduling failed or is just a simulation, * scheduling failed or is just a simulation,
* free resources if necessary * free resources if necessary
*/ */
if (!assign || num) { if (!assign || num) {
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
/*
* do not put_constraint() on comitted events,
* because they are good to go
*/
if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
continue;
if (x86_pmu.put_event_constraints) if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); x86_pmu.put_event_constraints(cpuc, e);
} }
} }
return num ? -EINVAL : 0; return num ? -EINVAL : 0;
@ -1155,6 +1174,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int i; int i;
/*
* event is descheduled
*/
event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
/* /*
* If we're called during a txn, we don't need to do anything. * If we're called during a txn, we don't need to do anything.
* The events never got scheduled and ->cancel_txn will truncate * The events never got scheduled and ->cancel_txn will truncate

View File

@ -63,11 +63,12 @@ struct event_constraint {
int flags; int flags;
}; };
/* /*
* struct event_constraint flags * struct hw_perf_event.flags flags
*/ */
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */ #define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
struct amd_nb { struct amd_nb {
int nb_id; /* NorthBridge id */ int nb_id; /* NorthBridge id */

View File

@ -1450,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (x86_pmu.event_constraints) { if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) {
if ((event->hw.config & c->cmask) == c->code) { if ((event->hw.config & c->cmask) == c->code) {
/* hw.flags zeroed at initialization */
event->hw.flags |= c->flags; event->hw.flags |= c->flags;
return c; return c;
} }
@ -1498,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
static void intel_put_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event) struct perf_event *event)
{ {
event->hw.flags = 0;
intel_put_shared_regs_event_constraints(cpuc, event); intel_put_shared_regs_event_constraints(cpuc, event);
} }