Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "The biggest chunk of the changes are two regression fixes: a HT workaround fix and an event-group scheduling fix. It's been verified with 5 days of fuzzer testing. Other fixes: - eBPF fix - a BIOS breakage detection fix - PMU driver fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel/pt: Fix a refactoring bug perf/x86: Tweak broken BIOS rules during check_hw_exists() perf/x86/intel/pt: Untangle pt_buffer_reset_markers() perf: Disallow sparse AUX allocations for non-SG PMUs in overwrite mode perf/x86: Improve HT workaround GP counter constraint perf/x86: Fix event/group validation perf: Fix race in BPF program unregister
This commit is contained in:
commit
a0e9c6efa5
|
@ -190,6 +190,7 @@ static bool check_hw_exists(void)
|
||||||
u64 val, val_fail, val_new= ~0;
|
u64 val, val_fail, val_new= ~0;
|
||||||
int i, reg, reg_fail, ret = 0;
|
int i, reg, reg_fail, ret = 0;
|
||||||
int bios_fail = 0;
|
int bios_fail = 0;
|
||||||
|
int reg_safe = -1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check to see if the BIOS enabled any of the counters, if so
|
* Check to see if the BIOS enabled any of the counters, if so
|
||||||
|
@ -204,6 +205,8 @@ static bool check_hw_exists(void)
|
||||||
bios_fail = 1;
|
bios_fail = 1;
|
||||||
val_fail = val;
|
val_fail = val;
|
||||||
reg_fail = reg;
|
reg_fail = reg;
|
||||||
|
} else {
|
||||||
|
reg_safe = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -221,12 +224,23 @@ static bool check_hw_exists(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If all the counters are enabled, the below test will always
|
||||||
|
* fail. The tools will also become useless in this scenario.
|
||||||
|
* Just fail and disable the hardware counters.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (reg_safe == -1) {
|
||||||
|
reg = reg_safe;
|
||||||
|
goto msr_fail;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the current value, change it and read it back to see if it
|
* Read the current value, change it and read it back to see if it
|
||||||
* matches, this is needed to detect certain hardware emulators
|
* matches, this is needed to detect certain hardware emulators
|
||||||
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
|
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
|
||||||
*/
|
*/
|
||||||
reg = x86_pmu_event_addr(0);
|
reg = x86_pmu_event_addr(reg_safe);
|
||||||
if (rdmsrl_safe(reg, &val))
|
if (rdmsrl_safe(reg, &val))
|
||||||
goto msr_fail;
|
goto msr_fail;
|
||||||
val ^= 0xffffUL;
|
val ^= 0xffffUL;
|
||||||
|
@ -611,6 +625,7 @@ struct sched_state {
|
||||||
int event; /* event index */
|
int event; /* event index */
|
||||||
int counter; /* counter index */
|
int counter; /* counter index */
|
||||||
int unassigned; /* number of events to be assigned left */
|
int unassigned; /* number of events to be assigned left */
|
||||||
|
int nr_gp; /* number of GP counters used */
|
||||||
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -620,27 +635,29 @@ struct sched_state {
|
||||||
struct perf_sched {
|
struct perf_sched {
|
||||||
int max_weight;
|
int max_weight;
|
||||||
int max_events;
|
int max_events;
|
||||||
struct perf_event **events;
|
int max_gp;
|
||||||
struct sched_state state;
|
|
||||||
int saved_states;
|
int saved_states;
|
||||||
|
struct event_constraint **constraints;
|
||||||
|
struct sched_state state;
|
||||||
struct sched_state saved[SCHED_STATES_MAX];
|
struct sched_state saved[SCHED_STATES_MAX];
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize interator that runs through all events and counters.
|
* Initialize interator that runs through all events and counters.
|
||||||
*/
|
*/
|
||||||
static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
|
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
|
||||||
int num, int wmin, int wmax)
|
int num, int wmin, int wmax, int gpmax)
|
||||||
{
|
{
|
||||||
int idx;
|
int idx;
|
||||||
|
|
||||||
memset(sched, 0, sizeof(*sched));
|
memset(sched, 0, sizeof(*sched));
|
||||||
sched->max_events = num;
|
sched->max_events = num;
|
||||||
sched->max_weight = wmax;
|
sched->max_weight = wmax;
|
||||||
sched->events = events;
|
sched->max_gp = gpmax;
|
||||||
|
sched->constraints = constraints;
|
||||||
|
|
||||||
for (idx = 0; idx < num; idx++) {
|
for (idx = 0; idx < num; idx++) {
|
||||||
if (events[idx]->hw.constraint->weight == wmin)
|
if (constraints[idx]->weight == wmin)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -687,7 +704,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
|
||||||
if (sched->state.event >= sched->max_events)
|
if (sched->state.event >= sched->max_events)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
c = sched->events[sched->state.event]->hw.constraint;
|
c = sched->constraints[sched->state.event];
|
||||||
/* Prefer fixed purpose counters */
|
/* Prefer fixed purpose counters */
|
||||||
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
|
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
|
||||||
idx = INTEL_PMC_IDX_FIXED;
|
idx = INTEL_PMC_IDX_FIXED;
|
||||||
|
@ -696,11 +713,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Grab the first unused counter starting with idx */
|
/* Grab the first unused counter starting with idx */
|
||||||
idx = sched->state.counter;
|
idx = sched->state.counter;
|
||||||
for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
|
for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
|
||||||
if (!__test_and_set_bit(idx, sched->state.used))
|
if (!__test_and_set_bit(idx, sched->state.used)) {
|
||||||
|
if (sched->state.nr_gp++ >= sched->max_gp)
|
||||||
|
return false;
|
||||||
|
|
||||||
goto done;
|
goto done;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -745,7 +767,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
|
||||||
if (sched->state.weight > sched->max_weight)
|
if (sched->state.weight > sched->max_weight)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
c = sched->events[sched->state.event]->hw.constraint;
|
c = sched->constraints[sched->state.event];
|
||||||
} while (c->weight != sched->state.weight);
|
} while (c->weight != sched->state.weight);
|
||||||
|
|
||||||
sched->state.counter = 0; /* start with first counter */
|
sched->state.counter = 0; /* start with first counter */
|
||||||
|
@ -756,12 +778,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
|
||||||
/*
|
/*
|
||||||
* Assign a counter for each event.
|
* Assign a counter for each event.
|
||||||
*/
|
*/
|
||||||
int perf_assign_events(struct perf_event **events, int n,
|
int perf_assign_events(struct event_constraint **constraints, int n,
|
||||||
int wmin, int wmax, int *assign)
|
int wmin, int wmax, int gpmax, int *assign)
|
||||||
{
|
{
|
||||||
struct perf_sched sched;
|
struct perf_sched sched;
|
||||||
|
|
||||||
perf_sched_init(&sched, events, n, wmin, wmax);
|
perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (!perf_sched_find_counter(&sched))
|
if (!perf_sched_find_counter(&sched))
|
||||||
|
@ -788,9 +810,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||||
x86_pmu.start_scheduling(cpuc);
|
x86_pmu.start_scheduling(cpuc);
|
||||||
|
|
||||||
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
|
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
|
||||||
hwc = &cpuc->event_list[i]->hw;
|
cpuc->event_constraint[i] = NULL;
|
||||||
c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
|
c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
|
||||||
hwc->constraint = c;
|
cpuc->event_constraint[i] = c;
|
||||||
|
|
||||||
wmin = min(wmin, c->weight);
|
wmin = min(wmin, c->weight);
|
||||||
wmax = max(wmax, c->weight);
|
wmax = max(wmax, c->weight);
|
||||||
|
@ -801,7 +823,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
hwc = &cpuc->event_list[i]->hw;
|
hwc = &cpuc->event_list[i]->hw;
|
||||||
c = hwc->constraint;
|
c = cpuc->event_constraint[i];
|
||||||
|
|
||||||
/* never assigned */
|
/* never assigned */
|
||||||
if (hwc->idx == -1)
|
if (hwc->idx == -1)
|
||||||
|
@ -821,9 +843,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* slow path */
|
/* slow path */
|
||||||
if (i != n)
|
if (i != n) {
|
||||||
unsched = perf_assign_events(cpuc->event_list, n, wmin,
|
int gpmax = x86_pmu.num_counters;
|
||||||
wmax, assign);
|
|
||||||
|
/*
|
||||||
|
* Do not allow scheduling of more than half the available
|
||||||
|
* generic counters.
|
||||||
|
*
|
||||||
|
* This helps avoid counter starvation of sibling thread by
|
||||||
|
* ensuring at most half the counters cannot be in exclusive
|
||||||
|
* mode. There is no designated counters for the limits. Any
|
||||||
|
* N/2 counters can be used. This helps with events with
|
||||||
|
* specific counter constraints.
|
||||||
|
*/
|
||||||
|
if (is_ht_workaround_enabled() && !cpuc->is_fake &&
|
||||||
|
READ_ONCE(cpuc->excl_cntrs->exclusive_present))
|
||||||
|
gpmax /= 2;
|
||||||
|
|
||||||
|
unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
|
||||||
|
wmax, gpmax, assign);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In case of success (unsched = 0), mark events as committed,
|
* In case of success (unsched = 0), mark events as committed,
|
||||||
|
@ -840,7 +879,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||||
e = cpuc->event_list[i];
|
e = cpuc->event_list[i];
|
||||||
e->hw.flags |= PERF_X86_EVENT_COMMITTED;
|
e->hw.flags |= PERF_X86_EVENT_COMMITTED;
|
||||||
if (x86_pmu.commit_scheduling)
|
if (x86_pmu.commit_scheduling)
|
||||||
x86_pmu.commit_scheduling(cpuc, e, assign[i]);
|
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1292,8 +1331,10 @@ static void x86_pmu_del(struct perf_event *event, int flags)
|
||||||
x86_pmu.put_event_constraints(cpuc, event);
|
x86_pmu.put_event_constraints(cpuc, event);
|
||||||
|
|
||||||
/* Delete the array entry. */
|
/* Delete the array entry. */
|
||||||
while (++i < cpuc->n_events)
|
while (++i < cpuc->n_events) {
|
||||||
cpuc->event_list[i-1] = cpuc->event_list[i];
|
cpuc->event_list[i-1] = cpuc->event_list[i];
|
||||||
|
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
|
||||||
|
}
|
||||||
--cpuc->n_events;
|
--cpuc->n_events;
|
||||||
|
|
||||||
perf_event_update_userpage(event);
|
perf_event_update_userpage(event);
|
||||||
|
|
|
@ -74,6 +74,7 @@ struct event_constraint {
|
||||||
#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
|
#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
|
||||||
#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
|
#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
|
||||||
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
|
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
|
||||||
|
#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
|
||||||
|
|
||||||
|
|
||||||
struct amd_nb {
|
struct amd_nb {
|
||||||
|
@ -134,8 +135,6 @@ enum intel_excl_state_type {
|
||||||
struct intel_excl_states {
|
struct intel_excl_states {
|
||||||
enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
|
enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
|
||||||
enum intel_excl_state_type state[X86_PMC_IDX_MAX];
|
enum intel_excl_state_type state[X86_PMC_IDX_MAX];
|
||||||
int num_alloc_cntrs;/* #counters allocated */
|
|
||||||
int max_alloc_cntrs;/* max #counters allowed */
|
|
||||||
bool sched_started; /* true if scheduling has started */
|
bool sched_started; /* true if scheduling has started */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -144,6 +143,11 @@ struct intel_excl_cntrs {
|
||||||
|
|
||||||
struct intel_excl_states states[2];
|
struct intel_excl_states states[2];
|
||||||
|
|
||||||
|
union {
|
||||||
|
u16 has_exclusive[2];
|
||||||
|
u32 exclusive_present;
|
||||||
|
};
|
||||||
|
|
||||||
int refcnt; /* per-core: #HT threads */
|
int refcnt; /* per-core: #HT threads */
|
||||||
unsigned core_id; /* per-core: core id */
|
unsigned core_id; /* per-core: core id */
|
||||||
};
|
};
|
||||||
|
@ -172,7 +176,11 @@ struct cpu_hw_events {
|
||||||
added in the current transaction */
|
added in the current transaction */
|
||||||
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
|
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
|
||||||
u64 tags[X86_PMC_IDX_MAX];
|
u64 tags[X86_PMC_IDX_MAX];
|
||||||
|
|
||||||
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
|
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
|
||||||
|
struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
|
||||||
|
|
||||||
|
int n_excl; /* the number of exclusive events */
|
||||||
|
|
||||||
unsigned int group_flag;
|
unsigned int group_flag;
|
||||||
int is_fake;
|
int is_fake;
|
||||||
|
@ -519,9 +527,7 @@ struct x86_pmu {
|
||||||
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
|
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
|
||||||
struct perf_event *event);
|
struct perf_event *event);
|
||||||
|
|
||||||
void (*commit_scheduling)(struct cpu_hw_events *cpuc,
|
void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
|
||||||
struct perf_event *event,
|
|
||||||
int cntr);
|
|
||||||
|
|
||||||
void (*start_scheduling)(struct cpu_hw_events *cpuc);
|
void (*start_scheduling)(struct cpu_hw_events *cpuc);
|
||||||
|
|
||||||
|
@ -717,8 +723,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
|
||||||
|
|
||||||
void x86_pmu_enable_all(int added);
|
void x86_pmu_enable_all(int added);
|
||||||
|
|
||||||
int perf_assign_events(struct perf_event **events, int n,
|
int perf_assign_events(struct event_constraint **constraints, int n,
|
||||||
int wmin, int wmax, int *assign);
|
int wmin, int wmax, int gpmax, int *assign);
|
||||||
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
|
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||||
|
|
||||||
void x86_pmu_stop(struct perf_event *event, int flags);
|
void x86_pmu_stop(struct perf_event *event, int flags);
|
||||||
|
@ -929,4 +935,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int is_ht_workaround_enabled(void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
#endif /* CONFIG_CPU_SUP_INTEL */
|
#endif /* CONFIG_CPU_SUP_INTEL */
|
||||||
|
|
|
@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
|
||||||
xl = &excl_cntrs->states[tid];
|
xl = &excl_cntrs->states[tid];
|
||||||
|
|
||||||
xl->sched_started = true;
|
xl->sched_started = true;
|
||||||
xl->num_alloc_cntrs = 0;
|
|
||||||
/*
|
/*
|
||||||
* lock shared state until we are done scheduling
|
* lock shared state until we are done scheduling
|
||||||
* in stop_event_scheduling()
|
* in stop_event_scheduling()
|
||||||
|
@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||||
* across HT threads
|
* across HT threads
|
||||||
*/
|
*/
|
||||||
is_excl = c->flags & PERF_X86_EVENT_EXCL;
|
is_excl = c->flags & PERF_X86_EVENT_EXCL;
|
||||||
|
if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
|
||||||
|
event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
|
||||||
|
if (!cpuc->n_excl++)
|
||||||
|
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* xl = state of current HT
|
* xl = state of current HT
|
||||||
|
@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||||
xl = &excl_cntrs->states[tid];
|
xl = &excl_cntrs->states[tid];
|
||||||
xlo = &excl_cntrs->states[o_tid];
|
xlo = &excl_cntrs->states[o_tid];
|
||||||
|
|
||||||
/*
|
|
||||||
* do not allow scheduling of more than max_alloc_cntrs
|
|
||||||
* which is set to half the available generic counters.
|
|
||||||
* this helps avoid counter starvation of sibling thread
|
|
||||||
* by ensuring at most half the counters cannot be in
|
|
||||||
* exclusive mode. There is not designated counters for the
|
|
||||||
* limits. Any N/2 counters can be used. This helps with
|
|
||||||
* events with specifix counter constraints
|
|
||||||
*/
|
|
||||||
if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
|
|
||||||
return &emptyconstraint;
|
|
||||||
|
|
||||||
cx = c;
|
cx = c;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2106,7 +2098,7 @@ static struct event_constraint *
|
||||||
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||||
struct perf_event *event)
|
struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct event_constraint *c1 = event->hw.constraint;
|
struct event_constraint *c1 = cpuc->event_constraint[idx];
|
||||||
struct event_constraint *c2;
|
struct event_constraint *c2;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
|
||||||
|
|
||||||
xl = &excl_cntrs->states[tid];
|
xl = &excl_cntrs->states[tid];
|
||||||
xlo = &excl_cntrs->states[o_tid];
|
xlo = &excl_cntrs->states[o_tid];
|
||||||
|
if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
|
||||||
|
hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
|
||||||
|
if (!--cpuc->n_excl)
|
||||||
|
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* put_constraint may be called from x86_schedule_events()
|
* put_constraint may be called from x86_schedule_events()
|
||||||
|
@ -2188,8 +2185,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
|
||||||
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||||
struct perf_event *event)
|
struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct event_constraint *c = event->hw.constraint;
|
|
||||||
|
|
||||||
intel_put_shared_regs_event_constraints(cpuc, event);
|
intel_put_shared_regs_event_constraints(cpuc, event);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2197,19 +2192,14 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||||
* all events are subject to and must call the
|
* all events are subject to and must call the
|
||||||
* put_excl_constraints() routine
|
* put_excl_constraints() routine
|
||||||
*/
|
*/
|
||||||
if (c && cpuc->excl_cntrs)
|
if (cpuc->excl_cntrs)
|
||||||
intel_put_excl_constraints(cpuc, event);
|
intel_put_excl_constraints(cpuc, event);
|
||||||
|
|
||||||
/* cleanup dynamic constraint */
|
|
||||||
if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
|
|
||||||
event->hw.constraint = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
|
static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
|
||||||
struct perf_event *event, int cntr)
|
|
||||||
{
|
{
|
||||||
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
|
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
|
||||||
struct event_constraint *c = event->hw.constraint;
|
struct event_constraint *c = cpuc->event_constraint[idx];
|
||||||
struct intel_excl_states *xlo, *xl;
|
struct intel_excl_states *xlo, *xl;
|
||||||
int tid = cpuc->excl_thread_id;
|
int tid = cpuc->excl_thread_id;
|
||||||
int o_tid = 1 - tid;
|
int o_tid = 1 - tid;
|
||||||
|
@ -2639,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu)
|
||||||
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
|
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
|
||||||
|
|
||||||
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
|
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
|
||||||
int h = x86_pmu.num_counters >> 1;
|
|
||||||
|
|
||||||
for_each_cpu(i, topology_thread_cpumask(cpu)) {
|
for_each_cpu(i, topology_thread_cpumask(cpu)) {
|
||||||
struct intel_excl_cntrs *c;
|
struct intel_excl_cntrs *c;
|
||||||
|
|
||||||
|
@ -2654,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu)
|
||||||
}
|
}
|
||||||
cpuc->excl_cntrs->core_id = core_id;
|
cpuc->excl_cntrs->core_id = core_id;
|
||||||
cpuc->excl_cntrs->refcnt++;
|
cpuc->excl_cntrs->refcnt++;
|
||||||
/*
|
|
||||||
* set hard limit to half the number of generic counters
|
|
||||||
*/
|
|
||||||
cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
|
|
||||||
cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
||||||
|
|
||||||
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
||||||
|
|
||||||
if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
|
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
|
||||||
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
|
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
|
||||||
else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
|
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
||||||
cpuc->pebs_enabled &= ~(1ULL << 63);
|
cpuc->pebs_enabled &= ~(1ULL << 63);
|
||||||
|
|
||||||
if (cpuc->enabled)
|
if (cpuc->enabled)
|
||||||
|
|
|
@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void)
|
||||||
|
|
||||||
de_attr->attr.attr.name = pt_caps[i].name;
|
de_attr->attr.attr.name = pt_caps[i].name;
|
||||||
|
|
||||||
sysfs_attr_init(&de_attrs->attr.attr);
|
sysfs_attr_init(&de_attr->attr.attr);
|
||||||
|
|
||||||
de_attr->attr.attr.mode = S_IRUGO;
|
de_attr->attr.attr.mode = S_IRUGO;
|
||||||
de_attr->attr.show = pt_cap_show;
|
de_attr->attr.show = pt_cap_show;
|
||||||
|
@ -615,7 +615,8 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
|
||||||
struct perf_output_handle *handle)
|
struct perf_output_handle *handle)
|
||||||
|
|
||||||
{
|
{
|
||||||
unsigned long idx, npages, end;
|
unsigned long head = local64_read(&buf->head);
|
||||||
|
unsigned long idx, npages, wakeup;
|
||||||
|
|
||||||
if (buf->snapshot)
|
if (buf->snapshot)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -634,17 +635,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
|
||||||
buf->topa_index[buf->stop_pos]->stop = 0;
|
buf->topa_index[buf->stop_pos]->stop = 0;
|
||||||
buf->topa_index[buf->intr_pos]->intr = 0;
|
buf->topa_index[buf->intr_pos]->intr = 0;
|
||||||
|
|
||||||
if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
|
/* how many pages till the STOP marker */
|
||||||
npages = (handle->size + 1) >> PAGE_SHIFT;
|
npages = handle->size >> PAGE_SHIFT;
|
||||||
end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages;
|
|
||||||
/*if (end > handle->wakeup >> PAGE_SHIFT)
|
/* if it's on a page boundary, fill up one more page */
|
||||||
end = handle->wakeup >> PAGE_SHIFT;*/
|
if (!offset_in_page(head + handle->size + 1))
|
||||||
idx = end & (buf->nr_pages - 1);
|
npages++;
|
||||||
buf->stop_pos = idx;
|
|
||||||
idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1;
|
idx = (head >> PAGE_SHIFT) + npages;
|
||||||
idx &= buf->nr_pages - 1;
|
idx &= buf->nr_pages - 1;
|
||||||
buf->intr_pos = idx;
|
buf->stop_pos = idx;
|
||||||
}
|
|
||||||
|
wakeup = handle->wakeup >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
/* in the worst case, wake up the consumer one page before hard stop */
|
||||||
|
idx = (head >> PAGE_SHIFT) + npages - 1;
|
||||||
|
if (idx > wakeup)
|
||||||
|
idx = wakeup;
|
||||||
|
|
||||||
|
idx &= buf->nr_pages - 1;
|
||||||
|
buf->intr_pos = idx;
|
||||||
|
|
||||||
buf->topa_index[buf->stop_pos]->stop = 1;
|
buf->topa_index[buf->stop_pos]->stop = 1;
|
||||||
buf->topa_index[buf->intr_pos]->intr = 1;
|
buf->topa_index[buf->intr_pos]->intr = 1;
|
||||||
|
|
|
@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
|
||||||
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
|
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
|
||||||
|
|
||||||
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
|
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
|
||||||
hwc = &box->event_list[i]->hw;
|
|
||||||
c = uncore_get_event_constraint(box, box->event_list[i]);
|
c = uncore_get_event_constraint(box, box->event_list[i]);
|
||||||
hwc->constraint = c;
|
box->event_constraint[i] = c;
|
||||||
wmin = min(wmin, c->weight);
|
wmin = min(wmin, c->weight);
|
||||||
wmax = max(wmax, c->weight);
|
wmax = max(wmax, c->weight);
|
||||||
}
|
}
|
||||||
|
@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
|
||||||
/* fastpath, try to reuse previous register */
|
/* fastpath, try to reuse previous register */
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
hwc = &box->event_list[i]->hw;
|
hwc = &box->event_list[i]->hw;
|
||||||
c = hwc->constraint;
|
c = box->event_constraint[i];
|
||||||
|
|
||||||
/* never assigned */
|
/* never assigned */
|
||||||
if (hwc->idx == -1)
|
if (hwc->idx == -1)
|
||||||
|
@ -395,8 +394,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
|
||||||
}
|
}
|
||||||
/* slow path */
|
/* slow path */
|
||||||
if (i != n)
|
if (i != n)
|
||||||
ret = perf_assign_events(box->event_list, n,
|
ret = perf_assign_events(box->event_constraint, n,
|
||||||
wmin, wmax, assign);
|
wmin, wmax, n, assign);
|
||||||
|
|
||||||
if (!assign || ret) {
|
if (!assign || ret) {
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
|
|
|
@ -97,6 +97,7 @@ struct intel_uncore_box {
|
||||||
atomic_t refcnt;
|
atomic_t refcnt;
|
||||||
struct perf_event *events[UNCORE_PMC_IDX_MAX];
|
struct perf_event *events[UNCORE_PMC_IDX_MAX];
|
||||||
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
|
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
|
||||||
|
struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
|
||||||
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
|
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
|
||||||
u64 tags[UNCORE_PMC_IDX_MAX];
|
u64 tags[UNCORE_PMC_IDX_MAX];
|
||||||
struct pci_dev *pci_dev;
|
struct pci_dev *pci_dev;
|
||||||
|
|
|
@ -92,8 +92,6 @@ struct hw_perf_event_extra {
|
||||||
int idx; /* index in shared_regs->regs[] */
|
int idx; /* index in shared_regs->regs[] */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct event_constraint;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct hw_perf_event - performance event hardware details:
|
* struct hw_perf_event - performance event hardware details:
|
||||||
*/
|
*/
|
||||||
|
@ -112,8 +110,6 @@ struct hw_perf_event {
|
||||||
|
|
||||||
struct hw_perf_event_extra extra_reg;
|
struct hw_perf_event_extra extra_reg;
|
||||||
struct hw_perf_event_extra branch_reg;
|
struct hw_perf_event_extra branch_reg;
|
||||||
|
|
||||||
struct event_constraint *constraint;
|
|
||||||
};
|
};
|
||||||
struct { /* software */
|
struct { /* software */
|
||||||
struct hrtimer hrtimer;
|
struct hrtimer hrtimer;
|
||||||
|
|
|
@ -3442,7 +3442,6 @@ static void free_event_rcu(struct rcu_head *head)
|
||||||
if (event->ns)
|
if (event->ns)
|
||||||
put_pid_ns(event->ns);
|
put_pid_ns(event->ns);
|
||||||
perf_event_free_filter(event);
|
perf_event_free_filter(event);
|
||||||
perf_event_free_bpf_prog(event);
|
|
||||||
kfree(event);
|
kfree(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3573,6 +3572,8 @@ static void __free_event(struct perf_event *event)
|
||||||
put_callchain_buffers();
|
put_callchain_buffers();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
perf_event_free_bpf_prog(event);
|
||||||
|
|
||||||
if (event->destroy)
|
if (event->destroy)
|
||||||
event->destroy(event);
|
event->destroy(event);
|
||||||
|
|
||||||
|
|
|
@ -493,6 +493,20 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
|
||||||
rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
|
rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In overwrite mode, PMUs that don't support SG may not handle more
|
||||||
|
* than one contiguous allocation, since they rely on PMI to do double
|
||||||
|
* buffering. In this case, the entire buffer has to be one contiguous
|
||||||
|
* chunk.
|
||||||
|
*/
|
||||||
|
if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) &&
|
||||||
|
overwrite) {
|
||||||
|
struct page *page = virt_to_page(rb->aux_pages[0]);
|
||||||
|
|
||||||
|
if (page_private(page) != max_order)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
|
rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
|
||||||
overwrite);
|
overwrite);
|
||||||
if (!rb->aux_priv)
|
if (!rb->aux_priv)
|
||||||
|
|
Loading…
Reference in New Issue