perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)

Vince reported the perf_fuzzer giving various unwinder warnings and
Josh reported:

> Deja vu.  Most of these are related to perf PEBS, similar to the
> following issue:
>
>   b8000586c9 ("perf/x86/intel: Cure bogus unwind from PEBS entries")
>
> This is basically the ORC version of that.  setup_pebs_sample_data() is
> assembling a franken-pt_regs which ORC isn't happy about.  RIP is
> inconsistent with some of the other registers (like RSP and RBP).

And where the previous unwinder only needed BP,SP ORC also requires
IP. But we cannot spoof IP because then the sample will get displaced,
entirely negating the point of PEBS.

So cure the whole thing differently by doing the unwind early; this
does however require a means to communicate we did the unwind early.
We (ab)use an unused sample_type bit for this, which we set on events
that fill out the data->callchain before the normal
perf_prepare_sample().

Debugged-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2018-05-10 15:48:41 +02:00 committed by Ingo Molnar
parent 4799f6856f
commit 6cbc304f2f
5 changed files with 21 additions and 16 deletions

View File

@ -2997,6 +2997,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
} }
if (x86_pmu.pebs_aliases) if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event); x86_pmu.pebs_aliases(event);
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
} }
if (needs_branch_stack(event)) { if (needs_branch_stack(event)) {

View File

@ -1185,17 +1185,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
data->data_src.val = val; data->data_src.val = val;
} }
/*
* We must however always use iregs for the unwinder to stay sane; the
* record BP,SP,IP can point into thin air when the record is from a
* previous PMI context or an (I)RET happend between the record and
* PMI.
*/
if (sample_type & PERF_SAMPLE_CALLCHAIN)
data->callchain = perf_callchain(event, iregs);
/* /*
* We use the interrupt regs as a base because the PEBS record does not * We use the interrupt regs as a base because the PEBS record does not
* contain a full regs set, specifically it seems to lack segment * contain a full regs set, specifically it seems to lack segment
* descriptors, which get used by things like user_mode(). * descriptors, which get used by things like user_mode().
* *
* In the simple case fix up only the IP for PERF_SAMPLE_IP. * In the simple case fix up only the IP for PERF_SAMPLE_IP.
*
* We must however always use BP,SP from iregs for the unwinder to stay
* sane; the record BP,SP can point into thin air when the record is
* from a previous PMI context or an (I)RET happend between the record
* and PMI.
*/ */
*regs = *iregs; *regs = *iregs;
@ -1214,15 +1218,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
regs->si = pebs->si; regs->si = pebs->si;
regs->di = pebs->di; regs->di = pebs->di;
/* regs->bp = pebs->bp;
* Per the above; only set BP,SP if we don't need callchains. regs->sp = pebs->sp;
*
* XXX: does this make sense?
*/
if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
regs->bp = pebs->bp;
regs->sp = pebs->sp;
}
#ifndef CONFIG_X86_32 #ifndef CONFIG_X86_32
regs->r8 = pebs->r8; regs->r8 = pebs->r8;

View File

@ -1130,6 +1130,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
extern struct perf_callchain_entry * extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
u32 max_stack, bool crosstask, bool add_mark); u32 max_stack, bool crosstask, bool add_mark);
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
extern int get_callchain_buffers(int max_stack); extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void); extern void put_callchain_buffers(void);

View File

@ -143,6 +143,8 @@ enum perf_event_sample_format {
PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63,
}; };
/* /*

View File

@ -6343,7 +6343,7 @@ static u64 perf_virt_to_phys(u64 virt)
static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
static struct perf_callchain_entry * struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs) perf_callchain(struct perf_event *event, struct pt_regs *regs)
{ {
bool kernel = !event->attr.exclude_callchain_kernel; bool kernel = !event->attr.exclude_callchain_kernel;
@ -6382,7 +6382,9 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_CALLCHAIN) { if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1; int size = 1;
data->callchain = perf_callchain(event, regs); if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
data->callchain = perf_callchain(event, regs);
size += data->callchain->nr; size += data->callchain->nr;
header->size += size * sizeof(u64); header->size += size * sizeof(u64);