perf/core improvements and fixes:
. Add libdw DWARF post unwind support for ARM (Jean Pihet) . Consolidate types.h for ARM and ARM64 (Jean Pihet) . Fix possible null pointer dereference in session.c (Masanari Iida) . Cleanup, remove unused variables in map_switch_event() (Dongsheng Yang) . Remove nr_state_machine_bugs in perf latency (Dongsheng Yang) . Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra) Signed-off-by: Jiri Olsa <jolsa@kernel.org> -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJTefKDAAoJEPZqUSBWB3s9KhsP/A+vkQ2ClJsCTK9ADSRA1NSx wnsXjE4UzgjQeaqSz+6tm/2BbckHhkzt2hcrQ690Gm0cqvY+WJ8FNxxiEmFXDgFG TCISeYjqpao7V1XgQX+deyIJSYD3TP/EzTUbpoJWZXIImKGs4mCP/D5C+5SNnXm2 NREvZeHErqcrRiha6Qa9AwdoVG/uQAQKaZeEQKIMIhH7aiqgRTbGrNbobcZ5R9bT dZCE1Zq3ns5/Vco7HAsu0Ij6SvmETNb2OPdGrBb7zr2tNRXFKVyZAGfG+rN8G915 Dt5fioE3s0kQMYpq8PHDcbkBH1HNbfz+QujJp7OKThC0zl6lH8Tyx8Oj2L8C7UyI J1vL87HLjxRvTb1EgTw7vR+n2te4vNgTd1TdXjAQyxhTU0V2AnhI+8ZQQUoKPvOX exQlt9ftL8IEC4EyKCyx6B5dRNIK7RVLFPKevf7a39QVEtqTbutMqu9u0I/+1PxZ s/v3sgLKKzVLjo3Bfsne0ZMJ+a9eJ8vz2vi63UCw44QaFXCl1VmMvoZaJxOMGnvN Mjlg5Vlgl0tSGToJkq0nF7mlb9iKrAYsUUWO9ND7Ya4B8C79u482oJDXI0YqH76g rCQlPwLbAcJF17+9z9kol2RvJolFgQnnZ52ypSYErJoKSMTdVhZ0HaaAsO4R6uKz RoHcyunF7ZNAmeOnzOg5 =l453 -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf into perf/core Pull perf/core improvements and fixes from Jiri Olsa: * Add libdw DWARF post unwind support for ARM (Jean Pihet) * Consolidate types.h for ARM and ARM64 (Jean Pihet) * Fix possible null pointer dereference in session.c (Masanari Iida) * Cleanup, remove unused variables in map_switch_event() (Dongsheng Yang) * Remove nr_state_machine_bugs in perf latency (Dongsheng Yang) * Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra) Signed-off-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
6480c56130
|
@ -411,7 +411,7 @@ LIB_OBJS += $(OUTPUT)tests/code-reading.o
|
|||
LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
|
||||
LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
|
||||
ifndef NO_DWARF_UNWIND
|
||||
ifeq ($(ARCH),x86)
|
||||
ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
|
||||
LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -5,3 +5,10 @@ endif
|
|||
ifndef NO_LIBUNWIND
|
||||
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
|
||||
endif
|
||||
ifndef NO_LIBDW_DWARF_UNWIND
|
||||
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
|
||||
endif
|
||||
ifndef NO_DWARF_UNWIND
|
||||
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
|
||||
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
|
||||
endif
|
||||
|
|
|
@ -2,10 +2,15 @@
|
|||
#define ARCH_PERF_REGS_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "../../util/types.h"
|
||||
#include <linux/types.h>
|
||||
#include <asm/perf_regs.h>
|
||||
|
||||
void perf_regs_load(u64 *regs);
|
||||
|
||||
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
|
||||
#define PERF_REGS_MAX PERF_REG_ARM_MAX
|
||||
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
|
||||
|
||||
#define PERF_REG_IP PERF_REG_ARM_PC
|
||||
#define PERF_REG_SP PERF_REG_ARM_SP
|
||||
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
#include <string.h>
|
||||
#include "perf_regs.h"
|
||||
#include "thread.h"
|
||||
#include "map.h"
|
||||
#include "event.h"
|
||||
#include "tests/tests.h"
|
||||
|
||||
#define STACK_SIZE 8192
|
||||
|
||||
static int sample_ustack(struct perf_sample *sample,
|
||||
struct thread *thread, u64 *regs)
|
||||
{
|
||||
struct stack_dump *stack = &sample->user_stack;
|
||||
struct map *map;
|
||||
unsigned long sp;
|
||||
u64 stack_size, *buf;
|
||||
|
||||
buf = malloc(STACK_SIZE);
|
||||
if (!buf) {
|
||||
pr_debug("failed to allocate sample uregs data\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
sp = (unsigned long) regs[PERF_REG_ARM_SP];
|
||||
|
||||
map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
|
||||
if (!map) {
|
||||
pr_debug("failed to get stack map\n");
|
||||
free(buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
stack_size = map->end - sp;
|
||||
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
|
||||
|
||||
memcpy(buf, (void *) sp, stack_size);
|
||||
stack->data = (char *) buf;
|
||||
stack->size = stack_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test__arch_unwind_sample(struct perf_sample *sample,
|
||||
struct thread *thread)
|
||||
{
|
||||
struct regs_dump *regs = &sample->user_regs;
|
||||
u64 *buf;
|
||||
|
||||
buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
|
||||
if (!buf) {
|
||||
pr_debug("failed to allocate sample uregs data\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
perf_regs_load(buf);
|
||||
regs->abi = PERF_SAMPLE_REGS_ABI;
|
||||
regs->regs = buf;
|
||||
regs->mask = PERF_REGS_MASK;
|
||||
|
||||
return sample_ustack(sample, thread, buf);
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
#include <linux/linkage.h>
|
||||
|
||||
#define R0 0x00
|
||||
#define R1 0x08
|
||||
#define R2 0x10
|
||||
#define R3 0x18
|
||||
#define R4 0x20
|
||||
#define R5 0x28
|
||||
#define R6 0x30
|
||||
#define R7 0x38
|
||||
#define R8 0x40
|
||||
#define R9 0x48
|
||||
#define SL 0x50
|
||||
#define FP 0x58
|
||||
#define IP 0x60
|
||||
#define SP 0x68
|
||||
#define LR 0x70
|
||||
#define PC 0x78
|
||||
|
||||
/*
|
||||
* Implementation of void perf_regs_load(u64 *regs);
|
||||
*
|
||||
* This functions fills in the 'regs' buffer from the actual registers values,
|
||||
* in the way the perf built-in unwinding test expects them:
|
||||
* - the PC at the time at the call to this function. Since this function
|
||||
* is called using a bl instruction, the PC value is taken from LR.
|
||||
* The built-in unwinding test then unwinds the call stack from the dwarf
|
||||
* information in unwind__get_entries.
|
||||
*
|
||||
* Notes:
|
||||
* - the 8 bytes stride in the registers offsets comes from the fact
|
||||
* that the registers are stored in an u64 array (u64 *regs),
|
||||
* - the regs buffer needs to be zeroed before the call to this function,
|
||||
* in this case using a calloc in dwarf-unwind.c.
|
||||
*/
|
||||
|
||||
.text
|
||||
.type perf_regs_load,%function
|
||||
ENTRY(perf_regs_load)
|
||||
str r0, [r0, #R0]
|
||||
str r1, [r0, #R1]
|
||||
str r2, [r0, #R2]
|
||||
str r3, [r0, #R3]
|
||||
str r4, [r0, #R4]
|
||||
str r5, [r0, #R5]
|
||||
str r6, [r0, #R6]
|
||||
str r7, [r0, #R7]
|
||||
str r8, [r0, #R8]
|
||||
str r9, [r0, #R9]
|
||||
str sl, [r0, #SL]
|
||||
str fp, [r0, #FP]
|
||||
str ip, [r0, #IP]
|
||||
str sp, [r0, #SP]
|
||||
str lr, [r0, #LR]
|
||||
str lr, [r0, #PC] // store pc as lr in order to skip the call
|
||||
// to this function
|
||||
mov pc, lr
|
||||
ENDPROC(perf_regs_load)
|
|
@ -0,0 +1,36 @@
|
|||
#include <elfutils/libdwfl.h>
|
||||
#include "../../util/unwind-libdw.h"
|
||||
#include "../../util/perf_regs.h"
|
||||
|
||||
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
|
||||
{
|
||||
struct unwind_info *ui = arg;
|
||||
struct regs_dump *user_regs = &ui->sample->user_regs;
|
||||
Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
|
||||
|
||||
#define REG(r) ({ \
|
||||
Dwarf_Word val = 0; \
|
||||
perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
|
||||
val; \
|
||||
})
|
||||
|
||||
dwarf_regs[0] = REG(R0);
|
||||
dwarf_regs[1] = REG(R1);
|
||||
dwarf_regs[2] = REG(R2);
|
||||
dwarf_regs[3] = REG(R3);
|
||||
dwarf_regs[4] = REG(R4);
|
||||
dwarf_regs[5] = REG(R5);
|
||||
dwarf_regs[6] = REG(R6);
|
||||
dwarf_regs[7] = REG(R7);
|
||||
dwarf_regs[8] = REG(R8);
|
||||
dwarf_regs[9] = REG(R9);
|
||||
dwarf_regs[10] = REG(R10);
|
||||
dwarf_regs[11] = REG(FP);
|
||||
dwarf_regs[12] = REG(IP);
|
||||
dwarf_regs[13] = REG(SP);
|
||||
dwarf_regs[14] = REG(LR);
|
||||
dwarf_regs[15] = REG(PC);
|
||||
|
||||
return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
|
||||
dwarf_regs);
|
||||
}
|
|
@ -2,7 +2,7 @@
|
|||
#define ARCH_PERF_REGS_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "../../util/types.h"
|
||||
#include <linux/types.h>
|
||||
#include <asm/perf_regs.h>
|
||||
|
||||
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
|
||||
|
|
|
@ -149,7 +149,6 @@ struct perf_sched {
|
|||
unsigned long nr_runs;
|
||||
unsigned long nr_timestamps;
|
||||
unsigned long nr_unordered_timestamps;
|
||||
unsigned long nr_state_machine_bugs;
|
||||
unsigned long nr_context_switch_bugs;
|
||||
unsigned long nr_events;
|
||||
unsigned long nr_lost_chunks;
|
||||
|
@ -1007,17 +1006,12 @@ static int latency_wakeup_event(struct perf_sched *sched,
|
|||
struct perf_sample *sample,
|
||||
struct machine *machine)
|
||||
{
|
||||
const u32 pid = perf_evsel__intval(evsel, sample, "pid"),
|
||||
success = perf_evsel__intval(evsel, sample, "success");
|
||||
const u32 pid = perf_evsel__intval(evsel, sample, "pid");
|
||||
struct work_atoms *atoms;
|
||||
struct work_atom *atom;
|
||||
struct thread *wakee;
|
||||
u64 timestamp = sample->time;
|
||||
|
||||
/* Note for later, it may be interesting to observe the failing cases */
|
||||
if (!success)
|
||||
return 0;
|
||||
|
||||
wakee = machine__findnew_thread(machine, 0, pid);
|
||||
atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
|
||||
if (!atoms) {
|
||||
|
@ -1037,12 +1031,18 @@ static int latency_wakeup_event(struct perf_sched *sched,
|
|||
atom = list_entry(atoms->work_list.prev, struct work_atom, list);
|
||||
|
||||
/*
|
||||
* As we do not guarantee the wakeup event happens when
|
||||
* task is out of run queue, also may happen when task is
|
||||
* on run queue and wakeup only change ->state to TASK_RUNNING,
|
||||
* then we should not set the ->wake_up_time when wake up a
|
||||
* task which is on run queue.
|
||||
*
|
||||
* You WILL be missing events if you've recorded only
|
||||
* one CPU, or are only looking at only one, so don't
|
||||
* make useless noise.
|
||||
* skip in this case.
|
||||
*/
|
||||
if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
|
||||
sched->nr_state_machine_bugs++;
|
||||
return 0;
|
||||
|
||||
sched->nr_timestamps++;
|
||||
if (atom->sched_out_time > timestamp) {
|
||||
|
@ -1266,9 +1266,8 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
|
|||
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||
struct perf_sample *sample, struct machine *machine)
|
||||
{
|
||||
const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
|
||||
next_pid = perf_evsel__intval(evsel, sample, "next_pid");
|
||||
struct thread *sched_out __maybe_unused, *sched_in;
|
||||
const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
|
||||
struct thread *sched_in;
|
||||
int new_shortname;
|
||||
u64 timestamp0, timestamp = sample->time;
|
||||
s64 delta;
|
||||
|
@ -1291,7 +1290,6 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
|||
return -1;
|
||||
}
|
||||
|
||||
sched_out = machine__findnew_thread(machine, 0, prev_pid);
|
||||
sched_in = machine__findnew_thread(machine, 0, next_pid);
|
||||
|
||||
sched->curr_thread[this_cpu] = sched_in;
|
||||
|
@ -1501,14 +1499,6 @@ static void print_bad_events(struct perf_sched *sched)
|
|||
(double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
|
||||
sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
|
||||
}
|
||||
if (sched->nr_state_machine_bugs && sched->nr_timestamps) {
|
||||
printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
|
||||
(double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0,
|
||||
sched->nr_state_machine_bugs, sched->nr_timestamps);
|
||||
if (sched->nr_lost_events)
|
||||
printf(" (due to lost events?)");
|
||||
printf("\n");
|
||||
}
|
||||
if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
|
||||
printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
|
||||
(double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
|
||||
|
|
|
@ -40,11 +40,11 @@ ifeq ($(ARCH),arm64)
|
|||
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
|
||||
endif
|
||||
|
||||
# So far there's only x86 libdw unwind support merged in perf.
|
||||
# So far there's only x86 and arm libdw unwind support merged in perf.
|
||||
# Disable it on all other architectures in case libdw unwind
|
||||
# support is detected in system. Add supported architectures
|
||||
# to the check.
|
||||
ifneq ($(ARCH),x86)
|
||||
ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
|
||||
NO_LIBDW_DWARF_UNWIND := 1
|
||||
endif
|
||||
|
||||
|
|
|
@ -115,7 +115,7 @@ static struct test {
|
|||
.desc = "Test parsing with no sample_id_all bit set",
|
||||
.func = test__parse_no_sample_id_all,
|
||||
},
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
|
||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||
{
|
||||
.desc = "Test dwarf unwind",
|
||||
|
|
|
@ -74,9 +74,6 @@ int test__perf_evsel__tp_sched_test(void)
|
|||
if (perf_evsel__test_field(evsel, "prio", 4, true))
|
||||
ret = -1;
|
||||
|
||||
if (perf_evsel__test_field(evsel, "success", 4, true))
|
||||
ret = -1;
|
||||
|
||||
if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
|
||||
ret = -1;
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ int test__hists_filter(void);
|
|||
int test__mmap_thread_lookup(void);
|
||||
int test__thread_mg_share(void);
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
|
||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||
struct thread;
|
||||
struct perf_sample;
|
||||
|
|
|
@ -1625,13 +1625,14 @@ out_delete_map:
|
|||
void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
|
||||
bool full)
|
||||
{
|
||||
int fd = perf_data_file__fd(session->file);
|
||||
struct stat st;
|
||||
int ret;
|
||||
int fd, ret;
|
||||
|
||||
if (session == NULL || fp == NULL)
|
||||
return;
|
||||
|
||||
fd = perf_data_file__fd(session->file);
|
||||
|
||||
ret = fstat(fd, &st);
|
||||
if (ret == -1)
|
||||
return;
|
||||
|
|
Loading…
Reference in New Issue