perf record: Implement basic filtering for off-cpu
It should honor cpu and task filtering with -a, -C or -p, -t options. Committer testing: # perf record --off-cpu --cpu 1 perf bench sched messaging -l 1000 # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 1.722 [sec] [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 1.446 MB perf.data (7248 samples) ] # # perf script | head -20 perf 97164 [001] 38287.696761: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696764: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696765: 9 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696767: 212 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97164 [001] 38287.696768: 5130 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97164 [001] 38287.696770: 123063 cycles: ffffffffb6e0011e syscall_return_via_sysret+0x38 (vmlinux) perf 97164 [001] 38287.696803: 2292748 cycles: ffffffffb636c82d __fput+0xad (vmlinux) swapper 0 [001] 38287.702852: 1927474 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97513 97513 [001] 38287.767207: 1172536 cycles: ffffffffb612ff65 newidle_balance+0x5 (vmlinux) swapper 0 [001] 38287.769567: 1073081 cycles: ffffffffb618216d ktime_get_mono_fast_ns+0xd (vmlinux) :97533 97533 [001] 38287.770962: 984460 cycles: ffffffffb65b2900 selinux_socket_sendmsg+0x0 (vmlinux) :97540 97540 [001] 38287.772242: 883462 cycles: ffffffffb6d0bf59 irqentry_exit_to_user_mode+0x9 (vmlinux) swapper 0 [001] 38287.773633: 741963 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97552 97552 [001] 38287.774539: 606680 cycles: ffffffffb62eda0a page_add_file_rmap+0x7a (vmlinux) :97556 97556 [001] 38287.775333: 502254 cycles: ffffffffb634f964 get_obj_cgroup_from_current+0xc4 (vmlinux) :97561 97561 [001] 38287.776163: 427891 cycles: ffffffffb61b1522 cgroup_rstat_updated+0x22 (vmlinux) swapper 0 [001] 38287.776854: 359030 cycles: ffffffffb612fc5e load_balance+0x9ce (vmlinux) :97567 97567 [001] 38287.777312: 330371 cycles: ffffffffb6a8d8d0 skb_set_owner_w+0x0 (vmlinux) :97566 97566 [001] 38287.777589: 311622 cycles: ffffffffb614a7a8 native_queued_spin_lock_slowpath+0x148 (vmlinux) :97512 97512 [001] 38287.777671: 307851 cycles: ffffffffb62e0f35 find_vma+0x55 (vmlinux) # # perf record --off-cpu --cpu 4 perf bench sched messaging -l 1000 # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 1.613 [sec] [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 1.415 MB perf.data (6729 samples) ] # perf script | head -20 perf 97650 [004] 38323.728036: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728040: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728041: 9 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728042: 208 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97650 [004] 38323.728044: 5026 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97650 [004] 38323.728046: 119970 cycles: ffffffffb6d0bebc syscall_exit_to_user_mode+0x1c (vmlinux) perf 97650 [004] 38323.728078: 2190103 cycles: 54b756 perf_tool__process_synth_event+0x16 (/home/acme/bin/perf) swapper 0 [004] 38323.783357:1593139
cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.785352:1593139
cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.797330: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.802350: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.806333: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97996 97996 [004] 38323.807145: 1418936 cycles: 7f5db9be6917 [unknown] ([unknown]) :97959 97959 [004] 38323.807730: 1445074 cycles: ffffffffb6329d36 memcg_slab_post_alloc_hook+0x146 (vmlinux) :97959 97959 [004] 38323.808103: 1341584 cycles: ffffffffb62fd90f get_page_from_freelist+0x112f (vmlinux) :97959 97959 [004] 38323.808451: 1227537 cycles: ffffffffb65b2905 selinux_socket_sendmsg+0x5 (vmlinux) :97959 97959 [004] 38323.808768: 1184321 cycles: ffffffffb6d1ba35 _raw_spin_lock_irqsave+0x15 (vmlinux) :97959 97959 [004] 38323.809073: 1153017 cycles: ffffffffb6a8d92d skb_set_owner_w+0x5d (vmlinux) :97959 97959 [004] 38323.809402: 1126875 cycles: ffffffffb6329c64 memcg_slab_post_alloc_hook+0x74 (vmlinux) :97959 97959 [004] 38323.809695: 1073248 cycles: ffffffffb6e0001d entry_SYSCALL_64+0x1d (vmlinux) # Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Milian Wolff <milian.wolff@kdab.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
edc41a1099
commit
10742d0c07
|
@ -892,7 +892,7 @@ static int record__config_text_poke(struct evlist *evlist)
|
|||
|
||||
static int record__config_off_cpu(struct record *rec)
|
||||
{
|
||||
return off_cpu_prepare(rec->evlist);
|
||||
return off_cpu_prepare(rec->evlist, &rec->opts.target);
|
||||
}
|
||||
|
||||
static bool record__kcore_readable(struct machine *machine)
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
#include "util/off_cpu.h"
|
||||
#include "util/perf-hooks.h"
|
||||
#include "util/session.h"
|
||||
#include "util/target.h"
|
||||
#include "util/cpumap.h"
|
||||
#include "util/thread_map.h"
|
||||
#include <bpf/bpf.h>
|
||||
|
||||
#include "bpf_skel/off_cpu.skel.h"
|
||||
|
@ -60,8 +63,23 @@ static int off_cpu_config(struct evlist *evlist)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void off_cpu_start(void *arg __maybe_unused)
|
||||
static void off_cpu_start(void *arg)
|
||||
{
|
||||
struct evlist *evlist = arg;
|
||||
|
||||
/* update task filter for the given workload */
|
||||
if (!skel->bss->has_cpu && !skel->bss->has_task &&
|
||||
perf_thread_map__pid(evlist->core.threads, 0) != -1) {
|
||||
int fd;
|
||||
u32 pid;
|
||||
u8 val = 1;
|
||||
|
||||
skel->bss->has_task = 1;
|
||||
fd = bpf_map__fd(skel->maps.task_filter);
|
||||
pid = perf_thread_map__pid(evlist->core.threads, 0);
|
||||
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
|
||||
}
|
||||
|
||||
skel->bss->enabled = 1;
|
||||
}
|
||||
|
||||
|
@ -71,31 +89,75 @@ static void off_cpu_finish(void *arg __maybe_unused)
|
|||
off_cpu_bpf__destroy(skel);
|
||||
}
|
||||
|
||||
int off_cpu_prepare(struct evlist *evlist)
|
||||
int off_cpu_prepare(struct evlist *evlist, struct target *target)
|
||||
{
|
||||
int err;
|
||||
int err, fd, i;
|
||||
int ncpus = 1, ntasks = 1;
|
||||
|
||||
if (off_cpu_config(evlist) < 0) {
|
||||
pr_err("Failed to config off-cpu BPF event\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
set_max_rlimit();
|
||||
|
||||
skel = off_cpu_bpf__open_and_load();
|
||||
skel = off_cpu_bpf__open();
|
||||
if (!skel) {
|
||||
pr_err("Failed to open off-cpu BPF skeleton\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* don't need to set cpu filter for system-wide mode */
|
||||
if (target->cpu_list) {
|
||||
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
|
||||
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
|
||||
}
|
||||
|
||||
if (target__has_task(target)) {
|
||||
ntasks = perf_thread_map__nr(evlist->core.threads);
|
||||
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
|
||||
}
|
||||
|
||||
set_max_rlimit();
|
||||
|
||||
err = off_cpu_bpf__load(skel);
|
||||
if (err) {
|
||||
pr_err("Failed to load off-cpu skeleton\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (target->cpu_list) {
|
||||
u32 cpu;
|
||||
u8 val = 1;
|
||||
|
||||
skel->bss->has_cpu = 1;
|
||||
fd = bpf_map__fd(skel->maps.cpu_filter);
|
||||
|
||||
for (i = 0; i < ncpus; i++) {
|
||||
cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
|
||||
bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
|
||||
}
|
||||
}
|
||||
|
||||
if (target__has_task(target)) {
|
||||
u32 pid;
|
||||
u8 val = 1;
|
||||
|
||||
skel->bss->has_task = 1;
|
||||
fd = bpf_map__fd(skel->maps.task_filter);
|
||||
|
||||
for (i = 0; i < ntasks; i++) {
|
||||
pid = perf_thread_map__pid(evlist->core.threads, i);
|
||||
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
|
||||
}
|
||||
}
|
||||
|
||||
err = off_cpu_bpf__attach(skel);
|
||||
if (err) {
|
||||
pr_err("Failed to attach off-cpu BPF skeleton\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (perf_hooks__set_hook("record_start", off_cpu_start, NULL) ||
|
||||
perf_hooks__set_hook("record_end", off_cpu_finish, NULL)) {
|
||||
if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) ||
|
||||
perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) {
|
||||
pr_err("Failed to attach off-cpu skeleton\n");
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -49,12 +49,28 @@ struct {
|
|||
__uint(max_entries, MAX_ENTRIES);
|
||||
} off_cpu SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(__u8));
|
||||
__uint(max_entries, 1);
|
||||
} cpu_filter SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(__u8));
|
||||
__uint(max_entries, 1);
|
||||
} task_filter SEC(".maps");
|
||||
|
||||
/* old kernel task_struct definition */
|
||||
struct task_struct___old {
|
||||
long state;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
int enabled = 0;
|
||||
int has_cpu = 0;
|
||||
int has_task = 0;
|
||||
|
||||
/*
|
||||
* Old kernel used to call it task_struct->state and now it's '__state'.
|
||||
|
@ -74,6 +90,37 @@ static inline int get_task_state(struct task_struct *t)
|
|||
return BPF_CORE_READ(t_old, state);
|
||||
}
|
||||
|
||||
static inline int can_record(struct task_struct *t, int state)
|
||||
{
|
||||
/* kernel threads don't have user stack */
|
||||
if (t->flags & PF_KTHREAD)
|
||||
return 0;
|
||||
|
||||
if (state != TASK_INTERRUPTIBLE &&
|
||||
state != TASK_UNINTERRUPTIBLE)
|
||||
return 0;
|
||||
|
||||
if (has_cpu) {
|
||||
__u32 cpu = bpf_get_smp_processor_id();
|
||||
__u8 *ok;
|
||||
|
||||
ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
|
||||
if (!ok)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (has_task) {
|
||||
__u8 *ok;
|
||||
__u32 pid = t->pid;
|
||||
|
||||
ok = bpf_map_lookup_elem(&task_filter, &pid);
|
||||
if (!ok)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
SEC("tp_btf/sched_switch")
|
||||
int on_switch(u64 *ctx)
|
||||
{
|
||||
|
@ -92,10 +139,7 @@ int on_switch(u64 *ctx)
|
|||
|
||||
ts = bpf_ktime_get_ns();
|
||||
|
||||
if (prev->flags & PF_KTHREAD)
|
||||
goto next;
|
||||
if (state != TASK_INTERRUPTIBLE &&
|
||||
state != TASK_UNINTERRUPTIBLE)
|
||||
if (!can_record(prev, state))
|
||||
goto next;
|
||||
|
||||
stack_id = bpf_get_stackid(ctx, &stacks,
|
||||
|
|
|
@ -2,15 +2,17 @@
|
|||
#define PERF_UTIL_OFF_CPU_H
|
||||
|
||||
struct evlist;
|
||||
struct target;
|
||||
struct perf_session;
|
||||
|
||||
#define OFFCPU_EVENT "offcpu-time"
|
||||
|
||||
#ifdef HAVE_BPF_SKEL
|
||||
int off_cpu_prepare(struct evlist *evlist);
|
||||
int off_cpu_prepare(struct evlist *evlist, struct target *target);
|
||||
int off_cpu_write(struct perf_session *session);
|
||||
#else
|
||||
static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused)
|
||||
static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused,
|
||||
struct target *target __maybe_unused)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue