perf lock contention: Implement -t/--threads option for BPF
The BPF didn't show the per-thread stat properly. Use task's thread id (PID) as a key instead of stack_id and add a task_data map to save task comm names. $ sudo ./perf lock con -abt -E 5 sleep 1 contended total wait max wait avg wait pid comm 1 740.66 ms 740.66 ms 740.66 ms 1950 nv_queue 3 305.50 ms 298.19 ms 101.83 ms 1884 nvidia-modeset/ 1 25.14 us 25.14 us 25.14 us 2725038 EventManager_De 12 23.09 us 9.30 us 1.92 us 0 swapper 1 20.18 us 20.18 us 20.18 us 2725033 EventManager_De Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221209190727.759804-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
fd507d3e35
commit
eca949b2b4
|
@ -12,6 +12,7 @@
|
|||
#include "util/target.h"
|
||||
#include "util/callchain.h"
|
||||
#include "util/lock-contention.h"
|
||||
#include "util/bpf_skel/lock_data.h"
|
||||
|
||||
#include <subcmd/pager.h>
|
||||
#include <subcmd/parse-options.h>
|
||||
|
@ -61,11 +62,7 @@ static int max_stack_depth = CONTENTION_STACK_DEPTH;
|
|||
static int stack_skip = CONTENTION_STACK_SKIP;
|
||||
static int print_nr_entries = INT_MAX / 2;
|
||||
|
||||
static enum {
|
||||
LOCK_AGGR_ADDR,
|
||||
LOCK_AGGR_TASK,
|
||||
LOCK_AGGR_CALLER,
|
||||
} aggr_mode = LOCK_AGGR_ADDR;
|
||||
static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
|
||||
|
||||
static struct thread_stat *thread_stat_find(u32 tid)
|
||||
{
|
||||
|
@ -1619,6 +1616,7 @@ static int __cmd_contention(int argc, const char **argv)
|
|||
.map_nr_entries = bpf_map_entries,
|
||||
.max_stack = max_stack_depth,
|
||||
.stack_skip = stack_skip,
|
||||
.aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : LOCK_AGGR_CALLER,
|
||||
};
|
||||
|
||||
session = perf_session__new(use_bpf ? NULL : &data, &eops);
|
||||
|
@ -1691,11 +1689,6 @@ static int __cmd_contention(int argc, const char **argv)
|
|||
if (select_key(true))
|
||||
goto out_delete;
|
||||
|
||||
if (show_thread_stats)
|
||||
aggr_mode = LOCK_AGGR_TASK;
|
||||
else
|
||||
aggr_mode = LOCK_AGGR_CALLER;
|
||||
|
||||
if (use_bpf) {
|
||||
lock_contention_start();
|
||||
if (argc)
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include "util/map.h"
|
||||
#include "util/symbol.h"
|
||||
#include "util/target.h"
|
||||
#include "util/thread.h"
|
||||
#include "util/thread_map.h"
|
||||
#include "util/lock-contention.h"
|
||||
#include <linux/zalloc.h>
|
||||
|
@ -30,10 +31,17 @@ int lock_contention_prepare(struct lock_contention *con)
|
|||
}
|
||||
|
||||
bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
|
||||
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
|
||||
bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
|
||||
bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
|
||||
|
||||
if (con->aggr_mode == LOCK_AGGR_TASK) {
|
||||
bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries);
|
||||
bpf_map__set_max_entries(skel->maps.stacks, 1);
|
||||
} else {
|
||||
bpf_map__set_max_entries(skel->maps.task_data, 1);
|
||||
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
|
||||
}
|
||||
|
||||
if (target__has_cpu(target))
|
||||
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
|
||||
if (target__has_task(target))
|
||||
|
@ -82,7 +90,9 @@ int lock_contention_prepare(struct lock_contention *con)
|
|||
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
|
||||
}
|
||||
|
||||
/* these don't work well if in the rodata section */
|
||||
skel->bss->stack_skip = con->stack_skip;
|
||||
skel->bss->aggr_mode = con->aggr_mode;
|
||||
|
||||
lock_contention_bpf__attach(skel);
|
||||
return 0;
|
||||
|
@ -102,7 +112,7 @@ int lock_contention_stop(void)
|
|||
|
||||
int lock_contention_read(struct lock_contention *con)
|
||||
{
|
||||
int fd, stack, err = 0;
|
||||
int fd, stack, task_fd, err = 0;
|
||||
struct contention_key *prev_key, key;
|
||||
struct contention_data data = {};
|
||||
struct lock_stat *st = NULL;
|
||||
|
@ -112,6 +122,7 @@ int lock_contention_read(struct lock_contention *con)
|
|||
|
||||
fd = bpf_map__fd(skel->maps.lock_stat);
|
||||
stack = bpf_map__fd(skel->maps.stacks);
|
||||
task_fd = bpf_map__fd(skel->maps.task_data);
|
||||
|
||||
con->lost = skel->bss->lost;
|
||||
|
||||
|
@ -119,6 +130,13 @@ int lock_contention_read(struct lock_contention *con)
|
|||
if (stack_trace == NULL)
|
||||
return -1;
|
||||
|
||||
if (con->aggr_mode == LOCK_AGGR_TASK) {
|
||||
struct thread *idle = __machine__findnew_thread(machine,
|
||||
/*pid=*/0,
|
||||
/*tid=*/0);
|
||||
thread__set_comm(idle, "swapper", /*timestamp=*/0);
|
||||
}
|
||||
|
||||
prev_key = NULL;
|
||||
while (!bpf_map_get_next_key(fd, prev_key, &key)) {
|
||||
struct map *kmap;
|
||||
|
@ -143,6 +161,22 @@ int lock_contention_read(struct lock_contention *con)
|
|||
|
||||
st->flags = data.flags;
|
||||
|
||||
if (con->aggr_mode == LOCK_AGGR_TASK) {
|
||||
struct contention_task_data task;
|
||||
struct thread *t;
|
||||
|
||||
st->addr = key.stack_or_task_id;
|
||||
|
||||
/* do not update idle comm which contains CPU number */
|
||||
if (st->addr) {
|
||||
bpf_map_lookup_elem(task_fd, &key, &task);
|
||||
t = __machine__findnew_thread(machine, /*pid=*/-1,
|
||||
key.stack_or_task_id);
|
||||
thread__set_comm(t, task.comm, /*timestamp=*/0);
|
||||
}
|
||||
goto next;
|
||||
}
|
||||
|
||||
bpf_map_lookup_elem(stack, &key, stack_trace);
|
||||
|
||||
/* skip lock internal functions */
|
||||
|
@ -175,7 +209,7 @@ int lock_contention_read(struct lock_contention *con)
|
|||
if (st->callstack == NULL)
|
||||
break;
|
||||
}
|
||||
|
||||
next:
|
||||
hlist_add_head(&st->hash_entry, con->result);
|
||||
prev_key = &key;
|
||||
|
||||
|
|
|
@ -41,6 +41,13 @@ struct {
|
|||
__uint(max_entries, MAX_ENTRIES);
|
||||
} lock_stat SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(struct contention_task_data));
|
||||
__uint(max_entries, MAX_ENTRIES);
|
||||
} task_data SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
|
@ -61,6 +68,9 @@ int has_cpu;
|
|||
int has_task;
|
||||
int stack_skip;
|
||||
|
||||
/* determine the key of lock stat */
|
||||
int aggr_mode;
|
||||
|
||||
/* error stat */
|
||||
int lost;
|
||||
|
||||
|
@ -87,6 +97,19 @@ static inline int can_record(void)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static inline void update_task_data(__u32 pid)
|
||||
{
|
||||
struct contention_task_data *p;
|
||||
|
||||
p = bpf_map_lookup_elem(&task_data, &pid);
|
||||
if (p == NULL) {
|
||||
struct contention_task_data data;
|
||||
|
||||
bpf_get_current_comm(data.comm, sizeof(data.comm));
|
||||
bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST);
|
||||
}
|
||||
}
|
||||
|
||||
SEC("tp_btf/contention_begin")
|
||||
int contention_begin(u64 *ctx)
|
||||
{
|
||||
|
@ -115,10 +138,14 @@ int contention_begin(u64 *ctx)
|
|||
pelem->timestamp = bpf_ktime_get_ns();
|
||||
pelem->lock = (__u64)ctx[0];
|
||||
pelem->flags = (__u32)ctx[1];
|
||||
pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip);
|
||||
|
||||
if (pelem->stack_id < 0)
|
||||
lost++;
|
||||
if (aggr_mode == LOCK_AGGR_CALLER) {
|
||||
pelem->stack_id = bpf_get_stackid(ctx, &stacks,
|
||||
BPF_F_FAST_STACK_CMP | stack_skip);
|
||||
if (pelem->stack_id < 0)
|
||||
lost++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -141,7 +168,13 @@ int contention_end(u64 *ctx)
|
|||
|
||||
duration = bpf_ktime_get_ns() - pelem->timestamp;
|
||||
|
||||
key.stack_or_task_id = pelem->stack_id;
|
||||
if (aggr_mode == LOCK_AGGR_CALLER) {
|
||||
key.stack_or_task_id = pelem->stack_id;
|
||||
} else {
|
||||
key.stack_or_task_id = pid;
|
||||
update_task_data(pid);
|
||||
}
|
||||
|
||||
data = bpf_map_lookup_elem(&lock_stat, &key);
|
||||
if (!data) {
|
||||
struct contention_data first = {
|
||||
|
|
|
@ -117,6 +117,7 @@ struct lock_contention {
|
|||
int lost;
|
||||
int max_stack;
|
||||
int stack_skip;
|
||||
int aggr_mode;
|
||||
};
|
||||
|
||||
#ifdef HAVE_BPF_SKEL
|
||||
|
|
Loading…
Reference in New Issue