Merge branch 'bpf-stacktrace-map-next-key-support'

Yonghong Song says:

====================
The patch set implements bpf syscall command BPF_MAP_GET_NEXT_KEY
for stacktrace map. Patch #1 is the core implementation
and Patch #2 implements a bpf test at tools/testing/selftests/bpf
directory. Please see individual patch comments for details.

Changelog:
  v1 -> v2:
   - For invalid key (key pointer is non-NULL), sets next_key to be the first valid key.
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Daniel Borkmann 2018-01-06 23:52:23 +01:00
commit 9be99badee
4 changed files with 216 additions and 3 deletions

View File

@ -226,9 +226,33 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
return 0;
}
static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
static int stack_map_get_next_key(struct bpf_map *map, void *key,
void *next_key)
{
return -EINVAL;
struct bpf_stack_map *smap = container_of(map,
struct bpf_stack_map, map);
u32 id;
WARN_ON_ONCE(!rcu_read_lock_held());
if (!key) {
id = 0;
} else {
id = *(u32 *)key;
if (id >= smap->n_buckets || !smap->buckets[id])
id = 0;
else
id++;
}
while (id < smap->n_buckets && !smap->buckets[id])
id++;
if (id >= smap->n_buckets)
return -ENOENT;
*(u32 *)next_key = id;
return 0;
}
static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,

View File

@ -19,7 +19,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
test_offload.py

View File

@ -837,6 +837,132 @@ static void test_tp_attach_query(void)
free(query);
}
static int compare_map_keys(int map1_fd, int map2_fd)
{
__u32 key, next_key;
char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
int err;
err = bpf_map_get_next_key(map1_fd, NULL, &key);
if (err)
return err;
err = bpf_map_lookup_elem(map2_fd, &key, val_buf);
if (err)
return err;
while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) {
err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf);
if (err)
return err;
key = next_key;
}
if (errno != ENOENT)
return -1;
return 0;
}
static void test_stacktrace_map()
{
int control_map_fd, stackid_hmap_fd, stackmap_fd;
const char *file = "./test_stacktrace_map.o";
int bytes, efd, err, pmu_fd, prog_fd;
struct perf_event_attr attr = {};
__u32 key, val, duration = 0;
struct bpf_object *obj;
char buf[256];
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
goto out;
/* Get the ID for the sched/sched_switch tracepoint */
snprintf(buf, sizeof(buf),
"/sys/kernel/debug/tracing/events/sched/sched_switch/id");
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
"read", "bytes %d errno %d\n", bytes, errno))
goto close_prog;
/* Open the perf event and attach bpf progrram */
attr.config = strtol(buf, NULL, 0);
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
attr.sample_period = 1;
attr.wakeup_events = 1;
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
0 /* flags */);
if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
pmu_fd, errno))
goto close_prog;
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
err, errno))
goto close_pmu;
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
err, errno))
goto disable_pmu;
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
"err %d errno %d\n", err, errno))
goto disable_pmu;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
err, errno))
goto disable_pmu;
/* give some time for bpf program run */
sleep(1);
/* disable stack trace collection */
key = 0;
val = 1;
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
* in stackmap, and vise versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
; /* fall through */
disable_pmu:
ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
close_pmu:
close(pmu_fd);
close_prog:
bpf_object__close(obj);
out:
return;
}
int main(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@ -852,6 +978,7 @@ int main(void)
test_pkt_md_access();
test_obj_name();
test_tp_attach_query();
test_stacktrace_map();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;

View File

@ -0,0 +1,62 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <linux/bpf.h>
#include "bpf_helpers.h"
#ifndef PERF_MAX_STACK_DEPTH
#define PERF_MAX_STACK_DEPTH 127
#endif
struct bpf_map_def SEC("maps") control_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 1,
};
struct bpf_map_def SEC("maps") stackid_hmap = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 10000,
};
struct bpf_map_def SEC("maps") stackmap = {
.type = BPF_MAP_TYPE_STACK_TRACE,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
.max_entries = 10000,
};
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
char prev_comm[16];
int prev_pid;
int prev_prio;
long long prev_state;
char next_comm[16];
int next_pid;
int next_prio;
};
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
{
__u32 key = 0, val = 0, *value_p;
value_p = bpf_map_lookup_elem(&control_map, &key);
if (value_p && *value_p)
return 0; /* skip if non-zero *value_p */
/* The size of stackmap and stackid_hmap should be the same */
key = bpf_get_stackid(ctx, &stackmap, 0);
if ((int)key >= 0)
bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
return 0;
}
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */