Merge branch 'bpf_enable_stats'
Song Liu says: ==================== run_time_ns is a useful stats for BPF programs. However, it is gated by sysctl kernel.bpf_stats_enabled. When multiple user space tools are toggling kernl.bpf_stats_enabled at the same time, they may confuse each other. Solve this problem with a new BPF command BPF_ENABLE_STATS. Changes v8 => v9: 1. Clean up in selftest (Andrii). 2. Not using static variable in test program (Andrii). Changes v7 => v8: 1. Change name BPF_STATS_RUNTIME_CNT => BPF_STATS_RUN_TIME (Alexei). 2. Add CHECK_ATTR to bpf_enable_stats() (Alexei). 3. Rebase (Andrii). 4. Simplfy the selftest (Alexei). Changes v6 => v7: 1. Add test to verify run_cnt matches count measured by the program. Changes v5 => v6: 1. Simplify test program (Yonghong). 2. Rebase (with some conflicts). Changes v4 => v5: 1. Use memset to zero bpf_attr in bpf_enable_stats() (Andrii). Changes v3 => v4: 1. Add libbpf support and selftest; 2. Avoid cleaning trailing space. Changes v2 => v3: 1. Rename the command to BPF_ENABLE_STATS, and make it extendible. 2. fix commit log; 3. remove unnecessary headers. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
commit
3dbb5b5040
|
@ -987,6 +987,7 @@ _out: \
|
||||||
|
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
DECLARE_PER_CPU(int, bpf_prog_active);
|
DECLARE_PER_CPU(int, bpf_prog_active);
|
||||||
|
extern struct mutex bpf_stats_enabled_mutex;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block execution of BPF programs attached to instrumentation (perf,
|
* Block execution of BPF programs attached to instrumentation (perf,
|
||||||
|
|
|
@ -115,6 +115,7 @@ enum bpf_cmd {
|
||||||
BPF_LINK_UPDATE,
|
BPF_LINK_UPDATE,
|
||||||
BPF_LINK_GET_FD_BY_ID,
|
BPF_LINK_GET_FD_BY_ID,
|
||||||
BPF_LINK_GET_NEXT_ID,
|
BPF_LINK_GET_NEXT_ID,
|
||||||
|
BPF_ENABLE_STATS,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_map_type {
|
enum bpf_map_type {
|
||||||
|
@ -390,6 +391,12 @@ enum {
|
||||||
*/
|
*/
|
||||||
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
|
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
|
||||||
|
|
||||||
|
/* type for BPF_ENABLE_STATS */
|
||||||
|
enum bpf_stats_type {
|
||||||
|
/* enabled run_time_ns and run_cnt */
|
||||||
|
BPF_STATS_RUN_TIME = 0,
|
||||||
|
};
|
||||||
|
|
||||||
enum bpf_stack_build_id_status {
|
enum bpf_stack_build_id_status {
|
||||||
/* user space need an empty entry to identify end of a trace */
|
/* user space need an empty entry to identify end of a trace */
|
||||||
BPF_STACK_BUILD_ID_EMPTY = 0,
|
BPF_STACK_BUILD_ID_EMPTY = 0,
|
||||||
|
@ -601,6 +608,10 @@ union bpf_attr {
|
||||||
__u32 old_prog_fd;
|
__u32 old_prog_fd;
|
||||||
} link_update;
|
} link_update;
|
||||||
|
|
||||||
|
struct { /* struct used by BPF_ENABLE_STATS command */
|
||||||
|
__u32 type;
|
||||||
|
} enable_stats;
|
||||||
|
|
||||||
} __attribute__((aligned(8)));
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
/* The description below is an attempt at providing documentation to eBPF
|
/* The description below is an attempt at providing documentation to eBPF
|
||||||
|
|
|
@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEFINE_MUTEX(bpf_stats_enabled_mutex);
|
||||||
|
|
||||||
|
static int bpf_stats_release(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
mutex_lock(&bpf_stats_enabled_mutex);
|
||||||
|
static_key_slow_dec(&bpf_stats_enabled_key.key);
|
||||||
|
mutex_unlock(&bpf_stats_enabled_mutex);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations bpf_stats_fops = {
|
||||||
|
.release = bpf_stats_release,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int bpf_enable_runtime_stats(void)
|
||||||
|
{
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
mutex_lock(&bpf_stats_enabled_mutex);
|
||||||
|
|
||||||
|
/* Set a very high limit to avoid overflow */
|
||||||
|
if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
|
||||||
|
mutex_unlock(&bpf_stats_enabled_mutex);
|
||||||
|
return -EBUSY;
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
|
||||||
|
if (fd >= 0)
|
||||||
|
static_key_slow_inc(&bpf_stats_enabled_key.key);
|
||||||
|
|
||||||
|
mutex_unlock(&bpf_stats_enabled_mutex);
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
|
||||||
|
|
||||||
|
static int bpf_enable_stats(union bpf_attr *attr)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (CHECK_ATTR(BPF_ENABLE_STATS))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
switch (attr->enable_stats.type) {
|
||||||
|
case BPF_STATS_RUN_TIME:
|
||||||
|
return bpf_enable_runtime_stats();
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
||||||
{
|
{
|
||||||
union bpf_attr attr;
|
union bpf_attr attr;
|
||||||
|
@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||||
err = bpf_obj_get_next_id(&attr, uattr,
|
err = bpf_obj_get_next_id(&attr, uattr,
|
||||||
&link_idr, &link_idr_lock);
|
&link_idr, &link_idr_lock);
|
||||||
break;
|
break;
|
||||||
|
case BPF_ENABLE_STATS:
|
||||||
|
err = bpf_enable_stats(&attr);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000;
|
||||||
|
|
||||||
#endif /* CONFIG_SYSCTL */
|
#endif /* CONFIG_SYSCTL */
|
||||||
|
|
||||||
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
|
static int bpf_stats_handler(struct ctl_table *table, int write,
|
||||||
|
void __user *buffer, size_t *lenp,
|
||||||
|
loff_t *ppos)
|
||||||
|
{
|
||||||
|
struct static_key *key = (struct static_key *)table->data;
|
||||||
|
static int saved_val;
|
||||||
|
int val, ret;
|
||||||
|
struct ctl_table tmp = {
|
||||||
|
.data = &val,
|
||||||
|
.maxlen = sizeof(val),
|
||||||
|
.mode = table->mode,
|
||||||
|
.extra1 = SYSCTL_ZERO,
|
||||||
|
.extra2 = SYSCTL_ONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (write && !capable(CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
mutex_lock(&bpf_stats_enabled_mutex);
|
||||||
|
val = saved_val;
|
||||||
|
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
|
||||||
|
if (write && !ret && val != saved_val) {
|
||||||
|
if (val)
|
||||||
|
static_key_slow_inc(key);
|
||||||
|
else
|
||||||
|
static_key_slow_dec(key);
|
||||||
|
saved_val = val;
|
||||||
|
}
|
||||||
|
mutex_unlock(&bpf_stats_enabled_mutex);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* /proc/sys support
|
* /proc/sys support
|
||||||
*/
|
*/
|
||||||
|
@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = {
|
||||||
.data = &bpf_stats_enabled_key.key,
|
.data = &bpf_stats_enabled_key.key,
|
||||||
.maxlen = sizeof(bpf_stats_enabled_key),
|
.maxlen = sizeof(bpf_stats_enabled_key),
|
||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_do_static_key,
|
.proc_handler = bpf_stats_handler,
|
||||||
},
|
},
|
||||||
#endif
|
#endif
|
||||||
#if defined(CONFIG_TREE_RCU)
|
#if defined(CONFIG_TREE_RCU)
|
||||||
|
|
|
@ -115,6 +115,7 @@ enum bpf_cmd {
|
||||||
BPF_LINK_UPDATE,
|
BPF_LINK_UPDATE,
|
||||||
BPF_LINK_GET_FD_BY_ID,
|
BPF_LINK_GET_FD_BY_ID,
|
||||||
BPF_LINK_GET_NEXT_ID,
|
BPF_LINK_GET_NEXT_ID,
|
||||||
|
BPF_ENABLE_STATS,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_map_type {
|
enum bpf_map_type {
|
||||||
|
@ -390,6 +391,12 @@ enum {
|
||||||
*/
|
*/
|
||||||
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
|
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
|
||||||
|
|
||||||
|
/* type for BPF_ENABLE_STATS */
|
||||||
|
enum bpf_stats_type {
|
||||||
|
/* enabled run_time_ns and run_cnt */
|
||||||
|
BPF_STATS_RUN_TIME = 0,
|
||||||
|
};
|
||||||
|
|
||||||
enum bpf_stack_build_id_status {
|
enum bpf_stack_build_id_status {
|
||||||
/* user space need an empty entry to identify end of a trace */
|
/* user space need an empty entry to identify end of a trace */
|
||||||
BPF_STACK_BUILD_ID_EMPTY = 0,
|
BPF_STACK_BUILD_ID_EMPTY = 0,
|
||||||
|
@ -601,6 +608,10 @@ union bpf_attr {
|
||||||
__u32 old_prog_fd;
|
__u32 old_prog_fd;
|
||||||
} link_update;
|
} link_update;
|
||||||
|
|
||||||
|
struct { /* struct used by BPF_ENABLE_STATS command */
|
||||||
|
__u32 type;
|
||||||
|
} enable_stats;
|
||||||
|
|
||||||
} __attribute__((aligned(8)));
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
/* The description below is an attempt at providing documentation to eBPF
|
/* The description below is an attempt at providing documentation to eBPF
|
||||||
|
|
|
@ -841,3 +841,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bpf_enable_stats(enum bpf_stats_type type)
|
||||||
|
{
|
||||||
|
union bpf_attr attr;
|
||||||
|
|
||||||
|
memset(&attr, 0, sizeof(attr));
|
||||||
|
attr.enable_stats.type = type;
|
||||||
|
|
||||||
|
return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
|
||||||
|
}
|
||||||
|
|
|
@ -231,6 +231,7 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
|
||||||
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
|
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
|
||||||
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
|
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
|
||||||
__u64 *probe_offset, __u64 *probe_addr);
|
__u64 *probe_offset, __u64 *probe_addr);
|
||||||
|
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
|
|
|
@ -257,6 +257,7 @@ LIBBPF_0.0.8 {
|
||||||
|
|
||||||
LIBBPF_0.0.9 {
|
LIBBPF_0.0.9 {
|
||||||
global:
|
global:
|
||||||
|
bpf_enable_stats;
|
||||||
bpf_link_get_fd_by_id;
|
bpf_link_get_fd_by_id;
|
||||||
bpf_link_get_next_id;
|
bpf_link_get_next_id;
|
||||||
} LIBBPF_0.0.8;
|
} LIBBPF_0.0.8;
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#include <test_progs.h>
|
||||||
|
#include "test_enable_stats.skel.h"
|
||||||
|
|
||||||
|
void test_enable_stats(void)
|
||||||
|
{
|
||||||
|
struct test_enable_stats *skel;
|
||||||
|
int stats_fd, err, prog_fd;
|
||||||
|
struct bpf_prog_info info;
|
||||||
|
__u32 info_len = sizeof(info);
|
||||||
|
int duration = 0;
|
||||||
|
|
||||||
|
skel = test_enable_stats__open_and_load();
|
||||||
|
if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
|
||||||
|
if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) {
|
||||||
|
test_enable_stats__destroy(skel);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = test_enable_stats__attach(skel);
|
||||||
|
if (CHECK(err, "attach_raw_tp", "err %d\n", err))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
test_enable_stats__detach(skel);
|
||||||
|
|
||||||
|
prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
|
||||||
|
memset(&info, 0, info_len);
|
||||||
|
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
|
||||||
|
if (CHECK(err, "get_prog_info",
|
||||||
|
"failed to get bpf_prog_info for fd %d\n", prog_fd))
|
||||||
|
goto cleanup;
|
||||||
|
if (CHECK(info.run_time_ns == 0, "check_stats_enabled",
|
||||||
|
"failed to enable run_time_ns stats\n"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid",
|
||||||
|
"invalid run_cnt stats\n");
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
test_enable_stats__destroy(skel);
|
||||||
|
close(stats_fd);
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
// Copyright (c) 2020 Facebook
|
||||||
|
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
||||||
|
|
||||||
|
__u64 count = 0;
|
||||||
|
|
||||||
|
SEC("raw_tracepoint/sys_enter")
|
||||||
|
int test_enable_stats(void *ctx)
|
||||||
|
{
|
||||||
|
count += 1;
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue