Merge branch 'trampoline-fixes'
Jiri Olsa says: ==================== hi, sending 2 fixes to fix kernel support for loading trampoline programs in bcc/bpftrace and allow to unwind through trampoline/dispatcher. Original rfc post [1]. Speedup output of perf bench while running klockstat.py on kprobes vs trampolines: Without: $ perf bench sched messaging -l 50000 ... Total time: 18.571 [sec] With current kprobe tracing: $ perf bench sched messaging -l 50000 ... Total time: 183.395 [sec] With kfunc tracing: $ perf bench sched messaging -l 50000 ... Total time: 39.773 [sec] v4 changes: - rebased on latest bpf-next/master - removed image tree mutex and use trampoline_mutex instead - checking directly for string pointer in patch 1 [Alexei] - skipped helpers patches, as they are no longer needed [Alexei] v3 changes: - added ack from John Fastabend for patch 1 - move out is_bpf_image_address from is_bpf_text_address call [David] v2 changes: - make the unwind work for dispatcher as well - added test for allowed trampolines count - used raw tp pt_regs nest-arrays for trampoline helpers thanks, jirka [1] https://lore.kernel.org/netdev/20191229143740.29143-1-jolsa@kernel.org/ ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
commit
e9f02a8027
|
@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
|
|||
int bpf_trampoline_link_prog(struct bpf_prog *prog);
|
||||
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
|
||||
void bpf_trampoline_put(struct bpf_trampoline *tr);
|
||||
void *bpf_jit_alloc_exec_page(void);
|
||||
#define BPF_DISPATCHER_INIT(name) { \
|
||||
.mutex = __MUTEX_INITIALIZER(name.mutex), \
|
||||
.func = &name##func, \
|
||||
|
@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void);
|
|||
#define BPF_DISPATCHER_PTR(name) (&name)
|
||||
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
|
||||
struct bpf_prog *to);
|
||||
struct bpf_image {
|
||||
struct latch_tree_node tnode;
|
||||
unsigned char data[];
|
||||
};
|
||||
#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
|
||||
bool is_bpf_image_address(unsigned long address);
|
||||
void *bpf_image_alloc(void);
|
||||
#else
|
||||
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
|
||||
{
|
||||
|
@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
|
|||
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
|
||||
struct bpf_prog *from,
|
||||
struct bpf_prog *to) {}
|
||||
static inline bool is_bpf_image_address(unsigned long address)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct bpf_func_info_aux {
|
||||
|
|
|
@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
|
|||
}
|
||||
}
|
||||
|
||||
static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
|
||||
{
|
||||
/* t comes in already as a pointer */
|
||||
t = btf_type_by_id(btf, t->type);
|
||||
|
||||
/* allow const */
|
||||
if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
|
||||
t = btf_type_by_id(btf, t->type);
|
||||
|
||||
/* char, signed char, unsigned char */
|
||||
return btf_type_is_int(t) && t->size == 1;
|
||||
}
|
||||
|
||||
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
|
@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
|||
*/
|
||||
return true;
|
||||
|
||||
if (is_string_ptr(btf, t))
|
||||
return true;
|
||||
|
||||
/* this is a pointer to another type */
|
||||
info->reg_type = PTR_TO_BTF_ID;
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
|
|||
noff = 0;
|
||||
} else {
|
||||
old = d->image + d->image_off;
|
||||
noff = d->image_off ^ (PAGE_SIZE / 2);
|
||||
noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
|
||||
}
|
||||
|
||||
new = d->num_progs ? d->image + noff : NULL;
|
||||
|
@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
|
|||
|
||||
mutex_lock(&d->mutex);
|
||||
if (!d->image) {
|
||||
d->image = bpf_jit_alloc_exec_page();
|
||||
d->image = bpf_image_alloc();
|
||||
if (!d->image)
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/rbtree_latch.h>
|
||||
|
||||
/* dummy _ops. The verifier will operate on target program's ops. */
|
||||
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
|
||||
|
@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
|
|||
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
|
||||
|
||||
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
|
||||
static struct latch_tree_root image_tree __cacheline_aligned;
|
||||
|
||||
/* serializes access to trampoline_table */
|
||||
/* serializes access to trampoline_table and image_tree */
|
||||
static DEFINE_MUTEX(trampoline_mutex);
|
||||
|
||||
void *bpf_jit_alloc_exec_page(void)
|
||||
static void *bpf_jit_alloc_exec_page(void)
|
||||
{
|
||||
void *image;
|
||||
|
||||
|
@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void)
|
|||
return image;
|
||||
}
|
||||
|
||||
static __always_inline bool image_tree_less(struct latch_tree_node *a,
|
||||
struct latch_tree_node *b)
|
||||
{
|
||||
struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
|
||||
struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
|
||||
|
||||
return ia < ib;
|
||||
}
|
||||
|
||||
static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
|
||||
{
|
||||
void *image = container_of(n, struct bpf_image, tnode);
|
||||
|
||||
if (addr < image)
|
||||
return -1;
|
||||
if (addr >= image + PAGE_SIZE)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct latch_tree_ops image_tree_ops = {
|
||||
.less = image_tree_less,
|
||||
.comp = image_tree_comp,
|
||||
};
|
||||
|
||||
static void *__bpf_image_alloc(bool lock)
|
||||
{
|
||||
struct bpf_image *image;
|
||||
|
||||
image = bpf_jit_alloc_exec_page();
|
||||
if (!image)
|
||||
return NULL;
|
||||
|
||||
if (lock)
|
||||
mutex_lock(&trampoline_mutex);
|
||||
latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
|
||||
if (lock)
|
||||
mutex_unlock(&trampoline_mutex);
|
||||
return image->data;
|
||||
}
|
||||
|
||||
void *bpf_image_alloc(void)
|
||||
{
|
||||
return __bpf_image_alloc(true);
|
||||
}
|
||||
|
||||
bool is_bpf_image_address(unsigned long addr)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
|
||||
{
|
||||
struct bpf_trampoline *tr;
|
||||
|
@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
|
|||
goto out;
|
||||
|
||||
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
|
||||
image = bpf_jit_alloc_exec_page();
|
||||
image = __bpf_image_alloc(false);
|
||||
if (!image) {
|
||||
kfree(tr);
|
||||
tr = NULL;
|
||||
|
@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
|
|||
}
|
||||
|
||||
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
|
||||
* bytes on x86. Pick a number to fit into PAGE_SIZE / 2
|
||||
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
|
||||
*/
|
||||
#define BPF_MAX_TRAMP_PROGS 40
|
||||
|
||||
static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
||||
{
|
||||
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
|
||||
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
|
||||
void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
|
||||
void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
|
||||
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
|
||||
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
|
||||
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
|
||||
|
@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
|||
*/
|
||||
synchronize_rcu_tasks();
|
||||
|
||||
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
|
||||
err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
|
||||
&tr->func.model, flags,
|
||||
fentry, fentry_cnt,
|
||||
fexit, fexit_cnt,
|
||||
|
@ -284,6 +344,8 @@ out:
|
|||
|
||||
void bpf_trampoline_put(struct bpf_trampoline *tr)
|
||||
{
|
||||
struct bpf_image *image;
|
||||
|
||||
if (!tr)
|
||||
return;
|
||||
mutex_lock(&trampoline_mutex);
|
||||
|
@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
|
|||
goto out;
|
||||
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
|
||||
goto out;
|
||||
image = container_of(tr->image, struct bpf_image, data);
|
||||
latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
|
||||
/* wait for tasks to get out of trampoline before freeing it */
|
||||
synchronize_rcu_tasks();
|
||||
bpf_jit_free_exec(tr->image);
|
||||
bpf_jit_free_exec(image);
|
||||
hlist_del(&tr->hlist);
|
||||
kfree(tr);
|
||||
out:
|
||||
|
|
|
@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr)
|
|||
* triggers a stack trace, or a WARN() that happens during
|
||||
* coming back from idle, or cpu on or offlining.
|
||||
*
|
||||
* is_module_text_address() as well as the kprobe slots
|
||||
* and is_bpf_text_address() require RCU to be watching.
|
||||
* is_module_text_address() as well as the kprobe slots,
|
||||
* is_bpf_text_address() and is_bpf_image_address require
|
||||
* RCU to be watching.
|
||||
*/
|
||||
no_rcu = !rcu_is_watching();
|
||||
|
||||
|
@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr)
|
|||
goto out;
|
||||
if (is_bpf_text_address(addr))
|
||||
goto out;
|
||||
if (is_bpf_image_address(addr))
|
||||
goto out;
|
||||
ret = 0;
|
||||
out:
|
||||
if (no_rcu)
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <test_progs.h>
|
||||
|
||||
#define MAX_TRAMP_PROGS 40
|
||||
|
||||
struct inst {
|
||||
struct bpf_object *obj;
|
||||
struct bpf_link *link_fentry;
|
||||
struct bpf_link *link_fexit;
|
||||
};
|
||||
|
||||
static int test_task_rename(void)
|
||||
{
|
||||
int fd, duration = 0, err;
|
||||
char buf[] = "test_overhead";
|
||||
|
||||
fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
|
||||
if (CHECK(fd < 0, "open /proc", "err %d", errno))
|
||||
return -1;
|
||||
err = write(fd, buf, sizeof(buf));
|
||||
if (err < 0) {
|
||||
CHECK(err < 0, "task rename", "err %d", errno);
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct bpf_link *load(struct bpf_object *obj, const char *name)
|
||||
{
|
||||
struct bpf_program *prog;
|
||||
int duration = 0;
|
||||
|
||||
prog = bpf_object__find_program_by_title(obj, name);
|
||||
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
|
||||
return ERR_PTR(-EINVAL);
|
||||
return bpf_program__attach_trace(prog);
|
||||
}
|
||||
|
||||
void test_trampoline_count(void)
|
||||
{
|
||||
const char *fentry_name = "fentry/__set_task_comm";
|
||||
const char *fexit_name = "fexit/__set_task_comm";
|
||||
const char *object = "test_trampoline_count.o";
|
||||
struct inst inst[MAX_TRAMP_PROGS] = { 0 };
|
||||
int err, i = 0, duration = 0;
|
||||
struct bpf_object *obj;
|
||||
struct bpf_link *link;
|
||||
char comm[16] = {};
|
||||
|
||||
/* attach 'allowed' 40 trampoline programs */
|
||||
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
|
||||
obj = bpf_object__open_file(object, NULL);
|
||||
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
|
||||
goto cleanup;
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (CHECK(err, "obj_load", "err %d\n", err))
|
||||
goto cleanup;
|
||||
inst[i].obj = obj;
|
||||
|
||||
if (rand() % 2) {
|
||||
link = load(obj, fentry_name);
|
||||
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
|
||||
goto cleanup;
|
||||
inst[i].link_fentry = link;
|
||||
} else {
|
||||
link = load(obj, fexit_name);
|
||||
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
|
||||
goto cleanup;
|
||||
inst[i].link_fexit = link;
|
||||
}
|
||||
}
|
||||
|
||||
/* and try 1 extra.. */
|
||||
obj = bpf_object__open_file(object, NULL);
|
||||
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
|
||||
goto cleanup;
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (CHECK(err, "obj_load", "err %d\n", err))
|
||||
goto cleanup_extra;
|
||||
|
||||
/* ..that needs to fail */
|
||||
link = load(obj, fentry_name);
|
||||
if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
|
||||
bpf_link__destroy(link);
|
||||
goto cleanup_extra;
|
||||
}
|
||||
|
||||
/* with E2BIG error */
|
||||
CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
|
||||
|
||||
/* and finaly execute the probe */
|
||||
if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
|
||||
goto cleanup_extra;
|
||||
CHECK_FAIL(test_task_rename());
|
||||
CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
|
||||
|
||||
cleanup_extra:
|
||||
bpf_object__close(obj);
|
||||
cleanup:
|
||||
while (--i) {
|
||||
bpf_link__destroy(inst[i].link_fentry);
|
||||
bpf_link__destroy(inst[i].link_fexit);
|
||||
bpf_object__close(inst[i].obj);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <linux/bpf.h>
|
||||
#include "bpf_trace_helpers.h"
|
||||
|
||||
struct task_struct;
|
||||
|
||||
SEC("fentry/__set_task_comm")
|
||||
int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fexit/__set_task_comm")
|
||||
int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
Loading…
Reference in New Issue