Merge branch 'trampoline-fixes'

Jiri Olsa says:

====================
hi,
sending 2 fixes to fix kernel support for loading
trampoline programs in bcc/bpftrace and allow to
unwind through trampoline/dispatcher.

Original rfc post [1].

Speedup output of perf bench while running klockstat.py
on kprobes vs trampolines:

    Without:
            $ perf bench sched messaging -l 50000
            ...
                 Total time: 18.571 [sec]

    With current kprobe tracing:
            $ perf bench sched messaging -l 50000
            ...
                 Total time: 183.395 [sec]

    With kfunc tracing:
            $ perf bench sched messaging -l 50000
            ...
                 Total time: 39.773 [sec]

v4 changes:
  - rebased on latest bpf-next/master
  - removed image tree mutex and use trampoline_mutex instead
  - checking directly for string pointer in patch 1 [Alexei]
  - skipped helpers patches, as they are no longer needed [Alexei]

v3 changes:
  - added ack from John Fastabend for patch 1
  - move out is_bpf_image_address from is_bpf_text_address call [David]

v2 changes:
  - make the unwind work for dispatcher as well
  - added test for allowed trampolines count
  - used raw tp pt_regs nest-arrays for trampoline helpers

thanks,
jirka

[1] https://lore.kernel.org/netdev/20191229143740.29143-1-jolsa@kernel.org/
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2020-01-25 07:12:41 -08:00
commit e9f02a8027
7 changed files with 239 additions and 13 deletions

View File

@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr);
void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_INIT(name) { \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
.func = &name##func, \
@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_PTR(name) (&name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
struct bpf_image {
struct latch_tree_node tnode;
unsigned char data[];
};
#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
bool is_bpf_image_address(unsigned long address);
void *bpf_image_alloc(void);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
struct bpf_prog *from,
struct bpf_prog *to) {}
static inline bool is_bpf_image_address(unsigned long address)
{
return false;
}
#endif
struct bpf_func_info_aux {

View File

@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
}
}
static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
{
/* t comes in already as a pointer */
t = btf_type_by_id(btf, t->type);
/* allow const */
if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
t = btf_type_by_id(btf, t->type);
/* char, signed char, unsigned char */
return btf_type_is_int(t) && t->size == 1;
}
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
*/
return true;
if (is_string_ptr(btf, t))
return true;
/* this is a pointer to another type */
info->reg_type = PTR_TO_BTF_ID;

View File

@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
noff = 0;
} else {
old = d->image + d->image_off;
noff = d->image_off ^ (PAGE_SIZE / 2);
noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
}
new = d->num_progs ? d->image + noff : NULL;
@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
mutex_lock(&d->mutex);
if (!d->image) {
d->image = bpf_jit_alloc_exec_page();
d->image = bpf_image_alloc();
if (!d->image)
goto out;
}

View File

@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/rbtree_latch.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
static struct latch_tree_root image_tree __cacheline_aligned;
/* serializes access to trampoline_table */
/* serializes access to trampoline_table and image_tree */
static DEFINE_MUTEX(trampoline_mutex);
void *bpf_jit_alloc_exec_page(void)
static void *bpf_jit_alloc_exec_page(void)
{
void *image;
@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void)
return image;
}
static __always_inline bool image_tree_less(struct latch_tree_node *a,
struct latch_tree_node *b)
{
struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
return ia < ib;
}
static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
{
void *image = container_of(n, struct bpf_image, tnode);
if (addr < image)
return -1;
if (addr >= image + PAGE_SIZE)
return 1;
return 0;
}
static const struct latch_tree_ops image_tree_ops = {
.less = image_tree_less,
.comp = image_tree_comp,
};
static void *__bpf_image_alloc(bool lock)
{
struct bpf_image *image;
image = bpf_jit_alloc_exec_page();
if (!image)
return NULL;
if (lock)
mutex_lock(&trampoline_mutex);
latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
if (lock)
mutex_unlock(&trampoline_mutex);
return image->data;
}
void *bpf_image_alloc(void)
{
return __bpf_image_alloc(true);
}
bool is_bpf_image_address(unsigned long addr)
{
bool ret;
rcu_read_lock();
ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
rcu_read_unlock();
return ret;
}
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
struct bpf_trampoline *tr;
@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
goto out;
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
image = bpf_jit_alloc_exec_page();
image = __bpf_image_alloc(false);
if (!image) {
kfree(tr);
tr = NULL;
@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
}
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into PAGE_SIZE / 2
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
#define BPF_MAX_TRAMP_PROGS 40
static int bpf_trampoline_update(struct bpf_trampoline *tr)
{
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
*/
synchronize_rcu_tasks();
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
&tr->func.model, flags,
fentry, fentry_cnt,
fexit, fexit_cnt,
@ -284,6 +344,8 @@ out:
void bpf_trampoline_put(struct bpf_trampoline *tr)
{
struct bpf_image *image;
if (!tr)
return;
mutex_lock(&trampoline_mutex);
@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
goto out;
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
goto out;
image = container_of(tr->image, struct bpf_image, data);
latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
/* wait for tasks to get out of trampoline before freeing it */
synchronize_rcu_tasks();
bpf_jit_free_exec(tr->image);
bpf_jit_free_exec(image);
hlist_del(&tr->hlist);
kfree(tr);
out:

View File

@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr)
* triggers a stack trace, or a WARN() that happens during
* coming back from idle, or cpu on or offlining.
*
* is_module_text_address() as well as the kprobe slots
* and is_bpf_text_address() require RCU to be watching.
* is_module_text_address() as well as the kprobe slots,
* is_bpf_text_address() and is_bpf_image_address require
* RCU to be watching.
*/
no_rcu = !rcu_is_watching();
@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr)
goto out;
if (is_bpf_text_address(addr))
goto out;
if (is_bpf_image_address(addr))
goto out;
ret = 0;
out:
if (no_rcu)

View File

@ -0,0 +1,112 @@
// SPDX-License-Identifier: GPL-2.0-only
#define _GNU_SOURCE
#include <sched.h>
#include <sys/prctl.h>
#include <test_progs.h>
#define MAX_TRAMP_PROGS 40
struct inst {
struct bpf_object *obj;
struct bpf_link *link_fentry;
struct bpf_link *link_fexit;
};
static int test_task_rename(void)
{
int fd, duration = 0, err;
char buf[] = "test_overhead";
fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
if (CHECK(fd < 0, "open /proc", "err %d", errno))
return -1;
err = write(fd, buf, sizeof(buf));
if (err < 0) {
CHECK(err < 0, "task rename", "err %d", errno);
close(fd);
return -1;
}
close(fd);
return 0;
}
static struct bpf_link *load(struct bpf_object *obj, const char *name)
{
struct bpf_program *prog;
int duration = 0;
prog = bpf_object__find_program_by_title(obj, name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
return ERR_PTR(-EINVAL);
return bpf_program__attach_trace(prog);
}
void test_trampoline_count(void)
{
const char *fentry_name = "fentry/__set_task_comm";
const char *fexit_name = "fexit/__set_task_comm";
const char *object = "test_trampoline_count.o";
struct inst inst[MAX_TRAMP_PROGS] = { 0 };
int err, i = 0, duration = 0;
struct bpf_object *obj;
struct bpf_link *link;
char comm[16] = {};
/* attach 'allowed' 40 trampoline programs */
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
obj = bpf_object__open_file(object, NULL);
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
goto cleanup;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
inst[i].obj = obj;
if (rand() % 2) {
link = load(obj, fentry_name);
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
goto cleanup;
inst[i].link_fentry = link;
} else {
link = load(obj, fexit_name);
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
goto cleanup;
inst[i].link_fexit = link;
}
}
/* and try 1 extra.. */
obj = bpf_object__open_file(object, NULL);
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
goto cleanup;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup_extra;
/* ..that needs to fail */
link = load(obj, fentry_name);
if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
bpf_link__destroy(link);
goto cleanup_extra;
}
/* with E2BIG error */
CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
/* and finaly execute the probe */
if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
goto cleanup_extra;
CHECK_FAIL(test_task_rename());
CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
cleanup_extra:
bpf_object__close(obj);
cleanup:
while (--i) {
bpf_link__destroy(inst[i].link_fentry);
bpf_link__destroy(inst[i].link_fexit);
bpf_object__close(inst[i].obj);
}
}

View File

@ -0,0 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdbool.h>
#include <stddef.h>
#include <linux/bpf.h>
#include "bpf_trace_helpers.h"
struct task_struct;
SEC("fentry/__set_task_comm")
int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
{
return 0;
}
SEC("fexit/__set_task_comm")
int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
{
return 0;
}
char _license[] SEC("license") = "GPL";