bpf: Add kernel module with user mode driver that populates bpffs.
Add kernel module with user mode driver that populates bpffs with BPF iterators. $ mount bpffs /my/bpffs/ -t bpf $ ls -la /my/bpffs/ total 4 drwxrwxrwt 2 root root 0 Jul 2 00:27 . drwxr-xr-x 19 root root 4096 Jul 2 00:09 .. -rw------- 1 root root 0 Jul 2 00:27 maps.debug -rw------- 1 root root 0 Jul 2 00:27 progs.debug The user mode driver will load BPF Type Formats, create BPF maps, populate BPF maps, load two BPF programs, attach them to BPF iterators, and finally send two bpf_link IDs back to the kernel. The kernel will pin two bpf_links into newly mounted bpffs instance under names "progs.debug" and "maps.debug". These two files become human readable. $ cat /my/bpffs/progs.debug id name attached 11 dump_bpf_map bpf_iter_bpf_map 12 dump_bpf_prog bpf_iter_bpf_prog 27 test_pkt_access 32 test_main test_pkt_access test_pkt_access 33 test_subprog1 test_pkt_access_subprog1 test_pkt_access 34 test_subprog2 test_pkt_access_subprog2 test_pkt_access 35 test_subprog3 test_pkt_access_subprog3 test_pkt_access 36 new_get_skb_len get_skb_len test_pkt_access 37 new_get_skb_ifindex get_skb_ifindex test_pkt_access 38 new_get_constant get_constant test_pkt_access The BPF program dump_bpf_prog() in iterators.bpf.c is printing this data about all BPF programs currently loaded in the system. This information is unstable and will change from kernel to kernel as ".debug" suffix conveys. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20200819042759.51280-4-alexei.starovoitov@gmail.com
This commit is contained in:
parent
f0fdfefb2d
commit
d71fa5c976
|
@ -1710,6 +1710,8 @@ config BPF_JIT_DEFAULT_ON
|
|||
def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
|
||||
depends on HAVE_EBPF_JIT && BPF_JIT
|
||||
|
||||
source "kernel/bpf/preload/Kconfig"
|
||||
|
||||
config USERFAULTFD
|
||||
bool "Enable userfaultfd() system call"
|
||||
depends on MMU
|
||||
|
|
|
@ -12,7 +12,7 @@ obj-y = fork.o exec_domain.o panic.o \
|
|||
notifier.o ksysfs.o cred.o reboot.o \
|
||||
async.o range.o smpboot.o ucount.o regset.o
|
||||
|
||||
obj-$(CONFIG_BPFILTER) += usermode_driver.o
|
||||
obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o
|
||||
obj-$(CONFIG_MODULES) += kmod.o
|
||||
obj-$(CONFIG_MULTIUSER) += groups.o
|
||||
|
||||
|
|
|
@ -29,3 +29,4 @@ ifeq ($(CONFIG_BPF_JIT),y)
|
|||
obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o
|
||||
obj-${CONFIG_BPF_LSM} += bpf_lsm.o
|
||||
endif
|
||||
obj-$(CONFIG_BPF_PRELOAD) += preload/
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <linux/filter.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_trace.h>
|
||||
#include "preload/bpf_preload.h"
|
||||
|
||||
enum bpf_type {
|
||||
BPF_TYPE_UNSPEC = 0,
|
||||
|
@ -369,9 +370,10 @@ static struct dentry *
|
|||
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
|
||||
{
|
||||
/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
|
||||
* extensions.
|
||||
* extensions. That allows popoulate_bpffs() create special files.
|
||||
*/
|
||||
if (strchr(dentry->d_name.name, '.'))
|
||||
if ((dir->i_mode & S_IALLUGO) &&
|
||||
strchr(dentry->d_name.name, '.'))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
return simple_lookup(dir, dentry, flags);
|
||||
|
@ -409,6 +411,27 @@ static const struct inode_operations bpf_dir_iops = {
|
|||
.unlink = simple_unlink,
|
||||
};
|
||||
|
||||
/* pin iterator link into bpffs */
|
||||
static int bpf_iter_link_pin_kernel(struct dentry *parent,
|
||||
const char *name, struct bpf_link *link)
|
||||
{
|
||||
umode_t mode = S_IFREG | S_IRUSR;
|
||||
struct dentry *dentry;
|
||||
int ret;
|
||||
|
||||
inode_lock(parent->d_inode);
|
||||
dentry = lookup_one_len(name, parent, strlen(name));
|
||||
if (IS_ERR(dentry)) {
|
||||
inode_unlock(parent->d_inode);
|
||||
return PTR_ERR(dentry);
|
||||
}
|
||||
ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops,
|
||||
&bpf_iter_fops);
|
||||
dput(dentry);
|
||||
inode_unlock(parent->d_inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bpf_obj_do_pin(const char __user *pathname, void *raw,
|
||||
enum bpf_type type)
|
||||
{
|
||||
|
@ -638,6 +661,91 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct bpf_preload_ops *bpf_preload_ops;
|
||||
EXPORT_SYMBOL_GPL(bpf_preload_ops);
|
||||
|
||||
static bool bpf_preload_mod_get(void)
|
||||
{
|
||||
/* If bpf_preload.ko wasn't loaded earlier then load it now.
|
||||
* When bpf_preload is built into vmlinux the module's __init
|
||||
* function will populate it.
|
||||
*/
|
||||
if (!bpf_preload_ops) {
|
||||
request_module("bpf_preload");
|
||||
if (!bpf_preload_ops)
|
||||
return false;
|
||||
}
|
||||
/* And grab the reference, so the module doesn't disappear while the
|
||||
* kernel is interacting with the kernel module and its UMD.
|
||||
*/
|
||||
if (!try_module_get(bpf_preload_ops->owner)) {
|
||||
pr_err("bpf_preload module get failed.\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void bpf_preload_mod_put(void)
|
||||
{
|
||||
if (bpf_preload_ops)
|
||||
/* now user can "rmmod bpf_preload" if necessary */
|
||||
module_put(bpf_preload_ops->owner);
|
||||
}
|
||||
|
||||
static DEFINE_MUTEX(bpf_preload_lock);
|
||||
|
||||
static int populate_bpffs(struct dentry *parent)
|
||||
{
|
||||
struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {};
|
||||
struct bpf_link *links[BPF_PRELOAD_LINKS] = {};
|
||||
int err = 0, i;
|
||||
|
||||
/* grab the mutex to make sure the kernel interactions with bpf_preload
|
||||
* UMD are serialized
|
||||
*/
|
||||
mutex_lock(&bpf_preload_lock);
|
||||
|
||||
/* if bpf_preload.ko wasn't built into vmlinux then load it */
|
||||
if (!bpf_preload_mod_get())
|
||||
goto out;
|
||||
|
||||
if (!bpf_preload_ops->info.tgid) {
|
||||
/* preload() will start UMD that will load BPF iterator programs */
|
||||
err = bpf_preload_ops->preload(objs);
|
||||
if (err)
|
||||
goto out_put;
|
||||
for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
|
||||
links[i] = bpf_link_by_id(objs[i].link_id);
|
||||
if (IS_ERR(links[i])) {
|
||||
err = PTR_ERR(links[i]);
|
||||
goto out_put;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
|
||||
err = bpf_iter_link_pin_kernel(parent,
|
||||
objs[i].link_name, links[i]);
|
||||
if (err)
|
||||
goto out_put;
|
||||
/* do not unlink successfully pinned links even
|
||||
* if later link fails to pin
|
||||
*/
|
||||
links[i] = NULL;
|
||||
}
|
||||
/* finish() will tell UMD process to exit */
|
||||
err = bpf_preload_ops->finish();
|
||||
if (err)
|
||||
goto out_put;
|
||||
}
|
||||
out_put:
|
||||
bpf_preload_mod_put();
|
||||
out:
|
||||
mutex_unlock(&bpf_preload_lock);
|
||||
for (i = 0; i < BPF_PRELOAD_LINKS && err; i++)
|
||||
if (!IS_ERR_OR_NULL(links[i]))
|
||||
bpf_link_put(links[i]);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
|
||||
{
|
||||
static const struct tree_descr bpf_rfiles[] = { { "" } };
|
||||
|
@ -654,8 +762,8 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
|
|||
inode = sb->s_root->d_inode;
|
||||
inode->i_op = &bpf_dir_iops;
|
||||
inode->i_mode &= ~S_IALLUGO;
|
||||
populate_bpffs(sb->s_root);
|
||||
inode->i_mode |= S_ISVTX | opts->mode;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -705,6 +813,8 @@ static int __init bpf_init(void)
|
|||
{
|
||||
int ret;
|
||||
|
||||
mutex_init(&bpf_preload_lock);
|
||||
|
||||
ret = sysfs_create_mount_point(fs_kobj, "bpf");
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config USERMODE_DRIVER
|
||||
bool
|
||||
default n
|
||||
|
||||
menuconfig BPF_PRELOAD
|
||||
bool "Preload BPF file system with kernel specific program and map iterators"
|
||||
depends on BPF
|
||||
select USERMODE_DRIVER
|
||||
help
|
||||
This builds kernel module with several embedded BPF programs that are
|
||||
pinned into BPF FS mount point as human readable files that are
|
||||
useful in debugging and introspection of BPF programs and maps.
|
||||
|
||||
if BPF_PRELOAD
|
||||
config BPF_PRELOAD_UMD
|
||||
tristate "bpf_preload kernel module with user mode driver"
|
||||
depends on CC_CAN_LINK
|
||||
depends on m || CC_CAN_LINK_STATIC
|
||||
default m
|
||||
help
|
||||
This builds bpf_preload kernel module with embedded user mode driver.
|
||||
endif
|
|
@ -0,0 +1,23 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
LIBBPF_SRCS = $(srctree)/tools/lib/bpf/
|
||||
LIBBPF_A = $(obj)/libbpf.a
|
||||
LIBBPF_OUT = $(abspath $(obj))
|
||||
|
||||
$(LIBBPF_A):
|
||||
$(Q)$(MAKE) -C $(LIBBPF_SRCS) OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a
|
||||
|
||||
userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
|
||||
-I $(srctree)/tools/lib/ -Wno-unused-result
|
||||
|
||||
userprogs := bpf_preload_umd
|
||||
|
||||
bpf_preload_umd-objs := iterators/iterators.o
|
||||
bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
|
||||
|
||||
$(obj)/bpf_preload_umd: $(LIBBPF_A)
|
||||
|
||||
$(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd
|
||||
|
||||
obj-$(CONFIG_BPF_PRELOAD_UMD) += bpf_preload.o
|
||||
bpf_preload-objs += bpf_preload_kern.o bpf_preload_umd_blob.o
|
|
@ -0,0 +1,16 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BPF_PRELOAD_H
|
||||
#define _BPF_PRELOAD_H
|
||||
|
||||
#include <linux/usermode_driver.h>
|
||||
#include "iterators/bpf_preload_common.h"
|
||||
|
||||
struct bpf_preload_ops {
|
||||
struct umd_info info;
|
||||
int (*preload)(struct bpf_preload_info *);
|
||||
int (*finish)(void);
|
||||
struct module *owner;
|
||||
};
|
||||
extern struct bpf_preload_ops *bpf_preload_ops;
|
||||
#define BPF_PRELOAD_LINKS 2
|
||||
#endif
|
|
@ -0,0 +1,91 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include "bpf_preload.h"
|
||||
|
||||
extern char bpf_preload_umd_start;
|
||||
extern char bpf_preload_umd_end;
|
||||
|
||||
static int preload(struct bpf_preload_info *obj);
|
||||
static int finish(void);
|
||||
|
||||
static struct bpf_preload_ops umd_ops = {
|
||||
.info.driver_name = "bpf_preload",
|
||||
.preload = preload,
|
||||
.finish = finish,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int preload(struct bpf_preload_info *obj)
|
||||
{
|
||||
int magic = BPF_PRELOAD_START;
|
||||
loff_t pos = 0;
|
||||
int i, err;
|
||||
ssize_t n;
|
||||
|
||||
err = fork_usermode_driver(&umd_ops.info);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* send the start magic to let UMD proceed with loading BPF progs */
|
||||
n = kernel_write(umd_ops.info.pipe_to_umh,
|
||||
&magic, sizeof(magic), &pos);
|
||||
if (n != sizeof(magic))
|
||||
return -EPIPE;
|
||||
|
||||
/* receive bpf_link IDs and names from UMD */
|
||||
pos = 0;
|
||||
for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
|
||||
n = kernel_read(umd_ops.info.pipe_from_umh,
|
||||
&obj[i], sizeof(*obj), &pos);
|
||||
if (n != sizeof(*obj))
|
||||
return -EPIPE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int finish(void)
|
||||
{
|
||||
int magic = BPF_PRELOAD_END;
|
||||
struct pid *tgid;
|
||||
loff_t pos = 0;
|
||||
ssize_t n;
|
||||
|
||||
/* send the last magic to UMD. It will do a normal exit. */
|
||||
n = kernel_write(umd_ops.info.pipe_to_umh,
|
||||
&magic, sizeof(magic), &pos);
|
||||
if (n != sizeof(magic))
|
||||
return -EPIPE;
|
||||
tgid = umd_ops.info.tgid;
|
||||
wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
|
||||
umd_ops.info.tgid = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init load_umd(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = umd_load_blob(&umd_ops.info, &bpf_preload_umd_start,
|
||||
&bpf_preload_umd_end - &bpf_preload_umd_start);
|
||||
if (err)
|
||||
return err;
|
||||
bpf_preload_ops = &umd_ops;
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit fini_umd(void)
|
||||
{
|
||||
bpf_preload_ops = NULL;
|
||||
/* kill UMD in case it's still there due to earlier error */
|
||||
kill_pid(umd_ops.info.tgid, SIGKILL, 1);
|
||||
umd_ops.info.tgid = NULL;
|
||||
umd_unload_blob(&umd_ops.info);
|
||||
}
|
||||
late_initcall(load_umd);
|
||||
module_exit(fini_umd);
|
||||
MODULE_LICENSE("GPL");
|
|
@ -0,0 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
.section .init.rodata, "a"
|
||||
.global bpf_preload_umd_start
|
||||
bpf_preload_umd_start:
|
||||
.incbin "kernel/bpf/preload/bpf_preload_umd"
|
||||
.global bpf_preload_umd_end
|
||||
bpf_preload_umd_end:
|
|
@ -0,0 +1,13 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BPF_PRELOAD_COMMON_H
|
||||
#define _BPF_PRELOAD_COMMON_H
|
||||
|
||||
#define BPF_PRELOAD_START 0x5555
|
||||
#define BPF_PRELOAD_END 0xAAAA
|
||||
|
||||
struct bpf_preload_info {
|
||||
char link_name[16];
|
||||
int link_id;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,94 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include <argp.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/resource.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <sys/mount.h>
|
||||
#include "iterators.skel.h"
|
||||
#include "bpf_preload_common.h"
|
||||
|
||||
int to_kernel = -1;
|
||||
int from_kernel = 0;
|
||||
|
||||
static int send_link_to_kernel(struct bpf_link *link, const char *link_name)
|
||||
{
|
||||
struct bpf_preload_info obj = {};
|
||||
struct bpf_link_info info = {};
|
||||
__u32 info_len = sizeof(info);
|
||||
int err;
|
||||
|
||||
err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
|
||||
if (err)
|
||||
return err;
|
||||
obj.link_id = info.id;
|
||||
if (strlen(link_name) >= sizeof(obj.link_name))
|
||||
return -E2BIG;
|
||||
strcpy(obj.link_name, link_name);
|
||||
if (write(to_kernel, &obj, sizeof(obj)) != sizeof(obj))
|
||||
return -EPIPE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct rlimit rlim = { RLIM_INFINITY, RLIM_INFINITY };
|
||||
struct iterators_bpf *skel;
|
||||
int err, magic;
|
||||
int debug_fd;
|
||||
|
||||
debug_fd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC);
|
||||
if (debug_fd < 0)
|
||||
return 1;
|
||||
to_kernel = dup(1);
|
||||
close(1);
|
||||
dup(debug_fd);
|
||||
/* now stdin and stderr point to /dev/console */
|
||||
|
||||
read(from_kernel, &magic, sizeof(magic));
|
||||
if (magic != BPF_PRELOAD_START) {
|
||||
printf("bad start magic %d\n", magic);
|
||||
return 1;
|
||||
}
|
||||
setrlimit(RLIMIT_MEMLOCK, &rlim);
|
||||
/* libbpf opens BPF object and loads it into the kernel */
|
||||
skel = iterators_bpf__open_and_load();
|
||||
if (!skel) {
|
||||
/* iterators.skel.h is little endian.
|
||||
* libbpf doesn't support automatic little->big conversion
|
||||
* of BPF bytecode yet.
|
||||
* The program load will fail in such case.
|
||||
*/
|
||||
printf("Failed load could be due to wrong endianness\n");
|
||||
return 1;
|
||||
}
|
||||
err = iterators_bpf__attach(skel);
|
||||
if (err)
|
||||
goto cleanup;
|
||||
|
||||
/* send two bpf_link IDs with names to the kernel */
|
||||
err = send_link_to_kernel(skel->links.dump_bpf_map, "maps.debug");
|
||||
if (err)
|
||||
goto cleanup;
|
||||
err = send_link_to_kernel(skel->links.dump_bpf_prog, "progs.debug");
|
||||
if (err)
|
||||
goto cleanup;
|
||||
|
||||
/* The kernel will proceed with pinnging the links in bpffs.
|
||||
* UMD will wait on read from pipe.
|
||||
*/
|
||||
read(from_kernel, &magic, sizeof(magic));
|
||||
if (magic != BPF_PRELOAD_END) {
|
||||
printf("bad final magic %d\n", magic);
|
||||
err = -EINVAL;
|
||||
}
|
||||
cleanup:
|
||||
iterators_bpf__destroy(skel);
|
||||
|
||||
return err != 0;
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
menuconfig BPFILTER
|
||||
bool "BPF based packet filtering framework (BPFILTER)"
|
||||
depends on NET && BPF && INET
|
||||
select USERMODE_DRIVER
|
||||
help
|
||||
This builds experimental bpfilter framework that is aiming to
|
||||
provide netfilter compatible functionality via BPF
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
# Most of this file is copied from tools/lib/traceevent/Makefile
|
||||
|
||||
RM ?= rm
|
||||
srctree = $(abs_srctree)
|
||||
|
||||
LIBBPF_VERSION := $(shell \
|
||||
grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
|
||||
sort -rV | head -n1 | cut -d'_' -f2)
|
||||
|
@ -188,7 +191,7 @@ $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
|
|||
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
|
||||
|
||||
$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
|
||||
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
|
||||
$(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^
|
||||
|
||||
$(OUTPUT)libbpf.pc:
|
||||
$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
|
||||
|
@ -291,7 +294,7 @@ cscope:
|
|||
cscope -b -q -I $(srctree)/include -f cscope.out
|
||||
|
||||
tags:
|
||||
rm -f TAGS tags
|
||||
$(RM) -f TAGS tags
|
||||
ls *.c *.h | xargs $(TAGS_PROG) -a
|
||||
|
||||
# Declare the contents of the .PHONY variable as phony. We keep that
|
||||
|
|
Loading…
Reference in New Issue