2018-10-06 07:40:00 +08:00
|
|
|
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
2018-01-31 04:55:03 +08:00
|
|
|
|
2015-07-01 10:14:03 +08:00
|
|
|
/*
|
|
|
|
* common eBPF ELF operations.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
|
|
|
|
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
|
|
|
|
* Copyright (C) 2015 Huawei Inc.
|
2016-07-04 19:02:42 +08:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation;
|
|
|
|
* version 2.1 of the License (not later!)
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this program; if not, see <http://www.gnu.org/licenses>
|
2015-07-01 10:14:03 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
2019-02-14 02:25:53 +08:00
|
|
|
#include <string.h>
|
2015-07-01 10:14:03 +08:00
|
|
|
#include <memory.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <asm/unistd.h>
|
2019-06-18 03:26:50 +08:00
|
|
|
#include <errno.h>
|
2015-07-01 10:14:03 +08:00
|
|
|
#include <linux/bpf.h>
|
|
|
|
#include "bpf.h"
|
2018-01-31 04:55:01 +08:00
|
|
|
#include "libbpf.h"
|
2019-06-18 03:26:50 +08:00
|
|
|
#include "libbpf_internal.h"
|
2015-07-01 10:14:03 +08:00
|
|
|
|
libbpf: Poison kernel-only integer types
It's been a recurring issue with types like u32 slipping into libbpf source
code accidentally. This is not detected during builds inside kernel source
tree, but becomes a compilation error in libbpf's Github repo. Libbpf is
supposed to use only __{s,u}{8,16,32,64} typedefs, so poison {s,u}{8,16,32,64}
explicitly in every .c file. Doing that in a bit more centralized way, e.g.,
inside libbpf_internal.h breaks selftests, which are both using kernel u32 and
libbpf_internal.h.
This patch also fixes a new u32 occurence in libbpf.c, added recently.
Fixes: 590a00888250 ("bpf: libbpf: Add STRUCT_OPS support")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200110181916.271446-1-andriin@fb.com
2020-01-11 02:19:16 +08:00
|
|
|
/* make sure libbpf doesn't use kernel-only integer typedefs */
|
|
|
|
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
|
|
|
|
|
2015-07-01 10:14:03 +08:00
|
|
|
/*
|
2017-02-28 06:29:28 +08:00
|
|
|
* When building perf, unistd.h is overridden. __NR_bpf is
|
2016-01-11 21:47:57 +08:00
|
|
|
* required to be defined explicitly.
|
2015-07-01 10:14:03 +08:00
|
|
|
*/
|
|
|
|
#ifndef __NR_bpf
|
|
|
|
# if defined(__i386__)
|
|
|
|
# define __NR_bpf 357
|
|
|
|
# elif defined(__x86_64__)
|
|
|
|
# define __NR_bpf 321
|
|
|
|
# elif defined(__aarch64__)
|
|
|
|
# define __NR_bpf 280
|
2017-04-23 03:31:05 +08:00
|
|
|
# elif defined(__sparc__)
|
|
|
|
# define __NR_bpf 349
|
2017-08-04 20:20:55 +08:00
|
|
|
# elif defined(__s390__)
|
|
|
|
# define __NR_bpf 351
|
2019-05-02 23:56:50 +08:00
|
|
|
# elif defined(__arc__)
|
|
|
|
# define __NR_bpf 280
|
2015-07-01 10:14:03 +08:00
|
|
|
# else
|
|
|
|
# error __NR_bpf not defined. libbpf does not support your arch.
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
2017-02-12 03:37:08 +08:00
|
|
|
static inline __u64 ptr_to_u64(const void *ptr)
|
2015-07-01 10:14:06 +08:00
|
|
|
{
|
|
|
|
return (__u64) (unsigned long) ptr;
|
|
|
|
}
|
|
|
|
|
2017-02-12 03:37:08 +08:00
|
|
|
static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
|
|
|
|
unsigned int size)
|
2015-07-01 10:14:03 +08:00
|
|
|
{
|
|
|
|
return syscall(__NR_bpf, cmd, attr, size);
|
|
|
|
}
|
|
|
|
|
2019-01-08 21:58:00 +08:00
|
|
|
static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
|
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
do {
|
|
|
|
fd = sys_bpf(BPF_PROG_LOAD, attr, size);
|
|
|
|
} while (fd < 0 && errno == EAGAIN);
|
|
|
|
|
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:56:05 +08:00
|
|
|
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
|
2015-07-01 10:14:03 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
memset(&attr, '\0', sizeof(attr));
|
|
|
|
|
2018-04-19 06:56:05 +08:00
|
|
|
attr.map_type = create_attr->map_type;
|
|
|
|
attr.key_size = create_attr->key_size;
|
|
|
|
attr.value_size = create_attr->value_size;
|
|
|
|
attr.max_entries = create_attr->max_entries;
|
|
|
|
attr.map_flags = create_attr->map_flags;
|
bpf, bpftool: fix a few ubsan warnings
The issue is reported at https://github.com/libbpf/libbpf/issues/28.
Basically, per C standard, for
void *memcpy(void *dest, const void *src, size_t n)
if "dest" or "src" is NULL, regardless of whether "n" is 0 or not,
the result of memcpy is undefined. clang ubsan reported three such
instances in bpf.c with the following pattern:
memcpy(dest, 0, 0).
Although in practice, no known compiler will cause issues when
copy size is 0. Let us still fix the issue to silence ubsan
warnings.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-04-10 08:37:41 +08:00
|
|
|
if (create_attr->name)
|
|
|
|
memcpy(attr.map_name, create_attr->name,
|
|
|
|
min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
|
2018-04-19 06:56:05 +08:00
|
|
|
attr.numa_node = create_attr->numa_node;
|
|
|
|
attr.btf_fd = create_attr->btf_fd;
|
2018-05-23 06:04:24 +08:00
|
|
|
attr.btf_key_type_id = create_attr->btf_key_type_id;
|
|
|
|
attr.btf_value_type_id = create_attr->btf_value_type_id;
|
2018-05-17 05:02:49 +08:00
|
|
|
attr.map_ifindex = create_attr->map_ifindex;
|
bpf: libbpf: Add STRUCT_OPS support
This patch adds BPF STRUCT_OPS support to libbpf.
The only sec_name convention is SEC(".struct_ops") to identify the
struct_ops implemented in BPF,
e.g. To implement a tcp_congestion_ops:
SEC(".struct_ops")
struct tcp_congestion_ops dctcp = {
.init = (void *)dctcp_init, /* <-- a bpf_prog */
/* ... some more func prts ... */
.name = "bpf_dctcp",
};
Each struct_ops is defined as a global variable under SEC(".struct_ops")
as above. libbpf creates a map for each variable and the variable name
is the map's name. Multiple struct_ops is supported under
SEC(".struct_ops").
In the bpf_object__open phase, libbpf will look for the SEC(".struct_ops")
section and find out what is the btf-type the struct_ops is
implementing. Note that the btf-type here is referring to
a type in the bpf_prog.o's btf. A "struct bpf_map" is added
by bpf_object__add_map() as other maps do. It will then
collect (through SHT_REL) where are the bpf progs that the
func ptrs are referring to. No btf_vmlinux is needed in
the open phase.
In the bpf_object__load phase, the map-fields, which depend
on the btf_vmlinux, are initialized (in bpf_map__init_kern_struct_ops()).
It will also set the prog->type, prog->attach_btf_id, and
prog->expected_attach_type. Thus, the prog's properties do
not rely on its section name.
[ Currently, the bpf_prog's btf-type ==> btf_vmlinux's btf-type matching
process is as simple as: member-name match + btf-kind match + size match.
If these matching conditions fail, libbpf will reject.
The current targeting support is "struct tcp_congestion_ops" which
most of its members are function pointers.
The member ordering of the bpf_prog's btf-type can be different from
the btf_vmlinux's btf-type. ]
Then, all obj->maps are created as usual (in bpf_object__create_maps()).
Once the maps are created and prog's properties are all set,
the libbpf will proceed to load all the progs.
bpf_map__attach_struct_ops() is added to register a struct_ops
map to a kernel subsystem.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200109003514.3856730-1-kafai@fb.com
2020-01-09 08:35:14 +08:00
|
|
|
if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS)
|
|
|
|
attr.btf_vmlinux_value_type_id =
|
|
|
|
create_attr->btf_vmlinux_value_type_id;
|
|
|
|
else
|
|
|
|
attr.inner_map_fd = create_attr->inner_map_fd;
|
2018-04-19 06:56:05 +08:00
|
|
|
|
2018-11-21 09:11:20 +08:00
|
|
|
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
|
2018-04-19 06:56:05 +08:00
|
|
|
}
|
2017-09-28 05:37:54 +08:00
|
|
|
|
2018-04-19 06:56:05 +08:00
|
|
|
int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
|
|
|
|
int key_size, int value_size, int max_entries,
|
|
|
|
__u32 map_flags, int node)
|
|
|
|
{
|
|
|
|
struct bpf_create_map_attr map_attr = {};
|
|
|
|
|
|
|
|
map_attr.name = name;
|
|
|
|
map_attr.map_type = map_type;
|
|
|
|
map_attr.map_flags = map_flags;
|
|
|
|
map_attr.key_size = key_size;
|
|
|
|
map_attr.value_size = value_size;
|
|
|
|
map_attr.max_entries = max_entries;
|
2017-08-19 02:28:01 +08:00
|
|
|
if (node >= 0) {
|
2018-04-19 06:56:05 +08:00
|
|
|
map_attr.numa_node = node;
|
|
|
|
map_attr.map_flags |= BPF_F_NUMA_NODE;
|
2017-08-19 02:28:01 +08:00
|
|
|
}
|
2015-07-01 10:14:03 +08:00
|
|
|
|
2018-04-19 06:56:05 +08:00
|
|
|
return bpf_create_map_xattr(&map_attr);
|
2015-07-01 10:14:03 +08:00
|
|
|
}
|
2015-07-01 10:14:06 +08:00
|
|
|
|
2017-08-19 02:28:01 +08:00
|
|
|
int bpf_create_map(enum bpf_map_type map_type, int key_size,
|
|
|
|
int value_size, int max_entries, __u32 map_flags)
|
|
|
|
{
|
2018-04-19 06:56:05 +08:00
|
|
|
struct bpf_create_map_attr map_attr = {};
|
|
|
|
|
|
|
|
map_attr.map_type = map_type;
|
|
|
|
map_attr.map_flags = map_flags;
|
|
|
|
map_attr.key_size = key_size;
|
|
|
|
map_attr.value_size = value_size;
|
|
|
|
map_attr.max_entries = max_entries;
|
|
|
|
|
|
|
|
return bpf_create_map_xattr(&map_attr);
|
2017-08-19 02:28:01 +08:00
|
|
|
}
|
|
|
|
|
2017-09-28 05:37:54 +08:00
|
|
|
int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
|
|
|
|
int key_size, int value_size, int max_entries,
|
|
|
|
__u32 map_flags)
|
|
|
|
{
|
2018-04-19 06:56:05 +08:00
|
|
|
struct bpf_create_map_attr map_attr = {};
|
|
|
|
|
|
|
|
map_attr.name = name;
|
|
|
|
map_attr.map_type = map_type;
|
|
|
|
map_attr.map_flags = map_flags;
|
|
|
|
map_attr.key_size = key_size;
|
|
|
|
map_attr.value_size = value_size;
|
|
|
|
map_attr.max_entries = max_entries;
|
|
|
|
|
|
|
|
return bpf_create_map_xattr(&map_attr);
|
2017-09-28 05:37:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
|
|
|
|
int key_size, int inner_map_fd, int max_entries,
|
2017-08-19 02:28:01 +08:00
|
|
|
__u32 map_flags, int node)
|
2017-03-23 01:00:35 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
memset(&attr, '\0', sizeof(attr));
|
|
|
|
|
|
|
|
attr.map_type = map_type;
|
|
|
|
attr.key_size = key_size;
|
|
|
|
attr.value_size = 4;
|
|
|
|
attr.inner_map_fd = inner_map_fd;
|
|
|
|
attr.max_entries = max_entries;
|
|
|
|
attr.map_flags = map_flags;
|
bpf, bpftool: fix a few ubsan warnings
The issue is reported at https://github.com/libbpf/libbpf/issues/28.
Basically, per C standard, for
void *memcpy(void *dest, const void *src, size_t n)
if "dest" or "src" is NULL, regardless of whether "n" is 0 or not,
the result of memcpy is undefined. clang ubsan reported three such
instances in bpf.c with the following pattern:
memcpy(dest, 0, 0).
Although in practice, no known compiler will cause issues when
copy size is 0. Let us still fix the issue to silence ubsan
warnings.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-04-10 08:37:41 +08:00
|
|
|
if (name)
|
|
|
|
memcpy(attr.map_name, name,
|
|
|
|
min(strlen(name), BPF_OBJ_NAME_LEN - 1));
|
2017-09-28 05:37:54 +08:00
|
|
|
|
2017-08-19 02:28:01 +08:00
|
|
|
if (node >= 0) {
|
|
|
|
attr.map_flags |= BPF_F_NUMA_NODE;
|
|
|
|
attr.numa_node = node;
|
|
|
|
}
|
2017-03-23 01:00:35 +08:00
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2017-09-28 05:37:54 +08:00
|
|
|
int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
|
|
|
|
int key_size, int inner_map_fd, int max_entries,
|
|
|
|
__u32 map_flags)
|
2017-08-19 02:28:01 +08:00
|
|
|
{
|
2017-09-28 05:37:54 +08:00
|
|
|
return bpf_create_map_in_map_node(map_type, name, key_size,
|
|
|
|
inner_map_fd, max_entries, map_flags,
|
|
|
|
-1);
|
2017-08-19 02:28:01 +08:00
|
|
|
}
|
|
|
|
|
2018-12-08 08:42:31 +08:00
|
|
|
static void *
|
|
|
|
alloc_zero_tailing_info(const void *orecord, __u32 cnt,
|
|
|
|
__u32 actual_rec_size, __u32 expected_rec_size)
|
|
|
|
{
|
2019-11-07 10:08:52 +08:00
|
|
|
__u64 info_len = (__u64)actual_rec_size * cnt;
|
2018-12-08 08:42:31 +08:00
|
|
|
void *info, *nrecord;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
info = malloc(info_len);
|
|
|
|
if (!info)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* zero out bytes kernel does not understand */
|
|
|
|
nrecord = info;
|
|
|
|
for (i = 0; i < cnt; i++) {
|
|
|
|
memcpy(nrecord, orecord, expected_rec_size);
|
|
|
|
memset(nrecord + expected_rec_size, 0,
|
|
|
|
actual_rec_size - expected_rec_size);
|
|
|
|
orecord += actual_rec_size;
|
|
|
|
nrecord += actual_rec_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
return info;
|
|
|
|
}
|
|
|
|
|
2018-03-31 06:08:01 +08:00
|
|
|
int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
|
|
|
|
char *log_buf, size_t log_buf_sz)
|
2015-07-01 10:14:06 +08:00
|
|
|
{
|
2018-12-08 08:42:31 +08:00
|
|
|
void *finfo = NULL, *linfo = NULL;
|
2015-07-01 10:14:06 +08:00
|
|
|
union bpf_attr attr;
|
tools/bpf: add log_level to bpf_load_program_attr
The kernel verifier has three levels of logs:
0: no logs
1: logs mostly useful
> 1: verbose
Current libbpf API functions bpf_load_program_xattr() and
bpf_load_program() cannot specify log_level.
The bcc, however, provides an interface for user to
specify log_level 2 for verbose output.
This patch added log_level into structure
bpf_load_program_attr, so users, including bcc, can use
bpf_load_program_xattr() to change log_level. The
supported log_level is 0, 1, and 2.
The bpf selftest test_sock.c is modified to enable log_level = 2.
If the "verbose" in test_sock.c is changed to true,
the test will output logs like below:
$ ./test_sock
func#0 @0
0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
0: (bf) r6 = r1
1: R1=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
1: (61) r7 = *(u32 *)(r6 +28)
invalid bpf_context access off=28 size=4
Test case: bind4 load with invalid access: src_ip6 .. [PASS]
...
Test case: bind6 allow all .. [PASS]
Summary: 16 PASSED, 0 FAILED
Some test_sock tests are negative tests and verbose verifier
log will be printed out as shown in the above.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-02-08 01:34:51 +08:00
|
|
|
__u32 log_level;
|
2018-03-31 06:08:01 +08:00
|
|
|
int fd;
|
|
|
|
|
tools/bpf: add log_level to bpf_load_program_attr
The kernel verifier has three levels of logs:
0: no logs
1: logs mostly useful
> 1: verbose
Current libbpf API functions bpf_load_program_xattr() and
bpf_load_program() cannot specify log_level.
The bcc, however, provides an interface for user to
specify log_level 2 for verbose output.
This patch added log_level into structure
bpf_load_program_attr, so users, including bcc, can use
bpf_load_program_xattr() to change log_level. The
supported log_level is 0, 1, and 2.
The bpf selftest test_sock.c is modified to enable log_level = 2.
If the "verbose" in test_sock.c is changed to true,
the test will output logs like below:
$ ./test_sock
func#0 @0
0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
0: (bf) r6 = r1
1: R1=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
1: (61) r7 = *(u32 *)(r6 +28)
invalid bpf_context access off=28 size=4
Test case: bind4 load with invalid access: src_ip6 .. [PASS]
...
Test case: bind6 allow all .. [PASS]
Summary: 16 PASSED, 0 FAILED
Some test_sock tests are negative tests and verbose verifier
log will be printed out as shown in the above.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-02-08 01:34:51 +08:00
|
|
|
if (!load_attr || !log_buf != !log_buf_sz)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
log_level = load_attr->log_level;
|
2019-04-02 12:27:47 +08:00
|
|
|
if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
|
2018-03-31 06:08:01 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2018-03-31 06:08:01 +08:00
|
|
|
attr.prog_type = load_attr->prog_type;
|
|
|
|
attr.expected_attach_type = load_attr->expected_attach_type;
|
2020-03-29 08:43:54 +08:00
|
|
|
if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
|
|
|
|
attr.prog_type == BPF_PROG_TYPE_LSM) {
|
bpf: libbpf: Add STRUCT_OPS support
This patch adds BPF STRUCT_OPS support to libbpf.
The only sec_name convention is SEC(".struct_ops") to identify the
struct_ops implemented in BPF,
e.g. To implement a tcp_congestion_ops:
SEC(".struct_ops")
struct tcp_congestion_ops dctcp = {
.init = (void *)dctcp_init, /* <-- a bpf_prog */
/* ... some more func prts ... */
.name = "bpf_dctcp",
};
Each struct_ops is defined as a global variable under SEC(".struct_ops")
as above. libbpf creates a map for each variable and the variable name
is the map's name. Multiple struct_ops is supported under
SEC(".struct_ops").
In the bpf_object__open phase, libbpf will look for the SEC(".struct_ops")
section and find out what is the btf-type the struct_ops is
implementing. Note that the btf-type here is referring to
a type in the bpf_prog.o's btf. A "struct bpf_map" is added
by bpf_object__add_map() as other maps do. It will then
collect (through SHT_REL) where are the bpf progs that the
func ptrs are referring to. No btf_vmlinux is needed in
the open phase.
In the bpf_object__load phase, the map-fields, which depend
on the btf_vmlinux, are initialized (in bpf_map__init_kern_struct_ops()).
It will also set the prog->type, prog->attach_btf_id, and
prog->expected_attach_type. Thus, the prog's properties do
not rely on its section name.
[ Currently, the bpf_prog's btf-type ==> btf_vmlinux's btf-type matching
process is as simple as: member-name match + btf-kind match + size match.
If these matching conditions fail, libbpf will reject.
The current targeting support is "struct tcp_congestion_ops" which
most of its members are function pointers.
The member ordering of the bpf_prog's btf-type can be different from
the btf_vmlinux's btf-type. ]
Then, all obj->maps are created as usual (in bpf_object__create_maps()).
Once the maps are created and prog's properties are all set,
the libbpf will proceed to load all the progs.
bpf_map__attach_struct_ops() is added to register a struct_ops
map to a kernel subsystem.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200109003514.3856730-1-kafai@fb.com
2020-01-09 08:35:14 +08:00
|
|
|
attr.attach_btf_id = load_attr->attach_btf_id;
|
2020-01-21 08:53:47 +08:00
|
|
|
} else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
|
|
|
|
attr.prog_type == BPF_PROG_TYPE_EXT) {
|
2019-10-31 06:32:12 +08:00
|
|
|
attr.attach_btf_id = load_attr->attach_btf_id;
|
2019-11-15 02:57:18 +08:00
|
|
|
attr.attach_prog_fd = load_attr->attach_prog_fd;
|
|
|
|
} else {
|
2019-10-31 06:32:12 +08:00
|
|
|
attr.prog_ifindex = load_attr->prog_ifindex;
|
2019-11-15 02:57:18 +08:00
|
|
|
attr.kern_version = load_attr->kern_version;
|
|
|
|
}
|
2018-03-31 06:08:01 +08:00
|
|
|
attr.insn_cnt = (__u32)load_attr->insns_cnt;
|
|
|
|
attr.insns = ptr_to_u64(load_attr->insns);
|
|
|
|
attr.license = ptr_to_u64(load_attr->license);
|
tools/bpf: add log_level to bpf_load_program_attr
The kernel verifier has three levels of logs:
0: no logs
1: logs mostly useful
> 1: verbose
Current libbpf API functions bpf_load_program_xattr() and
bpf_load_program() cannot specify log_level.
The bcc, however, provides an interface for user to
specify log_level 2 for verbose output.
This patch added log_level into structure
bpf_load_program_attr, so users, including bcc, can use
bpf_load_program_xattr() to change log_level. The
supported log_level is 0, 1, and 2.
The bpf selftest test_sock.c is modified to enable log_level = 2.
If the "verbose" in test_sock.c is changed to true,
the test will output logs like below:
$ ./test_sock
func#0 @0
0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
0: (bf) r6 = r1
1: R1=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
1: (61) r7 = *(u32 *)(r6 +28)
invalid bpf_context access off=28 size=4
Test case: bind4 load with invalid access: src_ip6 .. [PASS]
...
Test case: bind6 allow all .. [PASS]
Summary: 16 PASSED, 0 FAILED
Some test_sock tests are negative tests and verbose verifier
log will be printed out as shown in the above.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-02-08 01:34:51 +08:00
|
|
|
|
|
|
|
attr.log_level = log_level;
|
|
|
|
if (log_level) {
|
|
|
|
attr.log_buf = ptr_to_u64(log_buf);
|
|
|
|
attr.log_size = log_buf_sz;
|
|
|
|
} else {
|
|
|
|
attr.log_buf = ptr_to_u64(NULL);
|
|
|
|
attr.log_size = 0;
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:29:14 +08:00
|
|
|
attr.prog_btf_fd = load_attr->prog_btf_fd;
|
|
|
|
attr.func_info_rec_size = load_attr->func_info_rec_size;
|
|
|
|
attr.func_info_cnt = load_attr->func_info_cnt;
|
|
|
|
attr.func_info = ptr_to_u64(load_attr->func_info);
|
2018-12-08 08:42:31 +08:00
|
|
|
attr.line_info_rec_size = load_attr->line_info_rec_size;
|
|
|
|
attr.line_info_cnt = load_attr->line_info_cnt;
|
|
|
|
attr.line_info = ptr_to_u64(load_attr->line_info);
|
bpf, bpftool: fix a few ubsan warnings
The issue is reported at https://github.com/libbpf/libbpf/issues/28.
Basically, per C standard, for
void *memcpy(void *dest, const void *src, size_t n)
if "dest" or "src" is NULL, regardless of whether "n" is 0 or not,
the result of memcpy is undefined. clang ubsan reported three such
instances in bpf.c with the following pattern:
memcpy(dest, 0, 0).
Although in practice, no known compiler will cause issues when
copy size is 0. Let us still fix the issue to silence ubsan
warnings.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-04-10 08:37:41 +08:00
|
|
|
if (load_attr->name)
|
|
|
|
memcpy(attr.prog_name, load_attr->name,
|
|
|
|
min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
|
2019-05-25 06:25:19 +08:00
|
|
|
attr.prog_flags = load_attr->prog_flags;
|
2015-07-01 10:14:06 +08:00
|
|
|
|
2019-01-08 21:58:00 +08:00
|
|
|
fd = sys_bpf_prog_load(&attr, sizeof(attr));
|
2018-12-08 08:42:29 +08:00
|
|
|
if (fd >= 0)
|
2015-07-01 10:14:06 +08:00
|
|
|
return fd;
|
|
|
|
|
2018-11-20 07:29:16 +08:00
|
|
|
/* After bpf_prog_load, the kernel may modify certain attributes
|
|
|
|
* to give user space a hint how to deal with loading failure.
|
|
|
|
* Check to see whether we can make some changes and load again.
|
|
|
|
*/
|
2018-12-08 08:42:31 +08:00
|
|
|
while (errno == E2BIG && (!finfo || !linfo)) {
|
|
|
|
if (!finfo && attr.func_info_cnt &&
|
|
|
|
attr.func_info_rec_size < load_attr->func_info_rec_size) {
|
|
|
|
/* try with corrected func info records */
|
|
|
|
finfo = alloc_zero_tailing_info(load_attr->func_info,
|
|
|
|
load_attr->func_info_cnt,
|
|
|
|
load_attr->func_info_rec_size,
|
|
|
|
attr.func_info_rec_size);
|
|
|
|
if (!finfo)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
attr.func_info = ptr_to_u64(finfo);
|
|
|
|
attr.func_info_rec_size = load_attr->func_info_rec_size;
|
|
|
|
} else if (!linfo && attr.line_info_cnt &&
|
|
|
|
attr.line_info_rec_size <
|
|
|
|
load_attr->line_info_rec_size) {
|
|
|
|
linfo = alloc_zero_tailing_info(load_attr->line_info,
|
|
|
|
load_attr->line_info_cnt,
|
|
|
|
load_attr->line_info_rec_size,
|
|
|
|
attr.line_info_rec_size);
|
|
|
|
if (!linfo)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
attr.line_info = ptr_to_u64(linfo);
|
|
|
|
attr.line_info_rec_size = load_attr->line_info_rec_size;
|
|
|
|
} else {
|
|
|
|
break;
|
2018-11-20 07:29:16 +08:00
|
|
|
}
|
|
|
|
|
2019-01-08 21:58:00 +08:00
|
|
|
fd = sys_bpf_prog_load(&attr, sizeof(attr));
|
2018-11-20 07:29:16 +08:00
|
|
|
|
2018-12-08 08:42:29 +08:00
|
|
|
if (fd >= 0)
|
2018-11-20 07:29:16 +08:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
tools/bpf: add log_level to bpf_load_program_attr
The kernel verifier has three levels of logs:
0: no logs
1: logs mostly useful
> 1: verbose
Current libbpf API functions bpf_load_program_xattr() and
bpf_load_program() cannot specify log_level.
The bcc, however, provides an interface for user to
specify log_level 2 for verbose output.
This patch added log_level into structure
bpf_load_program_attr, so users, including bcc, can use
bpf_load_program_xattr() to change log_level. The
supported log_level is 0, 1, and 2.
The bpf selftest test_sock.c is modified to enable log_level = 2.
If the "verbose" in test_sock.c is changed to true,
the test will output logs like below:
$ ./test_sock
func#0 @0
0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
0: (bf) r6 = r1
1: R1=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
1: (61) r7 = *(u32 *)(r6 +28)
invalid bpf_context access off=28 size=4
Test case: bind4 load with invalid access: src_ip6 .. [PASS]
...
Test case: bind6 allow all .. [PASS]
Summary: 16 PASSED, 0 FAILED
Some test_sock tests are negative tests and verbose verifier
log will be printed out as shown in the above.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-02-08 01:34:51 +08:00
|
|
|
if (log_level || !log_buf)
|
2018-12-08 08:42:29 +08:00
|
|
|
goto done;
|
|
|
|
|
2015-07-01 10:14:06 +08:00
|
|
|
/* Try again with log */
|
|
|
|
attr.log_buf = ptr_to_u64(log_buf);
|
|
|
|
attr.log_size = log_buf_sz;
|
|
|
|
attr.log_level = 1;
|
|
|
|
log_buf[0] = 0;
|
2019-01-08 21:58:00 +08:00
|
|
|
fd = sys_bpf_prog_load(&attr, sizeof(attr));
|
2018-11-20 07:29:16 +08:00
|
|
|
done:
|
|
|
|
free(finfo);
|
2018-12-08 08:42:31 +08:00
|
|
|
free(linfo);
|
2018-11-20 07:29:16 +08:00
|
|
|
return fd;
|
2015-07-01 10:14:06 +08:00
|
|
|
}
|
2015-11-24 21:36:08 +08:00
|
|
|
|
2017-09-28 05:37:54 +08:00
|
|
|
int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
|
|
|
|
size_t insns_cnt, const char *license,
|
|
|
|
__u32 kern_version, char *log_buf,
|
|
|
|
size_t log_buf_sz)
|
|
|
|
{
|
2018-03-31 06:08:01 +08:00
|
|
|
struct bpf_load_program_attr load_attr;
|
|
|
|
|
|
|
|
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
|
|
|
|
load_attr.prog_type = type;
|
|
|
|
load_attr.expected_attach_type = 0;
|
|
|
|
load_attr.name = NULL;
|
|
|
|
load_attr.insns = insns;
|
|
|
|
load_attr.insns_cnt = insns_cnt;
|
|
|
|
load_attr.license = license;
|
|
|
|
load_attr.kern_version = kern_version;
|
|
|
|
|
|
|
|
return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
|
2017-09-28 05:37:54 +08:00
|
|
|
}
|
|
|
|
|
2017-05-11 02:42:48 +08:00
|
|
|
int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
|
2018-12-01 13:08:14 +08:00
|
|
|
size_t insns_cnt, __u32 prog_flags, const char *license,
|
|
|
|
__u32 kern_version, char *log_buf, size_t log_buf_sz,
|
|
|
|
int log_level)
|
2017-05-11 02:42:48 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2017-05-11 02:42:48 +08:00
|
|
|
attr.prog_type = type;
|
|
|
|
attr.insn_cnt = (__u32)insns_cnt;
|
|
|
|
attr.insns = ptr_to_u64(insns);
|
|
|
|
attr.license = ptr_to_u64(license);
|
|
|
|
attr.log_buf = ptr_to_u64(log_buf);
|
|
|
|
attr.log_size = log_buf_sz;
|
2017-07-21 06:00:22 +08:00
|
|
|
attr.log_level = log_level;
|
2017-05-11 02:42:48 +08:00
|
|
|
log_buf[0] = 0;
|
|
|
|
attr.kern_version = kern_version;
|
2018-12-01 13:08:14 +08:00
|
|
|
attr.prog_flags = prog_flags;
|
2017-05-11 02:42:48 +08:00
|
|
|
|
2019-01-08 21:58:00 +08:00
|
|
|
return sys_bpf_prog_load(&attr, sizeof(attr));
|
2017-05-11 02:42:48 +08:00
|
|
|
}
|
|
|
|
|
2017-02-10 07:21:39 +08:00
|
|
|
int bpf_map_update_elem(int fd, const void *key, const void *value,
|
2016-12-09 10:46:15 +08:00
|
|
|
__u64 flags)
|
2015-11-24 21:36:08 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2015-11-24 21:36:08 +08:00
|
|
|
attr.map_fd = fd;
|
|
|
|
attr.key = ptr_to_u64(key);
|
|
|
|
attr.value = ptr_to_u64(value);
|
|
|
|
attr.flags = flags;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
|
|
|
|
}
|
2016-11-26 15:03:25 +08:00
|
|
|
|
2017-02-10 07:21:40 +08:00
|
|
|
int bpf_map_lookup_elem(int fd, const void *key, void *value)
|
2016-11-26 15:03:25 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2016-11-26 15:03:25 +08:00
|
|
|
attr.map_fd = fd;
|
|
|
|
attr.key = ptr_to_u64(key);
|
|
|
|
attr.value = ptr_to_u64(value);
|
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2019-02-01 07:40:11 +08:00
|
|
|
int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2019-02-01 07:40:11 +08:00
|
|
|
attr.map_fd = fd;
|
|
|
|
attr.key = ptr_to_u64(key);
|
|
|
|
attr.value = ptr_to_u64(value);
|
|
|
|
attr.flags = flags;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2018-10-18 21:16:41 +08:00
|
|
|
int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2018-10-18 21:16:41 +08:00
|
|
|
attr.map_fd = fd;
|
|
|
|
attr.key = ptr_to_u64(key);
|
|
|
|
attr.value = ptr_to_u64(value);
|
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2017-02-10 07:21:41 +08:00
|
|
|
int bpf_map_delete_elem(int fd, const void *key)
|
2016-11-26 15:03:25 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2016-11-26 15:03:25 +08:00
|
|
|
attr.map_fd = fd;
|
|
|
|
attr.key = ptr_to_u64(key);
|
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2017-02-10 07:21:42 +08:00
|
|
|
int bpf_map_get_next_key(int fd, const void *key, void *next_key)
|
2016-11-26 15:03:25 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2016-11-26 15:03:25 +08:00
|
|
|
attr.map_fd = fd;
|
|
|
|
attr.key = ptr_to_u64(key);
|
|
|
|
attr.next_key = ptr_to_u64(next_key);
|
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
bpf, libbpf: support global data/bss/rodata sections
This work adds BPF loader support for global data sections
to libbpf. This allows to write BPF programs in more natural
C-like way by being able to define global variables and const
data.
Back at LPC 2018 [0] we presented a first prototype which
implemented support for global data sections by extending BPF
syscall where union bpf_attr would get additional memory/size
pair for each section passed during prog load in order to later
add this base address into the ldimm64 instruction along with
the user provided offset when accessing a variable. Consensus
from LPC was that for proper upstream support, it would be
more desirable to use maps instead of bpf_attr extension as
this would allow for introspection of these sections as well
as potential live updates of their content. This work follows
this path by taking the following steps from loader side:
1) In bpf_object__elf_collect() step we pick up ".data",
".rodata", and ".bss" section information.
2) If present, in bpf_object__init_internal_map() we add
maps to the obj's map array that corresponds to each
of the present sections. Given section size and access
properties can differ, a single entry array map is
created with value size that is corresponding to the
ELF section size of .data, .bss or .rodata. These
internal maps are integrated into the normal map
handling of libbpf such that when user traverses all
obj maps, they can be differentiated from user-created
ones via bpf_map__is_internal(). In later steps when
we actually create these maps in the kernel via
bpf_object__create_maps(), then for .data and .rodata
sections their content is copied into the map through
bpf_map_update_elem(). For .bss this is not necessary
since array map is already zero-initialized by default.
Additionally, for .rodata the map is frozen as read-only
after setup, such that neither from program nor syscall
side writes would be possible.
3) In bpf_program__collect_reloc() step, we record the
corresponding map, insn index, and relocation type for
the global data.
4) And last but not least in the actual relocation step in
bpf_program__relocate(), we mark the ldimm64 instruction
with src_reg = BPF_PSEUDO_MAP_VALUE where in the first
imm field the map's file descriptor is stored as similarly
done as in BPF_PSEUDO_MAP_FD, and in the second imm field
(as ldimm64 is 2-insn wide) we store the access offset
into the section. Given these maps have only single element
ldimm64's off remains zero in both parts.
5) On kernel side, this special marked BPF_PSEUDO_MAP_VALUE
load will then store the actual target address in order
to have a 'map-lookup'-free access. That is, the actual
map value base address + offset. The destination register
in the verifier will then be marked as PTR_TO_MAP_VALUE,
containing the fixed offset as reg->off and backing BPF
map as reg->map_ptr. Meaning, it's treated as any other
normal map value from verification side, only with
efficient, direct value access instead of actual call to
map lookup helper as in the typical case.
Currently, only support for static global variables has been
added, and libbpf rejects non-static global variables from
loading. This can be lifted until we have proper semantics
for how BPF will treat multi-object BPF loads. From BTF side,
libbpf will set the value type id of the types corresponding
to the ".bss", ".data" and ".rodata" names which LLVM will
emit without the object name prefix. The key type will be
left as zero, thus making use of the key-less BTF option in
array maps.
Simple example dump of program using globals vars in each
section:
# bpftool prog
[...]
6784: sched_cls name load_static_dat tag a7e1291567277844 gpl
loaded_at 2019-03-11T15:39:34+0000 uid 0
xlated 1776B jited 993B memlock 4096B map_ids 2238,2237,2235,2236,2239,2240
# bpftool map show id 2237
2237: array name test_glo.bss flags 0x0
key 4B value 64B max_entries 1 memlock 4096B
# bpftool map show id 2235
2235: array name test_glo.data flags 0x0
key 4B value 64B max_entries 1 memlock 4096B
# bpftool map show id 2236
2236: array name test_glo.rodata flags 0x80
key 4B value 96B max_entries 1 memlock 4096B
# bpftool prog dump xlated id 6784
int load_static_data(struct __sk_buff * skb):
; int load_static_data(struct __sk_buff *skb)
0: (b7) r6 = 0
; test_reloc(number, 0, &num0);
1: (63) *(u32 *)(r10 -4) = r6
2: (bf) r2 = r10
; int load_static_data(struct __sk_buff *skb)
3: (07) r2 += -4
; test_reloc(number, 0, &num0);
4: (18) r1 = map[id:2238]
6: (18) r3 = map[id:2237][0]+0 <-- direct addr in .bss area
8: (b7) r4 = 0
9: (85) call array_map_update_elem#100464
10: (b7) r1 = 1
; test_reloc(number, 1, &num1);
[...]
; test_reloc(string, 2, str2);
120: (18) r8 = map[id:2237][0]+16 <-- same here at offset +16
122: (18) r1 = map[id:2239]
124: (18) r3 = map[id:2237][0]+16
126: (b7) r4 = 0
127: (85) call array_map_update_elem#100464
128: (b7) r1 = 120
; str1[5] = 'x';
129: (73) *(u8 *)(r9 +5) = r1
; test_reloc(string, 3, str1);
130: (b7) r1 = 3
131: (63) *(u32 *)(r10 -4) = r1
132: (b7) r9 = 3
133: (bf) r2 = r10
; int load_static_data(struct __sk_buff *skb)
134: (07) r2 += -4
; test_reloc(string, 3, str1);
135: (18) r1 = map[id:2239]
137: (18) r3 = map[id:2235][0]+16 <-- direct addr in .data area
139: (b7) r4 = 0
140: (85) call array_map_update_elem#100464
141: (b7) r1 = 111
; __builtin_memcpy(&str2[2], "hello", sizeof("hello"));
142: (73) *(u8 *)(r8 +6) = r1 <-- further access based on .bss data
143: (b7) r1 = 108
144: (73) *(u8 *)(r8 +5) = r1
[...]
For Cilium use-case in particular, this enables migrating configuration
constants from Cilium daemon's generated header defines into global
data sections such that expensive runtime recompilations with LLVM can
be avoided altogether. Instead, the ELF file becomes effectively a
"template", meaning, it is compiled only once (!) and the Cilium daemon
will then rewrite relevant configuration data from the ELF's .data or
.rodata sections directly instead of recompiling the program. The
updated ELF is then loaded into the kernel and atomically replaces
the existing program in the networking datapath. More info in [0].
Based upon recent fix in LLVM, commit c0db6b6bd444 ("[BPF] Don't fail
for static variables").
[0] LPC 2018, BPF track, "ELF relocation for static data in BPF",
http://vger.kernel.org/lpc-bpf2018.html#session-3
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-04-10 05:20:13 +08:00
|
|
|
int bpf_map_freeze(int fd)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.map_fd = fd;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2020-01-16 02:43:06 +08:00
|
|
|
static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
|
|
|
|
void *out_batch, void *keys, void *values,
|
|
|
|
__u32 *count,
|
|
|
|
const struct bpf_map_batch_opts *opts)
|
|
|
|
{
|
2020-01-16 12:59:18 +08:00
|
|
|
union bpf_attr attr;
|
2020-01-16 02:43:06 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!OPTS_VALID(opts, bpf_map_batch_opts))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.batch.map_fd = fd;
|
|
|
|
attr.batch.in_batch = ptr_to_u64(in_batch);
|
|
|
|
attr.batch.out_batch = ptr_to_u64(out_batch);
|
|
|
|
attr.batch.keys = ptr_to_u64(keys);
|
|
|
|
attr.batch.values = ptr_to_u64(values);
|
|
|
|
attr.batch.count = *count;
|
|
|
|
attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0);
|
|
|
|
attr.batch.flags = OPTS_GET(opts, flags, 0);
|
|
|
|
|
|
|
|
ret = sys_bpf(cmd, &attr, sizeof(attr));
|
|
|
|
*count = attr.batch.count;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
|
|
|
|
const struct bpf_map_batch_opts *opts)
|
|
|
|
{
|
|
|
|
return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
|
|
|
|
NULL, keys, NULL, count, opts);
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
|
|
|
|
void *values, __u32 *count,
|
|
|
|
const struct bpf_map_batch_opts *opts)
|
|
|
|
{
|
|
|
|
return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch,
|
|
|
|
out_batch, keys, values, count, opts);
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
|
|
|
|
void *keys, void *values, __u32 *count,
|
|
|
|
const struct bpf_map_batch_opts *opts)
|
|
|
|
{
|
|
|
|
return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH,
|
|
|
|
fd, in_batch, out_batch, keys, values,
|
|
|
|
count, opts);
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
|
|
|
|
const struct bpf_map_batch_opts *opts)
|
|
|
|
{
|
|
|
|
return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
|
|
|
|
keys, values, count, opts);
|
|
|
|
}
|
|
|
|
|
2016-11-26 15:03:25 +08:00
|
|
|
int bpf_obj_pin(int fd, const char *pathname)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2016-11-26 15:03:25 +08:00
|
|
|
attr.pathname = ptr_to_u64((void *)pathname);
|
|
|
|
attr.bpf_fd = fd;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_obj_get(const char *pathname)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2016-11-26 15:03:25 +08:00
|
|
|
attr.pathname = ptr_to_u64((void *)pathname);
|
|
|
|
|
|
|
|
return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
|
|
|
|
}
|
2016-12-15 06:05:26 +08:00
|
|
|
|
2017-08-28 22:10:04 +08:00
|
|
|
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
|
|
|
|
unsigned int flags)
|
2019-12-19 15:44:36 +08:00
|
|
|
{
|
|
|
|
DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts,
|
|
|
|
.flags = flags,
|
|
|
|
);
|
|
|
|
|
|
|
|
return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_prog_attach_xattr(int prog_fd, int target_fd,
|
|
|
|
enum bpf_attach_type type,
|
|
|
|
const struct bpf_prog_attach_opts *opts)
|
2016-12-15 06:05:26 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-12-19 15:44:36 +08:00
|
|
|
if (!OPTS_VALID(opts, bpf_prog_attach_opts))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2016-12-15 06:05:26 +08:00
|
|
|
attr.target_fd = target_fd;
|
2017-08-28 22:10:04 +08:00
|
|
|
attr.attach_bpf_fd = prog_fd;
|
2016-12-15 06:05:26 +08:00
|
|
|
attr.attach_type = type;
|
2019-12-19 15:44:36 +08:00
|
|
|
attr.attach_flags = OPTS_GET(opts, flags, 0);
|
|
|
|
attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
|
2016-12-15 06:05:26 +08:00
|
|
|
|
|
|
|
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2016-12-15 06:05:26 +08:00
|
|
|
attr.target_fd = target_fd;
|
|
|
|
attr.attach_type = type;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
|
|
|
|
}
|
2017-03-31 12:45:39 +08:00
|
|
|
|
2017-10-03 13:50:24 +08:00
|
|
|
int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2017-10-03 13:50:24 +08:00
|
|
|
attr.target_fd = target_fd;
|
|
|
|
attr.attach_bpf_fd = prog_fd;
|
|
|
|
attr.attach_type = type;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2020-03-30 11:00:00 +08:00
|
|
|
int bpf_link_create(int prog_fd, int target_fd,
|
|
|
|
enum bpf_attach_type attach_type,
|
|
|
|
const struct bpf_link_create_opts *opts)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
if (!OPTS_VALID(opts, bpf_link_create_opts))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.link_create.prog_fd = prog_fd;
|
|
|
|
attr.link_create.target_fd = target_fd;
|
|
|
|
attr.link_create.attach_type = attach_type;
|
2020-07-24 02:41:17 +08:00
|
|
|
attr.link_create.flags = OPTS_GET(opts, flags, 0);
|
2020-03-30 11:00:00 +08:00
|
|
|
|
|
|
|
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2020-08-01 02:28:27 +08:00
|
|
|
int bpf_link_detach(int link_fd)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.link_detach.link_fd = link_fd;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2020-03-30 11:00:00 +08:00
|
|
|
int bpf_link_update(int link_fd, int new_prog_fd,
|
|
|
|
const struct bpf_link_update_opts *opts)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
if (!OPTS_VALID(opts, bpf_link_update_opts))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.link_update.link_fd = link_fd;
|
|
|
|
attr.link_update.new_prog_fd = new_prog_fd;
|
|
|
|
attr.link_update.flags = OPTS_GET(opts, flags, 0);
|
|
|
|
attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
|
|
|
|
|
|
|
|
return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2020-05-10 01:59:17 +08:00
|
|
|
int bpf_iter_create(int link_fd)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.iter_create.link_fd = link_fd;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2017-10-03 13:50:27 +08:00
|
|
|
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
|
|
|
|
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
int ret;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2017-10-03 13:50:27 +08:00
|
|
|
attr.query.target_fd = target_fd;
|
|
|
|
attr.query.attach_type = type;
|
|
|
|
attr.query.query_flags = query_flags;
|
|
|
|
attr.query.prog_cnt = *prog_cnt;
|
|
|
|
attr.query.prog_ids = ptr_to_u64(prog_ids);
|
|
|
|
|
|
|
|
ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
|
|
|
|
if (attach_flags)
|
|
|
|
*attach_flags = attr.query.attach_flags;
|
|
|
|
*prog_cnt = attr.query.prog_cnt;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-03-31 12:45:39 +08:00
|
|
|
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
|
|
|
|
void *data_out, __u32 *size_out, __u32 *retval,
|
|
|
|
__u32 *duration)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
int ret;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2017-03-31 12:45:39 +08:00
|
|
|
attr.test.prog_fd = prog_fd;
|
|
|
|
attr.test.data_in = ptr_to_u64(data);
|
|
|
|
attr.test.data_out = ptr_to_u64(data_out);
|
|
|
|
attr.test.data_size_in = size;
|
|
|
|
attr.test.repeat = repeat;
|
|
|
|
|
|
|
|
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
|
|
|
|
if (size_out)
|
|
|
|
*size_out = attr.test.data_size_out;
|
|
|
|
if (retval)
|
|
|
|
*retval = attr.test.retval;
|
|
|
|
if (duration)
|
|
|
|
*duration = attr.test.duration;
|
|
|
|
return ret;
|
|
|
|
}
|
2017-06-06 03:15:53 +08:00
|
|
|
|
2018-12-03 19:31:25 +08:00
|
|
|
int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!test_attr->data_out && test_attr->data_size_out > 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2018-12-03 19:31:25 +08:00
|
|
|
attr.test.prog_fd = test_attr->prog_fd;
|
|
|
|
attr.test.data_in = ptr_to_u64(test_attr->data_in);
|
|
|
|
attr.test.data_out = ptr_to_u64(test_attr->data_out);
|
|
|
|
attr.test.data_size_in = test_attr->data_size_in;
|
|
|
|
attr.test.data_size_out = test_attr->data_size_out;
|
2019-04-10 02:49:10 +08:00
|
|
|
attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
|
|
|
|
attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
|
|
|
|
attr.test.ctx_size_in = test_attr->ctx_size_in;
|
|
|
|
attr.test.ctx_size_out = test_attr->ctx_size_out;
|
2018-12-03 19:31:25 +08:00
|
|
|
attr.test.repeat = test_attr->repeat;
|
|
|
|
|
|
|
|
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
|
|
|
|
test_attr->data_size_out = attr.test.data_size_out;
|
2019-04-10 02:49:10 +08:00
|
|
|
test_attr->ctx_size_out = attr.test.ctx_size_out;
|
2018-12-03 19:31:25 +08:00
|
|
|
test_attr->retval = attr.test.retval;
|
|
|
|
test_attr->duration = attr.test.duration;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-08-20 17:31:52 +08:00
|
|
|
static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
|
2017-06-06 03:15:53 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
int err;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2017-06-06 03:15:53 +08:00
|
|
|
attr.start_id = start_id;
|
|
|
|
|
2019-08-20 17:31:52 +08:00
|
|
|
err = sys_bpf(cmd, &attr, sizeof(attr));
|
2017-06-06 03:15:53 +08:00
|
|
|
if (!err)
|
|
|
|
*next_id = attr.next_id;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-08-20 17:31:52 +08:00
|
|
|
int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
|
2017-06-06 03:15:53 +08:00
|
|
|
{
|
2019-08-20 17:31:52 +08:00
|
|
|
return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
|
|
|
|
}
|
2017-06-06 03:15:53 +08:00
|
|
|
|
2019-08-20 17:31:52 +08:00
|
|
|
int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
|
|
|
|
{
|
|
|
|
return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
|
2017-06-06 03:15:53 +08:00
|
|
|
}
|
|
|
|
|
2019-08-20 17:31:53 +08:00
|
|
|
int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
|
|
|
|
{
|
|
|
|
return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
|
|
|
|
}
|
|
|
|
|
2020-04-29 08:16:09 +08:00
|
|
|
int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
|
|
|
|
{
|
|
|
|
return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
|
|
|
|
}
|
|
|
|
|
2017-06-06 03:15:53 +08:00
|
|
|
int bpf_prog_get_fd_by_id(__u32 id)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2017-06-06 03:15:53 +08:00
|
|
|
attr.prog_id = id;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_map_get_fd_by_id(__u32 id)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2017-06-06 03:15:53 +08:00
|
|
|
attr.map_id = id;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2018-05-05 05:49:55 +08:00
|
|
|
int bpf_btf_get_fd_by_id(__u32 id)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2018-05-05 05:49:55 +08:00
|
|
|
attr.btf_id = id;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2020-04-29 08:16:09 +08:00
|
|
|
int bpf_link_get_fd_by_id(__u32 id)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.link_id = id;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
|
|
|
int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
|
2017-06-06 03:15:53 +08:00
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
int err;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2020-04-29 08:16:09 +08:00
|
|
|
attr.info.bpf_fd = bpf_fd;
|
2017-06-06 03:15:53 +08:00
|
|
|
attr.info.info_len = *info_len;
|
|
|
|
attr.info.info = ptr_to_u64(info);
|
|
|
|
|
|
|
|
err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
|
|
|
|
if (!err)
|
|
|
|
*info_len = attr.info.info_len;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
2018-01-31 04:55:01 +08:00
|
|
|
|
2018-03-29 03:05:38 +08:00
|
|
|
int bpf_raw_tracepoint_open(const char *name, int prog_fd)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
2019-02-14 02:25:53 +08:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2018-03-29 03:05:38 +08:00
|
|
|
attr.raw_tracepoint.name = ptr_to_u64(name);
|
|
|
|
attr.raw_tracepoint.prog_fd = prog_fd;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
|
|
|
|
}
|
|
|
|
|
2018-04-19 06:56:05 +08:00
|
|
|
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
|
|
|
|
bool do_log)
|
|
|
|
{
|
|
|
|
union bpf_attr attr = {};
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
attr.btf = ptr_to_u64(btf);
|
|
|
|
attr.btf_size = btf_size;
|
|
|
|
|
|
|
|
retry:
|
|
|
|
if (do_log && log_buf && log_buf_size) {
|
|
|
|
attr.btf_log_level = 1;
|
|
|
|
attr.btf_log_size = log_buf_size;
|
|
|
|
attr.btf_log_buf = ptr_to_u64(log_buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
|
|
|
|
if (fd == -1 && !do_log && log_buf && log_buf_size) {
|
|
|
|
do_log = true;
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
return fd;
|
|
|
|
}
|
2018-05-25 02:21:10 +08:00
|
|
|
|
|
|
|
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
|
|
|
|
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
|
|
|
|
__u64 *probe_addr)
|
|
|
|
{
|
|
|
|
union bpf_attr attr = {};
|
|
|
|
int err;
|
|
|
|
|
|
|
|
attr.task_fd_query.pid = pid;
|
|
|
|
attr.task_fd_query.fd = fd;
|
|
|
|
attr.task_fd_query.flags = flags;
|
|
|
|
attr.task_fd_query.buf = ptr_to_u64(buf);
|
|
|
|
attr.task_fd_query.buf_len = *buf_len;
|
|
|
|
|
|
|
|
err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
|
|
|
|
*buf_len = attr.task_fd_query.buf_len;
|
|
|
|
*prog_id = attr.task_fd_query.prog_id;
|
|
|
|
*fd_type = attr.task_fd_query.fd_type;
|
|
|
|
*probe_offset = attr.task_fd_query.probe_offset;
|
|
|
|
*probe_addr = attr.task_fd_query.probe_addr;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
2020-04-30 15:15:05 +08:00
|
|
|
|
|
|
|
int bpf_enable_stats(enum bpf_stats_type type)
|
|
|
|
{
|
|
|
|
union bpf_attr attr;
|
|
|
|
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
|
|
attr.enable_stats.type = type;
|
|
|
|
|
|
|
|
return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
|
|
|
|
}
|