Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2021-05-26 The following pull-request contains BPF updates for your *net* tree. We've added 14 non-merge commits during the last 14 day(s) which contain a total of 17 files changed, 513 insertions(+), 231 deletions(-). The main changes are: 1) Fix bpf_skb_change_head() helper to reset mac_len, from Jussi Maki. 2) Fix masking direction swap upon off-reg sign change, from Daniel Borkmann. 3) Fix BPF offloads in verifier by reordering driver callback, from Yinjun Zhang. 4) BPF selftest for ringbuf mmap ro/rw restrictions, from Andrii Nakryiko. 5) Follow-up fixes to nested bprintf per-cpu buffers, from Florent Revest. 6) Fix bpftool sock_release attach point help info, from Liu Jian. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
f5d287126f
|
@ -1,6 +1,5 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-y += kernel/ mm/
|
||||
obj-$(CONFIG_NET) += net/
|
||||
obj-y += kernel/ mm/ net/
|
||||
obj-$(CONFIG_KVM) += kvm/
|
||||
obj-$(CONFIG_XEN) += xen/
|
||||
obj-$(CONFIG_CRYPTO) += crypto/
|
||||
|
|
|
@ -37,6 +37,7 @@ config BPF_SYSCALL
|
|||
|
||||
config BPF_JIT
|
||||
bool "Enable BPF Just In Time compiler"
|
||||
depends on BPF
|
||||
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
|
||||
depends on MODULES
|
||||
help
|
||||
|
|
|
@ -107,10 +107,12 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
return &bpf_inode_storage_get_proto;
|
||||
case BPF_FUNC_inode_storage_delete:
|
||||
return &bpf_inode_storage_delete_proto;
|
||||
#ifdef CONFIG_NET
|
||||
case BPF_FUNC_sk_storage_get:
|
||||
return &bpf_sk_storage_get_proto;
|
||||
case BPF_FUNC_sk_storage_delete:
|
||||
return &bpf_sk_storage_delete_proto;
|
||||
#endif /* CONFIG_NET */
|
||||
case BPF_FUNC_spin_lock:
|
||||
return &bpf_spin_lock_proto;
|
||||
case BPF_FUNC_spin_unlock:
|
||||
|
|
|
@ -692,13 +692,15 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
|
||||
/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
|
||||
* arguments representation.
|
||||
*/
|
||||
#define MAX_PRINTF_BUF_LEN 512
|
||||
#define MAX_BPRINTF_BUF_LEN 512
|
||||
|
||||
/* Support executing three nested bprintf helper calls on a given CPU */
|
||||
#define MAX_BPRINTF_NEST_LEVEL 3
|
||||
struct bpf_bprintf_buffers {
|
||||
char tmp_bufs[3][MAX_PRINTF_BUF_LEN];
|
||||
char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
|
||||
};
|
||||
static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
|
||||
static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
|
||||
|
@ -710,7 +712,7 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
|
|||
|
||||
preempt_disable();
|
||||
nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
|
||||
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
|
||||
if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
|
||||
this_cpu_dec(bpf_bprintf_nest_level);
|
||||
preempt_enable();
|
||||
return -EBUSY;
|
||||
|
@ -761,7 +763,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
|
|||
if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
|
||||
return -EBUSY;
|
||||
|
||||
tmp_buf_end = tmp_buf + MAX_PRINTF_BUF_LEN;
|
||||
tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
|
||||
*bin_args = (u32 *)tmp_buf;
|
||||
}
|
||||
|
||||
|
|
|
@ -6409,18 +6409,10 @@ enum {
|
|||
};
|
||||
|
||||
static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
|
||||
const struct bpf_reg_state *off_reg,
|
||||
u32 *alu_limit, u8 opcode)
|
||||
u32 *alu_limit, bool mask_to_left)
|
||||
{
|
||||
bool off_is_neg = off_reg->smin_value < 0;
|
||||
bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
|
||||
(opcode == BPF_SUB && !off_is_neg);
|
||||
u32 max = 0, ptr_limit = 0;
|
||||
|
||||
if (!tnum_is_const(off_reg->var_off) &&
|
||||
(off_reg->smin_value < 0) != (off_reg->smax_value < 0))
|
||||
return REASON_BOUNDS;
|
||||
|
||||
switch (ptr_reg->type) {
|
||||
case PTR_TO_STACK:
|
||||
/* Offset 0 is out-of-bounds, but acceptable start for the
|
||||
|
@ -6486,15 +6478,20 @@ static bool sanitize_needed(u8 opcode)
|
|||
return opcode == BPF_ADD || opcode == BPF_SUB;
|
||||
}
|
||||
|
||||
struct bpf_sanitize_info {
|
||||
struct bpf_insn_aux_data aux;
|
||||
bool mask_to_left;
|
||||
};
|
||||
|
||||
static int sanitize_ptr_alu(struct bpf_verifier_env *env,
|
||||
struct bpf_insn *insn,
|
||||
const struct bpf_reg_state *ptr_reg,
|
||||
const struct bpf_reg_state *off_reg,
|
||||
struct bpf_reg_state *dst_reg,
|
||||
struct bpf_insn_aux_data *tmp_aux,
|
||||
struct bpf_sanitize_info *info,
|
||||
const bool commit_window)
|
||||
{
|
||||
struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
|
||||
struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
|
||||
struct bpf_verifier_state *vstate = env->cur_state;
|
||||
bool off_is_imm = tnum_is_const(off_reg->var_off);
|
||||
bool off_is_neg = off_reg->smin_value < 0;
|
||||
|
@ -6515,7 +6512,16 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
|
|||
if (vstate->speculative)
|
||||
goto do_sim;
|
||||
|
||||
err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode);
|
||||
if (!commit_window) {
|
||||
if (!tnum_is_const(off_reg->var_off) &&
|
||||
(off_reg->smin_value < 0) != (off_reg->smax_value < 0))
|
||||
return REASON_BOUNDS;
|
||||
|
||||
info->mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
|
||||
(opcode == BPF_SUB && !off_is_neg);
|
||||
}
|
||||
|
||||
err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
|
@ -6523,8 +6529,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
|
|||
/* In commit phase we narrow the masking window based on
|
||||
* the observed pointer move after the simulated operation.
|
||||
*/
|
||||
alu_state = tmp_aux->alu_state;
|
||||
alu_limit = abs(tmp_aux->alu_limit - alu_limit);
|
||||
alu_state = info->aux.alu_state;
|
||||
alu_limit = abs(info->aux.alu_limit - alu_limit);
|
||||
} else {
|
||||
alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
|
||||
alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
|
||||
|
@ -6539,8 +6545,12 @@ do_sim:
|
|||
/* If we're in commit phase, we're done here given we already
|
||||
* pushed the truncated dst_reg into the speculative verification
|
||||
* stack.
|
||||
*
|
||||
* Also, when register is a known constant, we rewrite register-based
|
||||
* operation to immediate-based, and thus do not need masking (and as
|
||||
* a consequence, do not need to simulate the zero-truncation either).
|
||||
*/
|
||||
if (commit_window)
|
||||
if (commit_window || off_is_imm)
|
||||
return 0;
|
||||
|
||||
/* Simulate and find potential out-of-bounds access under
|
||||
|
@ -6685,7 +6695,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
|
|||
smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
|
||||
u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
|
||||
umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
|
||||
struct bpf_insn_aux_data tmp_aux = {};
|
||||
struct bpf_sanitize_info info = {};
|
||||
u8 opcode = BPF_OP(insn->code);
|
||||
u32 dst = insn->dst_reg;
|
||||
int ret;
|
||||
|
@ -6754,7 +6764,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
|
|||
|
||||
if (sanitize_needed(opcode)) {
|
||||
ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
|
||||
&tmp_aux, false);
|
||||
&info, false);
|
||||
if (ret < 0)
|
||||
return sanitize_err(env, insn, ret, off_reg, dst_reg);
|
||||
}
|
||||
|
@ -6895,7 +6905,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
|
|||
return -EACCES;
|
||||
if (sanitize_needed(opcode)) {
|
||||
ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
|
||||
&tmp_aux, true);
|
||||
&info, true);
|
||||
if (ret < 0)
|
||||
return sanitize_err(env, insn, ret, off_reg, dst_reg);
|
||||
}
|
||||
|
@ -13368,12 +13378,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
|||
if (is_priv)
|
||||
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
ret = bpf_prog_offload_verifier_prep(env->prog);
|
||||
if (ret)
|
||||
goto skip_full_check;
|
||||
}
|
||||
|
||||
env->explored_states = kvcalloc(state_htab_size(env),
|
||||
sizeof(struct bpf_verifier_state_list *),
|
||||
GFP_USER);
|
||||
|
@ -13401,6 +13405,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
|||
if (ret < 0)
|
||||
goto skip_full_check;
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
ret = bpf_prog_offload_verifier_prep(env->prog);
|
||||
if (ret)
|
||||
goto skip_full_check;
|
||||
}
|
||||
|
||||
ret = check_cfg(env);
|
||||
if (ret < 0)
|
||||
goto skip_full_check;
|
||||
|
|
|
@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
|
|||
__skb_push(skb, head_room);
|
||||
memset(skb->data, 0, head_room);
|
||||
skb_reset_mac_header(skb);
|
||||
skb_reset_mac_len(skb);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -30,7 +30,8 @@ CGROUP COMMANDS
|
|||
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
|
||||
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
|
||||
| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
|
||||
| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
|
||||
| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
|
||||
| **sock_release** }
|
||||
| *ATTACH_FLAGS* := { **multi** | **override** }
|
||||
|
||||
DESCRIPTION
|
||||
|
@ -106,6 +107,7 @@ DESCRIPTION
|
|||
**getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
|
||||
**getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
|
||||
**getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
|
||||
**sock_release** closing an userspace inet socket (since 5.9).
|
||||
|
||||
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
|
||||
Detach *PROG* from the cgroup *CGROUP* and attach type
|
||||
|
|
|
@ -44,7 +44,7 @@ PROG COMMANDS
|
|||
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
|
||||
| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
|
||||
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
|
||||
| **cgroup/getsockopt** | **cgroup/setsockopt** |
|
||||
| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
|
||||
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
|
||||
| }
|
||||
| *ATTACH_TYPE* := {
|
||||
|
|
|
@ -478,7 +478,7 @@ _bpftool()
|
|||
cgroup/recvmsg4 cgroup/recvmsg6 \
|
||||
cgroup/post_bind4 cgroup/post_bind6 \
|
||||
cgroup/sysctl cgroup/getsockopt \
|
||||
cgroup/setsockopt struct_ops \
|
||||
cgroup/setsockopt cgroup/sock_release struct_ops \
|
||||
fentry fexit freplace sk_lookup" -- \
|
||||
"$cur" ) )
|
||||
return 0
|
||||
|
@ -1021,7 +1021,7 @@ _bpftool()
|
|||
device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
|
||||
getpeername4 getpeername6 getsockname4 getsockname6 \
|
||||
sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
|
||||
setsockopt'
|
||||
setsockopt sock_release'
|
||||
local ATTACH_FLAGS='multi override'
|
||||
local PROG_TYPE='id pinned tag name'
|
||||
case $prev in
|
||||
|
@ -1032,7 +1032,7 @@ _bpftool()
|
|||
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
|
||||
post_bind4|post_bind6|connect4|connect6|getpeername4|\
|
||||
getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
|
||||
recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
|
||||
recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
|
||||
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
|
||||
"$cur" ) )
|
||||
return 0
|
||||
|
|
|
@ -28,7 +28,8 @@
|
|||
" connect6 | getpeername4 | getpeername6 |\n" \
|
||||
" getsockname4 | getsockname6 | sendmsg4 |\n" \
|
||||
" sendmsg6 | recvmsg4 | recvmsg6 |\n" \
|
||||
" sysctl | getsockopt | setsockopt }"
|
||||
" sysctl | getsockopt | setsockopt |\n" \
|
||||
" sock_release }"
|
||||
|
||||
static unsigned int query_flags;
|
||||
|
||||
|
|
|
@ -2138,7 +2138,7 @@ static int do_help(int argc, char **argv)
|
|||
" cgroup/getpeername4 | cgroup/getpeername6 |\n"
|
||||
" cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
|
||||
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
|
||||
" cgroup/getsockopt | cgroup/setsockopt |\n"
|
||||
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
|
||||
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
|
||||
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
|
||||
" flow_dissector }\n"
|
||||
|
|
|
@ -86,8 +86,9 @@ void test_ringbuf(void)
|
|||
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
|
||||
pthread_t thread;
|
||||
long bg_ret = -1;
|
||||
int err, cnt;
|
||||
int err, cnt, rb_fd;
|
||||
int page_size = getpagesize();
|
||||
void *mmap_ptr, *tmp_ptr;
|
||||
|
||||
skel = test_ringbuf__open();
|
||||
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
|
||||
|
@ -101,6 +102,52 @@ void test_ringbuf(void)
|
|||
if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
|
||||
goto cleanup;
|
||||
|
||||
rb_fd = bpf_map__fd(skel->maps.ringbuf);
|
||||
/* good read/write cons_pos */
|
||||
mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
|
||||
ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
|
||||
tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE);
|
||||
if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend"))
|
||||
goto cleanup;
|
||||
ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
|
||||
ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
|
||||
|
||||
/* bad writeable prod_pos */
|
||||
mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size);
|
||||
err = -errno;
|
||||
ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos");
|
||||
ASSERT_EQ(err, -EPERM, "wr_prod_pos_err");
|
||||
|
||||
/* bad writeable data pages */
|
||||
mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
|
||||
err = -errno;
|
||||
ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one");
|
||||
ASSERT_EQ(err, -EPERM, "wr_data_page_one_err");
|
||||
mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size);
|
||||
ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two");
|
||||
mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
|
||||
ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all");
|
||||
|
||||
/* good read-only pages */
|
||||
mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
|
||||
if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
|
||||
goto cleanup;
|
||||
|
||||
ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect");
|
||||
ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect");
|
||||
ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap");
|
||||
ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro");
|
||||
|
||||
/* good read-only pages with initial offset */
|
||||
mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size);
|
||||
if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
|
||||
goto cleanup;
|
||||
|
||||
ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect");
|
||||
ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect");
|
||||
ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap");
|
||||
ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro");
|
||||
|
||||
/* only trigger BPF program for current process */
|
||||
skel->bss->pid = getpid();
|
||||
|
||||
|
|
|
@ -11,14 +11,17 @@
|
|||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include <linux/if.h>
|
||||
#include <sched.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mount.h>
|
||||
|
||||
#include "test_progs.h"
|
||||
#include "network_helpers.h"
|
||||
|
@ -32,18 +35,25 @@
|
|||
|
||||
#define IP4_SRC "172.16.1.100"
|
||||
#define IP4_DST "172.16.2.100"
|
||||
#define IP4_TUN_SRC "172.17.1.100"
|
||||
#define IP4_TUN_FWD "172.17.1.200"
|
||||
#define IP4_PORT 9004
|
||||
|
||||
#define IP6_SRC "::1:dead:beef:cafe"
|
||||
#define IP6_DST "::2:dead:beef:cafe"
|
||||
#define IP6_SRC "0::1:dead:beef:cafe"
|
||||
#define IP6_DST "0::2:dead:beef:cafe"
|
||||
#define IP6_TUN_SRC "1::1:dead:beef:cafe"
|
||||
#define IP6_TUN_FWD "1::2:dead:beef:cafe"
|
||||
#define IP6_PORT 9006
|
||||
|
||||
#define IP4_SLL "169.254.0.1"
|
||||
#define IP4_DLL "169.254.0.2"
|
||||
#define IP4_NET "169.254.0.0"
|
||||
|
||||
#define MAC_DST_FWD "00:11:22:33:44:55"
|
||||
#define MAC_DST "00:22:33:44:55:66"
|
||||
|
||||
#define IFADDR_STR_LEN 18
|
||||
#define PING_ARGS "-c 3 -w 10 -q"
|
||||
#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
|
||||
|
||||
#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
|
||||
#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
|
||||
|
@ -51,120 +61,104 @@
|
|||
|
||||
#define TIMEOUT_MILLIS 10000
|
||||
|
||||
#define MAX_PROC_MODS 128
|
||||
#define MAX_PROC_VALUE_LEN 16
|
||||
|
||||
#define log_err(MSG, ...) \
|
||||
fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
|
||||
__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
|
||||
|
||||
struct proc_mod {
|
||||
char path[PATH_MAX];
|
||||
char oldval[MAX_PROC_VALUE_LEN];
|
||||
int oldlen;
|
||||
};
|
||||
|
||||
static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
|
||||
static int root_netns_fd = -1;
|
||||
static int num_proc_mods;
|
||||
static struct proc_mod proc_mods[MAX_PROC_MODS];
|
||||
|
||||
/**
|
||||
* modify_proc() - Modify entry in /proc
|
||||
*
|
||||
* Modifies an entry in /proc and saves the original value for later
|
||||
* restoration with restore_proc().
|
||||
*/
|
||||
static int modify_proc(const char *path, const char *newval)
|
||||
static int write_file(const char *path, const char *newval)
|
||||
{
|
||||
struct proc_mod *mod;
|
||||
FILE *f;
|
||||
|
||||
if (num_proc_mods + 1 > MAX_PROC_MODS)
|
||||
return -1;
|
||||
|
||||
f = fopen(path, "r+");
|
||||
if (!f)
|
||||
return -1;
|
||||
|
||||
mod = &proc_mods[num_proc_mods];
|
||||
num_proc_mods++;
|
||||
|
||||
strncpy(mod->path, path, PATH_MAX);
|
||||
|
||||
if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
|
||||
log_err("reading from %s failed", path);
|
||||
goto fail;
|
||||
}
|
||||
rewind(f);
|
||||
if (fwrite(newval, strlen(newval), 1, f) != 1) {
|
||||
log_err("writing to %s failed", path);
|
||||
goto fail;
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
fclose(f);
|
||||
num_proc_mods--;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* restore_proc() - Restore all /proc modifications
|
||||
*/
|
||||
static void restore_proc(void)
|
||||
struct nstoken {
|
||||
int orig_netns_fd;
|
||||
};
|
||||
|
||||
static int setns_by_fd(int nsfd)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_proc_mods; i++) {
|
||||
struct proc_mod *mod = &proc_mods[i];
|
||||
FILE *f;
|
||||
|
||||
f = fopen(mod->path, "w");
|
||||
if (!f) {
|
||||
log_err("fopen of %s failed", mod->path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
|
||||
log_err("fwrite to %s failed", mod->path);
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
num_proc_mods = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* setns_by_name() - Set networks namespace by name
|
||||
*/
|
||||
static int setns_by_name(const char *name)
|
||||
{
|
||||
int nsfd;
|
||||
char nspath[PATH_MAX];
|
||||
int err;
|
||||
|
||||
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
|
||||
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
|
||||
if (nsfd < 0)
|
||||
return nsfd;
|
||||
|
||||
err = setns(nsfd, CLONE_NEWNET);
|
||||
close(nsfd);
|
||||
|
||||
return err;
|
||||
if (!ASSERT_OK(err, "setns"))
|
||||
return err;
|
||||
|
||||
/* Switch /sys to the new namespace so that e.g. /sys/class/net
|
||||
* reflects the devices in the new namespace.
|
||||
*/
|
||||
err = unshare(CLONE_NEWNS);
|
||||
if (!ASSERT_OK(err, "unshare"))
|
||||
return err;
|
||||
|
||||
err = umount2("/sys", MNT_DETACH);
|
||||
if (!ASSERT_OK(err, "umount2 /sys"))
|
||||
return err;
|
||||
|
||||
err = mount("sysfs", "/sys", "sysfs", 0, NULL);
|
||||
if (!ASSERT_OK(err, "mount /sys"))
|
||||
return err;
|
||||
|
||||
err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
|
||||
if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* setns_root() - Set network namespace to original (root) namespace
|
||||
* open_netns() - Switch to specified network namespace by name.
|
||||
*
|
||||
* Not expected to ever fail, so error not returned, but failure logged
|
||||
* and test marked as failed.
|
||||
* Returns token with which to restore the original namespace
|
||||
* using close_netns().
|
||||
*/
|
||||
static void setns_root(void)
|
||||
static struct nstoken *open_netns(const char *name)
|
||||
{
|
||||
ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
|
||||
int nsfd;
|
||||
char nspath[PATH_MAX];
|
||||
int err;
|
||||
struct nstoken *token;
|
||||
|
||||
token = malloc(sizeof(struct nstoken));
|
||||
if (!ASSERT_OK_PTR(token, "malloc token"))
|
||||
return NULL;
|
||||
|
||||
token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
|
||||
if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
|
||||
goto fail;
|
||||
|
||||
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
|
||||
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
|
||||
if (!ASSERT_GE(nsfd, 0, "open netns fd"))
|
||||
goto fail;
|
||||
|
||||
err = setns_by_fd(nsfd);
|
||||
if (!ASSERT_OK(err, "setns_by_fd"))
|
||||
goto fail;
|
||||
|
||||
return token;
|
||||
fail:
|
||||
free(token);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void close_netns(struct nstoken *token)
|
||||
{
|
||||
ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
|
||||
free(token);
|
||||
}
|
||||
|
||||
static int netns_setup_namespaces(const char *verb)
|
||||
|
@ -237,15 +231,17 @@ static int get_ifindex(const char *name)
|
|||
|
||||
static int netns_setup_links_and_routes(struct netns_setup_result *result)
|
||||
{
|
||||
struct nstoken *nstoken = NULL;
|
||||
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
|
||||
char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
|
||||
|
||||
SYS("ip link add veth_src type veth peer name veth_src_fwd");
|
||||
SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
|
||||
|
||||
SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
|
||||
SYS("ip link set veth_dst address " MAC_DST);
|
||||
|
||||
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
|
||||
goto fail;
|
||||
if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
|
||||
goto fail;
|
||||
|
||||
result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
|
||||
if (result->ifindex_veth_src_fwd < 0)
|
||||
|
@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
|
|||
SYS("ip link set veth_dst netns " NS_DST);
|
||||
|
||||
/** setup in 'src' namespace */
|
||||
if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
|
||||
nstoken = open_netns(NS_SRC);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns src"))
|
||||
goto fail;
|
||||
|
||||
SYS("ip addr add " IP4_SRC "/32 dev veth_src");
|
||||
|
@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
|
|||
SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
|
||||
veth_src_fwd_addr);
|
||||
|
||||
close_netns(nstoken);
|
||||
|
||||
/** setup in 'fwd' namespace */
|
||||
if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
|
||||
goto fail;
|
||||
|
||||
/* The fwd netns automatically gets a v6 LL address / routes, but also
|
||||
|
@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
|
|||
SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
|
||||
SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
|
||||
|
||||
close_netns(nstoken);
|
||||
|
||||
/** setup in 'dst' namespace */
|
||||
if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
|
||||
nstoken = open_netns(NS_DST);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
|
||||
goto fail;
|
||||
|
||||
SYS("ip addr add " IP4_DST "/32 dev veth_dst");
|
||||
|
@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
|
|||
SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
|
||||
SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
|
||||
|
||||
SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
|
||||
veth_dst_fwd_addr);
|
||||
SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
|
||||
veth_dst_fwd_addr);
|
||||
SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
|
||||
SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
|
||||
|
||||
close_netns(nstoken);
|
||||
|
||||
setns_root();
|
||||
return 0;
|
||||
fail:
|
||||
setns_root();
|
||||
if (nstoken)
|
||||
close_netns(nstoken);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int netns_load_bpf(void)
|
||||
{
|
||||
if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
|
||||
return -1;
|
||||
|
||||
SYS("tc qdisc add dev veth_src_fwd clsact");
|
||||
SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
|
||||
SRC_PROG_PIN_FILE);
|
||||
|
@ -335,42 +335,29 @@ static int netns_load_bpf(void)
|
|||
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
|
||||
CHK_PROG_PIN_FILE);
|
||||
|
||||
setns_root();
|
||||
return -1;
|
||||
fail:
|
||||
setns_root();
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int netns_unload_bpf(void)
|
||||
{
|
||||
if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
|
||||
goto fail;
|
||||
SYS("tc qdisc delete dev veth_src_fwd clsact");
|
||||
SYS("tc qdisc delete dev veth_dst_fwd clsact");
|
||||
|
||||
setns_root();
|
||||
return 0;
|
||||
fail:
|
||||
setns_root();
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
static void test_tcp(int family, const char *addr, __u16 port)
|
||||
{
|
||||
int listen_fd = -1, accept_fd = -1, client_fd = -1;
|
||||
char buf[] = "testing testing";
|
||||
int n;
|
||||
struct nstoken *nstoken;
|
||||
|
||||
if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
|
||||
nstoken = open_netns(NS_DST);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
|
||||
return;
|
||||
|
||||
listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
|
||||
if (!ASSERT_GE(listen_fd, 0, "listen"))
|
||||
goto done;
|
||||
|
||||
if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
|
||||
close_netns(nstoken);
|
||||
nstoken = open_netns(NS_SRC);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns src"))
|
||||
goto done;
|
||||
|
||||
client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
|
||||
|
@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port)
|
|||
ASSERT_EQ(n, sizeof(buf), "recv from server");
|
||||
|
||||
done:
|
||||
setns_root();
|
||||
if (nstoken)
|
||||
close_netns(nstoken);
|
||||
if (listen_fd >= 0)
|
||||
close(listen_fd);
|
||||
if (accept_fd >= 0)
|
||||
|
@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr)
|
|||
{
|
||||
const char *ping = family == AF_INET6 ? "ping6" : "ping";
|
||||
|
||||
SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
|
||||
SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
|
||||
return 0;
|
||||
fail:
|
||||
return -1;
|
||||
|
@ -419,19 +407,37 @@ static void test_connectivity(void)
|
|||
test_ping(AF_INET6, IP6_DST);
|
||||
}
|
||||
|
||||
static int set_forwarding(bool enable)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
|
||||
if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
|
||||
return err;
|
||||
|
||||
err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
|
||||
if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
|
||||
{
|
||||
struct test_tc_neigh_fib *skel;
|
||||
struct nstoken *nstoken = NULL;
|
||||
struct test_tc_neigh_fib *skel = NULL;
|
||||
int err;
|
||||
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
|
||||
return;
|
||||
|
||||
skel = test_tc_neigh_fib__open();
|
||||
if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
|
||||
return;
|
||||
goto done;
|
||||
|
||||
if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
|
||||
test_tc_neigh_fib__destroy(skel);
|
||||
return;
|
||||
}
|
||||
if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
|
||||
|
@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
|
|||
goto done;
|
||||
|
||||
/* bpf_fib_lookup() checks if forwarding is enabled */
|
||||
if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
|
||||
if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
|
||||
goto done;
|
||||
|
||||
err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
|
||||
if (!ASSERT_OK(err, "set ipv4.ip_forward"))
|
||||
goto done;
|
||||
|
||||
err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
|
||||
if (!ASSERT_OK(err, "set ipv6.forwarding"))
|
||||
goto done;
|
||||
setns_root();
|
||||
|
||||
test_connectivity();
|
||||
|
||||
done:
|
||||
bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
|
||||
bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
|
||||
test_tc_neigh_fib__destroy(skel);
|
||||
netns_unload_bpf();
|
||||
setns_root();
|
||||
restore_proc();
|
||||
if (skel)
|
||||
test_tc_neigh_fib__destroy(skel);
|
||||
close_netns(nstoken);
|
||||
}
|
||||
|
||||
static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
|
||||
{
|
||||
struct test_tc_neigh *skel;
|
||||
struct nstoken *nstoken = NULL;
|
||||
struct test_tc_neigh *skel = NULL;
|
||||
int err;
|
||||
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
|
||||
return;
|
||||
|
||||
skel = test_tc_neigh__open();
|
||||
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
|
||||
return;
|
||||
goto done;
|
||||
|
||||
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
|
||||
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
|
||||
|
||||
err = test_tc_neigh__load(skel);
|
||||
if (!ASSERT_OK(err, "test_tc_neigh__load")) {
|
||||
test_tc_neigh__destroy(skel);
|
||||
return;
|
||||
}
|
||||
if (!ASSERT_OK(err, "test_tc_neigh__load"))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
|
||||
|
@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
|
|||
if (netns_load_bpf())
|
||||
goto done;
|
||||
|
||||
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
|
||||
goto done;
|
||||
|
||||
test_connectivity();
|
||||
|
||||
done:
|
||||
bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
|
||||
bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
|
||||
test_tc_neigh__destroy(skel);
|
||||
netns_unload_bpf();
|
||||
setns_root();
|
||||
if (skel)
|
||||
test_tc_neigh__destroy(skel);
|
||||
close_netns(nstoken);
|
||||
}
|
||||
|
||||
static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
|
||||
{
|
||||
struct nstoken *nstoken;
|
||||
struct test_tc_peer *skel;
|
||||
int err;
|
||||
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
|
||||
return;
|
||||
|
||||
skel = test_tc_peer__open();
|
||||
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
|
||||
return;
|
||||
goto done;
|
||||
|
||||
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
|
||||
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
|
||||
|
||||
err = test_tc_peer__load(skel);
|
||||
if (!ASSERT_OK(err, "test_tc_peer__load")) {
|
||||
test_tc_peer__destroy(skel);
|
||||
return;
|
||||
}
|
||||
if (!ASSERT_OK(err, "test_tc_peer__load"))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
|
||||
|
@ -549,41 +549,237 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
|
|||
if (netns_load_bpf())
|
||||
goto done;
|
||||
|
||||
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
|
||||
goto done;
|
||||
|
||||
test_connectivity();
|
||||
|
||||
done:
|
||||
bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
|
||||
bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
|
||||
test_tc_peer__destroy(skel);
|
||||
netns_unload_bpf();
|
||||
setns_root();
|
||||
if (skel)
|
||||
test_tc_peer__destroy(skel);
|
||||
close_netns(nstoken);
|
||||
}
|
||||
|
||||
static int tun_open(char *name)
|
||||
{
|
||||
struct ifreq ifr;
|
||||
int fd, err;
|
||||
|
||||
fd = open("/dev/net/tun", O_RDWR);
|
||||
if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
|
||||
return -1;
|
||||
|
||||
memset(&ifr, 0, sizeof(ifr));
|
||||
|
||||
ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
|
||||
if (*name)
|
||||
strncpy(ifr.ifr_name, name, IFNAMSIZ);
|
||||
|
||||
err = ioctl(fd, TUNSETIFF, &ifr);
|
||||
if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
|
||||
goto fail;
|
||||
|
||||
SYS("ip link set dev %s up", name);
|
||||
|
||||
return fd;
|
||||
fail:
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
enum {
|
||||
SRC_TO_TARGET = 0,
|
||||
TARGET_TO_SRC = 1,
|
||||
};
|
||||
|
||||
static int tun_relay_loop(int src_fd, int target_fd)
|
||||
{
|
||||
fd_set rfds, wfds;
|
||||
|
||||
FD_ZERO(&rfds);
|
||||
FD_ZERO(&wfds);
|
||||
|
||||
for (;;) {
|
||||
char buf[1500];
|
||||
int direction, nread, nwrite;
|
||||
|
||||
FD_SET(src_fd, &rfds);
|
||||
FD_SET(target_fd, &rfds);
|
||||
|
||||
if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
|
||||
log_err("select failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
|
||||
|
||||
nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
|
||||
if (nread < 0) {
|
||||
log_err("read failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
|
||||
if (nwrite != nread) {
|
||||
log_err("write failed");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
|
||||
{
|
||||
struct test_tc_peer *skel = NULL;
|
||||
struct nstoken *nstoken = NULL;
|
||||
int err;
|
||||
int tunnel_pid = -1;
|
||||
int src_fd, target_fd;
|
||||
int ifindex;
|
||||
|
||||
/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
|
||||
* This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
|
||||
* expose the L2 headers encapsulating the IP packet to BPF and hence
|
||||
* don't have skb in suitable state for this test. Alternative to TUN/TAP
|
||||
* would be e.g. Wireguard which would appear as a pure L3 device to BPF,
|
||||
* but that requires much more complicated setup.
|
||||
*/
|
||||
nstoken = open_netns(NS_SRC);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
|
||||
return;
|
||||
|
||||
src_fd = tun_open("tun_src");
|
||||
if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
|
||||
goto fail;
|
||||
|
||||
close_netns(nstoken);
|
||||
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
|
||||
goto fail;
|
||||
|
||||
target_fd = tun_open("tun_fwd");
|
||||
if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
|
||||
goto fail;
|
||||
|
||||
tunnel_pid = fork();
|
||||
if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
|
||||
goto fail;
|
||||
|
||||
if (tunnel_pid == 0)
|
||||
exit(tun_relay_loop(src_fd, target_fd));
|
||||
|
||||
skel = test_tc_peer__open();
|
||||
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
|
||||
goto fail;
|
||||
|
||||
ifindex = get_ifindex("tun_fwd");
|
||||
if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
|
||||
goto fail;
|
||||
|
||||
skel->rodata->IFINDEX_SRC = ifindex;
|
||||
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
|
||||
|
||||
err = test_tc_peer__load(skel);
|
||||
if (!ASSERT_OK(err, "test_tc_peer__load"))
|
||||
goto fail;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
|
||||
goto fail;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
|
||||
goto fail;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
|
||||
goto fail;
|
||||
|
||||
/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
|
||||
* towards dst, and "tc_dst" to redirect packets
|
||||
* and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
|
||||
*/
|
||||
SYS("tc qdisc add dev tun_fwd clsact");
|
||||
SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
|
||||
SRC_PROG_PIN_FILE);
|
||||
|
||||
SYS("tc qdisc add dev veth_dst_fwd clsact");
|
||||
SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
|
||||
DST_PROG_PIN_FILE);
|
||||
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
|
||||
CHK_PROG_PIN_FILE);
|
||||
|
||||
/* Setup route and neigh tables */
|
||||
SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
|
||||
SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
|
||||
|
||||
SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
|
||||
SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
|
||||
|
||||
SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
|
||||
SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
|
||||
" dev tun_src scope global");
|
||||
SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
|
||||
SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
|
||||
SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
|
||||
" dev tun_src scope global");
|
||||
SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
|
||||
|
||||
SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
|
||||
SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
|
||||
|
||||
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
|
||||
goto fail;
|
||||
|
||||
test_connectivity();
|
||||
|
||||
fail:
|
||||
if (tunnel_pid > 0) {
|
||||
kill(tunnel_pid, SIGTERM);
|
||||
waitpid(tunnel_pid, NULL, 0);
|
||||
}
|
||||
if (src_fd >= 0)
|
||||
close(src_fd);
|
||||
if (target_fd >= 0)
|
||||
close(target_fd);
|
||||
if (skel)
|
||||
test_tc_peer__destroy(skel);
|
||||
if (nstoken)
|
||||
close_netns(nstoken);
|
||||
}
|
||||
|
||||
#define RUN_TEST(name) \
|
||||
({ \
|
||||
struct netns_setup_result setup_result; \
|
||||
if (test__start_subtest(#name)) \
|
||||
if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
|
||||
if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
|
||||
"setup links and routes")) \
|
||||
test_ ## name(&setup_result); \
|
||||
netns_setup_namespaces("delete"); \
|
||||
} \
|
||||
})
|
||||
|
||||
static void *test_tc_redirect_run_tests(void *arg)
|
||||
{
|
||||
RUN_TEST(tc_redirect_peer);
|
||||
RUN_TEST(tc_redirect_peer_l3);
|
||||
RUN_TEST(tc_redirect_neigh);
|
||||
RUN_TEST(tc_redirect_neigh_fib);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void test_tc_redirect(void)
|
||||
{
|
||||
struct netns_setup_result setup_result;
|
||||
pthread_t test_thread;
|
||||
int err;
|
||||
|
||||
root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
|
||||
if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
|
||||
return;
|
||||
|
||||
if (netns_setup_namespaces("add"))
|
||||
goto done;
|
||||
|
||||
if (netns_setup_links_and_routes(&setup_result))
|
||||
goto done;
|
||||
|
||||
if (test__start_subtest("tc_redirect_peer"))
|
||||
test_tc_redirect_peer(&setup_result);
|
||||
|
||||
if (test__start_subtest("tc_redirect_neigh"))
|
||||
test_tc_redirect_neigh(&setup_result);
|
||||
|
||||
if (test__start_subtest("tc_redirect_neigh_fib"))
|
||||
test_tc_redirect_neigh_fib(&setup_result);
|
||||
|
||||
done:
|
||||
close(root_netns_fd);
|
||||
netns_setup_namespaces("delete");
|
||||
/* Run the tests in their own thread to isolate the namespace changes
|
||||
* so they do not affect the environment of other tests.
|
||||
* (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
|
||||
*/
|
||||
err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
|
||||
if (ASSERT_OK(err, "pthread_create"))
|
||||
ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
|
||||
}
|
||||
|
|
|
@ -33,8 +33,8 @@
|
|||
a.s6_addr32[3] == b.s6_addr32[3])
|
||||
#endif
|
||||
|
||||
static volatile const __u32 IFINDEX_SRC;
|
||||
static volatile const __u32 IFINDEX_DST;
|
||||
volatile const __u32 IFINDEX_SRC;
|
||||
volatile const __u32 IFINDEX_DST;
|
||||
|
||||
static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
|
||||
__be32 addr)
|
||||
|
|
|
@ -5,11 +5,16 @@
|
|||
#include <linux/bpf.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
static volatile const __u32 IFINDEX_SRC;
|
||||
static volatile const __u32 IFINDEX_DST;
|
||||
volatile const __u32 IFINDEX_SRC;
|
||||
volatile const __u32 IFINDEX_DST;
|
||||
|
||||
static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
|
||||
static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
|
||||
|
||||
SEC("classifier/chk_egress")
|
||||
int tc_chk(struct __sk_buff *skb)
|
||||
|
@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb)
|
|||
return bpf_redirect_peer(IFINDEX_DST, 0);
|
||||
}
|
||||
|
||||
SEC("classifier/dst_ingress_l3")
|
||||
int tc_dst_l3(struct __sk_buff *skb)
|
||||
{
|
||||
return bpf_redirect(IFINDEX_SRC, 0);
|
||||
}
|
||||
|
||||
SEC("classifier/src_ingress_l3")
|
||||
int tc_src_l3(struct __sk_buff *skb)
|
||||
{
|
||||
__u16 proto = skb->protocol;
|
||||
|
||||
if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
return bpf_redirect_peer(IFINDEX_DST, 0);
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
||||
|
|
|
@ -295,8 +295,6 @@
|
|||
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "invalid write to stack R1 off=0 size=1",
|
||||
.result = ACCEPT,
|
||||
.retval = 42,
|
||||
},
|
||||
|
|
|
@ -300,8 +300,6 @@
|
|||
},
|
||||
.fixup_map_array_48b = { 3 },
|
||||
.result = ACCEPT,
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
|
||||
.retval = 1,
|
||||
},
|
||||
{
|
||||
|
@ -371,8 +369,6 @@
|
|||
},
|
||||
.fixup_map_array_48b = { 3 },
|
||||
.result = ACCEPT,
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
|
||||
.retval = 1,
|
||||
},
|
||||
{
|
||||
|
@ -472,8 +468,6 @@
|
|||
},
|
||||
.fixup_map_array_48b = { 3 },
|
||||
.result = ACCEPT,
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
|
||||
.retval = 1,
|
||||
},
|
||||
{
|
||||
|
@ -766,8 +760,6 @@
|
|||
},
|
||||
.fixup_map_array_48b = { 3 },
|
||||
.result = ACCEPT,
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
|
||||
.retval = 1,
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue