From 01b8539655635288dcd46366806abfacbb9b1f6c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 9 Aug 2023 22:05:56 +0800 Subject: [PATCH 01/17] bpf: Remove unused declaration bpf_link_new_file() Commit a3b80e107894 ("bpf: Allocate ID for bpf_link") removed the implementation but not the declaration. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230809140556.45836-1-yuehaibing@huawei.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index db3fe5a61b05..cfabbcf47bdb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2120,7 +2120,6 @@ void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); From 6da4fea89d258ff4816e53345fcbc593104f98ec Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Fri, 11 Aug 2023 21:16:03 +0900 Subject: [PATCH 02/17] bpftool: fix perf help message Currently, bpftool perf subcommand has typo with the help message. $ tools/bpf/bpftool/bpftool perf help Usage: bpftool perf { show | list } bpftool perf help } Since this bpftool perf subcommand help message has the extra bracket, this commit fix the typo by removing the extra bracket. Signed-off-by: Daniel T. Lee Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/r/20230811121603.17429-1-danieltimlee@gmail.com Signed-off-by: Martin KaFai Lau --- tools/bpf/bpftool/perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index 91743445e4c7..80de2874dabe 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -236,7 +236,7 @@ static int do_help(int argc, char **argv) { fprintf(stderr, "Usage: %1$s %2$s { show | list }\n" - " %1$s %2$s help }\n" + " %1$s %2$s help\n" "\n" " " HELP_SPEC_OPTIONS " }\n" "", From 8e50750f122e59ea4cab4b4f696ef22b391bedc9 Mon Sep 17 00:00:00 2001 From: Marco Vedovati Date: Thu, 10 Aug 2023 14:43:53 -0700 Subject: [PATCH 03/17] libbpf: Set close-on-exec flag on gzopen Enable the close-on-exec flag when using gzopen. This is especially important for multithreaded programs making use of libbpf, where a fork + exec could race with libbpf library calls, potentially resulting in a file descriptor leaked to the new process. This got missed in 59842c5451fe ("libbpf: Ensure libbpf always opens files with O_CLOEXEC"). Fixes: 59842c5451fe ("libbpf: Ensure libbpf always opens files with O_CLOEXEC") Signed-off-by: Marco Vedovati Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230810214350.106301-1-martin.kelly@crowdstrike.com --- tools/lib/bpf/libbpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 17883f5a44b9..b14a4376a86e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1978,9 +1978,9 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) return -ENAMETOOLONG; /* gzopen also accepts uncompressed files. */ - file = gzopen(buf, "r"); + file = gzopen(buf, "re"); if (!file) - file = gzopen("/proc/config.gz", "r"); + file = gzopen("/proc/config.gz", "re"); if (!file) { pr_warn("failed to open system Kconfig\n"); From 811915db674f8daf19bb4fcb67da9017235ce26d Mon Sep 17 00:00:00 2001 From: Yipeng Zou Date: Mon, 14 Aug 2023 11:14:34 +0800 Subject: [PATCH 04/17] selftests/bpf: Fix repeat option when kfunc_call verification fails There is no way where topts.repeat can be set to 1 when tc_test fails. Fix the typo where the break statement slipped by one line. Fixes: fb66223a244f ("selftests/bpf: add test for accessing ctx from syscall program type") Signed-off-by: Yipeng Zou Signed-off-by: Daniel Borkmann Reviewed-by: Li Zetao Link: https://lore.kernel.org/bpf/20230814031434.3077944-1-zouyipeng@huawei.com --- tools/testing/selftests/bpf/prog_tests/kfunc_call.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index a543742cd7bd..2eb71559713c 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -173,8 +173,8 @@ static void verify_fail(struct kfunc_test_params *param) case tc_test: topts.data_in = &pkt_v4; topts.data_size_in = sizeof(pkt_v4); - break; topts.repeat = 1; + break; } skel = kfunc_call_fail__open_opts(&opts); From 83a89c4b6ae93481d3f618aba6a29d89208d26ed Mon Sep 17 00:00:00 2001 From: Yipeng Zou Date: Mon, 14 Aug 2023 11:07:27 +0800 Subject: [PATCH 05/17] selftests/bpf: Clean up fmod_ret in bench_rename test script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Running the bench_rename test script, the following error occurs: # ./benchs/run_bench_rename.sh base : 0.819 ± 0.012M/s kprobe : 0.538 ± 0.009M/s kretprobe : 0.503 ± 0.004M/s rawtp : 0.779 ± 0.020M/s fentry : 0.726 ± 0.007M/s fexit : 0.691 ± 0.007M/s benchmark 'rename-fmodret' not found The bench_rename_fmodret has been removed in commit b000def2e052 ("selftests: Remove fmod_ret from test_overhead"), thus remove it from the runners in the test script. Fixes: b000def2e052 ("selftests: Remove fmod_ret from test_overhead") Signed-off-by: Yipeng Zou Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230814030727.3010390-1-zouyipeng@huawei.com --- tools/testing/selftests/bpf/benchs/run_bench_rename.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh index 16f774b1cdbe..7b281dbe4165 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh @@ -2,7 +2,7 @@ set -eufo pipefail -for i in base kprobe kretprobe rawtp fentry fexit fmodret +for i in base kprobe kretprobe rawtp fentry fexit do summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) printf "%-10s: %s\n" $i "$summary" From ccd9a8be2e42a337ad4da5d8a051e8293200a4f3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 14 Aug 2023 18:14:10 +0200 Subject: [PATCH 06/17] selftests/bpf: Add various more tcx test cases Add several new tcx test cases to improve test coverage. This also includes a few new tests with ingress instead of clsact qdisc, to cover the fix from commit dc644b540a2d ("tcx: Fix splat in ingress_destroy upon tcx_entry_free"). # ./test_progs -t tc [...] #234 tc_links_after:OK #235 tc_links_append:OK #236 tc_links_basic:OK #237 tc_links_before:OK #238 tc_links_chain_classic:OK #239 tc_links_chain_mixed:OK #240 tc_links_dev_cleanup:OK #241 tc_links_dev_mixed:OK #242 tc_links_ingress:OK #243 tc_links_invalid:OK #244 tc_links_prepend:OK #245 tc_links_replace:OK #246 tc_links_revision:OK #247 tc_opts_after:OK #248 tc_opts_append:OK #249 tc_opts_basic:OK #250 tc_opts_before:OK #251 tc_opts_chain_classic:OK #252 tc_opts_chain_mixed:OK #253 tc_opts_delete_empty:OK #254 tc_opts_demixed:OK #255 tc_opts_detach:OK #256 tc_opts_detach_after:OK #257 tc_opts_detach_before:OK #258 tc_opts_dev_cleanup:OK #259 tc_opts_invalid:OK #260 tc_opts_mixed:OK #261 tc_opts_prepend:OK #262 tc_opts_replace:OK #263 tc_opts_revision:OK [...] Summary: 44/38 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/8699efc284b75ccdc51ddf7062fa2370330dc6c0.1692029283.git.daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/tc_links.c | 336 ++++++++++++++++++ .../selftests/bpf/prog_tests/tc_opts.c | 110 ++++++ .../selftests/bpf/progs/test_tc_link.c | 16 + 3 files changed, 462 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index 81eea5f10742..74fc1fe9ee26 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Isovalent */ #include +#include #include #include @@ -1581,3 +1582,338 @@ void serial_test_tc_links_dev_cleanup(void) test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS); test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS); } + +static void test_tc_chain_mixed(int target) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback); + LIBBPF_OPTS(bpf_tcx_opts, optl); + struct test_tc_link *skel; + struct bpf_link *link; + __u32 pid1, pid2, pid3; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc5, target), + 0, "tc5_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc6, target), + 0, "tc6_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc5)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc6)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + err = bpf_tc_hook_create(&tc_hook); + err = err == -EEXIST ? 0 : err; + if (!ASSERT_OK(err, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.tc5); + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup; + + link = bpf_program__attach_tcx(skel->progs.tc6, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc6 = link; + + assert_mprog_count(target, 1); + + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); + + skel->bss->seen_tc4 = false; + skel->bss->seen_tc5 = false; + skel->bss->seen_tc6 = false; + + err = bpf_link__update_program(skel->links.tc6, skel->progs.tc4); + if (!ASSERT_OK(err, "link_update")) + goto cleanup; + + assert_mprog_count(target, 1); + + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); + + skel->bss->seen_tc4 = false; + skel->bss->seen_tc5 = false; + skel->bss->seen_tc6 = false; + + err = bpf_link__detach(skel->links.tc6); + if (!ASSERT_OK(err, "prog_detach")) + goto cleanup; + + __assert_mprog_count(target, 0, true, loopback); + + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); + +cleanup: + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + err = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(err, "bpf_tc_detach"); + + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + + test_tc_link__destroy(skel); +} + +void serial_test_tc_links_chain_mixed(void) +{ + test_tc_chain_mixed(BPF_TCX_INGRESS); + test_tc_chain_mixed(BPF_TCX_EGRESS); +} + +static void test_tc_links_ingress(int target, bool chain_tc_old, + bool tcx_teardown_first) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, + .handle = 1, + .priority = 1, + ); + LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .ifindex = loopback, + .attach_point = BPF_TC_CUSTOM, + .parent = TC_H_INGRESS, + ); + bool hook_created = false, tc_attached = false; + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 pid1, pid2, pid3; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + if (chain_tc_old) { + ASSERT_OK(system("tc qdisc add dev lo ingress"), "add_ingress"); + hook_created = true; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.tc3); + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup; + tc_attached = true; + } + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + assert_mprog_count(target, 2); + + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); + + skel->bss->seen_tc1 = false; + skel->bss->seen_tc2 = false; + skel->bss->seen_tc3 = false; + + err = bpf_link__detach(skel->links.tc2); + if (!ASSERT_OK(err, "prog_detach")) + goto cleanup; + + assert_mprog_count(target, 1); + + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); +cleanup: + if (tc_attached) { + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + err = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(err, "bpf_tc_detach"); + } + ASSERT_OK(system(ping_cmd), ping_cmd); + assert_mprog_count(target, 1); + if (hook_created && tcx_teardown_first) + ASSERT_OK(system("tc qdisc del dev lo ingress"), "del_ingress"); + ASSERT_OK(system(ping_cmd), ping_cmd); + test_tc_link__destroy(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + if (hook_created && !tcx_teardown_first) + ASSERT_OK(system("tc qdisc del dev lo ingress"), "del_ingress"); + ASSERT_OK(system(ping_cmd), ping_cmd); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_ingress(void) +{ + test_tc_links_ingress(BPF_TCX_INGRESS, true, true); + test_tc_links_ingress(BPF_TCX_INGRESS, true, false); + test_tc_links_ingress(BPF_TCX_INGRESS, false, false); +} + +static void test_tc_links_dev_mixed(int target) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + LIBBPF_OPTS(bpf_tc_hook, tc_hook); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 pid1, pid2, pid3, pid4; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth"); + ifindex = if_nametoindex("tcx_opts1"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid3, pid4, "prog_ids_3_4"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + link = bpf_program__attach_tcx(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + assert_mprog_count_ifindex(ifindex, target, 1); + + link = bpf_program__attach_tcx(skel->progs.tc2, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + assert_mprog_count_ifindex(ifindex, target, 2); + + link = bpf_program__attach_tcx(skel->progs.tc3, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc3 = link; + + assert_mprog_count_ifindex(ifindex, target, 3); + + link = bpf_program__attach_tcx(skel->progs.tc4, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc4 = link; + + assert_mprog_count_ifindex(ifindex, target, 4); + + tc_hook.ifindex = ifindex; + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + + err = bpf_tc_hook_create(&tc_hook); + err = err == -EEXIST ? 0 : err; + if (!ASSERT_OK(err, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.tc5); + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup; + + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); + + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc1)), 0, "tc1_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc2)), 0, "tc2_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc3)), 0, "tc3_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc4)), 0, "tc4_ifindex"); + + test_tc_link__destroy(skel); + return; +cleanup: + test_tc_link__destroy(skel); + + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); +} + +void serial_test_tc_links_dev_mixed(void) +{ + test_tc_links_dev_mixed(BPF_TCX_INGRESS); + test_tc_links_dev_mixed(BPF_TCX_EGRESS); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 39bd253e41aa..7a2ecd4eca5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2268,3 +2268,113 @@ void serial_test_tc_opts_delete_empty(void) test_tc_opts_delete_empty(BPF_TCX_INGRESS, true); test_tc_opts_delete_empty(BPF_TCX_EGRESS, true); } + +static void test_tc_chain_mixed(int target) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback); + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + __u32 fd1, fd2, fd3, id1, id2, id3; + struct test_tc_link *skel; + int err, detach_fd; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc4); + fd2 = bpf_program__fd(skel->progs.tc5); + fd3 = bpf_program__fd(skel->progs.tc6); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + err = bpf_tc_hook_create(&tc_hook); + err = err == -EEXIST ? 0 : err; + if (!ASSERT_OK(err, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = fd2; + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup_hook; + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_filter; + + detach_fd = fd3; + + assert_mprog_count(target, 1); + + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); + + skel->bss->seen_tc4 = false; + skel->bss->seen_tc5 = false; + skel->bss->seen_tc6 = false; + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = fd3, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_opts; + + detach_fd = fd1; + + assert_mprog_count(target, 1); + + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); + + skel->bss->seen_tc4 = false; + skel->bss->seen_tc5 = false; + skel->bss->seen_tc6 = false; + +cleanup_opts: + err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + __assert_mprog_count(target, 0, true, loopback); + + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); + +cleanup_filter: + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + err = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(err, "bpf_tc_detach"); + +cleanup_hook: + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_chain_mixed(void) +{ + test_tc_chain_mixed(BPF_TCX_INGRESS); + test_tc_chain_mixed(BPF_TCX_EGRESS); +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index ed1fd0e9cee9..30e7124c49a1 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -10,6 +10,8 @@ bool seen_tc1; bool seen_tc2; bool seen_tc3; bool seen_tc4; +bool seen_tc5; +bool seen_tc6; SEC("tc/ingress") int tc1(struct __sk_buff *skb) @@ -38,3 +40,17 @@ int tc4(struct __sk_buff *skb) seen_tc4 = true; return TCX_NEXT; } + +SEC("tc/egress") +int tc5(struct __sk_buff *skb) +{ + seen_tc5 = true; + return TCX_PASS; +} + +SEC("tc/egress") +int tc6(struct __sk_buff *skb) +{ + seen_tc6 = true; + return TCX_PASS; +} From 8ba651ed7fa1641f7c4941b79f2e3dd4ddb58aec Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 14 Aug 2023 13:59:07 -0500 Subject: [PATCH 07/17] bpf: Support default .validate() and .update() behavior for struct_ops links Currently, if a struct_ops map is loaded with BPF_F_LINK, it must also define the .validate() and .update() callbacks in its corresponding struct bpf_struct_ops in the kernel. Enabling struct_ops link is useful in its own right to ensure that the map is unloaded if an application crashes. For example, with sched_ext, we want to automatically unload the host-wide scheduler if the application crashes. We would likely never support updating elements of a sched_ext struct_ops map, so we'd have to implement these callbacks showing that they _can't_ support element updates just to benefit from the basic lifetime management of struct_ops links. Let's enable struct_ops maps to work with BPF_F_LINK even if they haven't defined these callbacks, by assuming that a struct_ops map element cannot be updated by default. Acked-by: Kui-Feng Lee Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230814185908.700553-2-void@manifault.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/bpf_struct_ops.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index eaff04eefb31..fdc3e8705a3c 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -509,9 +509,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, } if (st_map->map.map_flags & BPF_F_LINK) { - err = st_ops->validate(kdata); - if (err) - goto reset_unlock; + err = 0; + if (st_ops->validate) { + err = st_ops->validate(kdata); + if (err) + goto reset_unlock; + } set_memory_rox((long)st_map->image, 1); /* Let bpf_link handle registration & unregistration. * @@ -663,9 +666,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (attr->value_size != vt->size) return ERR_PTR(-EINVAL); - if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update)) - return ERR_PTR(-EOPNOTSUPP); - t = st_ops->type; st_map_size = sizeof(*st_map) + @@ -823,6 +823,9 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map if (!bpf_struct_ops_valid_to_reg(new_map)) return -EINVAL; + if (!st_map->st_ops->update) + return -EOPNOTSUPP; + mutex_lock(&update_mutex); old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); From bb48cf1679d294d4fd3bfaa88289ed9004cbb025 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 14 Aug 2023 13:59:08 -0500 Subject: [PATCH 08/17] bpf: Document struct bpf_struct_ops fields Subsystems that want to implement a struct bpf_struct_ops structure to enable struct_ops maps must currently reverse engineer how the structure works. Given that this is meant to be a way for subsystem maintainers to extend their subsystems using BPF, let's document it to make it a bit easier on them. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230814185908.700553-3-void@manifault.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cfabbcf47bdb..eced6400f778 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1550,6 +1550,53 @@ struct bpf_struct_ops_value; struct btf_member; #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 +/** + * struct bpf_struct_ops - A structure of callbacks allowing a subsystem to + * define a BPF_MAP_TYPE_STRUCT_OPS map type composed + * of BPF_PROG_TYPE_STRUCT_OPS progs. + * @verifier_ops: A structure of callbacks that are invoked by the verifier + * when determining whether the struct_ops progs in the + * struct_ops map are valid. + * @init: A callback that is invoked a single time, and before any other + * callback, to initialize the structure. A nonzero return value means + * the subsystem could not be initialized. + * @check_member: When defined, a callback invoked by the verifier to allow + * the subsystem to determine if an entry in the struct_ops map + * is valid. A nonzero return value means that the map is + * invalid and should be rejected by the verifier. + * @init_member: A callback that is invoked for each member of the struct_ops + * map to allow the subsystem to initialize the member. A nonzero + * value means the member could not be initialized. This callback + * is exclusive with the @type, @type_id, @value_type, and + * @value_id fields. + * @reg: A callback that is invoked when the struct_ops map has been + * initialized and is being attached to. Zero means the struct_ops map + * has been successfully registered and is live. A nonzero return value + * means the struct_ops map could not be registered. + * @unreg: A callback that is invoked when the struct_ops map should be + * unregistered. + * @update: A callback that is invoked when the live struct_ops map is being + * updated to contain new values. This callback is only invoked when + * the struct_ops map is loaded with BPF_F_LINK. If not defined, the + * it is assumed that the struct_ops map cannot be updated. + * @validate: A callback that is invoked after all of the members have been + * initialized. This callback should perform static checks on the + * map, meaning that it should either fail or succeed + * deterministically. A struct_ops map that has been validated may + * not necessarily succeed in being registered if the call to @reg + * fails. For example, a valid struct_ops map may be loaded, but + * then fail to be registered due to there being another active + * struct_ops map on the system in the subsystem already. For this + * reason, if this callback is not defined, the check is skipped as + * the struct_ops map will have final verification performed in + * @reg. + * @type: BTF type. + * @value_type: Value type. + * @name: The name of the struct bpf_struct_ops object. + * @func_models: Func models + * @type_id: BTF type id. + * @value_id: BTF value id. + */ struct bpf_struct_ops { const struct bpf_verifier_ops *verifier_ops; int (*init)(struct btf *btf); From 8897562f67b3e61ad736cd5c9f307447d33280e4 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Tue, 15 Aug 2023 09:53:41 +0100 Subject: [PATCH 09/17] net: Fix slab-out-of-bounds in inet[6]_steal_sock Kumar reported a KASAN splat in tcp_v6_rcv: bash-5.2# ./test_progs -t btf_skc_cls_ingress ... [ 51.810085] BUG: KASAN: slab-out-of-bounds in tcp_v6_rcv+0x2d7d/0x3440 [ 51.810458] Read of size 2 at addr ffff8881053f038c by task test_progs/226 The problem is that inet[6]_steal_sock accesses sk->sk_protocol without accounting for request or timewait sockets. To fix this we can't just check sock_common->skc_reuseport since that flag is present on timewait sockets. Instead, add a fullsock check to avoid the out of bands access of sk_protocol. Fixes: 9c02bec95954 ("bpf, net: Support SO_REUSEPORT sockets with bpf_sk_assign") Reported-by: Kumar Kartikeya Dwivedi Signed-off-by: Lorenz Bauer Link: https://lore.kernel.org/r/20230815-bpf-next-v2-1-95126eaa4c1b@isovalent.com Signed-off-by: Martin KaFai Lau --- include/net/inet6_hashtables.h | 2 +- include/net/inet_hashtables.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 284b5ce7205d..533a7337865a 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -116,7 +116,7 @@ struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff, if (!sk) return NULL; - if (!prefetched) + if (!prefetched || !sk_fullsock(sk)) return sk; if (sk->sk_protocol == IPPROTO_TCP) { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 843557223414..3ecfeadbfa06 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -462,7 +462,7 @@ struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff, if (!sk) return NULL; - if (!prefetched) + if (!prefetched || !sk_fullsock(sk)) return sk; if (sk->sk_protocol == IPPROTO_TCP) { From 0aa35162d2a1ed7ae5303b8d91f7290d3b8b9219 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 13 Aug 2023 14:18:59 +0000 Subject: [PATCH 10/17] bpf: Fix uninitialized symbol in bpf_perf_link_fill_kprobe() The commit 1b715e1b0ec5 ("bpf: Support ->fill_link_info for perf_event") leads to the following Smatch static checker warning: kernel/bpf/syscall.c:3416 bpf_perf_link_fill_kprobe() error: uninitialized symbol 'type'. That can happens when uname is NULL. So fix it by verifying the uname when we really need to fill it. Fixes: 1b715e1b0ec5 ("bpf: Support ->fill_link_info for perf_event") Reported-by: Dan Carpenter Signed-off-by: Yafang Shao Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Jiri Olsa Closes: https://lore.kernel.org/bpf/85697a7e-f897-4f74-8b43-82721bebc462@kili.mountain Link: https://lore.kernel.org/bpf/20230813141900.1268-2-laoar.shao@gmail.com --- kernel/bpf/syscall.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7f4e8c357a6a..cb658543bdb4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3378,14 +3378,13 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, if (!ulen ^ !uname) return -EINVAL; - if (!uname) - return 0; err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, probe_offset, probe_addr); if (err) return err; - + if (!uname) + return 0; if (buf) { len = strlen(buf); err = bpf_copy_to_user(uname, buf, ulen, len); From 23cf7aa539dc26f45e42ef6303613f03f4ae2142 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 13 Aug 2023 14:19:00 +0000 Subject: [PATCH 11/17] selftests/bpf: Add selftest for fill_link_info Add selftest for the fill_link_info of uprobe, kprobe and tracepoint. The result: $ tools/testing/selftests/bpf/test_progs --name=fill_link_info #79/1 fill_link_info/kprobe_link_info:OK #79/2 fill_link_info/kretprobe_link_info:OK #79/3 fill_link_info/kprobe_invalid_ubuff:OK #79/4 fill_link_info/tracepoint_link_info:OK #79/5 fill_link_info/uprobe_link_info:OK #79/6 fill_link_info/uretprobe_link_info:OK #79/7 fill_link_info/kprobe_multi_link_info:OK #79/8 fill_link_info/kretprobe_multi_link_info:OK #79/9 fill_link_info/kprobe_multi_invalid_ubuff:OK #79 fill_link_info:OK Summary: 1/9 PASSED, 0 SKIPPED, 0 FAILED The test case for kprobe_multi won't be run on aarch64, as it is not supported. Signed-off-by: Yafang Shao Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20230813141900.1268-3-laoar.shao@gmail.com --- tools/testing/selftests/bpf/DENYLIST.aarch64 | 3 + .../selftests/bpf/prog_tests/fill_link_info.c | 342 ++++++++++++++++++ .../selftests/bpf/progs/test_fill_link_info.c | 42 +++ 3 files changed, 387 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/fill_link_info.c create mode 100644 tools/testing/selftests/bpf/progs/test_fill_link_info.c diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 3b61e8b35d62..7f768d335698 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -12,3 +12,6 @@ kprobe_multi_test/skel_api # libbpf: failed to load BPF sk module_attach # prog 'kprobe_multi': failed to auto-attach: -95 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 +fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c new file mode 100644 index 000000000000..9d768e083714 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao */ + +#include +#include +#include +#include +#include "trace_helpers.h" +#include "test_fill_link_info.skel.h" + +#define TP_CAT "sched" +#define TP_NAME "sched_switch" + +static const char *kmulti_syms[] = { + "bpf_fentry_test2", + "bpf_fentry_test1", + "bpf_fentry_test3", +}; +#define KMULTI_CNT ARRAY_SIZE(kmulti_syms) +static __u64 kmulti_addrs[KMULTI_CNT]; + +#define KPROBE_FUNC "bpf_fentry_test1" +static __u64 kprobe_addr; + +#define UPROBE_FILE "/proc/self/exe" +static ssize_t uprobe_offset; +/* uprobe attach point */ +static noinline void uprobe_func(void) +{ + asm volatile (""); +} + +static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, + ssize_t offset, ssize_t entry_offset) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + char buf[PATH_MAX]; + int err; + + memset(&info, 0, sizeof(info)); + buf[0] = '\0'; + +again: + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type")) + return -1; + if (!ASSERT_EQ(info.perf_event.type, type, "perf_type_match")) + return -1; + + switch (info.perf_event.type) { + case BPF_PERF_EVENT_KPROBE: + case BPF_PERF_EVENT_KRETPROBE: + ASSERT_EQ(info.perf_event.kprobe.offset, offset, "kprobe_offset"); + + /* In case kernel.kptr_restrict is not permitted or MAX_SYMS is reached */ + if (addr) + ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset, + "kprobe_addr"); + + if (!info.perf_event.kprobe.func_name) { + ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); + info.perf_event.kprobe.func_name = ptr_to_u64(&buf); + info.perf_event.kprobe.name_len = sizeof(buf); + goto again; + } + + err = strncmp(u64_to_ptr(info.perf_event.kprobe.func_name), KPROBE_FUNC, + strlen(KPROBE_FUNC)); + ASSERT_EQ(err, 0, "cmp_kprobe_func_name"); + break; + case BPF_PERF_EVENT_TRACEPOINT: + if (!info.perf_event.tracepoint.tp_name) { + ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len"); + info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); + info.perf_event.tracepoint.name_len = sizeof(buf); + goto again; + } + + err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME, + strlen(TP_NAME)); + ASSERT_EQ(err, 0, "cmp_tp_name"); + break; + case BPF_PERF_EVENT_UPROBE: + case BPF_PERF_EVENT_URETPROBE: + ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + + if (!info.perf_event.uprobe.file_name) { + ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len"); + info.perf_event.uprobe.file_name = ptr_to_u64(&buf); + info.perf_event.uprobe.name_len = sizeof(buf); + goto again; + } + + err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE, + strlen(UPROBE_FILE)); + ASSERT_EQ(err, 0, "cmp_file_name"); + break; + default: + err = -1; + break; + } + return err; +} + +static void kprobe_fill_invalid_user_buffer(int fd) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + int err; + + memset(&info, 0, sizeof(info)); + + info.perf_event.kprobe.func_name = 0x1; /* invalid address */ + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "invalid_buff_and_len"); + + info.perf_event.kprobe.name_len = 64; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "invalid_buff"); + + info.perf_event.kprobe.func_name = 0; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "invalid_len"); + + ASSERT_EQ(info.perf_event.kprobe.addr, 0, "func_addr"); + ASSERT_EQ(info.perf_event.kprobe.offset, 0, "func_offset"); + ASSERT_EQ(info.perf_event.type, 0, "type"); +} + +static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, + enum bpf_perf_event_type type, + bool invalid) +{ + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, + .attach_mode = PROBE_ATTACH_MODE_LINK, + .retprobe = type == BPF_PERF_EVENT_KRETPROBE, + ); + ssize_t entry_offset = 0; + int link_fd, err; + + skel->links.kprobe_run = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, + KPROBE_FUNC, &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_run, "attach_kprobe")) + return; + + link_fd = bpf_link__fd(skel->links.kprobe_run); + if (!invalid) { + /* See also arch_adjust_kprobe_addr(). */ + if (skel->kconfig->CONFIG_X86_KERNEL_IBT) + entry_offset = 4; + err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset); + ASSERT_OK(err, "verify_perf_link_info"); + } else { + kprobe_fill_invalid_user_buffer(link_fd); + } + bpf_link__detach(skel->links.kprobe_run); +} + +static void test_tp_fill_link_info(struct test_fill_link_info *skel) +{ + int link_fd, err; + + skel->links.tp_run = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + if (!ASSERT_OK_PTR(skel->links.tp_run, "attach_tp")) + return; + + link_fd = bpf_link__fd(skel->links.tp_run); + err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0); + ASSERT_OK(err, "verify_perf_link_info"); + bpf_link__detach(skel->links.tp_run); +} + +static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, + enum bpf_perf_event_type type) +{ + int link_fd, err; + + skel->links.uprobe_run = bpf_program__attach_uprobe(skel->progs.uprobe_run, + type == BPF_PERF_EVENT_URETPROBE, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset); + if (!ASSERT_OK_PTR(skel->links.uprobe_run, "attach_uprobe")) + return; + + link_fd = bpf_link__fd(skel->links.uprobe_run); + err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); + ASSERT_OK(err, "verify_perf_link_info"); + bpf_link__detach(skel->links.uprobe_run); +} + +static int verify_kmulti_link_info(int fd, bool retprobe) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + __u64 addrs[KMULTI_CNT]; + int flags, i, err; + + memset(&info, 0, sizeof(info)); + +again: + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_KPROBE_MULTI, "kmulti_type")) + return -1; + + ASSERT_EQ(info.kprobe_multi.count, KMULTI_CNT, "func_cnt"); + flags = info.kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN; + if (!retprobe) + ASSERT_EQ(flags, 0, "kmulti_flags"); + else + ASSERT_NEQ(flags, 0, "kretmulti_flags"); + + if (!info.kprobe_multi.addrs) { + info.kprobe_multi.addrs = ptr_to_u64(addrs); + goto again; + } + for (i = 0; i < KMULTI_CNT; i++) + ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs"); + return 0; +} + +static void verify_kmulti_invalid_user_buffer(int fd) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + __u64 addrs[KMULTI_CNT]; + int err, i; + + memset(&info, 0, sizeof(info)); + + info.kprobe_multi.count = KMULTI_CNT; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "no_addr"); + + info.kprobe_multi.addrs = ptr_to_u64(addrs); + info.kprobe_multi.count = 0; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "no_cnt"); + + for (i = 0; i < KMULTI_CNT; i++) + addrs[i] = 0; + info.kprobe_multi.count = KMULTI_CNT - 1; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -ENOSPC, "smaller_cnt"); + for (i = 0; i < KMULTI_CNT - 1; i++) + ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs"); + ASSERT_EQ(addrs[i], 0, "kmulti_addrs"); + + for (i = 0; i < KMULTI_CNT; i++) + addrs[i] = 0; + info.kprobe_multi.count = KMULTI_CNT + 1; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, 0, "bigger_cnt"); + for (i = 0; i < KMULTI_CNT; i++) + ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs"); + + info.kprobe_multi.count = KMULTI_CNT; + info.kprobe_multi.addrs = 0x1; /* invalid addr */ + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "invalid_buff"); +} + +static int symbols_cmp_r(const void *a, const void *b) +{ + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; + + return strcmp(*str_a, *str_b); +} + +static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, + bool retprobe, bool invalid) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + int link_fd, err; + + opts.syms = kmulti_syms; + opts.cnt = KMULTI_CNT; + opts.retprobe = retprobe; + skel->links.kmulti_run = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.kmulti_run, "attach_kprobe_multi")) + return; + + link_fd = bpf_link__fd(skel->links.kmulti_run); + if (!invalid) { + err = verify_kmulti_link_info(link_fd, retprobe); + ASSERT_OK(err, "verify_kmulti_link_info"); + } else { + verify_kmulti_invalid_user_buffer(link_fd); + } + bpf_link__detach(skel->links.kmulti_run); +} + +void test_fill_link_info(void) +{ + struct test_fill_link_info *skel; + int i; + + skel = test_fill_link_info__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + /* load kallsyms to compare the addr */ + if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + goto cleanup; + + kprobe_addr = ksym_get_addr(KPROBE_FUNC); + if (test__start_subtest("kprobe_link_info")) + test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, false); + if (test__start_subtest("kretprobe_link_info")) + test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KRETPROBE, false); + if (test__start_subtest("kprobe_invalid_ubuff")) + test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, true); + if (test__start_subtest("tracepoint_link_info")) + test_tp_fill_link_info(skel); + + uprobe_offset = get_uprobe_offset(&uprobe_func); + if (test__start_subtest("uprobe_link_info")) + test_uprobe_fill_link_info(skel, BPF_PERF_EVENT_UPROBE); + if (test__start_subtest("uretprobe_link_info")) + test_uprobe_fill_link_info(skel, BPF_PERF_EVENT_URETPROBE); + + qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r); + for (i = 0; i < KMULTI_CNT; i++) + kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]); + if (test__start_subtest("kprobe_multi_link_info")) + test_kprobe_multi_fill_link_info(skel, false, false); + if (test__start_subtest("kretprobe_multi_link_info")) + test_kprobe_multi_fill_link_info(skel, true, false); + if (test__start_subtest("kprobe_multi_invalid_ubuff")) + test_kprobe_multi_fill_link_info(skel, true, true); + +cleanup: + test_fill_link_info__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c new file mode 100644 index 000000000000..564f402d56fe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao */ + +#include "vmlinux.h" +#include +#include + +extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; + +/* This function is here to have CONFIG_X86_KERNEL_IBT + * used and added to object BTF. + */ +int unused(void) +{ + return CONFIG_X86_KERNEL_IBT ? 0 : 1; +} + +SEC("kprobe") +int BPF_PROG(kprobe_run) +{ + return 0; +} + +SEC("uprobe") +int BPF_PROG(uprobe_run) +{ + return 0; +} + +SEC("tracepoint") +int BPF_PROG(tp_run) +{ + return 0; +} + +SEC("kprobe.multi") +int BPF_PROG(kmulti_run) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; From e16e6c6df475b10b1ed933a6827798312612358f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 16 Aug 2023 11:56:50 +0200 Subject: [PATCH 12/17] bpftool: Implement link show support for tcx Add support to dump tcx link information to bpftool. This adds a common helper show_link_ifindex_{plain,json}() which can be reused also for other link types. The plain text and json device output is the same format as in bpftool net dump. Below shows an example link dump output along with a cgroup link for comparison: # bpftool link [...] 10: cgroup prog 1977 cgroup_id 1 attach_type cgroup_inet6_post_bind [...] 13: tcx prog 2053 ifindex enp5s0(3) attach_type tcx_ingress 14: tcx prog 2080 ifindex enp5s0(3) attach_type tcx_egress [...] Equivalent json output: # bpftool link --json [...] { "id": 10, "type": "cgroup", "prog_id": 1977, "cgroup_id": 1, "attach_type": "cgroup_inet6_post_bind" }, [...] { "id": 13, "type": "tcx", "prog_id": 2053, "devname": "enp5s0", "ifindex": 3, "attach_type": "tcx_ingress" }, { "id": 14, "type": "tcx", "prog_id": 2080, "devname": "enp5s0", "ifindex": 3, "attach_type": "tcx_egress" } [...] Suggested-by: Yafang Shao Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20230816095651.10014-1-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- tools/bpf/bpftool/link.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 65a168df63bc..a3774594f154 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -150,6 +150,18 @@ static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr) jsonw_uint_field(wtr, "attach_type", attach_type); } +static void show_link_ifindex_json(__u32 ifindex, json_writer_t *wtr) +{ + char devname[IF_NAMESIZE] = "(unknown)"; + + if (ifindex) + if_indextoname(ifindex, devname); + else + snprintf(devname, sizeof(devname), "(detached)"); + jsonw_string_field(wtr, "devname", devname); + jsonw_uint_field(wtr, "ifindex", ifindex); +} + static bool is_iter_map_target(const char *target_name) { return strcmp(target_name, "bpf_map_elem") == 0 || @@ -433,6 +445,10 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_NETFILTER: netfilter_dump_json(info, json_wtr); break; + case BPF_LINK_TYPE_TCX: + show_link_ifindex_json(info->tcx.ifindex, json_wtr); + show_link_attach_type_json(info->tcx.attach_type, json_wtr); + break; case BPF_LINK_TYPE_STRUCT_OPS: jsonw_uint_field(json_wtr, "map_id", info->struct_ops.map_id); @@ -509,6 +525,22 @@ static void show_link_attach_type_plain(__u32 attach_type) printf("attach_type %u ", attach_type); } +static void show_link_ifindex_plain(__u32 ifindex) +{ + char devname[IF_NAMESIZE * 2] = "(unknown)"; + char tmpname[IF_NAMESIZE]; + char *ret = NULL; + + if (ifindex) + ret = if_indextoname(ifindex, tmpname); + else + snprintf(devname, sizeof(devname), "(detached)"); + if (ret) + snprintf(devname, sizeof(devname), "%s(%d)", + tmpname, ifindex); + printf("ifindex %s ", devname); +} + static void show_iter_plain(struct bpf_link_info *info) { const char *target_name = u64_to_ptr(info->iter.target_name); @@ -745,6 +777,11 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_NETFILTER: netfilter_dump_plain(info); break; + case BPF_LINK_TYPE_TCX: + printf("\n\t"); + show_link_ifindex_plain(info->tcx.ifindex); + show_link_attach_type_plain(info->tcx.attach_type); + break; case BPF_LINK_TYPE_KPROBE_MULTI: show_kprobe_multi_plain(info); break; From 053bbf9bff58864be880d7e9a5af586793dbb7de Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 16 Aug 2023 11:56:51 +0200 Subject: [PATCH 13/17] bpftool: Implement link show support for xdp Add support to dump XDP link information to bpftool. This reuses the recently added show_link_ifindex_{plain,json}(). The XDP link info only exposes the ifindex. Below shows an example link dump output, and a cgroup link is included for comparison, too: # bpftool link [...] 10: cgroup prog 2466 cgroup_id 1 attach_type cgroup_inet6_post_bind [...] 16: xdp prog 2477 ifindex enp5s0(3) [...] Equivalent json output: # bpftool link --json [...] { "id": 10, "type": "cgroup", "prog_id": 2466, "cgroup_id": 1, "attach_type": "cgroup_inet6_post_bind" }, [...] { "id": 16, "type": "xdp", "prog_id": 2477, "devname": "enp5s0", "ifindex": 3 } [...] Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/r/20230816095651.10014-2-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- tools/bpf/bpftool/link.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index a3774594f154..0b214f6ab5c8 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -449,6 +449,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) show_link_ifindex_json(info->tcx.ifindex, json_wtr); show_link_attach_type_json(info->tcx.attach_type, json_wtr); break; + case BPF_LINK_TYPE_XDP: + show_link_ifindex_json(info->xdp.ifindex, json_wtr); + break; case BPF_LINK_TYPE_STRUCT_OPS: jsonw_uint_field(json_wtr, "map_id", info->struct_ops.map_id); @@ -782,6 +785,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) show_link_ifindex_plain(info->tcx.ifindex); show_link_attach_type_plain(info->tcx.attach_type); break; + case BPF_LINK_TYPE_XDP: + printf("\n\t"); + show_link_ifindex_plain(info->xdp.ifindex); + break; case BPF_LINK_TYPE_KPROBE_MULTI: show_kprobe_multi_plain(info); break; From 0dd061a6a115f25132989cbd591a25afb2dee086 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 16 Aug 2023 09:11:56 +0800 Subject: [PATCH 14/17] bpf: Add update_socket_protocol hook Add a hook named update_socket_protocol in __sys_socket(), for bpf progs to attach to and update socket protocol. One user case is to force legacy TCP apps to create and use MPTCP sockets instead of TCP ones. Define a fmod_ret set named bpf_mptcp_fmodret_ids, add the hook update_socket_protocol into this set, and register it in bpf_mptcp_kfunc_init(). Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79 Acked-by: Matthieu Baerts Acked-by: Yonghong Song Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/ac84be00f97072a46f8a72b4e2be46cbb7fa5053.1692147782.git.geliang.tang@suse.com Signed-off-by: Martin KaFai Lau --- net/mptcp/bpf.c | 15 +++++++++++++++ net/socket.c | 26 +++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 5a0a84ad94af..8a16672b94e2 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -19,3 +19,18 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) return NULL; } + +BTF_SET8_START(bpf_mptcp_fmodret_ids) +BTF_ID_FLAGS(func, update_socket_protocol) +BTF_SET8_END(bpf_mptcp_fmodret_ids) + +static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { + .owner = THIS_MODULE, + .set = &bpf_mptcp_fmodret_ids, +}; + +static int __init bpf_mptcp_kfunc_init(void) +{ + return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); +} +late_initcall(bpf_mptcp_kfunc_init); diff --git a/net/socket.c b/net/socket.c index 5d4e37595e9a..fdb5233bf560 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1657,12 +1657,36 @@ struct file *__sys_socket_file(int family, int type, int protocol) return sock_alloc_file(sock, flags, NULL); } +/* A hook for bpf progs to attach to and update socket protocol. + * + * A static noinline declaration here could cause the compiler to + * optimize away the function. A global noinline declaration will + * keep the definition, but may optimize away the callsite. + * Therefore, __weak is needed to ensure that the call is still + * emitted, by telling the compiler that we don't know what the + * function might eventually be. + * + * __diag_* below are needed to dismiss the missing prototype warning. + */ + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "A fmod_ret entry point for BPF programs"); + +__weak noinline int update_socket_protocol(int family, int type, int protocol) +{ + return protocol; +} + +__diag_pop(); + int __sys_socket(int family, int type, int protocol) { struct socket *sock; int flags; - sock = __sys_socket_create(family, type, protocol); + sock = __sys_socket_create(family, type, + update_socket_protocol(family, type, protocol)); if (IS_ERR(sock)) return PTR_ERR(sock); From 97c9c652089b7081d5ed03b1dd0076c04ab12a4a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 16 Aug 2023 09:11:57 +0800 Subject: [PATCH 15/17] selftests/bpf: Add two mptcp netns helpers Add two netns helpers for mptcp tests: create_netns() and cleanup_netns(). Use them in test_base(). These new helpers will be re-used in the following commits introducing new tests. Acked-by: Yonghong Song Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/7506371fb6c417b401cc9d7365fe455754f4ba3f.1692147782.git.geliang.tang@suse.com Signed-off-by: Martin KaFai Lau --- .../testing/selftests/bpf/prog_tests/mptcp.c | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index cd0c42fff7c0..76afb5191772 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -22,6 +22,24 @@ struct mptcp_storage { char ca_name[TCP_CA_NAME_MAX]; }; +static struct nstoken *create_netns(void) +{ + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); + + return open_netns(NS_TEST); +fail: + return NULL; +} + +static void cleanup_netns(struct nstoken *nstoken) +{ + if (nstoken) + close_netns(nstoken); + + SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST); +} + static int verify_tsk(int map_fd, int client_fd) { int err, cfd = client_fd; @@ -147,11 +165,8 @@ static void test_base(void) if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; - SYS(fail, "ip netns add %s", NS_TEST); - SYS(fail, "ip -net %s link set dev lo up", NS_TEST); - - nstoken = open_netns(NS_TEST); - if (!ASSERT_OK_PTR(nstoken, "open_netns")) + nstoken = create_netns(); + if (!ASSERT_OK_PTR(nstoken, "create_netns")) goto fail; /* without MPTCP */ @@ -174,11 +189,7 @@ with_mptcp: close(server_fd); fail: - if (nstoken) - close_netns(nstoken); - - SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); - + cleanup_netns(nstoken); close(cgroup_fd); } From 207746550262e81b1e360d003a67f7ef7bacfbae Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 16 Aug 2023 09:11:58 +0800 Subject: [PATCH 16/17] selftests/bpf: Fix error checks of mptcp open_and_load Return libbpf_get_error(), instead of -EIO, for the error from mptcp_sock__open_and_load(). Load success means prog_fd and map_fd are always valid. So drop these unneeded ASSERT_GE checks for them in mptcp run_test(). Acked-by: Yonghong Song Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/db5fcb93293df9ab173edcbaf8252465b80da6f2.1692147782.git.geliang.tang@suse.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 76afb5191772..3d3999067e27 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -118,24 +118,14 @@ static int run_test(int cgroup_fd, int server_fd, bool is_mptcp) sock_skel = mptcp_sock__open_and_load(); if (!ASSERT_OK_PTR(sock_skel, "skel_open_load")) - return -EIO; + return libbpf_get_error(sock_skel); err = mptcp_sock__attach(sock_skel); if (!ASSERT_OK(err, "skel_attach")) goto out; prog_fd = bpf_program__fd(sock_skel->progs._sockops); - if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) { - err = -EIO; - goto out; - } - map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map); - if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) { - err = -EIO; - goto out; - } - err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; From ddba122428a75261fc4d3547f4e7e4aa6b67caef Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 16 Aug 2023 09:11:59 +0800 Subject: [PATCH 17/17] selftests/bpf: Add mptcpify test Implement a new test program mptcpify: if the family is AF_INET or AF_INET6, the type is SOCK_STREAM, and the protocol ID is 0 or IPPROTO_TCP, set it to IPPROTO_MPTCP. It will be hooked in update_socket_protocol(). Extend the MPTCP test base, add a selftest test_mptcpify() for the mptcpify case. Open and load the mptcpify test prog to mptcpify the TCP sockets dynamically, then use start_server() and connect_to_fd() to create a TCP socket, but actually what's created is an MPTCP socket, which can be verified through 'getsockopt(SOL_PROTOCOL)' and 'getsockopt(MPTCP_INFO)'. Acked-by: Yonghong Song Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/364e72f307e7bb38382ec7442c182d76298a9c41.1692147782.git.geliang.tang@suse.com Signed-off-by: Martin KaFai Lau --- .../testing/selftests/bpf/prog_tests/mptcp.c | 141 ++++++++++++++++++ tools/testing/selftests/bpf/progs/mptcpify.c | 20 +++ 2 files changed, 161 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 3d3999067e27..7c0be7cf550b 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -2,17 +2,59 @@ /* Copyright (c) 2020, Tessares SA. */ /* Copyright (c) 2022, SUSE. */ +#include +#include #include #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" +#include "mptcpify.skel.h" #define NS_TEST "mptcp_ns" +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +#ifndef SOL_MPTCP +#define SOL_MPTCP 284 +#endif +#ifndef MPTCP_INFO +#define MPTCP_INFO 1 +#endif +#ifndef MPTCP_INFO_FLAG_FALLBACK +#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0) +#endif +#ifndef MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED +#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1) +#endif + #ifndef TCP_CA_NAME_MAX #define TCP_CA_NAME_MAX 16 #endif +struct __mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; +}; + struct mptcp_storage { __u32 invoked; __u32 is_mptcp; @@ -183,8 +225,107 @@ fail: close(cgroup_fd); } +static void send_byte(int fd) +{ + char b = 0x55; + + ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte"); +} + +static int verify_mptcpify(int server_fd, int client_fd) +{ + struct __mptcp_info info; + socklen_t optlen; + int protocol; + int err = 0; + + optlen = sizeof(protocol); + if (!ASSERT_OK(getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen), + "getsockopt(SOL_PROTOCOL)")) + return -1; + + if (!ASSERT_EQ(protocol, IPPROTO_MPTCP, "protocol isn't MPTCP")) + err++; + + optlen = sizeof(info); + if (!ASSERT_OK(getsockopt(client_fd, SOL_MPTCP, MPTCP_INFO, &info, &optlen), + "getsockopt(MPTCP_INFO)")) + return -1; + + if (!ASSERT_GE(info.mptcpi_flags, 0, "unexpected mptcpi_flags")) + err++; + if (!ASSERT_FALSE(info.mptcpi_flags & MPTCP_INFO_FLAG_FALLBACK, + "MPTCP fallback")) + err++; + if (!ASSERT_TRUE(info.mptcpi_flags & MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED, + "no remote key received")) + err++; + + return err; +} + +static int run_mptcpify(int cgroup_fd) +{ + int server_fd, client_fd, err = 0; + struct mptcpify *mptcpify_skel; + + mptcpify_skel = mptcpify__open_and_load(); + if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load")) + return libbpf_get_error(mptcpify_skel); + + err = mptcpify__attach(mptcpify_skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + /* without MPTCP */ + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) { + err = -EIO; + goto out; + } + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect to fd")) { + err = -EIO; + goto close_server; + } + + send_byte(client_fd); + + err = verify_mptcpify(server_fd, client_fd); + + close(client_fd); +close_server: + close(server_fd); +out: + mptcpify__destroy(mptcpify_skel); + return err; +} + +static void test_mptcpify(void) +{ + struct nstoken *nstoken = NULL; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/mptcpify"); + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) + return; + + nstoken = create_netns(); + if (!ASSERT_OK_PTR(nstoken, "create_netns")) + goto fail; + + ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify"); + +fail: + cleanup_netns(nstoken); + close(cgroup_fd); +} + void test_mptcp(void) { if (test__start_subtest("base")) test_base(); + if (test__start_subtest("mptcpify")) + test_mptcpify(); } diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c new file mode 100644 index 000000000000..53301ae8a8f7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcpify.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023, SUSE. */ + +#include "vmlinux.h" +#include +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +SEC("fmod_ret/update_socket_protocol") +int BPF_PROG(mptcpify, int family, int type, int protocol) +{ + if ((family == AF_INET || family == AF_INET6) && + type == SOCK_STREAM && + (!protocol || protocol == IPPROTO_TCP)) { + return IPPROTO_MPTCP; + } + + return protocol; +}