OpenCloudOS-Kernel/tools/lib/bpf/netlink.c

759 lines
18 KiB
C
Raw Normal View History

// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2018 Facebook */
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
#include <arpa/inet.h>
#include <linux/bpf.h>
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
#include <linux/rtnetlink.h>
#include <sys/socket.h>
#include <errno.h>
#include <time.h>
#include "bpf.h"
#include "libbpf.h"
#include "libbpf_internal.h"
#include "nlattr.h"
#ifndef SOL_NETLINK
#define SOL_NETLINK 270
#endif
typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
void *cookie);
struct xdp_id_md {
int ifindex;
__u32 flags;
struct xdp_link_info info;
};
static int libbpf_netlink_open(__u32 *nl_pid)
{
struct sockaddr_nl sa;
socklen_t addrlen;
int one = 1, ret;
int sock;
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
if (sock < 0)
return -errno;
if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
&one, sizeof(one)) < 0) {
pr_warn("Netlink error reporting not supported\n");
}
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
ret = -errno;
goto cleanup;
}
addrlen = sizeof(sa);
if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
ret = -errno;
goto cleanup;
}
if (addrlen != sizeof(sa)) {
ret = -LIBBPF_ERRNO__INTERNAL;
goto cleanup;
}
*nl_pid = sa.nl_pid;
return sock;
cleanup:
close(sock);
return ret;
}
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
static void libbpf_netlink_close(int sock)
{
close(sock);
}
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
enum {
NL_CONT,
NL_NEXT,
NL_DONE,
};
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
__dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
void *cookie)
{
bool multipart = true;
struct nlmsgerr *err;
struct nlmsghdr *nh;
char buf[4096];
int len, ret;
while (multipart) {
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
start:
multipart = false;
len = recv(sock, buf, sizeof(buf), 0);
if (len < 0) {
ret = -errno;
goto done;
}
if (len == 0)
break;
for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != nl_pid) {
ret = -LIBBPF_ERRNO__WRNGPID;
goto done;
}
if (nh->nlmsg_seq != seq) {
ret = -LIBBPF_ERRNO__INVSEQ;
goto done;
}
if (nh->nlmsg_flags & NLM_F_MULTI)
multipart = true;
switch (nh->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(nh);
if (!err->error)
continue;
ret = err->error;
libbpf_nla_dump_errormsg(nh);
goto done;
case NLMSG_DONE:
return 0;
default:
break;
}
if (_fn) {
ret = _fn(nh, fn, cookie);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
switch (ret) {
case NL_CONT:
break;
case NL_NEXT:
goto start;
case NL_DONE:
return 0;
default:
return ret;
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
}
}
}
ret = 0;
done:
return ret;
}
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
__dump_nlmsg_t parse_msg,
libbpf_dump_nlmsg_t parse_attr,
void *cookie)
{
__u32 nl_pid = 0;
int sock, ret;
sock = libbpf_netlink_open(&nl_pid);
if (sock < 0)
return sock;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
req->nh.nlmsg_pid = 0;
req->nh.nlmsg_seq = time(NULL);
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
if (send(sock, req, req->nh.nlmsg_len, 0) < 0) {
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
ret = -errno;
goto out;
}
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq,
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
parse_msg, parse_attr, cookie);
out:
libbpf_netlink_close(sock);
return ret;
}
static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
__u32 flags)
{
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
struct nlattr *nla;
int ret;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
struct libbpf_nla_req req;
memset(&req, 0, sizeof(req));
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
req.nh.nlmsg_type = RTM_SETLINK;
req.ifinfo.ifi_family = AF_UNSPEC;
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
req.ifinfo.ifi_index = ifindex;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
nla = nlattr_begin_nested(&req, IFLA_XDP);
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
if (!nla)
return -EMSGSIZE;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd));
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
if (ret < 0)
return ret;
if (flags) {
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags));
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
if (ret < 0)
return ret;
}
if (flags & XDP_FLAGS_REPLACE) {
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd,
sizeof(old_fd));
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
if (ret < 0)
return ret;
}
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
nlattr_end_nested(&req, nla);
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
}
int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
const struct bpf_xdp_set_link_opts *opts)
{
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
int old_fd = -1, ret;
if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
if (OPTS_HAS(opts, old_fd)) {
old_fd = OPTS_GET(opts, old_fd, -1);
flags |= XDP_FLAGS_REPLACE;
}
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
return libbpf_err(ret);
}
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
{
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
int ret;
ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
return libbpf_err(ret);
}
static int __dump_link_nlmsg(struct nlmsghdr *nlh,
libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
{
struct nlattr *tb[IFLA_MAX + 1], *attr;
struct ifinfomsg *ifi = NLMSG_DATA(nlh);
int len;
len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
return -LIBBPF_ERRNO__NLPARSE;
return dump_link_nlmsg(cookie, ifi, tb);
}
static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
{
struct nlattr *xdp_tb[IFLA_XDP_MAX + 1];
struct xdp_id_md *xdp_id = cookie;
struct ifinfomsg *ifinfo = msg;
int ret;
if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index)
return 0;
if (!tb[IFLA_XDP])
return 0;
ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL);
if (ret)
return ret;
if (!xdp_tb[IFLA_XDP_ATTACHED])
return 0;
xdp_id->info.attach_mode = libbpf_nla_getattr_u8(
xdp_tb[IFLA_XDP_ATTACHED]);
if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE)
return 0;
if (xdp_tb[IFLA_XDP_PROG_ID])
xdp_id->info.prog_id = libbpf_nla_getattr_u32(
xdp_tb[IFLA_XDP_PROG_ID]);
if (xdp_tb[IFLA_XDP_SKB_PROG_ID])
xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32(
xdp_tb[IFLA_XDP_SKB_PROG_ID]);
if (xdp_tb[IFLA_XDP_DRV_PROG_ID])
xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32(
xdp_tb[IFLA_XDP_DRV_PROG_ID]);
if (xdp_tb[IFLA_XDP_HW_PROG_ID])
xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32(
xdp_tb[IFLA_XDP_HW_PROG_ID]);
return 0;
}
int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags)
{
struct xdp_id_md xdp_id = {};
__u32 mask;
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
int ret;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
struct libbpf_nla_req req = {
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nh.nlmsg_type = RTM_GETLINK,
.nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.ifinfo.ifi_family = AF_PACKET,
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
};
if (flags & ~XDP_FLAGS_MASK || !info_size)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
/* Check whether the single {HW,DRV,SKB} mode is set */
flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
mask = flags - 1;
if (flags && flags & mask)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
xdp_id.ifindex = ifindex;
xdp_id.flags = flags;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
libbpf: Add various netlink helpers This change introduces a few helpers to wrap open coded attribute preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that is useful to wrap send + recv handling in a generic way. Subsequent patch will also use this function for sending and receiving a netlink response. The libbpf_nl_get_link() helper has been removed instead, moving socket creation into the newly named libbpf_netlink_send_recv(). Every nested attribute's closure must happen using the helper nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is enforced using nlattr_begin_nested() helper. Other simple attributes can be added directly. The maxsz parameter corresponds to the size of the request structure which is being filled in, so for instance with req being: struct { struct nlmsghdr nh; struct tcmsg t; char buf[4096]; } req; Then, maxsz should be sizeof(req). This change also converts the open coded attribute preparation with these helpers. Note that the only failure the internal call to nlattr_add() could result in the nested helper would be -EMSGSIZE, hence that is what we return to our caller. The libbpf_netlink_send_recv() call takes care of opening the socket, sending the netlink message, receiving the response, potentially invoking callbacks, and return errors if any, and then finally close the socket. This allows users to avoid identical socket setup code in different places. The only user of libbpf_nl_get_link() has been converted to make use of it. __bpf_set_link_xdp_fd_replace() has also been refactored to use it. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> [ Daniel: major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
2021-05-12 18:34:49 +08:00
get_xdp_info, &xdp_id);
if (!ret) {
size_t sz = min(info_size, sizeof(xdp_id.info));
memcpy(info, &xdp_id.info, sz);
memset((void *) info + sz, 0, info_size - sz);
}
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
}
static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
{
flags &= XDP_FLAGS_MODES;
if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
return info->prog_id;
if (flags & XDP_FLAGS_DRV_MODE)
return info->drv_prog_id;
if (flags & XDP_FLAGS_HW_MODE)
return info->hw_prog_id;
if (flags & XDP_FLAGS_SKB_MODE)
return info->skb_prog_id;
return 0;
}
int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
{
struct xdp_link_info info;
int ret;
ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
if (!ret)
*prog_id = get_xdp_id(&info, flags);
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
}
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
static int clsact_config(struct libbpf_nla_req *req)
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
{
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
req->tc.tcm_parent = TC_H_CLSACT;
req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
static int attach_point_to_config(struct bpf_tc_hook *hook,
qdisc_config_t *config)
{
switch (OPTS_GET(hook, attach_point, 0)) {
case BPF_TC_INGRESS:
case BPF_TC_EGRESS:
case BPF_TC_INGRESS | BPF_TC_EGRESS:
if (OPTS_GET(hook, parent, 0))
return -EINVAL;
*config = &clsact_config;
return 0;
case BPF_TC_CUSTOM:
return -EOPNOTSUPP;
default:
return -EINVAL;
}
}
static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point,
__u32 *parent)
{
switch (attach_point) {
case BPF_TC_INGRESS:
case BPF_TC_EGRESS:
if (*parent)
return -EINVAL;
*parent = TC_H_MAKE(TC_H_CLSACT,
attach_point == BPF_TC_INGRESS ?
TC_H_MIN_INGRESS : TC_H_MIN_EGRESS);
break;
case BPF_TC_CUSTOM:
if (!*parent)
return -EINVAL;
break;
default:
return -EINVAL;
}
return 0;
}
static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
{
qdisc_config_t config;
int ret;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
struct libbpf_nla_req req;
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
ret = attach_point_to_config(hook, &config);
if (ret < 0)
return ret;
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
req.nh.nlmsg_type = cmd;
req.tc.tcm_family = AF_UNSPEC;
req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = config(&req);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
return ret;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
{
return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
static int tc_qdisc_delete(struct bpf_tc_hook *hook)
{
return tc_qdisc_modify(hook, RTM_DELQDISC, 0);
}
int bpf_tc_hook_create(struct bpf_tc_hook *hook)
{
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
int ret;
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
OPTS_GET(hook, ifindex, 0) <= 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
ret = tc_qdisc_create_excl(hook);
return libbpf_err(ret);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
const struct bpf_tc_opts *opts,
const bool flush);
int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
{
if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
OPTS_GET(hook, ifindex, 0) <= 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
switch (OPTS_GET(hook, attach_point, 0)) {
case BPF_TC_INGRESS:
case BPF_TC_EGRESS:
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(__bpf_tc_detach(hook, NULL, true));
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
case BPF_TC_INGRESS | BPF_TC_EGRESS:
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(tc_qdisc_delete(hook));
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
case BPF_TC_CUSTOM:
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EOPNOTSUPP);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
default:
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
}
struct bpf_cb_ctx {
struct bpf_tc_opts *opts;
bool processed;
};
static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb,
bool unicast)
{
struct nlattr *tbb[TCA_BPF_MAX + 1];
struct bpf_cb_ctx *info = cookie;
if (!info || !info->opts)
return -EINVAL;
if (unicast && info->processed)
return -EINVAL;
if (!tb[TCA_OPTIONS])
return NL_CONT;
libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL);
if (!tbb[TCA_BPF_ID])
return -EINVAL;
OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]));
OPTS_SET(info->opts, handle, tc->tcm_handle);
OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16);
info->processed = true;
return unicast ? NL_NEXT : NL_DONE;
}
static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
void *cookie)
{
struct tcmsg *tc = NLMSG_DATA(nh);
struct nlattr *tb[TCA_MAX + 1];
libbpf_nla_parse(tb, TCA_MAX,
(struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))),
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
if (!tb[TCA_KIND])
return NL_CONT;
return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
}
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
char name[256];
int len, ret;
ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
if (ret < 0)
return ret;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd));
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
return ret;
len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
if (len < 0)
return -errno;
if (len >= sizeof(name))
return -ENAMETOOLONG;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
return nlattr_add(req, TCA_BPF_NAME, name, len + 1);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
{
__u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
int ret, ifindex, attach_point, prog_fd;
struct bpf_cb_ctx info = {};
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
struct libbpf_nla_req req;
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
struct nlattr *nla;
if (!hook || !opts ||
!OPTS_VALID(hook, bpf_tc_hook) ||
!OPTS_VALID(opts, bpf_tc_opts))
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
ifindex = OPTS_GET(hook, ifindex, 0);
parent = OPTS_GET(hook, parent, 0);
attach_point = OPTS_GET(hook, attach_point, 0);
handle = OPTS_GET(opts, handle, 0);
priority = OPTS_GET(opts, priority, 0);
prog_fd = OPTS_GET(opts, prog_fd, 0);
prog_id = OPTS_GET(opts, prog_id, 0);
flags = OPTS_GET(opts, flags, 0);
if (ifindex <= 0 || !prog_fd || prog_id)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (priority > UINT16_MAX)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (flags & ~BPF_TC_F_REPLACE)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
protocol = ETH_P_ALL;
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE |
NLM_F_ECHO | flags;
req.nh.nlmsg_type = RTM_NEWTFILTER;
req.tc.tcm_family = AF_UNSPEC;
req.tc.tcm_ifindex = ifindex;
req.tc.tcm_handle = handle;
req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
ret = tc_get_tcm_parent(attach_point, &parent);
if (ret < 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
req.tc.tcm_parent = parent;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
nla = nlattr_begin_nested(&req, TCA_OPTIONS);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (!nla)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EMSGSIZE);
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = tc_add_fd_and_name(&req, prog_fd);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags));
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
nlattr_end_nested(&req, nla);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
info.opts = opts;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (!info.processed)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-ENOENT);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
return ret;
}
static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
const struct bpf_tc_opts *opts,
const bool flush)
{
__u32 protocol = 0, handle, priority, parent, prog_id, flags;
int ret, ifindex, attach_point, prog_fd;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
struct libbpf_nla_req req;
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (!hook ||
!OPTS_VALID(hook, bpf_tc_hook) ||
!OPTS_VALID(opts, bpf_tc_opts))
return -EINVAL;
ifindex = OPTS_GET(hook, ifindex, 0);
parent = OPTS_GET(hook, parent, 0);
attach_point = OPTS_GET(hook, attach_point, 0);
handle = OPTS_GET(opts, handle, 0);
priority = OPTS_GET(opts, priority, 0);
prog_fd = OPTS_GET(opts, prog_fd, 0);
prog_id = OPTS_GET(opts, prog_id, 0);
flags = OPTS_GET(opts, flags, 0);
if (ifindex <= 0 || flags || prog_fd || prog_id)
return -EINVAL;
if (priority > UINT16_MAX)
return -EINVAL;
if (!flush) {
if (!handle || !priority)
return -EINVAL;
protocol = ETH_P_ALL;
} else {
if (handle || priority)
return -EINVAL;
}
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
req.nh.nlmsg_type = RTM_DELTFILTER;
req.tc.tcm_family = AF_UNSPEC;
req.tc.tcm_ifindex = ifindex;
if (!flush) {
req.tc.tcm_handle = handle;
req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
}
ret = tc_get_tcm_parent(attach_point, &parent);
if (ret < 0)
return ret;
req.tc.tcm_parent = parent;
if (!flush) {
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
return ret;
}
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
int bpf_tc_detach(const struct bpf_tc_hook *hook,
const struct bpf_tc_opts *opts)
{
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
int ret;
if (!opts)
return libbpf_err(-EINVAL);
ret = __bpf_tc_detach(hook, opts, false);
return libbpf_err(ret);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
}
int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
{
__u32 protocol, handle, priority, parent, prog_id, flags;
int ret, ifindex, attach_point, prog_fd;
struct bpf_cb_ctx info = {};
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
struct libbpf_nla_req req;
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (!hook || !opts ||
!OPTS_VALID(hook, bpf_tc_hook) ||
!OPTS_VALID(opts, bpf_tc_opts))
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
ifindex = OPTS_GET(hook, ifindex, 0);
parent = OPTS_GET(hook, parent, 0);
attach_point = OPTS_GET(hook, attach_point, 0);
handle = OPTS_GET(opts, handle, 0);
priority = OPTS_GET(opts, priority, 0);
prog_fd = OPTS_GET(opts, prog_fd, 0);
prog_id = OPTS_GET(opts, prog_id, 0);
flags = OPTS_GET(opts, flags, 0);
if (ifindex <= 0 || flags || prog_fd || prog_id ||
!handle || !priority)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (priority > UINT16_MAX)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-EINVAL);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
protocol = ETH_P_ALL;
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
req.nh.nlmsg_flags = NLM_F_REQUEST;
req.nh.nlmsg_type = RTM_GETTFILTER;
req.tc.tcm_family = AF_UNSPEC;
req.tc.tcm_ifindex = ifindex;
req.tc.tcm_handle = handle;
req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
ret = tc_get_tcm_parent(attach_point, &parent);
if (ret < 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
req.tc.tcm_parent = parent;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
info.opts = opts;
libbpf: Add request buffer type for netlink messages Coverity complains about OOB writes to nlmsghdr. There is no OOB as we write to the trailing buffer, but static analyzers and compilers may rightfully be confused as the nlmsghdr pointer has subobject provenance (and hence subobject bounds). Fix this by using an explicit request structure containing the nlmsghdr, struct tcmsg/ifinfomsg, and attribute buffer. Also switch nh_tail (renamed to req_tail) to cast req * to char * so that it can be understood as arithmetic on pointer to the representation array (hence having same bound as request structure), which should further appease analyzers. As a bonus, callers don't have to pass sizeof(req) all the time now, as size is implicitly obtained using the pointer. While at it, also reduce the size of attribute buffer to 128 bytes (132 for ifinfomsg using functions due to the padding). Summary of problem: Even though C standard allows interconvertibility of pointer to first member and pointer to struct, for the purposes of alias analysis it would still consider the first as having pointer value "pointer to T" where T is type of first member hence having subobject bounds, allowing analyzers within reason to complain when object is accessed beyond the size of pointed to object. The only exception to this rule may be when a char * is formed to a member subobject. It is not possible for the compiler to be able to tell the intent of the programmer that it is a pointer to member object or the underlying representation array of the containing object, so such diagnosis is suppressed. Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API") Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
2021-06-19 12:14:53 +08:00
ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (ret < 0)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(ret);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
if (!info.processed)
libbpf: Streamline error reporting for high-level APIs Implement changes to error reporting for high-level libbpf APIs to make them less surprising and less error-prone to users: - in all the cases when error happens, errno is set to an appropriate error value; - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and error code is communicated through errno; this applies both to APIs that already returned NULL before (so now they communicate more detailed error codes), as well as for many APIs that used ERR_PTR() macro and encoded error numbers as fake pointers. - in legacy (default) mode, those APIs that were returning ERR_PTR(err), continue doing so, but still set errno. With these changes, errno can be always used to extract actual error, regardless of legacy or libbpf 1.0 modes. This is utilized internally in libbpf in places where libbpf uses it's own high-level APIs. libbpf_get_error() is adapted to handle both cases completely transparently to end-users (and is used by libbpf consistently as well). More context, justification, and discussion can be found in "Libbpf: the road to v1.0" document ([0]). [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
2021-05-25 11:59:34 +08:00
return libbpf_err(-ENOENT);
libbpf: Add low level TC-BPF management API This adds functions that wrap the netlink API used for adding, manipulating, and removing traffic control filters. The API summary: A bpf_tc_hook represents a location where a TC-BPF filter can be attached. This means that creating a hook leads to creation of the backing qdisc, while destruction either removes all filters attached to a hook, or destroys qdisc if requested explicitly (as discussed below). The TC-BPF API functions operate on this bpf_tc_hook to attach, replace, query, and detach tc filters. All functions return 0 on success, and a negative error code on failure. bpf_tc_hook_create - Create a hook Parameters: @hook - Cannot be NULL, ifindex > 0, attach_point must be set to proper enum constant. Note that parent must be unset when attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a valid value for attach_point. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_hook_destroy - Destroy a hook Parameters: @hook - Cannot be NULL. The behaviour depends on value of attach_point. If BPF_TC_INGRESS, all filters attached to the ingress hook will be detached. If BPF_TC_EGRESS, all filters attached to the egress hook will be detached. If BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be deleted, also detaching all filters. As before, parent must be unset for these attach_points, and set for BPF_TC_CUSTOM. It is advised that if the qdisc is operated on by many programs, then the program at least check that there are no other existing filters before deleting the clsact qdisc. An example is shown below: DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* set opts as NULL, as we're not really interested in * getting any info for a particular filter, but just * detecting its presence. */ r = bpf_tc_query(&hook, NULL); if (r == -ENOENT) { /* no filters */ hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS; return bpf_tc_hook_destroy(&hook); } else { /* failed or r == 0, the latter means filters do exist */ return r; } Note that there is a small race between checking for no filters and deleting the qdisc. This is currently unavoidable. Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM. bpf_tc_attach - Attach a filter to a hook Parameters: @hook - Cannot be NULL. Represents the hook the filter will be attached to. Requirements for ifindex and attach_point are same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM is also supported. In that case, parent must be set to the handle where the filter will be attached (using BPF_TC_PARENT). E.g. to set parent to 1:16 like in tc command line, the equivalent would be BPF_TC_PARENT(1, 16). @opts - Cannot be NULL. The following opts are optional: * handle - The handle of the filter * priority - The priority of the filter Must be >= 0 and <= UINT16_MAX Note that when left unset, they will be auto-allocated by the kernel. The following opts must be set: * prog_fd - The fd of the loaded SCHED_CLS prog The following opts must be unset: * prog_id - The ID of the BPF prog The following opts are optional: * flags - Currently only BPF_TC_F_REPLACE is allowed. It allows replacing an existing filter instead of failing with -EEXIST. The following opts will be filled by bpf_tc_attach on a successful attach operation if they are unset: * handle - The handle of the attached filter * priority - The priority of the attached filter * prog_id - The ID of the attached SCHED_CLS prog This way, the user can know what the auto allocated values for optional opts like handle and priority are for the newly attached filter, if they were unset. Note that some other attributes are set to fixed default values listed below (this holds for all bpf_tc_* APIs): protocol as ETH_P_ALL, direct action mode, chain index of 0, and class ID of 0 (this can be set by writing to the skb->tc_classid field from the BPF program). bpf_tc_detach Parameters: @hook - Cannot be NULL. Represents the hook the filter will be detached from. Requirements are same as described above in bpf_tc_attach. @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags bpf_tc_query Parameters: @hook - Cannot be NULL. Represents the hook where the filter lookup will be performed. Requirements are same as described above in bpf_tc_attach(). @opts - Cannot be NULL. The following opts must be set: * handle, priority The following opts must be unset: * prog_fd, prog_id, flags The following fields will be filled by bpf_tc_query upon a successful lookup: * prog_id Some usage examples (using BPF skeleton infrastructure): BPF program (test_tc_bpf.c): #include <linux/bpf.h> #include <bpf/bpf_helpers.h> SEC("classifier") int cls(struct __sk_buff *skb) { return 0; } Userspace loader: struct test_tc_bpf *skel = NULL; int fd, r; skel = test_tc_bpf__open_and_load(); if (!skel) return -ENOMEM; fd = bpf_program__fd(skel->progs.cls); DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = if_nametoindex("lo"), .attach_point = BPF_TC_INGRESS); /* Create clsact qdisc */ r = bpf_tc_hook_create(&hook); if (r < 0) goto end; DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); r = bpf_tc_attach(&hook, &opts); if (r < 0) goto end; /* Print the auto allocated handle and priority */ printf("Handle=%u", opts.handle); printf("Priority=%u", opts.priority); opts.prog_fd = opts.prog_id = 0; bpf_tc_detach(&hook, &opts); end: test_tc_bpf__destroy(skel); This is equivalent to doing the following using tc command line: # tc qdisc add dev lo clsact # tc filter add dev lo ingress bpf obj foo.o sec classifier da # tc filter del dev lo ingress handle <h> prio <p> bpf ... where the handle and priority can be found using: # tc filter show dev lo ingress Another example replacing a filter (extending prior example): /* We can also choose both (or one), let's try replacing an * existing filter. */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle = opts.handle, .priority = opts.priority, .prog_fd = fd); r = bpf_tc_attach(&hook, &replace_opts); if (r == -EEXIST) { /* Expected, now use BPF_TC_F_REPLACE to replace it */ replace_opts.flags = BPF_TC_F_REPLACE; return bpf_tc_attach(&hook, &replace_opts); } else if (r < 0) { return r; } /* There must be no existing filter with these * attributes, so cleanup and return an error. */ replace_opts.prog_fd = replace_opts.prog_id = 0; bpf_tc_detach(&hook, &replace_opts); return -1; To obtain info of a particular filter: /* Find info for filter with handle 1 and priority 50 */ DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1, .priority = 50); r = bpf_tc_query(&hook, &info_opts); if (r == -ENOENT) printf("Filter not found"); else if (r < 0) return r; printf("Prog ID: %u", info_opts.prog_id); return 0; Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design [ Daniel: also did major patch cleanup ] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
2021-05-13 07:41:22 +08:00
return ret;
}