diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 52457ae3b259..7f591d71ab28 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -189,6 +189,11 @@ enum nfp_bpf_map_use { NFP_MAP_USE_ATOMIC_CNT, }; +struct nfp_bpf_map_word { + unsigned char type :4; + unsigned char non_zero_update :1; +}; + /** * struct nfp_bpf_map - private per-map data attached to BPF maps for offload * @offmap: pointer to the offloaded BPF map @@ -202,7 +207,7 @@ struct nfp_bpf_map { struct nfp_app_bpf *bpf; u32 tid; struct list_head l; - enum nfp_bpf_map_use use_map[]; + struct nfp_bpf_map_word use_map[]; }; struct nfp_bpf_neutral_map { @@ -436,6 +441,7 @@ struct nfp_bpf_subprog_info { * @prog: machine code * @prog_len: number of valid instructions in @prog array * @__prog_alloc_len: alloc size of @prog array + * @stack_size: total amount of stack used * @verifier_meta: temporary storage for verifier's insn meta * @type: BPF program type * @last_bpf_off: address of the last instruction translated from BPF @@ -460,6 +466,8 @@ struct nfp_prog { unsigned int prog_len; unsigned int __prog_alloc_len; + unsigned int stack_size; + struct nfp_insn_meta *verifier_meta; enum bpf_prog_type type; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 927e038d9f77..ba8ceedcf6a2 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -262,10 +262,25 @@ static void nfp_map_bpf_byte_swap(struct nfp_bpf_map *nfp_map, void *value) unsigned int i; for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) - if (nfp_map->use_map[i] == NFP_MAP_USE_ATOMIC_CNT) + if (nfp_map->use_map[i].type == NFP_MAP_USE_ATOMIC_CNT) word[i] = (__force u32)cpu_to_be32(word[i]); } +/* Mark value as unsafely initialized in case it becomes atomic later + * and we didn't byte swap something non-byte swap neutral. + */ +static void +nfp_map_bpf_byte_swap_record(struct nfp_bpf_map *nfp_map, void *value) +{ + u32 *word = value; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) + if (nfp_map->use_map[i].type == NFP_MAP_UNUSED && + word[i] != (__force u32)cpu_to_be32(word[i])) + nfp_map->use_map[i].non_zero_update = 1; +} + static int nfp_bpf_map_lookup_entry(struct bpf_offloaded_map *offmap, void *key, void *value) @@ -285,6 +300,7 @@ nfp_bpf_map_update_entry(struct bpf_offloaded_map *offmap, void *key, void *value, u64 flags) { nfp_map_bpf_byte_swap(offmap->dev_priv, value); + nfp_map_bpf_byte_swap_record(offmap->dev_priv, value); return nfp_bpf_ctrl_update_entry(offmap, key, value, flags); } @@ -473,7 +489,7 @@ nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, struct netlink_ext_ack *extack) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - unsigned int max_mtu; + unsigned int max_mtu, max_stack, max_prog_len; dma_addr_t dma_addr; void *img; int err; @@ -484,6 +500,18 @@ nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, return -EOPNOTSUPP; } + max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + if (nfp_prog->stack_size > max_stack) { + NL_SET_ERR_MSG_MOD(extack, "stack too large"); + return -EOPNOTSUPP; + } + + max_prog_len = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + if (nfp_prog->prog_len > max_prog_len) { + NL_SET_ERR_MSG_MOD(extack, "program too long"); + return -EOPNOTSUPP; + } + img = nfp_bpf_relo_for_vnic(nfp_prog, nn->app_priv); if (IS_ERR(img)) return PTR_ERR(img); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 193dd685b365..99f977bfd8cc 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -80,6 +80,46 @@ exit_set_location: nfp_prog->adjust_head_location = location; } +static bool nfp_bpf_map_update_value_ok(struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; + const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3; + struct bpf_offloaded_map *offmap; + struct bpf_func_state *state; + struct nfp_bpf_map *nfp_map; + int off, i; + + state = env->cur_state->frame[reg3->frameno]; + + /* We need to record each time update happens with non-zero words, + * in case such word is used in atomic operations. + * Implicitly depend on nfp_bpf_stack_arg_ok(reg3) being run before. + */ + + offmap = map_to_offmap(reg1->map_ptr); + nfp_map = offmap->dev_priv; + off = reg3->off + reg3->var_off.value; + + for (i = 0; i < offmap->map.value_size; i++) { + struct bpf_stack_state *stack_entry; + unsigned int soff; + + soff = -(off + i) - 1; + stack_entry = &state->stack[soff / BPF_REG_SIZE]; + if (stack_entry->slot_type[soff % BPF_REG_SIZE] == STACK_ZERO) + continue; + + if (nfp_map->use_map[i / 4].type == NFP_MAP_USE_ATOMIC_CNT) { + pr_vlog(env, "value at offset %d/%d may be non-zero, bpf_map_update_elem() is required to initialize atomic counters to zero to avoid offload endian issues\n", + i, soff); + return false; + } + nfp_map->use_map[i / 4].non_zero_update = 1; + } + + return true; +} + static int nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env, const struct bpf_reg_state *reg, @@ -171,7 +211,8 @@ nfp_bpf_check_helper_call(struct nfp_prog *nfp_prog, bpf->helpers.map_update, reg1) || !nfp_bpf_stack_arg_ok("map_update", env, reg2, meta->func_id ? &meta->arg2 : NULL) || - !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL)) + !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL) || + !nfp_bpf_map_update_value_ok(env)) return -EOPNOTSUPP; break; @@ -352,15 +393,22 @@ nfp_bpf_map_mark_used_one(struct bpf_verifier_env *env, struct nfp_bpf_map *nfp_map, unsigned int off, enum nfp_bpf_map_use use) { - if (nfp_map->use_map[off / 4] != NFP_MAP_UNUSED && - nfp_map->use_map[off / 4] != use) { + if (nfp_map->use_map[off / 4].type != NFP_MAP_UNUSED && + nfp_map->use_map[off / 4].type != use) { pr_vlog(env, "map value use type conflict %s vs %s off: %u\n", - nfp_bpf_map_use_name(nfp_map->use_map[off / 4]), + nfp_bpf_map_use_name(nfp_map->use_map[off / 4].type), nfp_bpf_map_use_name(use), off); return -EOPNOTSUPP; } - nfp_map->use_map[off / 4] = use; + if (nfp_map->use_map[off / 4].non_zero_update && + use == NFP_MAP_USE_ATOMIC_CNT) { + pr_vlog(env, "atomic counter in map value may already be initialized to non-zero value off: %u\n", + off); + return -EOPNOTSUPP; + } + + nfp_map->use_map[off / 4].type = use; return 0; } @@ -699,9 +747,9 @@ continue_subprog: static int nfp_bpf_finalize(struct bpf_verifier_env *env) { - unsigned int stack_size, stack_needed; struct bpf_subprog_info *info; struct nfp_prog *nfp_prog; + unsigned int max_stack; struct nfp_net *nn; int i; @@ -729,11 +777,12 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env) } nn = netdev_priv(env->prog->aux->offload->netdev); - stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; - stack_needed = nfp_bpf_get_stack_usage(nfp_prog, env->prog->len); - if (stack_needed > stack_size) { + max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + nfp_prog->stack_size = nfp_bpf_get_stack_usage(nfp_prog, + env->prog->len); + if (nfp_prog->stack_size > max_stack) { pr_vlog(env, "stack too large: program %dB > FW stack %dB\n", - stack_needed, stack_size); + nfp_prog->stack_size, max_stack); return -EOPNOTSUPP; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e60fff48288b..33014ae73103 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,6 +39,9 @@ struct bpf_map_ops { void *(*map_lookup_elem)(struct bpf_map *map, void *key); int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + int (*map_pop_elem)(struct bpf_map *map, void *value); + int (*map_peek_elem)(struct bpf_map *map, void *value); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -138,6 +141,7 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack @@ -810,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_map_push_elem_proto; +extern const struct bpf_func_proto bpf_map_pop_elem_proto; +extern const struct bpf_func_proto bpf_map_peek_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fa48343a5ea1..44d9ab4809bd 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) #ifdef CONFIG_PERF_EVENTS -BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) #endif #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) diff --git a/include/linux/filter.h b/include/linux/filter.h index 5771874bc01e..91b4c934f02e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) cb->data_end = skb->data + skb_headlen(skb); } +/* Similar to bpf_compute_data_pointers(), except that save orginal + * data in cb->data and cb->meta_data for restore. + */ +static inline void bpf_compute_and_save_data_end( + struct sk_buff *skb, void **saved_data_end) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + + *saved_data_end = cb->data_end; + cb->data_end = skb->data + skb_headlen(skb); +} + +/* Restore data saved by bpf_compute_data_pointers(). */ +static inline void bpf_restore_data_end( + struct sk_buff *skb, void *saved_data_end) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + + cb->data_end = saved_data_end; +} + static inline u8 *bpf_skb_cb(struct sk_buff *skb) { /* eBPF programs may read/write skb->cb[] area to transfer meta diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 0b919f0bc6d6..2a11e9d91dfa 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -176,6 +176,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, { dst->sg.data[which] = src->sg.data[which]; dst->sg.data[which].length = size; + dst->sg.size += size; src->sg.data[which].length -= size; src->sg.data[which].offset += size; } @@ -186,23 +187,31 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) sk_msg_init(src); } -static inline u32 sk_msg_elem_used(const struct sk_msg *msg) -{ - return msg->sg.end >= msg->sg.start ? - msg->sg.end - msg->sg.start : - msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); -} - static inline bool sk_msg_full(const struct sk_msg *msg) { return (msg->sg.end == msg->sg.start) && msg->sg.size; } +static inline u32 sk_msg_elem_used(const struct sk_msg *msg) +{ + if (sk_msg_full(msg)) + return MAX_MSG_FRAGS; + + return msg->sg.end >= msg->sg.start ? + msg->sg.end - msg->sg.start : + msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); +} + static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) { return &msg->sg.data[which]; } +static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which) +{ + return msg->sg.data[which]; +} + static inline struct page *sk_msg_page(struct sk_msg *msg, int which) { return sg_page(sk_msg_elem(msg, which)); @@ -266,11 +275,6 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } -static inline bool sk_has_psock(struct sock *sk) -{ - return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg; -} - static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { @@ -370,6 +374,26 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, return test_bit(bit, &psock->state); } +static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) +{ + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (psock) { + if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { + psock = ERR_PTR(-EBUSY); + goto out; + } + + if (!refcount_inc_not_zero(&psock->refcnt)) + psock = ERR_PTR(-EBUSY); + } +out: + rcu_read_unlock(); + return psock; +} + static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; diff --git a/include/net/tcp.h b/include/net/tcp.h index 3600ae0f25c3..8a61c3e8c15d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2051,11 +2051,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); #define TCP_ULP_MAX 128 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) -enum { - TCP_ULP_TLS, - TCP_ULP_BPF, -}; - struct tcp_ulp_ops { struct list_head list; @@ -2064,9 +2059,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); - int uid; char name[TCP_ULP_NAME_MAX]; - bool user_visible; struct module *owner; }; int tcp_register_ulp(struct tcp_ulp_ops *type); @@ -2089,7 +2082,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, - struct msghdr *msg, int len); + struct msghdr *msg, int len, int flags); /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f9187b41dff6..852dc17ab47a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -103,6 +103,7 @@ enum bpf_cmd { BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, }; enum bpf_map_type { @@ -128,6 +129,8 @@ enum bpf_map_type { BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, }; enum bpf_prog_type { @@ -462,6 +465,28 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1433,7 +1458,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -2215,6 +2240,23 @@ union bpf_attr { * pointer that was returned from bpf_sk_lookup_xxx\ (). * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * Description + * For socket policies, insert *len* bytes into msg at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it may want to insert metadata or options into the msg. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. + * + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2303,7 +2345,11 @@ union bpf_attr { FN(skb_ancestor_cgroup_id), \ FN(sk_lookup_tcp), \ FN(sk_lookup_udp), \ - FN(sk_release), + FN(sk_release), \ + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), \ + FN(msg_push_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index ff8262626b8f..4c2fa3ac56f6 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -3,7 +3,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_SYSCALL) += btf.o ifeq ($(CONFIG_NET),y) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 00f6ed2e4f9a..9425c2fb872f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, { unsigned int offset = skb->data - skb_network_header(skb); struct sock *save_sk; + void *saved_data_end; struct cgroup *cgrp; int ret; @@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, save_sk = skb->sk; skb->sk = sk; __skb_push(skb, offset); + + /* compute pointers for the bpf prog */ + bpf_compute_and_save_data_end(skb, &saved_data_end); + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, bpf_prog_run_save_cb); + bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); skb->sk = save_sk; return ret == 1 ? 0 : -EPERM; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index defcf4df6d91..7c7eeea8cffc 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32) const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; const struct bpf_func_proto bpf_map_update_elem_proto __weak; const struct bpf_func_proto bpf_map_delete_elem_proto __weak; +const struct bpf_func_proto bpf_map_push_elem_proto __weak; +const struct bpf_func_proto bpf_map_pop_elem_proto __weak; +const struct bpf_func_proto bpf_map_peek_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6502115e8f55..ab0d5e3f9892 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -76,6 +76,49 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = { .arg2_type = ARG_PTR_TO_MAP_KEY, }; +BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) +{ + return map->ops->map_push_elem(map, value, flags); +} + +const struct bpf_func_proto bpf_map_push_elem_proto = { + .func = bpf_map_push_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_VALUE, + .arg3_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) +{ + return map->ops->map_pop_elem(map, value); +} + +const struct bpf_func_proto bpf_map_pop_elem_proto = { + .func = bpf_map_pop_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, +}; + +BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) +{ + return map->ops->map_peek_elem(map, value); +} + +const struct bpf_func_proto bpf_map_peek_elem_proto = { + .func = bpf_map_pop_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, +}; + const struct bpf_func_proto bpf_get_prandom_u32_proto = { .func = bpf_user_rnd_u32, .gpl_only = false, diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c new file mode 100644 index 000000000000..12a93fb37449 --- /dev/null +++ b/kernel/bpf/queue_stack_maps.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * queue_stack_maps.c: BPF queue and stack maps + * + * Copyright (c) 2018 Politecnico di Torino + */ +#include +#include +#include +#include "percpu_freelist.h" + +#define QUEUE_STACK_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + + +struct bpf_queue_stack { + struct bpf_map map; + raw_spinlock_t lock; + u32 head, tail; + u32 size; /* max_entries + 1 */ + + char elements[0] __aligned(8); +}; + +static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) +{ + return container_of(map, struct bpf_queue_stack, map); +} + +static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs) +{ + return qs->head == qs->tail; +} + +static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) +{ + u32 head = qs->head + 1; + + if (unlikely(head >= qs->size)) + head = 0; + + return head == qs->tail; +} + +/* Called from syscall */ +static int queue_stack_map_alloc_check(union bpf_attr *attr) +{ + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 0 || + attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK) + return -EINVAL; + + if (attr->value_size > KMALLOC_MAX_SIZE) + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + return -E2BIG; + + return 0; +} + +static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) +{ + int ret, numa_node = bpf_map_attr_numa_node(attr); + struct bpf_queue_stack *qs; + u32 size, value_size; + u64 queue_size, cost; + + size = attr->max_entries + 1; + value_size = attr->value_size; + + queue_size = sizeof(*qs) + (u64) value_size * size; + + cost = queue_size; + if (cost >= U32_MAX - PAGE_SIZE) + return ERR_PTR(-E2BIG); + + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + ret = bpf_map_precharge_memlock(cost); + if (ret < 0) + return ERR_PTR(ret); + + qs = bpf_map_area_alloc(queue_size, numa_node); + if (!qs) + return ERR_PTR(-ENOMEM); + + memset(qs, 0, sizeof(*qs)); + + bpf_map_init_from_attr(&qs->map, attr); + + qs->map.pages = cost; + qs->size = size; + + raw_spin_lock_init(&qs->lock); + + return &qs->map; +} + +/* Called when map->refcnt goes to zero, either from workqueue or from syscall */ +static void queue_stack_map_free(struct bpf_map *map) +{ + struct bpf_queue_stack *qs = bpf_queue_stack(map); + + /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete + */ + synchronize_rcu(); + + bpf_map_area_free(qs); +} + +static int __queue_map_get(struct bpf_map *map, void *value, bool delete) +{ + struct bpf_queue_stack *qs = bpf_queue_stack(map); + unsigned long flags; + int err = 0; + void *ptr; + + raw_spin_lock_irqsave(&qs->lock, flags); + + if (queue_stack_map_is_empty(qs)) { + err = -ENOENT; + goto out; + } + + ptr = &qs->elements[qs->tail * qs->map.value_size]; + memcpy(value, ptr, qs->map.value_size); + + if (delete) { + if (unlikely(++qs->tail >= qs->size)) + qs->tail = 0; + } + +out: + raw_spin_unlock_irqrestore(&qs->lock, flags); + return err; +} + + +static int __stack_map_get(struct bpf_map *map, void *value, bool delete) +{ + struct bpf_queue_stack *qs = bpf_queue_stack(map); + unsigned long flags; + int err = 0; + void *ptr; + u32 index; + + raw_spin_lock_irqsave(&qs->lock, flags); + + if (queue_stack_map_is_empty(qs)) { + err = -ENOENT; + goto out; + } + + index = qs->head - 1; + if (unlikely(index >= qs->size)) + index = qs->size - 1; + + ptr = &qs->elements[index * qs->map.value_size]; + memcpy(value, ptr, qs->map.value_size); + + if (delete) + qs->head = index; + +out: + raw_spin_unlock_irqrestore(&qs->lock, flags); + return err; +} + +/* Called from syscall or from eBPF program */ +static int queue_map_peek_elem(struct bpf_map *map, void *value) +{ + return __queue_map_get(map, value, false); +} + +/* Called from syscall or from eBPF program */ +static int stack_map_peek_elem(struct bpf_map *map, void *value) +{ + return __stack_map_get(map, value, false); +} + +/* Called from syscall or from eBPF program */ +static int queue_map_pop_elem(struct bpf_map *map, void *value) +{ + return __queue_map_get(map, value, true); +} + +/* Called from syscall or from eBPF program */ +static int stack_map_pop_elem(struct bpf_map *map, void *value) +{ + return __stack_map_get(map, value, true); +} + +/* Called from syscall or from eBPF program */ +static int queue_stack_map_push_elem(struct bpf_map *map, void *value, + u64 flags) +{ + struct bpf_queue_stack *qs = bpf_queue_stack(map); + unsigned long irq_flags; + int err = 0; + void *dst; + + /* BPF_EXIST is used to force making room for a new element in case the + * map is full + */ + bool replace = (flags & BPF_EXIST); + + /* Check supported flags for queue and stack maps */ + if (flags & BPF_NOEXIST || flags > BPF_EXIST) + return -EINVAL; + + raw_spin_lock_irqsave(&qs->lock, irq_flags); + + if (queue_stack_map_is_full(qs)) { + if (!replace) { + err = -E2BIG; + goto out; + } + /* advance tail pointer to overwrite oldest element */ + if (unlikely(++qs->tail >= qs->size)) + qs->tail = 0; + } + + dst = &qs->elements[qs->head * qs->map.value_size]; + memcpy(dst, value, qs->map.value_size); + + if (unlikely(++qs->head >= qs->size)) + qs->head = 0; + +out: + raw_spin_unlock_irqrestore(&qs->lock, irq_flags); + return err; +} + +/* Called from syscall or from eBPF program */ +static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) +{ + return NULL; +} + +/* Called from syscall or from eBPF program */ +static int queue_stack_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + return -EINVAL; +} + +/* Called from syscall or from eBPF program */ +static int queue_stack_map_delete_elem(struct bpf_map *map, void *key) +{ + return -EINVAL; +} + +/* Called from syscall */ +static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, + void *next_key) +{ + return -EINVAL; +} + +const struct bpf_map_ops queue_map_ops = { + .map_alloc_check = queue_stack_map_alloc_check, + .map_alloc = queue_stack_map_alloc, + .map_free = queue_stack_map_free, + .map_lookup_elem = queue_stack_map_lookup_elem, + .map_update_elem = queue_stack_map_update_elem, + .map_delete_elem = queue_stack_map_delete_elem, + .map_push_elem = queue_stack_map_push_elem, + .map_pop_elem = queue_map_pop_elem, + .map_peek_elem = queue_map_peek_elem, + .map_get_next_key = queue_stack_map_get_next_key, +}; + +const struct bpf_map_ops stack_map_ops = { + .map_alloc_check = queue_stack_map_alloc_check, + .map_alloc = queue_stack_map_alloc, + .map_free = queue_stack_map_free, + .map_lookup_elem = queue_stack_map_lookup_elem, + .map_update_elem = queue_stack_map_update_elem, + .map_delete_elem = queue_stack_map_delete_elem, + .map_push_elem = queue_stack_map_push_elem, + .map_pop_elem = stack_map_pop_elem, + .map_peek_elem = stack_map_peek_elem, + .map_get_next_key = queue_stack_map_get_next_key, +}; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index b2ade10f7ec3..90daf285de03 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map) put_callchain_buffers(); } -const struct bpf_map_ops stack_map_ops = { +const struct bpf_map_ops stack_trace_map_ops = { .map_alloc = stack_map_alloc, .map_free = stack_map_free, .map_get_next_key = stack_map_get_next_key, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f4ecd6ed2252..ccb93277aae2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) return -ENOTSUPP; } +static void *__bpf_copy_key(void __user *ukey, u64 key_size) +{ + if (key_size) + return memdup_user(ukey, key_size); + + if (ukey) + return ERR_PTR(-EINVAL); + + return NULL; +} + /* last field in 'union bpf_attr' used by this command */ #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value @@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr) goto err_put; } - key = memdup_user(ukey, map->key_size); + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; @@ -716,6 +727,9 @@ static int map_lookup_elem(union bpf_attr *attr) err = bpf_fd_htab_map_lookup_elem(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { err = bpf_fd_reuseport_array_lookup_elem(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || + map->map_type == BPF_MAP_TYPE_STACK) { + err = map->ops->map_peek_elem(map, value); } else { rcu_read_lock(); ptr = map->ops->map_lookup_elem(map, key); @@ -785,7 +799,7 @@ static int map_update_elem(union bpf_attr *attr) goto err_put; } - key = memdup_user(ukey, map->key_size); + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; @@ -846,6 +860,9 @@ static int map_update_elem(union bpf_attr *attr) /* rcu_read_lock() is not needed */ err = bpf_fd_reuseport_array_update_elem(map, key, value, attr->flags); + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || + map->map_type == BPF_MAP_TYPE_STACK) { + err = map->ops->map_push_elem(map, value, attr->flags); } else { rcu_read_lock(); err = map->ops->map_update_elem(map, key, value, attr->flags); @@ -888,7 +905,7 @@ static int map_delete_elem(union bpf_attr *attr) goto err_put; } - key = memdup_user(ukey, map->key_size); + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; @@ -941,7 +958,7 @@ static int map_get_next_key(union bpf_attr *attr) } if (ukey) { - key = memdup_user(ukey, map->key_size); + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; @@ -982,6 +999,69 @@ err_put: return err; } +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value + +static int map_lookup_and_delete_elem(union bpf_attr *attr) +{ + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *uvalue = u64_to_user_ptr(attr->value); + int ufd = attr->map_fd; + struct bpf_map *map; + void *key, *value; + u32 value_size; + struct fd f; + int err; + + if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + + key = __bpf_copy_key(ukey, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); + goto err_put; + } + + value_size = map->value_size; + + err = -ENOMEM; + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); + if (!value) + goto free_key; + + if (map->map_type == BPF_MAP_TYPE_QUEUE || + map->map_type == BPF_MAP_TYPE_STACK) { + err = map->ops->map_pop_elem(map, value); + } else { + err = -ENOTSUPP; + } + + if (err) + goto free_value; + + if (copy_to_user(uvalue, value, value_size) != 0) + goto free_value; + + err = 0; + +free_value: + kfree(value); +free_key: + kfree(key); +err_put: + fdput(f); + return err; +} + static const struct bpf_prog_ops * const bpf_prog_types[] = { #define BPF_PROG_TYPE(_id, _name) \ [_id] = & _name ## _prog_ops, @@ -2455,6 +2535,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_TASK_FD_QUERY: err = bpf_task_fd_query(&attr, uattr); break; + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: + err = map_lookup_and_delete_elem(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3f93a548a642..98fa0be35370 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1528,14 +1528,19 @@ static bool __is_pointer_value(bool allow_ptr_leaks, return reg->type != SCALAR_VALUE; } +static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) +{ + return cur_regs(env) + regno; +} + static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { - return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); + return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); } static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) { - const struct bpf_reg_state *reg = cur_regs(env) + regno; + const struct bpf_reg_state *reg = reg_state(env, regno); return reg->type == PTR_TO_CTX || reg->type == PTR_TO_SOCKET; @@ -1543,11 +1548,19 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) { - const struct bpf_reg_state *reg = cur_regs(env) + regno; + const struct bpf_reg_state *reg = reg_state(env, regno); return type_is_pkt_pointer(reg->type); } +static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = reg_state(env, regno); + + /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ + return reg->type == PTR_TO_FLOW_KEYS; +} + static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict) @@ -1956,9 +1969,11 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins } if (is_ctx_reg(env, insn->dst_reg) || - is_pkt_reg(env, insn->dst_reg)) { + is_pkt_reg(env, insn->dst_reg) || + is_flow_key_reg(env, insn->dst_reg)) { verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", - insn->dst_reg, reg_type_str[insn->dst_reg]); + insn->dst_reg, + reg_type_str[reg_state(env, insn->dst_reg)->type]); return -EACCES; } @@ -1983,7 +1998,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct bpf_reg_state *reg = cur_regs(env) + regno; + struct bpf_reg_state *reg = reg_state(env, regno); struct bpf_func_state *state = func(env, reg); int off, i, slot, spi; @@ -2062,8 +2077,6 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); - case PTR_TO_FLOW_KEYS: - return check_flow_keys_access(env, reg->off, access_size); case PTR_TO_MAP_VALUE: return check_map_access(env, regno, reg->off, access_size, zero_size_allowed); @@ -2117,7 +2130,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } if (arg_type == ARG_PTR_TO_MAP_KEY || - arg_type == ARG_PTR_TO_MAP_VALUE) { + arg_type == ARG_PTR_TO_MAP_VALUE || + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { expected_type = PTR_TO_STACK; if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && type != expected_type) @@ -2187,7 +2201,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL); - } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { + } else if (arg_type == ARG_PTR_TO_MAP_VALUE || + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity */ @@ -2196,9 +2211,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, verbose(env, "invalid map_ptr to access map->value\n"); return -EACCES; } + meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, - NULL); + meta); } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); @@ -2321,6 +2337,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_sk_select_reuseport) goto error; break; + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + if (func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_map_pop_elem && + func_id != BPF_FUNC_map_push_elem) + goto error; + break; default: break; } @@ -2377,6 +2400,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) goto error; break; + case BPF_FUNC_map_peek_elem: + case BPF_FUNC_map_pop_elem: + case BPF_FUNC_map_push_elem: + if (map->map_type != BPF_MAP_TYPE_QUEUE && + map->map_type != BPF_MAP_TYPE_STACK) + goto error; + break; default: break; } @@ -2672,7 +2702,10 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, if (func_id != BPF_FUNC_tail_call && func_id != BPF_FUNC_map_lookup_elem && func_id != BPF_FUNC_map_update_elem && - func_id != BPF_FUNC_map_delete_elem) + func_id != BPF_FUNC_map_delete_elem && + func_id != BPF_FUNC_map_push_elem && + func_id != BPF_FUNC_map_pop_elem && + func_id != BPF_FUNC_map_peek_elem) return 0; if (meta->map_ptr == NULL) { @@ -5244,7 +5277,8 @@ static int do_check(struct bpf_verifier_env *env) if (is_ctx_reg(env, insn->dst_reg)) { verbose(env, "BPF_ST stores into R%d %s is not allowed\n", - insn->dst_reg, reg_type_str[insn->dst_reg]); + insn->dst_reg, + reg_type_str[reg_state(env, insn->dst_reg)->type]); return -EACCES; } @@ -6144,7 +6178,10 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) if (prog->jit_requested && BITS_PER_LONG == 64 && (insn->imm == BPF_FUNC_map_lookup_elem || insn->imm == BPF_FUNC_map_update_elem || - insn->imm == BPF_FUNC_map_delete_elem)) { + insn->imm == BPF_FUNC_map_delete_elem || + insn->imm == BPF_FUNC_map_push_elem || + insn->imm == BPF_FUNC_map_pop_elem || + insn->imm == BPF_FUNC_map_peek_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -6177,6 +6214,14 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) BUILD_BUG_ON(!__same_type(ops->map_update_elem, (int (*)(struct bpf_map *map, void *key, void *value, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_push_elem, + (int (*)(struct bpf_map *map, void *value, + u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_pop_elem, + (int (*)(struct bpf_map *map, void *value))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_peek_elem, + (int (*)(struct bpf_map *map, void *value))NULL)); + switch (insn->imm) { case BPF_FUNC_map_lookup_elem: insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - @@ -6190,6 +6235,18 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - __bpf_call_base; continue; + case BPF_FUNC_map_push_elem: + insn->imm = BPF_CAST_CALL(ops->map_push_elem) - + __bpf_call_base; + continue; + case BPF_FUNC_map_pop_elem: + insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - + __bpf_call_base; + continue; + case BPF_FUNC_map_peek_elem: + insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - + __bpf_call_base; + continue; } goto patch_call_imm; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 0c423b8cd75c..c89c22c49015 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) @@ -115,6 +117,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, u32 retval, duration; int hh_len = ETH_HLEN; struct sk_buff *skb; + struct sock *sk; void *data; int ret; @@ -137,11 +140,21 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, break; } - skb = build_skb(data, 0); - if (!skb) { + sk = kzalloc(sizeof(struct sock), GFP_USER); + if (!sk) { kfree(data); return -ENOMEM; } + sock_net_set(sk, current->nsproxy->net_ns); + sock_init_data(NULL, sk); + + skb = build_skb(data, 0); + if (!skb) { + kfree(data); + kfree(sk); + return -ENOMEM; + } + skb->sk = sk; skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); @@ -159,6 +172,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { kfree_skb(skb); + kfree(sk); return -ENOMEM; } } @@ -171,6 +185,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, size = skb_headlen(skb); ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); kfree_skb(skb); + kfree(sk); return ret; } diff --git a/net/core/filter.c b/net/core/filter.c index 1a3ac6c46873..35c6933c2622 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2297,6 +2297,137 @@ static const struct bpf_func_proto bpf_msg_pull_data_proto = { .arg4_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, + u32, len, u64, flags) +{ + struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge; + u32 new, i = 0, l, space, copy = 0, offset = 0; + u8 *raw, *to, *from; + struct page *page; + + if (unlikely(flags)) + return -EINVAL; + + /* First find the starting scatterlist element */ + i = msg->sg.start; + do { + l = sk_msg_elem(msg, i)->length; + + if (start < offset + l) + break; + offset += l; + sk_msg_iter_var_next(i); + } while (i != msg->sg.end); + + if (start >= offset + l) + return -EINVAL; + + space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); + + /* If no space available will fallback to copy, we need at + * least one scatterlist elem available to push data into + * when start aligns to the beginning of an element or two + * when it falls inside an element. We handle the start equals + * offset case because its the common case for inserting a + * header. + */ + if (!space || (space == 1 && start != offset)) + copy = msg->sg.data[i].length; + + page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP, + get_order(copy + len)); + if (unlikely(!page)) + return -ENOMEM; + + if (copy) { + int front, back; + + raw = page_address(page); + + psge = sk_msg_elem(msg, i); + front = start - offset; + back = psge->length - front; + from = sg_virt(psge); + + if (front) + memcpy(raw, from, front); + + if (back) { + from += front; + to = raw + front + len; + + memcpy(to, from, back); + } + + put_page(sg_page(psge)); + } else if (start - offset) { + psge = sk_msg_elem(msg, i); + rsge = sk_msg_elem_cpy(msg, i); + + psge->length = start - offset; + rsge.length -= psge->length; + rsge.offset += start; + + sk_msg_iter_var_next(i); + sg_unmark_end(psge); + sk_msg_iter_next(msg, end); + } + + /* Slot(s) to place newly allocated data */ + new = i; + + /* Shift one or two slots as needed */ + if (!copy) { + sge = sk_msg_elem_cpy(msg, i); + + sk_msg_iter_var_next(i); + sg_unmark_end(&sge); + sk_msg_iter_next(msg, end); + + nsge = sk_msg_elem_cpy(msg, i); + if (rsge.length) { + sk_msg_iter_var_next(i); + nnsge = sk_msg_elem_cpy(msg, i); + } + + while (i != msg->sg.end) { + msg->sg.data[i] = sge; + sge = nsge; + sk_msg_iter_var_next(i); + if (rsge.length) { + nsge = nnsge; + nnsge = sk_msg_elem_cpy(msg, i); + } else { + nsge = sk_msg_elem_cpy(msg, i); + } + } + } + + /* Place newly allocated data buffer */ + sk_mem_charge(msg->sk, len); + msg->sg.size += len; + msg->sg.copy[new] = false; + sg_set_page(&msg->sg.data[new], page, len + copy, 0); + if (rsge.length) { + get_page(sg_page(&rsge)); + sk_msg_iter_var_next(new); + msg->sg.data[new] = rsge; + } + + sk_msg_compute_data_pointers(msg); + return 0; +} + +static const struct bpf_func_proto bpf_msg_push_data_proto = { + .func = bpf_msg_push_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -4854,6 +4985,7 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_xdp_adjust_head || func == bpf_xdp_adjust_meta || func == bpf_msg_pull_data || + func == bpf_msg_push_data || func == bpf_xdp_adjust_tail || #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) func == bpf_lwt_seg6_store_bytes || @@ -4876,6 +5008,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_map_push_elem: + return &bpf_map_push_elem_proto; + case BPF_FUNC_map_pop_elem: + return &bpf_map_pop_elem_proto; + case BPF_FUNC_map_peek_elem: + return &bpf_map_peek_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: @@ -5124,6 +5262,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_msg_cork_bytes_proto; case BPF_FUNC_msg_pull_data: return &bpf_msg_pull_data_proto; + case BPF_FUNC_msg_push_data: + return &bpf_msg_push_data_proto; case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; default: @@ -5346,6 +5486,40 @@ static bool sk_filter_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +static bool cg_skb_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, flow_keys): + return false; + } + if (type == BPF_WRITE) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, mark): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): + break; + default: + return false; + } + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} + static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -7038,7 +7212,7 @@ const struct bpf_prog_ops xdp_prog_ops = { const struct bpf_verifier_ops cg_skb_verifier_ops = { .get_func_proto = cg_skb_func_proto, - .is_valid_access = sk_filter_is_valid_access, + .is_valid_access = cg_skb_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 3c0e44cb811a..be6092ac69f8 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -175,12 +175,13 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, } } - psock = sk_psock_get(sk); + psock = sk_psock_get_checked(sk); + if (IS_ERR(psock)) { + ret = PTR_ERR(psock); + goto out_progs; + } + if (psock) { - if (!sk_has_psock(sk)) { - ret = -EBUSY; - goto out_progs; - } if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) || (skb_progs && READ_ONCE(psock->progs.skb_parser))) { sk_psock_put(sk, psock); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 80debb0daf37..b7918d4caa30 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -39,17 +39,19 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, } int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, - struct msghdr *msg, int len) + struct msghdr *msg, int len, int flags) { struct iov_iter *iter = &msg->msg_iter; + int peek = flags & MSG_PEEK; int i, ret, copied = 0; + struct sk_msg *msg_rx; + + msg_rx = list_first_entry_or_null(&psock->ingress_msg, + struct sk_msg, list); while (copied != len) { struct scatterlist *sge; - struct sk_msg *msg_rx; - msg_rx = list_first_entry_or_null(&psock->ingress_msg, - struct sk_msg, list); if (unlikely(!msg_rx)) break; @@ -70,21 +72,30 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, } copied += copy; - sge->offset += copy; - sge->length -= copy; - sk_mem_uncharge(sk, copy); - if (!sge->length) { - i++; - if (i == MAX_SKB_FRAGS) - i = 0; - if (!msg_rx->skb) - put_page(page); + if (likely(!peek)) { + sge->offset += copy; + sge->length -= copy; + sk_mem_uncharge(sk, copy); + msg_rx->sg.size -= copy; + + if (!sge->length) { + sk_msg_iter_var_next(i); + if (!msg_rx->skb) + put_page(page); + } + } else { + sk_msg_iter_var_next(i); } if (copied == len) break; } while (i != msg_rx->sg.end); + if (unlikely(peek)) { + msg_rx = list_next_entry(msg_rx, list); + continue; + } + msg_rx->sg.start = i; if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { list_del(&msg_rx->list); @@ -92,6 +103,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, consume_skb(msg_rx->skb); kfree(msg_rx); } + msg_rx = list_first_entry_or_null(&psock->ingress_msg, + struct sk_msg, list); } return copied; @@ -114,7 +127,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); lock_sock(sk); msg_bytes_ready: - copied = __tcp_bpf_recvmsg(sk, psock, msg, len); + copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); if (!copied) { int data, err = 0; long timeo; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index e90b6d537077..311cec8e533d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -715,8 +715,6 @@ EXPORT_SYMBOL(tls_unregister_device); static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .name = "tls", - .uid = TCP_ULP_TLS, - .user_visible = true, .owner = THIS_MODULE, .init = tls_init, }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a525fc4c2a4b..5cd88ba8acd1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1478,7 +1478,8 @@ int tls_sw_recvmsg(struct sock *sk, skb = tls_wait_data(sk, psock, flags, timeo, &err); if (!skb) { if (psock) { - int ret = __tcp_bpf_recvmsg(sk, psock, msg, len); + int ret = __tcp_bpf_recvmsg(sk, psock, + msg, len, flags); if (ret > 0) { copied += ret; diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h index 40bde6b23501..12835ea0e417 100644 --- a/tools/arch/arm64/include/asm/barrier.h +++ b/tools/arch/arm64/include/asm/barrier.h @@ -14,4 +14,74 @@ #define wmb() asm volatile("dmb ishst" ::: "memory") #define rmb() asm volatile("dmb ishld" ::: "memory") +#define smp_store_release(p, v) \ +do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u8 *)__u.__c) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u16 *)__u.__c) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u32 *)__u.__c) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u64 *)__u.__c) \ + : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ + __u.__val; \ +}) + #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */ diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h index d808ee0e77b5..4d471d9511a5 100644 --- a/tools/arch/ia64/include/asm/barrier.h +++ b/tools/arch/ia64/include/asm/barrier.h @@ -46,4 +46,17 @@ #define rmb() mb() #define wmb() mb() +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */ diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h index a634da05bc97..905a2c66d96d 100644 --- a/tools/arch/powerpc/include/asm/barrier.h +++ b/tools/arch/powerpc/include/asm/barrier.h @@ -27,4 +27,20 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") +#if defined(__powerpc64__) +#define smp_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory") + +#define smp_store_release(p, v) \ +do { \ + smp_lwsync(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_lwsync(); \ + ___p1; \ +}) +#endif /* defined(__powerpc64__) */ #endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */ diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h index 5030c99f47d2..de362fa664d4 100644 --- a/tools/arch/s390/include/asm/barrier.h +++ b/tools/arch/s390/include/asm/barrier.h @@ -28,4 +28,17 @@ #define rmb() mb() #define wmb() mb() +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* __TOOLS_LIB_ASM_BARRIER_H */ diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h index ba61344287d5..cfb0fdc8ccf0 100644 --- a/tools/arch/sparc/include/asm/barrier_64.h +++ b/tools/arch/sparc/include/asm/barrier_64.h @@ -40,4 +40,17 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define rmb() __asm__ __volatile__("":::"memory") #define wmb() __asm__ __volatile__("":::"memory") +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */ diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h index 8774dee27471..58919868473c 100644 --- a/tools/arch/x86/include/asm/barrier.h +++ b/tools/arch/x86/include/asm/barrier.h @@ -26,4 +26,18 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif +#if defined(__x86_64__) +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) +#endif /* defined(__x86_64__) */ #endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */ diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 3497f2d80328..f55a2daed59b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -86,7 +86,9 @@ DESCRIPTION **bpftool map pin** *MAP* *FILE* Pin map *MAP* as *FILE*. - Note: *FILE* must be located in *bpffs* mount. + Note: *FILE* must be located in *bpffs* mount. It must not + contain a dot character ('.'), which is reserved for future + extensions of *bpffs*. **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map. diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 12c803003ab2..ac4e904b10fb 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -75,7 +75,9 @@ DESCRIPTION **bpftool prog pin** *PROG* *FILE* Pin program *PROG* as *FILE*. - Note: *FILE* must be located in *bpffs* mount. + Note: *FILE* must be located in *bpffs* mount. It must not + contain a dot character ('.'), which is reserved for future + extensions of *bpffs*. **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] Load bpf program from binary *OBJ* and pin as *FILE*. @@ -91,7 +93,9 @@ DESCRIPTION If **dev** *NAME* is specified program will be loaded onto given networking device (offload). - Note: *FILE* must be located in *bpffs* mount. + Note: *FILE* must be located in *bpffs* mount. It must not + contain a dot character ('.'), which is reserved for future + extensions of *bpffs*. **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP* Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index c56545e87b0d..3f78e6404589 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -143,7 +143,7 @@ _bpftool_map_update_map_type() local type type=$(bpftool -jp map show $keyword $ref | \ command sed -n 's/.*"type": "\(.*\)",$/\1/p') - printf $type + [[ -n $type ]] && printf $type } _bpftool_map_update_get_id() diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 3318da8060bd..25af85304ebe 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -554,7 +554,9 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) return read_sysfs_hex_int(full_path); } -const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) +const char * +ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, + const char **opt) { char devname[IF_NAMESIZE]; int vendor_id; @@ -579,6 +581,7 @@ const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) device_id != 0x6000 && device_id != 0x6003) p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); + *opt = "ctx4"; return "NFP-6xxx"; default: p_err("Can't get bfd arch name for device vendor id 0x%04x", diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 87439320ef70..c75ffd9ce2bb 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -77,7 +77,7 @@ static int fprintf_json(void *out, const char *fmt, ...) } void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch) + const char *arch, const char *disassembler_options) { disassembler_ftype disassemble; struct disassemble_info info; @@ -116,6 +116,8 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, info.arch = bfd_get_arch(bfdf); info.mach = bfd_get_mach(bfdf); + if (disassembler_options) + info.disassembler_options = disassembler_options; info.buffer = image; info.buffer_length = len; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 828dde30e9ec..75a3296dc0bc 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -321,7 +321,8 @@ static int do_batch(int argc, char **argv) p_err("reading batch file failed: %s", strerror(errno)); err = -1; } else { - p_info("processed %d commands", lines); + if (!json_output) + printf("processed %d commands\n", lines); err = 0; } err_close: diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 28ee769bd11b..28322ace2856 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -145,13 +145,15 @@ int map_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch); + const char *arch, const char *disassembler_options); void print_data_json(uint8_t *data, size_t len); void print_hex_data_json(uint8_t *data, size_t len); unsigned int get_page_size(void); unsigned int get_possible_cpus(void); -const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); +const char * +ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, + const char **opt); struct btf_dumper { const struct btf *btf; diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 6d41323be291..bdaf4062e26e 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -50,15 +50,17 @@ static void int_exit(int signo) stop = true; } -static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv) +static enum bpf_perf_event_ret +print_bpf_output(struct perf_event_header *event, void *private_data) { - struct event_ring_info *ring = priv; - struct perf_event_sample *e = event; + struct perf_event_sample *e = container_of(event, struct perf_event_sample, + header); + struct event_ring_info *ring = private_data; struct { struct perf_event_header header; __u64 id; __u64 lost; - } *lost = event; + } *lost = (typeof(lost))event; if (json_output) { jsonw_start_object(json_wtr); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 335028968dfb..5302ee282409 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -449,6 +449,7 @@ static int do_dump(int argc, char **argv) unsigned long *func_ksyms = NULL; struct bpf_prog_info info = {}; unsigned int *func_lens = NULL; + const char *disasm_opt = NULL; unsigned int nr_func_ksyms; unsigned int nr_func_lens; struct dump_data dd = {}; @@ -607,9 +608,10 @@ static int do_dump(int argc, char **argv) const char *name = NULL; if (info.ifindex) { - name = ifindex_to_bfd_name_ns(info.ifindex, - info.netns_dev, - info.netns_ino); + name = ifindex_to_bfd_params(info.ifindex, + info.netns_dev, + info.netns_ino, + &disasm_opt); if (!name) goto err_free; } @@ -651,7 +653,8 @@ static int do_dump(int argc, char **argv) printf("%s:\n", sym_name); } - disasm_print_insn(img, lens[i], opcodes, name); + disasm_print_insn(img, lens[i], opcodes, name, + disasm_opt); img += lens[i]; if (json_output) @@ -663,7 +666,8 @@ static int do_dump(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); } else { - disasm_print_insn(buf, *member_len, opcodes, name); + disasm_print_insn(buf, *member_len, opcodes, name, + disasm_opt); } } else if (visual) { if (json_output) diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h index 391d942536e5..8d378c57cb01 100644 --- a/tools/include/asm/barrier.h +++ b/tools/include/asm/barrier.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #if defined(__i386__) || defined(__x86_64__) #include "../../arch/x86/include/asm/barrier.h" #elif defined(__arm__) @@ -26,3 +27,37 @@ #else #include #endif + +/* + * Generic fallback smp_*() definitions for archs that haven't + * been updated yet. + */ + +#ifndef smp_rmb +# define smp_rmb() rmb() +#endif + +#ifndef smp_wmb +# define smp_wmb() wmb() +#endif + +#ifndef smp_mb +# define smp_mb() mb() +#endif + +#ifndef smp_store_release +# define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_mb(); \ + ___p1; \ +}) +#endif diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h new file mode 100644 index 000000000000..9a083ae60473 --- /dev/null +++ b/tools/include/linux/ring_buffer.h @@ -0,0 +1,73 @@ +#ifndef _TOOLS_LINUX_RING_BUFFER_H_ +#define _TOOLS_LINUX_RING_BUFFER_H_ + +#include + +/* + * Contract with kernel for walking the perf ring buffer from + * user space requires the following barrier pairing (quote + * from kernel/events/ring_buffer.c): + * + * Since the mmap() consumer (userspace) can run on a + * different CPU: + * + * kernel user + * + * if (LOAD ->data_tail) { LOAD ->data_head + * (A) smp_rmb() (C) + * STORE $data LOAD $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head STORE ->data_tail + * } + * + * Where A pairs with D, and B pairs with C. + * + * In our case A is a control dependency that separates the + * load of the ->data_tail and the stores of $data. In case + * ->data_tail indicates there is no room in the buffer to + * store $data we do not. + * + * D needs to be a full barrier since it separates the data + * READ from the tail WRITE. + * + * For B a WMB is sufficient since it separates two WRITEs, + * and for C an RMB is sufficient since it separates two READs. + * + * Note, instead of B, C, D we could also use smp_store_release() + * in B and D as well as smp_load_acquire() in C. + * + * However, this optimization does not make sense for all kernel + * supported architectures since for a fair number it would + * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), + * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). + * + * Thus for those smp_wmb() in B and smp_rmb() in C would still + * be less expensive. For the case of D this has either the same + * cost or is less expensive, for example, due to TSO x86 can + * avoid the CPU barrier entirely. + */ + +static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) +{ +/* + * Architectures where smp_load_acquire() does not fallback to + * READ_ONCE() + smp_mb() pair. + */ +#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ + defined(__ia64__) || defined(__sparc__) && defined(__arch64__) + return smp_load_acquire(&base->data_head); +#else + u64 head = READ_ONCE(base->data_head); + + smp_rmb(); + return head; +#endif +} + +static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, + u64 tail) +{ + smp_store_release(&base->data_tail, tail); +} + +#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f9187b41dff6..852dc17ab47a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -103,6 +103,7 @@ enum bpf_cmd { BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, }; enum bpf_map_type { @@ -128,6 +129,8 @@ enum bpf_map_type { BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, }; enum bpf_prog_type { @@ -462,6 +465,28 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1433,7 +1458,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -2215,6 +2240,23 @@ union bpf_attr { * pointer that was returned from bpf_sk_lookup_xxx\ (). * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * Description + * For socket policies, insert *len* bytes into msg at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it may want to insert metadata or options into the msg. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. + * + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2303,7 +2345,11 @@ union bpf_attr { FN(skb_ancestor_cgroup_id), \ FN(sk_lookup_tcp), \ FN(sk_lookup_udp), \ - FN(sk_release), + FN(sk_release), \ + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), \ + FN(msg_push_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/include/uapi/linux/tls.h b/tools/include/uapi/linux/tls.h new file mode 100644 index 000000000000..ff02287495ac --- /dev/null +++ b/tools/include/uapi/linux/tls.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UAPI_LINUX_TLS_H +#define _UAPI_LINUX_TLS_H + +#include + +/* TLS socket options */ +#define TLS_TX 1 /* Set transmit parameters */ +#define TLS_RX 2 /* Set receive parameters */ + +/* Supported versions */ +#define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) +#define TLS_VERSION_MAJOR(ver) (((ver) >> 8) & 0xFF) + +#define TLS_VERSION_NUMBER(id) ((((id##_VERSION_MAJOR) & 0xFF) << 8) | \ + ((id##_VERSION_MINOR) & 0xFF)) + +#define TLS_1_2_VERSION_MAJOR 0x3 +#define TLS_1_2_VERSION_MINOR 0x3 +#define TLS_1_2_VERSION TLS_VERSION_NUMBER(TLS_1_2) + +/* Supported ciphers */ +#define TLS_CIPHER_AES_GCM_128 51 +#define TLS_CIPHER_AES_GCM_128_IV_SIZE 8 +#define TLS_CIPHER_AES_GCM_128_KEY_SIZE 16 +#define TLS_CIPHER_AES_GCM_128_SALT_SIZE 4 +#define TLS_CIPHER_AES_GCM_128_TAG_SIZE 16 +#define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8 + +#define TLS_SET_RECORD_TYPE 1 +#define TLS_GET_RECORD_TYPE 2 + +struct tls_crypto_info { + __u16 version; + __u16 cipher_type; +}; + +struct tls12_crypto_info_aes_gcm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE]; +}; + +#endif /* _UAPI_LINUX_TLS_H */ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 79d84413ddf2..425b480bda75 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -125,6 +125,7 @@ override CFLAGS += $(EXTRA_WARNINGS) override CFLAGS += -Werror -Wall override CFLAGS += -fPIC override CFLAGS += $(INCLUDES) +override CFLAGS += -fvisibility=hidden ifeq ($(VERBOSE),1) Q = diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index d70a255cb05e..03f9bcc4ef50 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value) return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } +int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + + return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); +} + int bpf_map_delete_elem(int fd, const void *key) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 69a4d40c4227..26a51538213c 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -27,6 +27,10 @@ #include #include +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + struct bpf_create_map_attr { const char *name; enum bpf_map_type map_type; @@ -42,21 +46,24 @@ struct bpf_create_map_attr { __u32 inner_map_fd; }; -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node); -int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags); -int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries, __u32 map_flags); -int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags, int node); -int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags); +LIBBPF_API int +bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, + int max_entries, __u32 map_flags, int node); +LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, + int max_entries, __u32 map_flags); +LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags); +LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, + const char *name, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags, int node); +LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, + const char *name, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags); struct bpf_load_program_attr { enum bpf_prog_type prog_type; @@ -74,44 +81,51 @@ struct bpf_load_program_attr { /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE (256 * 1024) -int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, - size_t log_buf_sz); -int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, int log_level); +LIBBPF_API int +bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz); +LIBBPF_API int bpf_load_program(enum bpf_prog_type type, + const struct bpf_insn *insns, size_t insns_cnt, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz); +LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, + const struct bpf_insn *insns, + size_t insns_cnt, int strict_alignment, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz, + int log_level); -int bpf_map_update_elem(int fd, const void *key, const void *value, - __u64 flags); +LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, + __u64 flags); -int bpf_map_lookup_elem(int fd, const void *key, void *value); -int bpf_map_delete_elem(int fd, const void *key); -int bpf_map_get_next_key(int fd, const void *key, void *next_key); -int bpf_obj_pin(int fd, const char *pathname); -int bpf_obj_get(const char *pathname); -int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, - unsigned int flags); -int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); -int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); -int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, - void *data_out, __u32 *size_out, __u32 *retval, - __u32 *duration); -int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); -int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); -int bpf_prog_get_fd_by_id(__u32 id); -int bpf_map_get_fd_by_id(__u32 id); -int bpf_btf_get_fd_by_id(__u32 id); -int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); -int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, - __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); -int bpf_raw_tracepoint_open(const char *name, int prog_fd); -int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, - bool do_log); -int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, - __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, - __u64 *probe_addr); +LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); +LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, + void *value); +LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); +LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); +LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); +LIBBPF_API int bpf_obj_get(const char *pathname); +LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, + enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); +LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, + enum bpf_attach_type type); +LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, + __u32 size, void *data_out, __u32 *size_out, + __u32 *retval, __u32 *duration); +LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); +LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, + __u32 query_flags, __u32 *attach_flags, + __u32 *prog_ids, __u32 *prog_cnt); +LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); +LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, + __u32 log_buf_size, bool do_log); +LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, + __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, + __u64 *probe_offset, __u64 *probe_addr); #endif /* __LIBBPF_BPF_H */ diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 6db5462bb2ef..b77e7080f7e7 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -6,6 +6,10 @@ #include +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + #define BTF_ELF_SEC ".BTF" struct btf; @@ -14,13 +18,15 @@ struct btf_type; typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); -void btf__free(struct btf *btf); -struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); -__s32 btf__find_by_name(const struct btf *btf, const char *type_name); -const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); -__s64 btf__resolve_size(const struct btf *btf, __u32 type_id); -int btf__resolve_type(const struct btf *btf, __u32 type_id); -int btf__fd(const struct btf *btf); -const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API void btf__free(struct btf *btf); +LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, + const char *type_name); +LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, + __u32 id); +LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__fd(const struct btf *btf); +LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); #endif /* __LIBBPF_BTF_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index bd71efcc53be..b607be7236d3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -2414,61 +2415,49 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mem, unsigned long size, - unsigned long page_size, void **buf, size_t *buf_len, - bpf_perf_event_print_t fn, void *priv) +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data) { - volatile struct perf_event_mmap_page *header = mem; + struct perf_event_mmap_page *header = mmap_mem; + __u64 data_head = ring_buffer_read_head(header); __u64 data_tail = header->data_tail; - __u64 data_head = header->data_head; - int ret = LIBBPF_PERF_EVENT_ERROR; - void *base, *begin, *end; + void *base = ((__u8 *)header) + page_size; + int ret = LIBBPF_PERF_EVENT_CONT; + struct perf_event_header *ehdr; + size_t ehdr_size; - asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ - if (data_head == data_tail) - return LIBBPF_PERF_EVENT_CONT; + while (data_head != data_tail) { + ehdr = base + (data_tail & (mmap_size - 1)); + ehdr_size = ehdr->size; - base = ((char *)header) + page_size; + if (((void *)ehdr) + ehdr_size > base + mmap_size) { + void *copy_start = ehdr; + size_t len_first = base + mmap_size - copy_start; + size_t len_secnd = ehdr_size - len_first; - begin = base + data_tail % size; - end = base + data_head % size; - - while (begin != end) { - struct perf_event_header *ehdr; - - ehdr = begin; - if (begin + ehdr->size > base + size) { - long len = base + size - begin; - - if (*buf_len < ehdr->size) { - free(*buf); - *buf = malloc(ehdr->size); - if (!*buf) { + if (*copy_size < ehdr_size) { + free(*copy_mem); + *copy_mem = malloc(ehdr_size); + if (!*copy_mem) { + *copy_size = 0; ret = LIBBPF_PERF_EVENT_ERROR; break; } - *buf_len = ehdr->size; + *copy_size = ehdr_size; } - memcpy(*buf, begin, len); - memcpy(*buf + len, base, ehdr->size - len); - ehdr = (void *)*buf; - begin = base + ehdr->size - len; - } else if (begin + ehdr->size == base + size) { - begin = base; - } else { - begin += ehdr->size; + memcpy(*copy_mem, copy_start, len_first); + memcpy(*copy_mem + len_first, base, len_secnd); + ehdr = *copy_mem; } - ret = fn(ehdr, priv); + ret = fn(ehdr, private_data); + data_tail += ehdr_size; if (ret != LIBBPF_PERF_EVENT_CONT) break; - - data_tail += ehdr->size; } - __sync_synchronize(); /* smp_mb() */ - header->data_tail = data_tail; - + ring_buffer_write_tail(header, data_tail); return ret; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 7e9c801a9fdd..1f3468dad8b2 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -16,6 +16,10 @@ #include // for size_t #include +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -37,7 +41,7 @@ enum libbpf_errno { __LIBBPF_ERRNO__END, }; -int libbpf_strerror(int err, char *buf, size_t size); +LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); /* * __printf is defined in include/linux/compiler-gcc.h. However, @@ -47,9 +51,9 @@ int libbpf_strerror(int err, char *buf, size_t size); typedef int (*libbpf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); -void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug); +LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn, + libbpf_print_fn_t info, + libbpf_print_fn_t debug); /* Hide internal to user */ struct bpf_object; @@ -59,27 +63,28 @@ struct bpf_object_open_attr { enum bpf_prog_type prog_type; }; -struct bpf_object *bpf_object__open(const char *path); -struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr); +LIBBPF_API struct bpf_object *bpf_object__open(const char *path); +LIBBPF_API struct bpf_object * +bpf_object__open_xattr(struct bpf_object_open_attr *attr); struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags); -struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz, - const char *name); -int bpf_object__pin(struct bpf_object *object, const char *path); -void bpf_object__close(struct bpf_object *object); +LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, + size_t obj_buf_sz, + const char *name); +LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); +LIBBPF_API void bpf_object__close(struct bpf_object *object); /* Load/unload object into/from kernel */ -int bpf_object__load(struct bpf_object *obj); -int bpf_object__unload(struct bpf_object *obj); -const char *bpf_object__name(struct bpf_object *obj); -unsigned int bpf_object__kversion(struct bpf_object *obj); -int bpf_object__btf_fd(const struct bpf_object *obj); +LIBBPF_API int bpf_object__load(struct bpf_object *obj); +LIBBPF_API int bpf_object__unload(struct bpf_object *obj); +LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); +LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); +LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); -struct bpf_program * +LIBBPF_API struct bpf_program * bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); -struct bpf_object *bpf_object__next(struct bpf_object *prev); +LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ for ((pos) = bpf_object__next(NULL), \ (tmp) = bpf_object__next(pos); \ @@ -87,19 +92,20 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev); (pos) = (tmp), (tmp) = bpf_object__next(tmp)) typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); -int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv); -void *bpf_object__priv(struct bpf_object *prog); +LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, + bpf_object_clear_priv_t clear_priv); +LIBBPF_API void *bpf_object__priv(struct bpf_object *prog); -int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, - enum bpf_attach_type *expected_attach_type); -int libbpf_attach_type_by_name(const char *name, - enum bpf_attach_type *attach_type); +LIBBPF_API int +libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type); +LIBBPF_API int libbpf_attach_type_by_name(const char *name, + enum bpf_attach_type *attach_type); /* Accessors of bpf_program */ struct bpf_program; -struct bpf_program *bpf_program__next(struct bpf_program *prog, - struct bpf_object *obj); +LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, + struct bpf_object *obj); #define bpf_object__for_each_program(pos, obj) \ for ((pos) = bpf_program__next(NULL, (obj)); \ @@ -109,21 +115,24 @@ struct bpf_program *bpf_program__next(struct bpf_program *prog, typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); -int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv); +LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv); -void *bpf_program__priv(struct bpf_program *prog); -void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); +LIBBPF_API void *bpf_program__priv(struct bpf_program *prog); +LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, + __u32 ifindex); -const char *bpf_program__title(struct bpf_program *prog, bool needs_copy); +LIBBPF_API const char *bpf_program__title(struct bpf_program *prog, + bool needs_copy); -int bpf_program__load(struct bpf_program *prog, char *license, - __u32 kern_version); -int bpf_program__fd(struct bpf_program *prog); -int bpf_program__pin_instance(struct bpf_program *prog, const char *path, - int instance); -int bpf_program__pin(struct bpf_program *prog, const char *path); -void bpf_program__unload(struct bpf_program *prog); +LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, + __u32 kern_version); +LIBBPF_API int bpf_program__fd(struct bpf_program *prog); +LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, + const char *path, + int instance); +LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); +LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_insn; @@ -184,34 +193,36 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, struct bpf_insn *insns, int insns_cnt, struct bpf_prog_prep_result *res); -int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, - bpf_program_prep_t prep); +LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, + bpf_program_prep_t prep); -int bpf_program__nth_fd(struct bpf_program *prog, int n); +LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n); /* * Adjust type of BPF program. Default is kprobe. */ -int bpf_program__set_socket_filter(struct bpf_program *prog); -int bpf_program__set_tracepoint(struct bpf_program *prog); -int bpf_program__set_raw_tracepoint(struct bpf_program *prog); -int bpf_program__set_kprobe(struct bpf_program *prog); -int bpf_program__set_sched_cls(struct bpf_program *prog); -int bpf_program__set_sched_act(struct bpf_program *prog); -int bpf_program__set_xdp(struct bpf_program *prog); -int bpf_program__set_perf_event(struct bpf_program *prog); -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); -void bpf_program__set_expected_attach_type(struct bpf_program *prog, - enum bpf_attach_type type); +LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); +LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, + enum bpf_prog_type type); +LIBBPF_API void +bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type); -bool bpf_program__is_socket_filter(struct bpf_program *prog); -bool bpf_program__is_tracepoint(struct bpf_program *prog); -bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); -bool bpf_program__is_kprobe(struct bpf_program *prog); -bool bpf_program__is_sched_cls(struct bpf_program *prog); -bool bpf_program__is_sched_act(struct bpf_program *prog); -bool bpf_program__is_xdp(struct bpf_program *prog); -bool bpf_program__is_perf_event(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog); /* * No need for __attribute__((packed)), all members of 'bpf_map_def' @@ -232,39 +243,39 @@ struct bpf_map_def { * so no need to worry about a name clash. */ struct bpf_map; -struct bpf_map * +LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); /* * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. */ -struct bpf_map * +LIBBPF_API struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); -struct bpf_map * +LIBBPF_API struct bpf_map * bpf_map__next(struct bpf_map *map, struct bpf_object *obj); #define bpf_map__for_each(pos, obj) \ for ((pos) = bpf_map__next(NULL, (obj)); \ (pos) != NULL; \ (pos) = bpf_map__next((pos), (obj))) -int bpf_map__fd(struct bpf_map *map); -const struct bpf_map_def *bpf_map__def(struct bpf_map *map); -const char *bpf_map__name(struct bpf_map *map); -__u32 bpf_map__btf_key_type_id(const struct bpf_map *map); -__u32 bpf_map__btf_value_type_id(const struct bpf_map *map); +LIBBPF_API int bpf_map__fd(struct bpf_map *map); +LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); +LIBBPF_API const char *bpf_map__name(struct bpf_map *map); +LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); +LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); -int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv); -void *bpf_map__priv(struct bpf_map *map); -int bpf_map__reuse_fd(struct bpf_map *map, int fd); -bool bpf_map__is_offload_neutral(struct bpf_map *map); -void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); -int bpf_map__pin(struct bpf_map *map, const char *path); +LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, + bpf_map_clear_priv_t clear_priv); +LIBBPF_API void *bpf_map__priv(struct bpf_map *map); +LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); +LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); -long libbpf_get_error(const void *ptr); +LIBBPF_API long libbpf_get_error(const void *ptr); struct bpf_prog_load_attr { const char *file; @@ -273,12 +284,12 @@ struct bpf_prog_load_attr { int ifindex; }; -int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); -int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); +LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd); +LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, @@ -286,12 +297,14 @@ enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_CONT = -2, }; -typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, - void *priv); -int bpf_perf_event_read_simple(void *mem, unsigned long size, - unsigned long page_size, - void **buf, size_t *buf_len, - bpf_perf_event_print_t fn, void *priv); +struct perf_event_header; +typedef enum bpf_perf_event_ret + (*bpf_perf_event_print_t)(struct perf_event_header *hdr, + void *private_data); +LIBBPF_API enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data); struct nlattr; typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 05a6d47c7956..8f6531fd4dad 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include "auxtrace.h" #include "event.h" @@ -71,21 +71,12 @@ void perf_mmap__consume(struct perf_mmap *map); static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { - struct perf_event_mmap_page *pc = mm->base; - u64 head = READ_ONCE(pc->data_head); - rmb(); - return head; + return ring_buffer_read_head(mm->base); } static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) { - struct perf_event_mmap_page *pc = md->base; - - /* - * ensure all reads are done before we write the tail out. - */ - mb(); - pc->data_tail = tail; + ring_buffer_write_tail(md->base, tail); } union perf_event *perf_mmap__read_forward(struct perf_mmap *map); diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 8a60c9b9892d..1b799e30c06d 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -25,3 +25,5 @@ test_cgroup_storage test_select_reuseport test_flow_dissector flow_dissector_load +test_netcnt +test_section_names diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index d99dd6fc3fbe..e39dfb4e7970 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -37,7 +37,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ - test_sk_lookup_kern.o test_xdp_vlan.o + test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -118,6 +118,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline +$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h +$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h + BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index fda8c162d0df..686e57ce40f4 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value, (void *) BPF_FUNC_map_update_elem; static int (*bpf_map_delete_elem)(void *map, void *key) = (void *) BPF_FUNC_map_delete_elem; +static int (*bpf_map_push_elem)(void *map, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_map_push_elem; +static int (*bpf_map_pop_elem)(void *map, void *value) = + (void *) BPF_FUNC_map_pop_elem; +static int (*bpf_map_peek_elem)(void *map, void *value) = + (void *) BPF_FUNC_map_peek_elem; static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = (void *) BPF_FUNC_probe_read; static unsigned long long (*bpf_ktime_get_ns)(void) = @@ -104,6 +111,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) = (void *) BPF_FUNC_msg_cork_bytes; static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; +static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = + (void *) BPF_FUNC_msg_push_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh index d97dc914cd49..156d89f1edcc 100755 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -6,7 +6,7 @@ export TESTNAME=test_libbpf # Determine selftest success via shell exit code exit_handler() { - if (( $? == 0 )); then + if [ $? -eq 0 ]; then echo "selftests: $TESTNAME [PASS]"; else echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 9b552c0fc47d..4db2116e52be 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -471,6 +472,122 @@ static void test_devmap(int task, void *data) close(fd); } +static void test_queuemap(int task, void *data) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + int fd, i; + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) + vals[i] = rand(); + + /* Invalid key size */ + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE, + map_flags); + assert(fd < 0 && errno == EINVAL); + + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE, + map_flags); + /* Queue map does not support BPF_F_NO_PREALLOC */ + if (map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } + if (fd < 0) { + printf("Failed to create queuemap '%s'!\n", strerror(errno)); + exit(1); + } + + /* Push MAP_SIZE elements */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); + + /* Check that element cannot be pushed due to max_entries limit */ + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + errno == E2BIG); + + /* Peek element */ + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]); + + /* Replace half elements */ + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); + + /* Pop all elements */ + for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && + val == vals[i]); + + /* Check that there are not elements left */ + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + errno == ENOENT); + + /* Check that non supported functions set errno to EINVAL */ + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + + close(fd); +} + +static void test_stackmap(int task, void *data) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + int fd, i; + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) + vals[i] = rand(); + + /* Invalid key size */ + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE, + map_flags); + assert(fd < 0 && errno == EINVAL); + + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE, + map_flags); + /* Stack map does not support BPF_F_NO_PREALLOC */ + if (map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } + if (fd < 0) { + printf("Failed to create stackmap '%s'!\n", strerror(errno)); + exit(1); + } + + /* Push MAP_SIZE elements */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); + + /* Check that element cannot be pushed due to max_entries limit */ + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + errno == E2BIG); + + /* Peek element */ + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]); + + /* Replace half elements */ + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); + + /* Pop all elements */ + for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--) + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && + val == vals[i]); + + /* Check that there are not elements left */ + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + errno == ENOENT); + + /* Check that non supported functions set errno to EINVAL */ + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + + close(fd); +} + #include #include #include @@ -1434,10 +1551,15 @@ static void run_all_tests(void) test_map_wronly(); test_reuseport_array(); + + test_queuemap(0, NULL); + test_stackmap(0, NULL); } int main(void) { + srand(time(NULL)); + map_flags = 0; run_all_tests(); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index e8becca9c521..2d3c04f45530 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1735,8 +1735,105 @@ static void test_reference_tracking() bpf_object__close(obj); } +enum { + QUEUE, + STACK, +}; + +static void test_queue_stack_map(int type) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE], duration, retval, size, val; + int i, err, prog_fd, map_in_fd, map_out_fd; + char file[32], buf[128]; + struct bpf_object *obj; + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE; i++) + vals[i] = rand(); + + if (type == QUEUE) + strncpy(file, "./test_queue_map.o", sizeof(file)); + else if (type == STACK) + strncpy(file, "./test_stack_map.o", sizeof(file)); + else + return; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + map_in_fd = bpf_find_map(__func__, obj, "map_in"); + if (map_in_fd < 0) + goto out; + + map_out_fd = bpf_find_map(__func__, obj, "map_out"); + if (map_out_fd < 0) + goto out; + + /* Push 32 elements to the input map */ + for (i = 0; i < MAP_SIZE; i++) { + err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0); + if (err) { + error_cnt++; + goto out; + } + } + + /* The eBPF program pushes iph.saddr in the output map, + * pops the input map and saves this value in iph.daddr + */ + for (i = 0; i < MAP_SIZE; i++) { + if (type == QUEUE) { + val = vals[i]; + pkt_v4.iph.saddr = vals[i] * 5; + } else if (type == STACK) { + val = vals[MAP_SIZE - 1 - i]; + pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5; + } + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + if (err || retval || size != sizeof(pkt_v4) || + iph->daddr != val) + break; + } + + CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val, + "bpf_map_pop_elem", + "err %d errno %d retval %d size %d iph->daddr %u\n", + err, errno, retval, size, iph->daddr); + + /* Queue is empty, program should return TC_ACT_SHOT */ + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4), + "check-queue-stack-map-empty", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + /* Check that the program pushed elements correctly */ + for (i = 0; i < MAP_SIZE; i++) { + err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val); + if (err || val != vals[i] * 5) + break; + } + + CHECK(i != MAP_SIZE && (err || val != vals[i] * 5), + "bpf_map_push_elem", "err %d value %u\n", err, val); + +out: + pkt_v4.iph.saddr = 0; + bpf_object__close(obj); +} + int main(void) { + srand(time(NULL)); + jit_enabled = is_jit_enabled(); test_pkt_access(); @@ -1757,6 +1854,8 @@ int main(void) test_task_fd_query_rawtp(); test_task_fd_query_tp(); test_reference_tracking(); + test_queue_stack_map(QUEUE); + test_queue_stack_map(STACK); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/test_queue_map.c new file mode 100644 index 000000000000..87db1f9da33d --- /dev/null +++ b/tools/testing/selftests/bpf/test_queue_map.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Politecnico di Torino +#define MAP_TYPE BPF_MAP_TYPE_QUEUE +#include "test_queue_stack_map.h" diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h new file mode 100644 index 000000000000..295b9b3bc5c7 --- /dev/null +++ b/tools/testing/selftests/bpf/test_queue_stack_map.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018 Politecnico di Torino +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = { + .type = MAP_TYPE, + .key_size = 0, + .value_size = sizeof(__u32), + .max_entries = 32, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = { + .type = MAP_TYPE, + .key_size = 0, + .value_size = sizeof(__u32), + .max_entries = 32, + .map_flags = 0, +}; + +SEC("test") +int _test(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + __u32 value; + int err; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + struct iphdr *iph = (struct iphdr *)(eth + 1); + + if (iph + 1 > data_end) + return TC_ACT_SHOT; + + err = bpf_map_pop_elem(&map_in, &value); + if (err) + return TC_ACT_SHOT; + + iph->daddr = value; + + err = bpf_map_push_elem(&map_out, &iph->saddr, 0); + if (err) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 10a5fa83c75a..622ade0a0957 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,13 @@ int running; static void running_handler(int a); +#ifndef TCP_ULP +# define TCP_ULP 31 +#endif +#ifndef SOL_TLS +# define SOL_TLS 282 +#endif + /* randomly selected ports for testing on lo */ #define S1_PORT 10000 #define S2_PORT 10001 @@ -69,9 +77,12 @@ int txmsg_apply; int txmsg_cork; int txmsg_start; int txmsg_end; +int txmsg_start_push; +int txmsg_end_push; int txmsg_ingress; int txmsg_skb; int ktls; +int peek_flag; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -91,9 +102,12 @@ static const struct option long_options[] = { {"txmsg_cork", required_argument, NULL, 'k'}, {"txmsg_start", required_argument, NULL, 's'}, {"txmsg_end", required_argument, NULL, 'e'}, + {"txmsg_start_push", required_argument, NULL, 'p'}, + {"txmsg_end_push", required_argument, NULL, 'q'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, {"txmsg_skb", no_argument, &txmsg_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, + {"peek", no_argument, &peek_flag, 1 }, {0, 0, NULL, 0 } }; @@ -114,11 +128,6 @@ static void usage(char *argv[]) printf("\n"); } -#define TCP_ULP 31 -#define TLS_TX 1 -#define TLS_RX 2 -#include - char *sock_to_string(int s) { if (s == c1) @@ -349,33 +358,40 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, return 0; } -static int msg_loop(int fd, int iov_count, int iov_length, int cnt, - struct msg_stats *s, bool tx, - struct sockmap_options *opt) +static void msg_free_iov(struct msghdr *msg) { - struct msghdr msg = {0}; - int err, i, flags = MSG_NOSIGNAL; + int i; + + for (i = 0; i < msg->msg_iovlen; i++) + free(msg->msg_iov[i].iov_base); + free(msg->msg_iov); + msg->msg_iov = NULL; + msg->msg_iovlen = 0; +} + +static int msg_alloc_iov(struct msghdr *msg, + int iov_count, int iov_length, + bool data, bool xmit) +{ + unsigned char k = 0; struct iovec *iov; - unsigned char k; - bool data_test = opt->data_test; - bool drop = opt->drop_expected; + int i; iov = calloc(iov_count, sizeof(struct iovec)); if (!iov) return errno; - k = 0; for (i = 0; i < iov_count; i++) { unsigned char *d = calloc(iov_length, sizeof(char)); if (!d) { fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count); - goto out_errno; + goto unwind_iov; } iov[i].iov_base = d; iov[i].iov_len = iov_length; - if (data_test && tx) { + if (data && xmit) { int j; for (j = 0; j < iov_length; j++) @@ -383,9 +399,60 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } } - msg.msg_iov = iov; - msg.msg_iovlen = iov_count; - k = 0; + msg->msg_iov = iov; + msg->msg_iovlen = iov_count; + + return 0; +unwind_iov: + for (i--; i >= 0 ; i--) + free(msg->msg_iov[i].iov_base); + return -ENOMEM; +} + +static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) +{ + int i, j, bytes_cnt = 0; + unsigned char k = 0; + + for (i = 0; i < msg->msg_iovlen; i++) { + unsigned char *d = msg->msg_iov[i].iov_base; + + for (j = 0; + j < msg->msg_iov[i].iov_len && size; j++) { + if (d[j] != k++) { + fprintf(stderr, + "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", + i, j, d[j], k - 1, d[j+1], k); + return -EIO; + } + bytes_cnt++; + if (bytes_cnt == chunk_sz) { + k = 0; + bytes_cnt = 0; + } + size--; + } + } + return 0; +} + +static int msg_loop(int fd, int iov_count, int iov_length, int cnt, + struct msg_stats *s, bool tx, + struct sockmap_options *opt) +{ + struct msghdr msg = {0}, msg_peek = {0}; + int err, i, flags = MSG_NOSIGNAL; + bool drop = opt->drop_expected; + bool data = opt->data_test; + + err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); + if (err) + goto out_errno; + if (peek_flag) { + err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx); + if (err) + goto out_errno; + } if (tx) { clock_gettime(CLOCK_MONOTONIC, &s->start); @@ -405,19 +472,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } clock_gettime(CLOCK_MONOTONIC, &s->end); } else { - int slct, recv, max_fd = fd; + int slct, recvp = 0, recv, max_fd = fd; int fd_flags = O_NONBLOCK; struct timeval timeout; float total_bytes; - int bytes_cnt = 0; - int chunk_sz; fd_set w; - if (opt->sendpage) - chunk_sz = iov_length * cnt; - else - chunk_sz = iov_length * iov_count; - fcntl(fd, fd_flags); total_bytes = (float)iov_count * (float)iov_length * (float)cnt; err = clock_gettime(CLOCK_MONOTONIC, &s->start); @@ -449,6 +509,19 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, goto out_errno; } + errno = 0; + if (peek_flag) { + flags |= MSG_PEEK; + recvp = recvmsg(fd, &msg_peek, flags); + if (recvp < 0) { + if (errno != EWOULDBLOCK) { + clock_gettime(CLOCK_MONOTONIC, &s->end); + goto out_errno; + } + } + flags = 0; + } + recv = recvmsg(fd, &msg, flags); if (recv < 0) { if (errno != EWOULDBLOCK) { @@ -460,27 +533,23 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, s->bytes_recvd += recv; - if (data_test) { - int j; + if (data) { + int chunk_sz = opt->sendpage ? + iov_length * cnt : + iov_length * iov_count; - for (i = 0; i < msg.msg_iovlen; i++) { - unsigned char *d = iov[i].iov_base; - - for (j = 0; - j < iov[i].iov_len && recv; j++) { - if (d[j] != k++) { - errno = -EIO; - fprintf(stderr, - "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", - i, j, d[j], k - 1, d[j+1], k); - goto out_errno; - } - bytes_cnt++; - if (bytes_cnt == chunk_sz) { - k = 0; - bytes_cnt = 0; - } - recv--; + errno = msg_verify_data(&msg, recv, chunk_sz); + if (errno) { + perror("data verify msg failed\n"); + goto out_errno; + } + if (recvp) { + errno = msg_verify_data(&msg_peek, + recvp, + chunk_sz); + if (errno) { + perror("data verify msg_peek failed\n"); + goto out_errno; } } } @@ -488,14 +557,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); } - for (i = 0; i < iov_count; i++) - free(iov[i].iov_base); - free(iov); - return 0; + msg_free_iov(&msg); + msg_free_iov(&msg_peek); + return err; out_errno: - for (i = 0; i < iov_count; i++) - free(iov[i].iov_base); - free(iov); + msg_free_iov(&msg); + msg_free_iov(&msg_peek); return errno; } @@ -562,9 +629,10 @@ static int sendmsg_test(struct sockmap_options *opt) } if (opt->verbose) fprintf(stdout, - "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n", + "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n", s.bytes_sent, sent_Bps, sent_Bps/giga, - s.bytes_recvd, recvd_Bps, recvd_Bps/giga); + s.bytes_recvd, recvd_Bps, recvd_Bps/giga, + peek_flag ? "(peek_msg)" : ""); if (err && txmsg_cork) err = 0; exit(err ? 1 : 0); @@ -839,6 +907,30 @@ run: } } + if (txmsg_start_push) { + i = 2; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_push, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_start_push): %d (%s)\n", + err, strerror(errno)); + goto out; + } + } + + if (txmsg_end_push) { + i = 3; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_end_push, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_end_push): %d (%s)\n", + txmsg_end_push, i, err, strerror(errno)); + goto out; + } + } + if (txmsg_ingress) { int in = BPF_F_INGRESS; @@ -996,6 +1088,8 @@ static void test_options(char *options) strncat(options, "skb,", OPTSTRING); if (ktls) strncat(options, "ktls,", OPTSTRING); + if (peek_flag) + strncat(options, "peek,", OPTSTRING); } static int __test_exec(int cgrp, int test, struct sockmap_options *opt) @@ -1169,6 +1263,8 @@ static int test_mixed(int cgrp) txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; txmsg_apply = txmsg_cork = 0; txmsg_start = txmsg_end = 0; + txmsg_start_push = txmsg_end_push = 0; + /* Test small and large iov_count values with pass/redir/apply/cork */ txmsg_pass = 1; txmsg_redir = 0; @@ -1285,6 +1381,8 @@ static int test_start_end(int cgrp) /* Test basic start/end with lots of iov_count and iov_lengths */ txmsg_start = 1; txmsg_end = 2; + txmsg_start_push = 1; + txmsg_end_push = 2; err = test_txmsg(cgrp); if (err) goto out; @@ -1298,6 +1396,8 @@ static int test_start_end(int cgrp) for (i = 99; i <= 1600; i += 500) { txmsg_start = 0; txmsg_end = i; + txmsg_start_push = 0; + txmsg_end_push = i; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1307,6 +1407,8 @@ static int test_start_end(int cgrp) for (i = 199; i <= 1600; i += 500) { txmsg_start = 100; txmsg_end = i; + txmsg_start_push = 100; + txmsg_end_push = i; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1315,6 +1417,8 @@ static int test_start_end(int cgrp) /* Test start/end with cork pulling last sg entry */ txmsg_start = 1500; txmsg_end = 1600; + txmsg_start_push = 1500; + txmsg_end_push = 1600; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1322,6 +1426,8 @@ static int test_start_end(int cgrp) /* Test start/end pull of single byte in last page */ txmsg_start = 1111; txmsg_end = 1112; + txmsg_start_push = 1111; + txmsg_end_push = 1112; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1329,6 +1435,8 @@ static int test_start_end(int cgrp) /* Test start/end with end < start */ txmsg_start = 1111; txmsg_end = 0; + txmsg_start_push = 1111; + txmsg_end_push = 0; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1336,6 +1444,8 @@ static int test_start_end(int cgrp) /* Test start/end with end > data */ txmsg_start = 0; txmsg_end = 1601; + txmsg_start_push = 0; + txmsg_end_push = 1601; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1343,6 +1453,8 @@ static int test_start_end(int cgrp) /* Test start/end with start > data */ txmsg_start = 1601; txmsg_end = 1600; + txmsg_start_push = 1601; + txmsg_end_push = 1600; err = test_exec(cgrp, &opt); out: @@ -1358,7 +1470,7 @@ char *map_names[] = { "sock_map_redir", "sock_apply_bytes", "sock_cork_bytes", - "sock_pull_bytes", + "sock_bytes", "sock_redir_flags", "sock_skb_opts", }; @@ -1465,7 +1577,7 @@ static int __test_suite(int cg_fd, char *bpf_file) } /* Tests basic commands and APIs with range of iov values */ - txmsg_start = txmsg_end = 0; + txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0; err = test_txmsg(cg_fd); if (err) goto out; @@ -1514,7 +1626,7 @@ int main(int argc, char **argv) if (argc < 2) return test_suite(-1); - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:", long_options, &longindex)) != -1) { switch (opt) { case 's': @@ -1523,6 +1635,12 @@ int main(int argc, char **argv) case 'e': txmsg_end = atoi(optarg); break; + case 'p': + txmsg_start_push = atoi(optarg); + break; + case 'q': + txmsg_end_push = atoi(optarg); + break; case 'a': txmsg_apply = atoi(optarg); break; diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index 8e8e41780bb9..14b8bbac004f 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -70,11 +70,11 @@ struct bpf_map_def SEC("maps") sock_cork_bytes = { .max_entries = 1 }; -struct bpf_map_def SEC("maps") sock_pull_bytes = { +struct bpf_map_def SEC("maps") sock_bytes = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), - .max_entries = 2 + .max_entries = 4 }; struct bpf_map_def SEC("maps") sock_redir_flags = { @@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1; - int *start, *end; + int *bytes, zero = 0, one = 1, two = 2, three = 3; + int *start, *end, *start_push, *end_push; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -190,18 +190,24 @@ int bpf_prog4(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); return SK_PASS; } SEC("sk_msg2") int bpf_prog5(struct sk_msg_md *msg) { - int err1 = -1, err2 = -1, zero = 0, one = 1; - int *bytes, *start, *end, len1, len2; + int zero = 0, one = 1, two = 2, three = 3; + int *start, *end, *start_push, *end_push; + int *bytes, len1, len2 = 0, len3; + int err1 = -1, err2 = -1; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -210,8 +216,8 @@ int bpf_prog5(struct sk_msg_md *msg) if (bytes) err2 = bpf_msg_cork_bytes(msg, *bytes); len1 = (__u64)msg->data_end - (__u64)msg->data; - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) { int err; @@ -225,6 +231,23 @@ int bpf_prog5(struct sk_msg_md *msg) bpf_printk("sk_msg2: length update %i->%i\n", len1, len2); } + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) { + int err; + + bpf_printk("sk_msg2: push(%i:%i)\n", + start_push ? *start_push : 0, + end_push ? *end_push : 0); + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + bpf_printk("sk_msg2: push_data err %i\n", err); + len3 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length push_update %i->%i\n", + len2 ? len2 : len1, len3); + } + bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", len1, err1, err2); return SK_PASS; @@ -233,8 +256,8 @@ int bpf_prog5(struct sk_msg_md *msg) SEC("sk_msg3") int bpf_prog6(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1, key = 0; - int *start, *end, *f; + int *bytes, *start, *end, *start_push, *end_push, *f; + int zero = 0, one = 1, two = 2, three = 3, key = 0; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -243,10 +266,17 @@ int bpf_prog6(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -262,8 +292,9 @@ int bpf_prog6(struct sk_msg_md *msg) SEC("sk_msg4") int bpf_prog7(struct sk_msg_md *msg) { - int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0; - int *f, *bytes, *start, *end, len1, len2; + int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; + int *bytes, *start, *end, *start_push, *end_push, *f; + int err1 = 0, err2 = 0, key = 0; __u64 flags = 0; int err; @@ -274,10 +305,10 @@ int bpf_prog7(struct sk_msg_md *msg) if (bytes) err2 = bpf_msg_cork_bytes(msg, *bytes); len1 = (__u64)msg->data_end - (__u64)msg->data; - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); - if (start && end) { + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); + if (start && end) { bpf_printk("sk_msg2: pull(%i:%i)\n", start ? *start : 0, end ? *end : 0); err = bpf_msg_pull_data(msg, *start, *end, 0); @@ -288,6 +319,22 @@ int bpf_prog7(struct sk_msg_md *msg) bpf_printk("sk_msg2: length update %i->%i\n", len1, len2); } + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) { + bpf_printk("sk_msg4: push(%i:%i)\n", + start_push ? *start_push : 0, + end_push ? *end_push : 0); + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + bpf_printk("sk_msg4: push_data err %i\n", + err); + len3 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg4: length push_update %i->%i\n", + len2 ? len2 : len1, len3); + } + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -342,8 +389,8 @@ int bpf_prog9(struct sk_msg_md *msg) SEC("sk_msg7") int bpf_prog10(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1; - int *start, *end; + int *bytes, *start, *end, *start_push, *end_push; + int zero = 0, one = 1, two = 2, three = 3; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -351,10 +398,14 @@ int bpf_prog10(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); return SK_DROP; } diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/test_stack_map.c new file mode 100644 index 000000000000..31c3880e6da0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_stack_map.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Politecnico di Torino +#define MAP_TYPE BPF_MAP_TYPE_STACK +#include "test_queue_stack_map.h" diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index cf4cd32b6772..769d68a48f30 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3430,7 +3430,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_ST stores into R1 inv is not allowed", + .errstr = "BPF_ST stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3442,7 +3442,7 @@ static struct bpf_test tests[] = { BPF_REG_0, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_XADD stores into R1 inv is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -4862,6 +4862,177 @@ static struct bpf_test tests[] = { .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, + { + "direct packet read test#1 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, queue_mapping)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, protocol)), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, vlan_present)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#2 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, vlan_tci)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, vlan_proto)), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, priority)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, + ingress_ifindex)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, hash)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#3 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8, + offsetof(struct __sk_buff, cb[4])), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#4 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, family)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip4)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, local_ip4)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, remote_port)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, local_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of tc_classid for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of data_meta for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data_meta)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of flow_keys for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, flow_keys)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid write access to napi_id for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9, + offsetof(struct __sk_buff, napi_id)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, { "valid cgroup storage access", .insns = { @@ -5499,7 +5670,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 inv is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", }, { "leak pointer into ctx 2", @@ -5514,7 +5685,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 inv is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", }, { "leak pointer into ctx 3", @@ -12463,7 +12634,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_XADD stores into R2 ctx", + .errstr = "BPF_XADD stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, }, { diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index cabe2a3a3b30..4cdb63bf0521 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -41,6 +41,7 @@ int load_kallsyms(void) syms[i].name = strdup(func); i++; } + fclose(f); sym_cnt = i; qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); return 0; @@ -124,10 +125,11 @@ struct perf_event_sample { char data[]; }; -static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) +static enum bpf_perf_event_ret +bpf_perf_event_print(struct perf_event_header *hdr, void *private_data) { - struct perf_event_sample *e = event; - perf_event_print_fn fn = priv; + struct perf_event_sample *e = (struct perf_event_sample *)hdr; + perf_event_print_fn fn = private_data; int ret; if (e->header.type == PERF_RECORD_SAMPLE) {