Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-10-12 The main changes are: 1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong. 2) libbpf relocation support for different size load/store, from Andrii. 3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel. 4) BPF support for per-cpu variables, form Hao. 5) sockmap improvements, from John. ==================== Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
ccdf7fae3a
|
@ -60,13 +60,13 @@ Q: Where can I find patches currently under discussion for BPF subsystem?
|
|||
A: All patches that are Cc'ed to netdev are queued for review under netdev
|
||||
patchwork project:
|
||||
|
||||
http://patchwork.ozlabs.org/project/netdev/list/
|
||||
https://patchwork.kernel.org/project/netdevbpf/list/
|
||||
|
||||
Those patches which target BPF, are assigned to a 'bpf' delegate for
|
||||
further processing from BPF maintainers. The current queue with
|
||||
patches under review can be found at:
|
||||
|
||||
https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
|
||||
https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
|
||||
|
||||
Once the patches have been reviewed by the BPF community as a whole
|
||||
and approved by the BPF maintainers, their status in patchwork will be
|
||||
|
|
|
@ -3263,7 +3263,7 @@ M: Daniel Borkmann <daniel@iogearbox.net>
|
|||
R: Martin KaFai Lau <kafai@fb.com>
|
||||
R: Song Liu <songliubraving@fb.com>
|
||||
R: Yonghong Song <yhs@fb.com>
|
||||
R: Andrii Nakryiko <andriin@fb.com>
|
||||
R: Andrii Nakryiko <andrii@kernel.org>
|
||||
R: John Fastabend <john.fastabend@gmail.com>
|
||||
R: KP Singh <kpsingh@chromium.org>
|
||||
L: netdev@vger.kernel.org
|
||||
|
|
|
@ -420,6 +420,14 @@ static int veth_select_rxq(struct net_device *dev)
|
|||
return smp_processor_id() % dev->real_num_rx_queues;
|
||||
}
|
||||
|
||||
static struct net_device *veth_peer_dev(struct net_device *dev)
|
||||
{
|
||||
struct veth_priv *priv = netdev_priv(dev);
|
||||
|
||||
/* Callers must be under RCU read side. */
|
||||
return rcu_dereference(priv->peer);
|
||||
}
|
||||
|
||||
static int veth_xdp_xmit(struct net_device *dev, int n,
|
||||
struct xdp_frame **frames,
|
||||
u32 flags, bool ndo_xmit)
|
||||
|
@ -1224,6 +1232,7 @@ static const struct net_device_ops veth_netdev_ops = {
|
|||
.ndo_set_rx_headroom = veth_set_rx_headroom,
|
||||
.ndo_bpf = veth_xdp,
|
||||
.ndo_xdp_xmit = veth_ndo_xdp_xmit,
|
||||
.ndo_get_peer_dev = veth_peer_dev,
|
||||
};
|
||||
|
||||
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
|
||||
|
|
|
@ -82,7 +82,7 @@ struct bpf_map_ops {
|
|||
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
|
||||
int fd);
|
||||
void (*map_fd_put_ptr)(void *ptr);
|
||||
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
|
||||
int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
|
||||
u32 (*map_fd_sys_lookup_elem)(void *ptr);
|
||||
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
|
||||
struct seq_file *m);
|
||||
|
@ -293,6 +293,7 @@ enum bpf_arg_type {
|
|||
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
|
||||
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
|
||||
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
|
||||
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
|
||||
__BPF_ARG_TYPE_MAX,
|
||||
};
|
||||
|
||||
|
@ -307,6 +308,8 @@ enum bpf_return_type {
|
|||
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
|
||||
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
|
||||
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
|
||||
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
|
||||
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
|
||||
};
|
||||
|
||||
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
|
||||
|
@ -405,6 +408,7 @@ enum bpf_reg_type {
|
|||
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
|
||||
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
|
||||
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
|
||||
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
|
||||
};
|
||||
|
||||
/* The information passed from prog-specific *_is_valid_access
|
||||
|
@ -1828,6 +1832,8 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
|
|||
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
|
||||
extern const struct bpf_func_proto bpf_copy_from_user_proto;
|
||||
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
|
||||
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
|
||||
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
|
||||
|
||||
const struct bpf_func_proto *bpf_tracing_func_proto(
|
||||
enum bpf_func_id func_id, const struct bpf_prog *prog);
|
||||
|
|
|
@ -308,6 +308,13 @@ struct bpf_insn_aux_data {
|
|||
u32 map_index; /* index into used_maps[] */
|
||||
u32 map_off; /* offset from value base address */
|
||||
};
|
||||
struct {
|
||||
enum bpf_reg_type reg_type; /* type of pseudo_btf_id */
|
||||
union {
|
||||
u32 btf_id; /* btf_id for struct typed var */
|
||||
u32 mem_size; /* mem_size for non-struct typed var */
|
||||
};
|
||||
} btf_var;
|
||||
};
|
||||
u64 map_key_state; /* constant (32 bit) key tracking for maps */
|
||||
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
||||
|
|
|
@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
|
|||
i < btf_type_vlen(struct_type); \
|
||||
i++, member++)
|
||||
|
||||
#define for_each_vsi(i, datasec_type, member) \
|
||||
for (i = 0, member = btf_type_var_secinfo(datasec_type); \
|
||||
i < btf_type_vlen(datasec_type); \
|
||||
i++, member++)
|
||||
|
||||
static inline bool btf_type_is_ptr(const struct btf_type *t)
|
||||
{
|
||||
return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
|
||||
|
@ -145,6 +150,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
|
|||
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
|
||||
}
|
||||
|
||||
static inline bool btf_type_is_var(const struct btf_type *t)
|
||||
{
|
||||
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
|
||||
}
|
||||
|
||||
/* union is only a special case of struct:
|
||||
* all its offsetof(member) == 0
|
||||
*/
|
||||
static inline bool btf_type_is_struct(const struct btf_type *t)
|
||||
{
|
||||
u8 kind = BTF_INFO_KIND(t->info);
|
||||
|
||||
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
|
||||
}
|
||||
|
||||
static inline u16 btf_type_vlen(const struct btf_type *t)
|
||||
{
|
||||
return BTF_INFO_VLEN(t->info);
|
||||
|
@ -179,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
|
|||
return (const struct btf_member *)(t + 1);
|
||||
}
|
||||
|
||||
static inline const struct btf_var_secinfo *btf_type_var_secinfo(
|
||||
const struct btf_type *t)
|
||||
{
|
||||
return (const struct btf_var_secinfo *)(t + 1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
|
||||
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
|
||||
|
|
|
@ -1276,6 +1276,9 @@ struct netdev_net_notifier {
|
|||
* int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
|
||||
* int cmd);
|
||||
* Add, change, delete or get information on an IPv4 tunnel.
|
||||
* struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
|
||||
* If a device is paired with a peer device, return the peer instance.
|
||||
* The caller must be under RCU read context.
|
||||
*/
|
||||
struct net_device_ops {
|
||||
int (*ndo_init)(struct net_device *dev);
|
||||
|
@ -1483,6 +1486,7 @@ struct net_device_ops {
|
|||
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
|
||||
int (*ndo_tunnel_ctl)(struct net_device *dev,
|
||||
struct ip_tunnel_parm *p, int cmd);
|
||||
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node);
|
|||
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
|
||||
|
||||
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg);
|
||||
|
|
|
@ -2228,34 +2228,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
|
|||
#endif /* CONFIG_NET_SOCK_MSG */
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
/* Copy the listen sk's HDR_OPT_CB flags to its child.
|
||||
*
|
||||
* During 3-Way-HandShake, the synack is usually sent from
|
||||
* the listen sk with the HDR_OPT_CB flags set so that
|
||||
* bpf-prog will be called to write the BPF hdr option.
|
||||
*
|
||||
* In fastopen, the child sk is used to send synack instead
|
||||
* of the listen sk. Thus, inheriting the HDR_OPT_CB flags
|
||||
* from the listen sk gives the bpf-prog a chance to write
|
||||
* BPF hdr option in the synack pkt during fastopen.
|
||||
*
|
||||
* Both fastopen and non-fastopen child will inherit the
|
||||
* HDR_OPT_CB flags to keep the bpf-prog having a consistent
|
||||
* behavior when deciding to clear this cb flags (or not)
|
||||
* during the PASSIVE_ESTABLISHED_CB.
|
||||
*
|
||||
* In the future, other cb flags could be inherited here also.
|
||||
*/
|
||||
static inline void bpf_skops_init_child(const struct sock *sk,
|
||||
struct sock *child)
|
||||
{
|
||||
tcp_sk(child)->bpf_sock_ops_cb_flags =
|
||||
tcp_sk(sk)->bpf_sock_ops_cb_flags &
|
||||
(BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
|
||||
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
|
||||
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
|
||||
}
|
||||
|
||||
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
||||
struct sk_buff *skb,
|
||||
unsigned int end_offset)
|
||||
|
@ -2264,11 +2236,6 @@ static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
|||
skops->skb_data_end = skb->data + end_offset;
|
||||
}
|
||||
#else
|
||||
static inline void bpf_skops_init_child(const struct sock *sk,
|
||||
struct sock *child)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
||||
struct sk_buff *skb,
|
||||
unsigned int end_offset)
|
||||
|
|
|
@ -356,18 +356,36 @@ enum bpf_link_type {
|
|||
#define BPF_F_SLEEPABLE (1U << 4)
|
||||
|
||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||
* two extensions:
|
||||
* the following extensions:
|
||||
*
|
||||
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
|
||||
* insn[0].imm: map fd map fd
|
||||
* insn[1].imm: 0 offset into value
|
||||
* insn[0].off: 0 0
|
||||
* insn[1].off: 0 0
|
||||
* ldimm64 rewrite: address of map address of map[0]+offset
|
||||
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
|
||||
* insn[0].src_reg: BPF_PSEUDO_MAP_FD
|
||||
* insn[0].imm: map fd
|
||||
* insn[1].imm: 0
|
||||
* insn[0].off: 0
|
||||
* insn[1].off: 0
|
||||
* ldimm64 rewrite: address of map
|
||||
* verifier type: CONST_PTR_TO_MAP
|
||||
*/
|
||||
#define BPF_PSEUDO_MAP_FD 1
|
||||
/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
|
||||
* insn[0].imm: map fd
|
||||
* insn[1].imm: offset into value
|
||||
* insn[0].off: 0
|
||||
* insn[1].off: 0
|
||||
* ldimm64 rewrite: address of map[0]+offset
|
||||
* verifier type: PTR_TO_MAP_VALUE
|
||||
*/
|
||||
#define BPF_PSEUDO_MAP_VALUE 2
|
||||
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
|
||||
* insn[0].imm: kernel btd id of VAR
|
||||
* insn[1].imm: 0
|
||||
* insn[0].off: 0
|
||||
* insn[1].off: 0
|
||||
* ldimm64 rewrite: address of the kernel variable
|
||||
* verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
|
||||
* is struct/union.
|
||||
*/
|
||||
#define BPF_PSEUDO_BTF_ID 3
|
||||
|
||||
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
||||
* offset to another bpf function
|
||||
|
@ -417,6 +435,9 @@ enum {
|
|||
|
||||
/* Share perf_event among processes */
|
||||
BPF_F_PRESERVE_ELEMS = (1U << 11),
|
||||
|
||||
/* Create a map that is suitable to be an inner map with dynamic max entries */
|
||||
BPF_F_INNER_MAP = (1U << 12),
|
||||
};
|
||||
|
||||
/* Flags for BPF_PROG_QUERY. */
|
||||
|
@ -1680,7 +1701,7 @@ union bpf_attr {
|
|||
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
||||
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
|
||||
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
|
||||
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
|
||||
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
|
||||
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
||||
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
||||
* Return
|
||||
|
@ -2235,7 +2256,7 @@ union bpf_attr {
|
|||
* Description
|
||||
* This helper is used in programs implementing policies at the
|
||||
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
|
||||
* if the verdeict eBPF program returns **SK_PASS**), redirect it
|
||||
* if the verdict eBPF program returns **SK_PASS**), redirect it
|
||||
* to the socket referenced by *map* (of type
|
||||
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
|
||||
* egress interfaces can be used for redirection. The
|
||||
|
@ -3661,10 +3682,59 @@ union bpf_attr {
|
|||
* Redirect the packet to another net device of index *ifindex*
|
||||
* and fill in L2 addresses from neighboring subsystem. This helper
|
||||
* is somewhat similar to **bpf_redirect**\ (), except that it
|
||||
* fills in e.g. MAC addresses based on the L3 information from
|
||||
* the packet. This helper is supported for IPv4 and IPv6 protocols.
|
||||
* populates L2 addresses as well, meaning, internally, the helper
|
||||
* performs a FIB lookup based on the skb's networking header to
|
||||
* get the address of the next hop and then relies on the neighbor
|
||||
* lookup for the L2 address of the nexthop.
|
||||
*
|
||||
* The *flags* argument is reserved and must be 0. The helper is
|
||||
* currently only supported for tc BPF program types.
|
||||
* currently only supported for tc BPF program types, and enabled
|
||||
* for IPv4 and IPv6 protocols.
|
||||
* Return
|
||||
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||
* **TC_ACT_SHOT** on error.
|
||||
*
|
||||
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
|
||||
* Description
|
||||
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
|
||||
* pointer to the percpu kernel variable on *cpu*. A ksym is an
|
||||
* extern variable decorated with '__ksym'. For ksym, there is a
|
||||
* global var (either static or global) defined of the same name
|
||||
* in the kernel. The ksym is percpu if the global var is percpu.
|
||||
* The returned pointer points to the global percpu var on *cpu*.
|
||||
*
|
||||
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
|
||||
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
|
||||
* happens if *cpu* is larger than nr_cpu_ids. The caller of
|
||||
* bpf_per_cpu_ptr() must check the returned value.
|
||||
* Return
|
||||
* A pointer pointing to the kernel percpu variable on *cpu*, or
|
||||
* NULL, if *cpu* is invalid.
|
||||
*
|
||||
* void *bpf_this_cpu_ptr(const void *percpu_ptr)
|
||||
* Description
|
||||
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
|
||||
* pointer to the percpu kernel variable on this cpu. See the
|
||||
* description of 'ksym' in **bpf_per_cpu_ptr**\ ().
|
||||
*
|
||||
* bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
|
||||
* the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
|
||||
* never return NULL.
|
||||
* Return
|
||||
* A pointer pointing to the kernel percpu variable on this cpu.
|
||||
*
|
||||
* long bpf_redirect_peer(u32 ifindex, u64 flags)
|
||||
* Description
|
||||
* Redirect the packet to another net device of index *ifindex*.
|
||||
* This helper is somewhat similar to **bpf_redirect**\ (), except
|
||||
* that the redirection happens to the *ifindex*' peer device and
|
||||
* the netns switch takes place from ingress to ingress without
|
||||
* going through the CPU's backlog queue.
|
||||
*
|
||||
* The *flags* argument is reserved and must be 0. The helper is
|
||||
* currently only supported for tc BPF program types at the ingress
|
||||
* hook and for veth device types. The peer device must reside in a
|
||||
* different network namespace.
|
||||
* Return
|
||||
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||
* **TC_ACT_SHOT** on error.
|
||||
|
@ -3823,6 +3893,9 @@ union bpf_attr {
|
|||
FN(seq_printf_btf), \
|
||||
FN(skb_cgroup_classid), \
|
||||
FN(redirect_neigh), \
|
||||
FN(bpf_per_cpu_ptr), \
|
||||
FN(bpf_this_cpu_ptr), \
|
||||
FN(redirect_peer), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#define ARRAY_CREATE_FLAG_MASK \
|
||||
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
|
||||
BPF_F_PRESERVE_ELEMS)
|
||||
BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
|
||||
|
||||
static void bpf_array_free_percpu(struct bpf_array *array)
|
||||
{
|
||||
|
@ -62,7 +62,7 @@ int array_map_alloc_check(union bpf_attr *attr)
|
|||
return -EINVAL;
|
||||
|
||||
if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
|
||||
attr->map_flags & BPF_F_MMAPABLE)
|
||||
attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
|
||||
|
@ -214,7 +214,7 @@ static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
|
|||
}
|
||||
|
||||
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
|
||||
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
@ -223,6 +223,9 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
|||
const int map_ptr = BPF_REG_1;
|
||||
const int index = BPF_REG_2;
|
||||
|
||||
if (map->map_flags & BPF_F_INNER_MAP)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
|
||||
if (!map->bypass_spec_v1) {
|
||||
|
@ -496,8 +499,10 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
|
|||
static bool array_map_meta_equal(const struct bpf_map *meta0,
|
||||
const struct bpf_map *meta1)
|
||||
{
|
||||
return meta0->max_entries == meta1->max_entries &&
|
||||
bpf_map_meta_equal(meta0, meta1);
|
||||
if (!bpf_map_meta_equal(meta0, meta1))
|
||||
return false;
|
||||
return meta0->map_flags & BPF_F_INNER_MAP ? true :
|
||||
meta0->max_entries == meta1->max_entries;
|
||||
}
|
||||
|
||||
struct bpf_iter_seq_array_map_info {
|
||||
|
@ -1251,7 +1256,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
|
|||
return READ_ONCE(*inner_map);
|
||||
}
|
||||
|
||||
static u32 array_of_map_gen_lookup(struct bpf_map *map,
|
||||
static int array_of_map_gen_lookup(struct bpf_map *map,
|
||||
struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
|
|
|
@ -188,11 +188,6 @@
|
|||
i < btf_type_vlen(struct_type); \
|
||||
i++, member++)
|
||||
|
||||
#define for_each_vsi(i, struct_type, member) \
|
||||
for (i = 0, member = btf_type_var_secinfo(struct_type); \
|
||||
i < btf_type_vlen(struct_type); \
|
||||
i++, member++)
|
||||
|
||||
#define for_each_vsi_from(i, from, struct_type, member) \
|
||||
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
|
||||
i < btf_type_vlen(struct_type); \
|
||||
|
@ -440,16 +435,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
|
|||
return !t || btf_type_nosize(t);
|
||||
}
|
||||
|
||||
/* union is only a special case of struct:
|
||||
* all its offsetof(member) == 0
|
||||
*/
|
||||
static bool btf_type_is_struct(const struct btf_type *t)
|
||||
{
|
||||
u8 kind = BTF_INFO_KIND(t->info);
|
||||
|
||||
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
|
||||
}
|
||||
|
||||
static bool __btf_type_is_struct(const struct btf_type *t)
|
||||
{
|
||||
return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
|
||||
|
@ -460,11 +445,6 @@ static bool btf_type_is_array(const struct btf_type *t)
|
|||
return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
|
||||
}
|
||||
|
||||
static bool btf_type_is_var(const struct btf_type *t)
|
||||
{
|
||||
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
|
||||
}
|
||||
|
||||
static bool btf_type_is_datasec(const struct btf_type *t)
|
||||
{
|
||||
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
|
||||
|
@ -613,11 +593,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
|
|||
return (const struct btf_var *)(t + 1);
|
||||
}
|
||||
|
||||
static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
|
||||
{
|
||||
return (const struct btf_var_secinfo *)(t + 1);
|
||||
}
|
||||
|
||||
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
|
||||
{
|
||||
return kind_ops[BTF_INFO_KIND(t->info)];
|
||||
|
|
|
@ -612,7 +612,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
|
|||
* bpf_prog
|
||||
* __htab_map_lookup_elem
|
||||
*/
|
||||
static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
const int ret = BPF_REG_0;
|
||||
|
@ -651,7 +651,7 @@ static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
|
|||
return __htab_lru_map_lookup_elem(map, key, false);
|
||||
}
|
||||
|
||||
static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
|
||||
static int htab_lru_map_gen_lookup(struct bpf_map *map,
|
||||
struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
@ -2070,7 +2070,7 @@ static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
|
|||
return READ_ONCE(*inner_map);
|
||||
}
|
||||
|
||||
static u32 htab_of_map_gen_lookup(struct bpf_map *map,
|
||||
static int htab_of_map_gen_lookup(struct bpf_map *map,
|
||||
struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
|
|
@ -623,6 +623,34 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
|
|||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
|
||||
{
|
||||
if (cpu >= nr_cpu_ids)
|
||||
return (unsigned long)NULL;
|
||||
|
||||
return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
|
||||
.func = bpf_per_cpu_ptr,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
|
||||
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
|
||||
{
|
||||
return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
|
||||
.func = bpf_this_cpu_ptr,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID,
|
||||
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_get_current_task_proto __weak;
|
||||
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
|
||||
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
|
||||
|
@ -689,6 +717,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
|||
return &bpf_snprintf_btf_proto;
|
||||
case BPF_FUNC_jiffies64:
|
||||
return &bpf_jiffies64_proto;
|
||||
case BPF_FUNC_bpf_per_cpu_ptr:
|
||||
return &bpf_per_cpu_ptr_proto;
|
||||
case BPF_FUNC_bpf_this_cpu_ptr:
|
||||
return &bpf_this_cpu_ptr_proto;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ int pcpu_freelist_init(struct pcpu_freelist *s)
|
|||
raw_spin_lock_init(&head->lock);
|
||||
head->first = NULL;
|
||||
}
|
||||
raw_spin_lock_init(&s->extralist.lock);
|
||||
s->extralist.first = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -40,12 +42,50 @@ static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
|
|||
raw_spin_unlock(&head->lock);
|
||||
}
|
||||
|
||||
static inline bool pcpu_freelist_try_push_extra(struct pcpu_freelist *s,
|
||||
struct pcpu_freelist_node *node)
|
||||
{
|
||||
if (!raw_spin_trylock(&s->extralist.lock))
|
||||
return false;
|
||||
|
||||
pcpu_freelist_push_node(&s->extralist, node);
|
||||
raw_spin_unlock(&s->extralist.lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
|
||||
struct pcpu_freelist_node *node)
|
||||
{
|
||||
int cpu, orig_cpu;
|
||||
|
||||
orig_cpu = cpu = raw_smp_processor_id();
|
||||
while (1) {
|
||||
struct pcpu_freelist_head *head;
|
||||
|
||||
head = per_cpu_ptr(s->freelist, cpu);
|
||||
if (raw_spin_trylock(&head->lock)) {
|
||||
pcpu_freelist_push_node(head, node);
|
||||
raw_spin_unlock(&head->lock);
|
||||
return;
|
||||
}
|
||||
cpu = cpumask_next(cpu, cpu_possible_mask);
|
||||
if (cpu >= nr_cpu_ids)
|
||||
cpu = 0;
|
||||
|
||||
/* cannot lock any per cpu lock, try extralist */
|
||||
if (cpu == orig_cpu &&
|
||||
pcpu_freelist_try_push_extra(s, node))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void __pcpu_freelist_push(struct pcpu_freelist *s,
|
||||
struct pcpu_freelist_node *node)
|
||||
{
|
||||
struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
|
||||
|
||||
___pcpu_freelist_push(head, node);
|
||||
if (in_nmi())
|
||||
___pcpu_freelist_push_nmi(s, node);
|
||||
else
|
||||
___pcpu_freelist_push(this_cpu_ptr(s->freelist), node);
|
||||
}
|
||||
|
||||
void pcpu_freelist_push(struct pcpu_freelist *s,
|
||||
|
@ -81,7 +121,7 @@ again:
|
|||
}
|
||||
}
|
||||
|
||||
struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||
static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||
{
|
||||
struct pcpu_freelist_head *head;
|
||||
struct pcpu_freelist_node *node;
|
||||
|
@ -102,8 +142,59 @@ struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
|
|||
if (cpu >= nr_cpu_ids)
|
||||
cpu = 0;
|
||||
if (cpu == orig_cpu)
|
||||
return NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* per cpu lists are all empty, try extralist */
|
||||
raw_spin_lock(&s->extralist.lock);
|
||||
node = s->extralist.first;
|
||||
if (node)
|
||||
s->extralist.first = node->next;
|
||||
raw_spin_unlock(&s->extralist.lock);
|
||||
return node;
|
||||
}
|
||||
|
||||
static struct pcpu_freelist_node *
|
||||
___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
|
||||
{
|
||||
struct pcpu_freelist_head *head;
|
||||
struct pcpu_freelist_node *node;
|
||||
int orig_cpu, cpu;
|
||||
|
||||
orig_cpu = cpu = raw_smp_processor_id();
|
||||
while (1) {
|
||||
head = per_cpu_ptr(s->freelist, cpu);
|
||||
if (raw_spin_trylock(&head->lock)) {
|
||||
node = head->first;
|
||||
if (node) {
|
||||
head->first = node->next;
|
||||
raw_spin_unlock(&head->lock);
|
||||
return node;
|
||||
}
|
||||
raw_spin_unlock(&head->lock);
|
||||
}
|
||||
cpu = cpumask_next(cpu, cpu_possible_mask);
|
||||
if (cpu >= nr_cpu_ids)
|
||||
cpu = 0;
|
||||
if (cpu == orig_cpu)
|
||||
break;
|
||||
}
|
||||
|
||||
/* cannot pop from per cpu lists, try extralist */
|
||||
if (!raw_spin_trylock(&s->extralist.lock))
|
||||
return NULL;
|
||||
node = s->extralist.first;
|
||||
if (node)
|
||||
s->extralist.first = node->next;
|
||||
raw_spin_unlock(&s->extralist.lock);
|
||||
return node;
|
||||
}
|
||||
|
||||
struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||
{
|
||||
if (in_nmi())
|
||||
return ___pcpu_freelist_pop_nmi(s);
|
||||
return ___pcpu_freelist_pop(s);
|
||||
}
|
||||
|
||||
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||
|
|
|
@ -13,6 +13,7 @@ struct pcpu_freelist_head {
|
|||
|
||||
struct pcpu_freelist {
|
||||
struct pcpu_freelist_head __percpu *freelist;
|
||||
struct pcpu_freelist_head extralist;
|
||||
};
|
||||
|
||||
struct pcpu_freelist_node {
|
||||
|
|
|
@ -4323,8 +4323,10 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
|
|||
used_maps_old = prog->aux->used_maps;
|
||||
|
||||
for (i = 0; i < prog->aux->used_map_cnt; i++)
|
||||
if (used_maps_old[i] == map)
|
||||
if (used_maps_old[i] == map) {
|
||||
bpf_map_put(map);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
|
||||
sizeof(used_maps_new[0]),
|
||||
|
|
|
@ -238,6 +238,8 @@ struct bpf_call_arg_meta {
|
|||
u64 msize_max_value;
|
||||
int ref_obj_id;
|
||||
int func_id;
|
||||
u32 btf_id;
|
||||
u32 ret_btf_id;
|
||||
};
|
||||
|
||||
struct btf *btf_vmlinux;
|
||||
|
@ -517,6 +519,7 @@ static const char * const reg_type_str[] = {
|
|||
[PTR_TO_XDP_SOCK] = "xdp_sock",
|
||||
[PTR_TO_BTF_ID] = "ptr_",
|
||||
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
|
||||
[PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
|
||||
[PTR_TO_MEM] = "mem",
|
||||
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
|
||||
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
|
||||
|
@ -583,7 +586,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
|
|||
/* reg->off should be 0 for SCALAR_VALUE */
|
||||
verbose(env, "%lld", reg->var_off.value + reg->off);
|
||||
} else {
|
||||
if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
|
||||
if (t == PTR_TO_BTF_ID ||
|
||||
t == PTR_TO_BTF_ID_OR_NULL ||
|
||||
t == PTR_TO_PERCPU_BTF_ID)
|
||||
verbose(env, "%s", kernel_type_name(reg->btf_id));
|
||||
verbose(env, "(id=%d", reg->id);
|
||||
if (reg_type_may_be_refcounted_or_null(t))
|
||||
|
@ -2204,6 +2209,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
|
|||
case PTR_TO_RDONLY_BUF_OR_NULL:
|
||||
case PTR_TO_RDWR_BUF:
|
||||
case PTR_TO_RDWR_BUF_OR_NULL:
|
||||
case PTR_TO_PERCPU_BTF_ID:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -2221,6 +2227,20 @@ static bool register_is_const(struct bpf_reg_state *reg)
|
|||
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
|
||||
}
|
||||
|
||||
static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
|
||||
{
|
||||
return tnum_is_unknown(reg->var_off) &&
|
||||
reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
|
||||
reg->umin_value == 0 && reg->umax_value == U64_MAX &&
|
||||
reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
|
||||
reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
|
||||
}
|
||||
|
||||
static bool register_is_bounded(struct bpf_reg_state *reg)
|
||||
{
|
||||
return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
|
||||
}
|
||||
|
||||
static bool __is_pointer_value(bool allow_ptr_leaks,
|
||||
const struct bpf_reg_state *reg)
|
||||
{
|
||||
|
@ -2272,7 +2292,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
|
|||
if (value_regno >= 0)
|
||||
reg = &cur->regs[value_regno];
|
||||
|
||||
if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
|
||||
if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
|
||||
!register_is_null(reg) && env->bpf_capable) {
|
||||
if (dst_reg != BPF_REG_FP) {
|
||||
/* The backtracking logic can only recognize explicit
|
||||
|
@ -2667,7 +2687,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
|
|||
case BPF_PROG_TYPE_CGROUP_SKB:
|
||||
if (t == BPF_WRITE)
|
||||
return false;
|
||||
/* fallthrough */
|
||||
fallthrough;
|
||||
|
||||
/* Program types with direct read + write access go here! */
|
||||
case BPF_PROG_TYPE_SCHED_CLS:
|
||||
|
@ -3978,6 +3998,7 @@ static const struct bpf_reg_types sock_types = {
|
|||
},
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NET
|
||||
static const struct bpf_reg_types btf_id_sock_common_types = {
|
||||
.types = {
|
||||
PTR_TO_SOCK_COMMON,
|
||||
|
@ -3988,6 +4009,7 @@ static const struct bpf_reg_types btf_id_sock_common_types = {
|
|||
},
|
||||
.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
|
||||
};
|
||||
#endif
|
||||
|
||||
static const struct bpf_reg_types mem_types = {
|
||||
.types = {
|
||||
|
@ -4017,6 +4039,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
|
|||
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
|
||||
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
|
||||
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
|
||||
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
|
||||
|
||||
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
||||
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
|
||||
|
@ -4030,7 +4053,9 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
|||
[ARG_PTR_TO_CTX] = &context_types,
|
||||
[ARG_PTR_TO_CTX_OR_NULL] = &context_types,
|
||||
[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
|
||||
#ifdef CONFIG_NET
|
||||
[ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
|
||||
#endif
|
||||
[ARG_PTR_TO_SOCKET] = &fullsock_types,
|
||||
[ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
|
||||
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
|
||||
|
@ -4042,6 +4067,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
|||
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
|
||||
[ARG_PTR_TO_INT] = &int_ptr_types,
|
||||
[ARG_PTR_TO_LONG] = &int_ptr_types,
|
||||
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
|
||||
};
|
||||
|
||||
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
|
||||
|
@ -4205,6 +4231,12 @@ skip_type_check:
|
|||
err = check_helper_mem_access(env, regno,
|
||||
meta->map_ptr->value_size, false,
|
||||
meta);
|
||||
} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
|
||||
if (!reg->btf_id) {
|
||||
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
|
||||
return -EACCES;
|
||||
}
|
||||
meta->ret_btf_id = reg->btf_id;
|
||||
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
|
||||
if (meta->func_id == BPF_FUNC_spin_lock) {
|
||||
if (process_spin_lock(env, regno, true))
|
||||
|
@ -5114,6 +5146,35 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
|||
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
|
||||
regs[BPF_REG_0].id = ++env->id_gen;
|
||||
regs[BPF_REG_0].mem_size = meta.mem_size;
|
||||
} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
|
||||
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
|
||||
const struct btf_type *t;
|
||||
|
||||
mark_reg_known_zero(env, regs, BPF_REG_0);
|
||||
t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
|
||||
if (!btf_type_is_struct(t)) {
|
||||
u32 tsize;
|
||||
const struct btf_type *ret;
|
||||
const char *tname;
|
||||
|
||||
/* resolve the type size of ksym. */
|
||||
ret = btf_resolve_size(btf_vmlinux, t, &tsize);
|
||||
if (IS_ERR(ret)) {
|
||||
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||
verbose(env, "unable to resolve the size of type '%s': %ld\n",
|
||||
tname, PTR_ERR(ret));
|
||||
return -EINVAL;
|
||||
}
|
||||
regs[BPF_REG_0].type =
|
||||
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
|
||||
PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
|
||||
regs[BPF_REG_0].mem_size = tsize;
|
||||
} else {
|
||||
regs[BPF_REG_0].type =
|
||||
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
|
||||
PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
|
||||
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
|
||||
}
|
||||
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
|
||||
int ret_btf_id;
|
||||
|
||||
|
@ -5432,7 +5493,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
|
|||
/* smin_val represents the known value */
|
||||
if (known && smin_val == 0 && opcode == BPF_ADD)
|
||||
break;
|
||||
/* fall-through */
|
||||
fallthrough;
|
||||
case PTR_TO_PACKET_END:
|
||||
case PTR_TO_SOCKET:
|
||||
case PTR_TO_SOCKET_OR_NULL:
|
||||
|
@ -6389,6 +6450,11 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
|
|||
src_reg = NULL;
|
||||
if (dst_reg->type != SCALAR_VALUE)
|
||||
ptr_reg = dst_reg;
|
||||
else
|
||||
/* Make sure ID is cleared otherwise dst_reg min/max could be
|
||||
* incorrectly propagated into other registers by find_equal_scalars()
|
||||
*/
|
||||
dst_reg->id = 0;
|
||||
if (BPF_SRC(insn->code) == BPF_X) {
|
||||
src_reg = ®s[insn->src_reg];
|
||||
if (src_reg->type != SCALAR_VALUE) {
|
||||
|
@ -6522,6 +6588,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
|||
/* case: R1 = R2
|
||||
* copy register state to dest reg
|
||||
*/
|
||||
if (src_reg->type == SCALAR_VALUE && !src_reg->id)
|
||||
/* Assign src and dst registers the same ID
|
||||
* that will be used by find_equal_scalars()
|
||||
* to propagate min/max range.
|
||||
*/
|
||||
src_reg->id = ++env->id_gen;
|
||||
*dst_reg = *src_reg;
|
||||
dst_reg->live |= REG_LIVE_WRITTEN;
|
||||
dst_reg->subreg_def = DEF_NOT_SUBREG;
|
||||
|
@ -6534,6 +6606,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
|||
return -EACCES;
|
||||
} else if (src_reg->type == SCALAR_VALUE) {
|
||||
*dst_reg = *src_reg;
|
||||
/* Make sure ID is cleared otherwise
|
||||
* dst_reg min/max could be incorrectly
|
||||
* propagated into src_reg by find_equal_scalars()
|
||||
*/
|
||||
dst_reg->id = 0;
|
||||
dst_reg->live |= REG_LIVE_WRITTEN;
|
||||
dst_reg->subreg_def = env->insn_idx + 1;
|
||||
} else {
|
||||
|
@ -7322,6 +7399,30 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void find_equal_scalars(struct bpf_verifier_state *vstate,
|
||||
struct bpf_reg_state *known_reg)
|
||||
{
|
||||
struct bpf_func_state *state;
|
||||
struct bpf_reg_state *reg;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i <= vstate->curframe; i++) {
|
||||
state = vstate->frame[i];
|
||||
for (j = 0; j < MAX_BPF_REG; j++) {
|
||||
reg = &state->regs[j];
|
||||
if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
|
||||
*reg = *known_reg;
|
||||
}
|
||||
|
||||
bpf_for_each_spilled_reg(j, state, reg) {
|
||||
if (!reg)
|
||||
continue;
|
||||
if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
|
||||
*reg = *known_reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
||||
struct bpf_insn *insn, int *insn_idx)
|
||||
{
|
||||
|
@ -7450,6 +7551,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
|||
reg_combine_min_max(&other_branch_regs[insn->src_reg],
|
||||
&other_branch_regs[insn->dst_reg],
|
||||
src_reg, dst_reg, opcode);
|
||||
if (src_reg->id) {
|
||||
find_equal_scalars(this_branch, src_reg);
|
||||
find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
|
||||
}
|
||||
|
||||
}
|
||||
} else if (dst_reg->type == SCALAR_VALUE) {
|
||||
reg_set_min_max(&other_branch_regs[insn->dst_reg],
|
||||
|
@ -7457,6 +7563,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
|||
opcode, is_jmp32);
|
||||
}
|
||||
|
||||
if (dst_reg->type == SCALAR_VALUE && dst_reg->id) {
|
||||
find_equal_scalars(this_branch, dst_reg);
|
||||
find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
|
||||
}
|
||||
|
||||
/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
|
||||
* NOTE: these optimizations below are related with pointer comparison
|
||||
* which will never be JMP32.
|
||||
|
@ -7488,6 +7599,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
|||
{
|
||||
struct bpf_insn_aux_data *aux = cur_aux(env);
|
||||
struct bpf_reg_state *regs = cur_regs(env);
|
||||
struct bpf_reg_state *dst_reg;
|
||||
struct bpf_map *map;
|
||||
int err;
|
||||
|
||||
|
@ -7504,25 +7616,45 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
dst_reg = ®s[insn->dst_reg];
|
||||
if (insn->src_reg == 0) {
|
||||
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
|
||||
|
||||
regs[insn->dst_reg].type = SCALAR_VALUE;
|
||||
dst_reg->type = SCALAR_VALUE;
|
||||
__mark_reg_known(®s[insn->dst_reg], imm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
|
||||
mark_reg_known_zero(env, regs, insn->dst_reg);
|
||||
|
||||
dst_reg->type = aux->btf_var.reg_type;
|
||||
switch (dst_reg->type) {
|
||||
case PTR_TO_MEM:
|
||||
dst_reg->mem_size = aux->btf_var.mem_size;
|
||||
break;
|
||||
case PTR_TO_BTF_ID:
|
||||
case PTR_TO_PERCPU_BTF_ID:
|
||||
dst_reg->btf_id = aux->btf_var.btf_id;
|
||||
break;
|
||||
default:
|
||||
verbose(env, "bpf verifier is misconfigured\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
map = env->used_maps[aux->map_index];
|
||||
mark_reg_known_zero(env, regs, insn->dst_reg);
|
||||
regs[insn->dst_reg].map_ptr = map;
|
||||
dst_reg->map_ptr = map;
|
||||
|
||||
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
|
||||
regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
|
||||
regs[insn->dst_reg].off = aux->map_off;
|
||||
dst_reg->type = PTR_TO_MAP_VALUE;
|
||||
dst_reg->off = aux->map_off;
|
||||
if (map_value_has_spin_lock(map))
|
||||
regs[insn->dst_reg].id = ++env->id_gen;
|
||||
dst_reg->id = ++env->id_gen;
|
||||
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
|
||||
regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
|
||||
dst_reg->type = CONST_PTR_TO_MAP;
|
||||
} else {
|
||||
verbose(env, "bpf verifier is misconfigured\n");
|
||||
return -EINVAL;
|
||||
|
@ -9424,6 +9556,92 @@ process_bpf_exit:
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* replace pseudo btf_id with kernel symbol address */
|
||||
static int check_pseudo_btf_id(struct bpf_verifier_env *env,
|
||||
struct bpf_insn *insn,
|
||||
struct bpf_insn_aux_data *aux)
|
||||
{
|
||||
u32 datasec_id, type, id = insn->imm;
|
||||
const struct btf_var_secinfo *vsi;
|
||||
const struct btf_type *datasec;
|
||||
const struct btf_type *t;
|
||||
const char *sym_name;
|
||||
bool percpu = false;
|
||||
u64 addr;
|
||||
int i;
|
||||
|
||||
if (!btf_vmlinux) {
|
||||
verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (insn[1].imm != 0) {
|
||||
verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
t = btf_type_by_id(btf_vmlinux, id);
|
||||
if (!t) {
|
||||
verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (!btf_type_is_var(t)) {
|
||||
verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
|
||||
id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||
addr = kallsyms_lookup_name(sym_name);
|
||||
if (!addr) {
|
||||
verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
|
||||
sym_name);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
|
||||
BTF_KIND_DATASEC);
|
||||
if (datasec_id > 0) {
|
||||
datasec = btf_type_by_id(btf_vmlinux, datasec_id);
|
||||
for_each_vsi(i, datasec, vsi) {
|
||||
if (vsi->type == id) {
|
||||
percpu = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
insn[0].imm = (u32)addr;
|
||||
insn[1].imm = addr >> 32;
|
||||
|
||||
type = t->type;
|
||||
t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
|
||||
if (percpu) {
|
||||
aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
|
||||
aux->btf_var.btf_id = type;
|
||||
} else if (!btf_type_is_struct(t)) {
|
||||
const struct btf_type *ret;
|
||||
const char *tname;
|
||||
u32 tsize;
|
||||
|
||||
/* resolve the type size of ksym. */
|
||||
ret = btf_resolve_size(btf_vmlinux, t, &tsize);
|
||||
if (IS_ERR(ret)) {
|
||||
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||
verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
|
||||
tname, PTR_ERR(ret));
|
||||
return -EINVAL;
|
||||
}
|
||||
aux->btf_var.reg_type = PTR_TO_MEM;
|
||||
aux->btf_var.mem_size = tsize;
|
||||
} else {
|
||||
aux->btf_var.reg_type = PTR_TO_BTF_ID;
|
||||
aux->btf_var.btf_id = type;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_map_prealloc(struct bpf_map *map)
|
||||
{
|
||||
return (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
|
@ -9534,10 +9752,14 @@ static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
|
|||
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
|
||||
}
|
||||
|
||||
/* look for pseudo eBPF instructions that access map FDs and
|
||||
* replace them with actual map pointers
|
||||
/* find and rewrite pseudo imm in ld_imm64 instructions:
|
||||
*
|
||||
* 1. if it accesses map FD, replace it with actual map pointer.
|
||||
* 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
|
||||
*
|
||||
* NOTE: btf_vmlinux is required for converting pseudo btf_id.
|
||||
*/
|
||||
static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
|
||||
static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct bpf_insn *insn = env->prog->insnsi;
|
||||
int insn_cnt = env->prog->len;
|
||||
|
@ -9578,6 +9800,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
|
|||
/* valid generic load 64-bit imm */
|
||||
goto next_insn;
|
||||
|
||||
if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
|
||||
aux = &env->insn_aux_data[i];
|
||||
err = check_pseudo_btf_id(env, insn, aux);
|
||||
if (err)
|
||||
return err;
|
||||
goto next_insn;
|
||||
}
|
||||
|
||||
/* In final convert_pseudo_ld_imm64() step, this is
|
||||
* converted into regular 64-bit imm load insn.
|
||||
*/
|
||||
|
@ -10819,7 +11049,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
|||
if (insn->imm == BPF_FUNC_map_lookup_elem &&
|
||||
ops->map_gen_lookup) {
|
||||
cnt = ops->map_gen_lookup(map_ptr, insn_buf);
|
||||
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
||||
if (cnt == -EOPNOTSUPP)
|
||||
goto patch_map_ops_generic;
|
||||
if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
||||
verbose(env, "bpf verifier is misconfigured\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -10849,7 +11081,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
|||
(int (*)(struct bpf_map *map, void *value))NULL));
|
||||
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
|
||||
(int (*)(struct bpf_map *map, void *value))NULL));
|
||||
|
||||
patch_map_ops_generic:
|
||||
switch (insn->imm) {
|
||||
case BPF_FUNC_map_lookup_elem:
|
||||
insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
|
||||
|
@ -11633,10 +11865,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
|||
if (is_priv)
|
||||
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
|
||||
|
||||
ret = replace_map_fd_with_map_ptr(env);
|
||||
if (ret < 0)
|
||||
goto skip_full_check;
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
ret = bpf_prog_offload_verifier_prep(env->prog);
|
||||
if (ret)
|
||||
|
@ -11662,6 +11890,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
|||
if (ret)
|
||||
goto skip_full_check;
|
||||
|
||||
ret = resolve_pseudo_ldimm64(env);
|
||||
if (ret < 0)
|
||||
goto skip_full_check;
|
||||
|
||||
ret = check_cfg(env);
|
||||
if (ret < 0)
|
||||
goto skip_full_check;
|
||||
|
|
|
@ -1327,6 +1327,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
|
||||
case BPF_FUNC_snprintf_btf:
|
||||
return &bpf_snprintf_btf_proto;
|
||||
case BPF_FUNC_bpf_per_cpu_ptr:
|
||||
return &bpf_per_cpu_ptr_proto;
|
||||
case BPF_FUNC_bpf_this_cpu_ptr:
|
||||
return &bpf_this_cpu_ptr_proto;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1776,7 +1780,9 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
|
|||
};
|
||||
|
||||
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
||||
#ifdef CONFIG_NET
|
||||
.test_run = bpf_prog_test_run_raw_tp,
|
||||
#endif
|
||||
};
|
||||
|
||||
const struct bpf_verifier_ops tracing_verifier_ops = {
|
||||
|
|
|
@ -4930,7 +4930,7 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
|
|||
|
||||
static inline struct sk_buff *
|
||||
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
||||
struct net_device *orig_dev)
|
||||
struct net_device *orig_dev, bool *another)
|
||||
{
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
|
||||
|
@ -4974,7 +4974,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
|||
* redirecting to another netdev
|
||||
*/
|
||||
__skb_push(skb, skb->mac_len);
|
||||
skb_do_redirect(skb);
|
||||
if (skb_do_redirect(skb) == -EAGAIN) {
|
||||
__skb_pull(skb, skb->mac_len);
|
||||
*another = true;
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
case TC_ACT_CONSUMED:
|
||||
return NULL;
|
||||
|
@ -5163,7 +5167,12 @@ another_round:
|
|||
skip_taps:
|
||||
#ifdef CONFIG_NET_INGRESS
|
||||
if (static_branch_unlikely(&ingress_needed_key)) {
|
||||
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
|
||||
bool another = false;
|
||||
|
||||
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
|
||||
&another);
|
||||
if (another)
|
||||
goto another_round;
|
||||
if (!skb)
|
||||
goto out;
|
||||
|
||||
|
|
|
@ -76,6 +76,7 @@
|
|||
#include <net/bpf_sk_storage.h>
|
||||
#include <net/transp_v6.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <net/tls.h>
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
bpf_sk_base_func_proto(enum bpf_func_id func_id);
|
||||
|
@ -2379,8 +2380,9 @@ out:
|
|||
|
||||
/* Internal, non-exposed redirect flags. */
|
||||
enum {
|
||||
BPF_F_NEIGH = (1ULL << 1),
|
||||
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH)
|
||||
BPF_F_NEIGH = (1ULL << 1),
|
||||
BPF_F_PEER = (1ULL << 2),
|
||||
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER)
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
|
||||
|
@ -2429,19 +2431,35 @@ EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
|
|||
int skb_do_redirect(struct sk_buff *skb)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
struct net *net = dev_net(skb->dev);
|
||||
struct net_device *dev;
|
||||
u32 flags = ri->flags;
|
||||
|
||||
dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
|
||||
dev = dev_get_by_index_rcu(net, ri->tgt_index);
|
||||
ri->tgt_index = 0;
|
||||
if (unlikely(!dev)) {
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
ri->flags = 0;
|
||||
if (unlikely(!dev))
|
||||
goto out_drop;
|
||||
if (flags & BPF_F_PEER) {
|
||||
const struct net_device_ops *ops = dev->netdev_ops;
|
||||
|
||||
if (unlikely(!ops->ndo_get_peer_dev ||
|
||||
!skb_at_tc_ingress(skb)))
|
||||
goto out_drop;
|
||||
dev = ops->ndo_get_peer_dev(dev);
|
||||
if (unlikely(!dev ||
|
||||
!is_skb_forwardable(dev, skb) ||
|
||||
net_eq(net, dev_net(dev))))
|
||||
goto out_drop;
|
||||
skb->dev = dev;
|
||||
return -EAGAIN;
|
||||
}
|
||||
return flags & BPF_F_NEIGH ?
|
||||
__bpf_redirect_neigh(skb, dev) :
|
||||
__bpf_redirect(skb, dev, flags);
|
||||
out_drop:
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
|
||||
|
@ -2465,6 +2483,27 @@ static const struct bpf_func_proto bpf_redirect_proto = {
|
|||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
|
||||
if (unlikely(flags))
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
ri->flags = BPF_F_PEER;
|
||||
ri->tgt_index = ifindex;
|
||||
|
||||
return TC_ACT_REDIRECT;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_redirect_peer_proto = {
|
||||
.func = bpf_redirect_peer,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_ANYTHING,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
|
@ -3479,6 +3518,48 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
|
|||
SKB_MAX_ALLOC;
|
||||
}
|
||||
|
||||
BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
|
||||
u32, mode, u64, flags)
|
||||
{
|
||||
u32 len_diff_abs = abs(len_diff);
|
||||
bool shrink = len_diff < 0;
|
||||
int ret = 0;
|
||||
|
||||
if (unlikely(flags || mode))
|
||||
return -EINVAL;
|
||||
if (unlikely(len_diff_abs > 0xfffU))
|
||||
return -EFAULT;
|
||||
|
||||
if (!shrink) {
|
||||
ret = skb_cow(skb, len_diff);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
__skb_push(skb, len_diff_abs);
|
||||
memset(skb->data, 0, len_diff_abs);
|
||||
} else {
|
||||
if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
|
||||
return -ENOMEM;
|
||||
__skb_pull(skb, len_diff_abs);
|
||||
}
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
if (tls_sw_has_ctx_rx(skb->sk)) {
|
||||
struct strp_msg *rxm = strp_msg(skb);
|
||||
|
||||
rxm->full_len += len_diff;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_skb_adjust_room_proto = {
|
||||
.func = sk_skb_adjust_room,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
|
||||
u32, mode, u64, flags)
|
||||
{
|
||||
|
@ -4784,6 +4865,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
|||
else
|
||||
icsk->icsk_user_timeout = val;
|
||||
break;
|
||||
case TCP_NOTSENT_LOWAT:
|
||||
tp->notsent_lowat = val;
|
||||
sk->sk_write_space(sk);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
@ -5149,7 +5234,6 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
|
|||
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
|
||||
params->h_vlan_TCI = 0;
|
||||
params->h_vlan_proto = 0;
|
||||
params->ifindex = dev->ifindex;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -5246,6 +5330,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
dev = nhc->nhc_dev;
|
||||
|
||||
params->rt_metric = res.fi->fib_priority;
|
||||
params->ifindex = dev->ifindex;
|
||||
|
||||
/* xdp and cls_bpf programs are run in RCU-bh so
|
||||
* rcu_read_lock_bh is not needed here
|
||||
|
@ -5371,6 +5456,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
|
||||
dev = res.nh->fib_nh_dev;
|
||||
params->rt_metric = res.f6i->fib6_metric;
|
||||
params->ifindex = dev->ifindex;
|
||||
|
||||
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
|
||||
* not needed here.
|
||||
|
@ -6745,6 +6831,7 @@ bool bpf_helper_changes_pkt_data(void *func)
|
|||
func == bpf_skb_change_tail ||
|
||||
func == sk_skb_change_tail ||
|
||||
func == bpf_skb_adjust_room ||
|
||||
func == sk_skb_adjust_room ||
|
||||
func == bpf_skb_pull_data ||
|
||||
func == sk_skb_pull_data ||
|
||||
func == bpf_clone_redirect ||
|
||||
|
@ -7005,6 +7092,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
return &bpf_redirect_proto;
|
||||
case BPF_FUNC_redirect_neigh:
|
||||
return &bpf_redirect_neigh_proto;
|
||||
case BPF_FUNC_redirect_peer:
|
||||
return &bpf_redirect_peer_proto;
|
||||
case BPF_FUNC_get_route_realm:
|
||||
return &bpf_get_route_realm_proto;
|
||||
case BPF_FUNC_get_hash_recalc:
|
||||
|
@ -7218,6 +7307,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
return &sk_skb_change_tail_proto;
|
||||
case BPF_FUNC_skb_change_head:
|
||||
return &sk_skb_change_head_proto;
|
||||
case BPF_FUNC_skb_adjust_room:
|
||||
return &sk_skb_adjust_room_proto;
|
||||
case BPF_FUNC_get_socket_cookie:
|
||||
return &bpf_get_socket_cookie_proto;
|
||||
case BPF_FUNC_get_socket_uid:
|
||||
|
|
161
net/core/skmsg.c
161
net/core/skmsg.c
|
@ -433,10 +433,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
|
|||
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
||||
u32 off, u32 len, bool ingress)
|
||||
{
|
||||
if (ingress)
|
||||
return sk_psock_skb_ingress(psock, skb);
|
||||
else
|
||||
if (!ingress) {
|
||||
if (!sock_writeable(psock->sk))
|
||||
return -EAGAIN;
|
||||
return skb_send_sock_locked(psock->sk, skb, off, len);
|
||||
}
|
||||
return sk_psock_skb_ingress(psock, skb);
|
||||
}
|
||||
|
||||
static void sk_psock_backlog(struct work_struct *work)
|
||||
|
@ -625,6 +627,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
|
|||
rcu_assign_sk_user_data(sk, NULL);
|
||||
if (psock->progs.skb_parser)
|
||||
sk_psock_stop_strp(sk, psock);
|
||||
else if (psock->progs.skb_verdict)
|
||||
sk_psock_stop_verdict(sk, psock);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
|
||||
|
@ -682,19 +686,8 @@ EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
|
|||
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
skb->sk = psock->sk;
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
ret = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||
/* strparser clones the skb before handing it to a upper layer,
|
||||
* meaning skb_orphan has been called. We NULL sk on the way out
|
||||
* to ensure we don't trigger a BUG_ON() in skb/sk operations
|
||||
* later and because we are not charging the memory of this skb
|
||||
* to any socket yet.
|
||||
*/
|
||||
skb->sk = NULL;
|
||||
return ret;
|
||||
return bpf_prog_run_pin_on_cpu(prog, skb);
|
||||
}
|
||||
|
||||
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
|
||||
|
@ -709,38 +702,35 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
|
|||
{
|
||||
struct sk_psock *psock_other;
|
||||
struct sock *sk_other;
|
||||
bool ingress;
|
||||
|
||||
sk_other = tcp_skb_bpf_redirect_fetch(skb);
|
||||
/* This error is a buggy BPF program, it returned a redirect
|
||||
* return code, but then didn't set a redirect interface.
|
||||
*/
|
||||
if (unlikely(!sk_other)) {
|
||||
kfree_skb(skb);
|
||||
return;
|
||||
}
|
||||
psock_other = sk_psock(sk_other);
|
||||
/* This error indicates the socket is being torn down or had another
|
||||
* error that caused the pipe to break. We can't send a packet on
|
||||
* a socket that is in this state so we drop the skb.
|
||||
*/
|
||||
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
|
||||
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
|
||||
kfree_skb(skb);
|
||||
return;
|
||||
}
|
||||
|
||||
ingress = tcp_skb_bpf_ingress(skb);
|
||||
if ((!ingress && sock_writeable(sk_other)) ||
|
||||
(ingress &&
|
||||
atomic_read(&sk_other->sk_rmem_alloc) <=
|
||||
sk_other->sk_rcvbuf)) {
|
||||
if (!ingress)
|
||||
skb_set_owner_w(skb, sk_other);
|
||||
skb_queue_tail(&psock_other->ingress_skb, skb);
|
||||
schedule_work(&psock_other->work);
|
||||
} else {
|
||||
kfree_skb(skb);
|
||||
}
|
||||
skb_queue_tail(&psock_other->ingress_skb, skb);
|
||||
schedule_work(&psock_other->work);
|
||||
}
|
||||
|
||||
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict)
|
||||
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
|
||||
{
|
||||
switch (verdict) {
|
||||
case __SK_REDIRECT:
|
||||
skb_set_owner_r(skb, sk);
|
||||
sk_psock_skb_redirect(skb);
|
||||
break;
|
||||
case __SK_PASS:
|
||||
|
@ -758,11 +748,17 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
|
|||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||
if (likely(prog)) {
|
||||
/* We skip full set_owner_r here because if we do a SK_PASS
|
||||
* or SK_DROP we can skip skb memory accounting and use the
|
||||
* TLS context.
|
||||
*/
|
||||
skb->sk = psock->sk;
|
||||
tcp_skb_bpf_redirect_clear(skb);
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||
skb->sk = NULL;
|
||||
}
|
||||
sk_psock_tls_verdict_apply(skb, ret);
|
||||
sk_psock_tls_verdict_apply(skb, psock->sk, ret);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
@ -771,7 +767,9 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
|
|||
static void sk_psock_verdict_apply(struct sk_psock *psock,
|
||||
struct sk_buff *skb, int verdict)
|
||||
{
|
||||
struct tcp_skb_cb *tcp;
|
||||
struct sock *sk_other;
|
||||
int err = -EIO;
|
||||
|
||||
switch (verdict) {
|
||||
case __SK_PASS:
|
||||
|
@ -780,16 +778,24 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
|
|||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
||||
goto out_free;
|
||||
}
|
||||
if (atomic_read(&sk_other->sk_rmem_alloc) <=
|
||||
sk_other->sk_rcvbuf) {
|
||||
struct tcp_skb_cb *tcp = TCP_SKB_CB(skb);
|
||||
|
||||
tcp->bpf.flags |= BPF_F_INGRESS;
|
||||
tcp = TCP_SKB_CB(skb);
|
||||
tcp->bpf.flags |= BPF_F_INGRESS;
|
||||
|
||||
/* If the queue is empty then we can submit directly
|
||||
* into the msg queue. If its not empty we have to
|
||||
* queue work otherwise we may get OOO data. Otherwise,
|
||||
* if sk_psock_skb_ingress errors will be handled by
|
||||
* retrying later from workqueue.
|
||||
*/
|
||||
if (skb_queue_empty(&psock->ingress_skb)) {
|
||||
err = sk_psock_skb_ingress(psock, skb);
|
||||
}
|
||||
if (err < 0) {
|
||||
skb_queue_tail(&psock->ingress_skb, skb);
|
||||
schedule_work(&psock->work);
|
||||
break;
|
||||
}
|
||||
goto out_free;
|
||||
break;
|
||||
case __SK_REDIRECT:
|
||||
sk_psock_skb_redirect(skb);
|
||||
break;
|
||||
|
@ -814,9 +820,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
|
|||
kfree_skb(skb);
|
||||
goto out;
|
||||
}
|
||||
skb_set_owner_r(skb, sk);
|
||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||
if (likely(prog)) {
|
||||
skb_orphan(skb);
|
||||
tcp_skb_bpf_redirect_clear(skb);
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||
|
@ -839,8 +845,11 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
|
|||
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.skb_parser);
|
||||
if (likely(prog))
|
||||
if (likely(prog)) {
|
||||
skb->sk = psock->sk;
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
skb->sk = NULL;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
@ -864,6 +873,57 @@ static void sk_psock_strp_data_ready(struct sock *sk)
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
|
||||
unsigned int offset, size_t orig_len)
|
||||
{
|
||||
struct sock *sk = (struct sock *)desc->arg.data;
|
||||
struct sk_psock *psock;
|
||||
struct bpf_prog *prog;
|
||||
int ret = __SK_DROP;
|
||||
int len = skb->len;
|
||||
|
||||
/* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
|
||||
skb = skb_clone(skb, GFP_ATOMIC);
|
||||
if (!skb) {
|
||||
desc->error = -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
len = 0;
|
||||
kfree_skb(skb);
|
||||
goto out;
|
||||
}
|
||||
skb_set_owner_r(skb, sk);
|
||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||
if (likely(prog)) {
|
||||
tcp_skb_bpf_redirect_clear(skb);
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||
}
|
||||
sk_psock_verdict_apply(psock, skb, ret);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return len;
|
||||
}
|
||||
|
||||
static void sk_psock_verdict_data_ready(struct sock *sk)
|
||||
{
|
||||
struct socket *sock = sk->sk_socket;
|
||||
read_descriptor_t desc;
|
||||
|
||||
if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
|
||||
return;
|
||||
|
||||
desc.arg.data = sk;
|
||||
desc.error = 0;
|
||||
desc.count = 1;
|
||||
|
||||
sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
|
||||
}
|
||||
|
||||
static void sk_psock_write_space(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
|
@ -893,6 +953,19 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
|
|||
return strp_init(&psock->parser.strp, sk, &cb);
|
||||
}
|
||||
|
||||
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (parser->enabled)
|
||||
return;
|
||||
|
||||
parser->saved_data_ready = sk->sk_data_ready;
|
||||
sk->sk_data_ready = sk_psock_verdict_data_ready;
|
||||
sk->sk_write_space = sk_psock_write_space;
|
||||
parser->enabled = true;
|
||||
}
|
||||
|
||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
@ -918,3 +991,15 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
|
|||
strp_stop(&parser->strp);
|
||||
parser->enabled = false;
|
||||
}
|
||||
|
||||
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (!parser->enabled)
|
||||
return;
|
||||
|
||||
sk->sk_data_ready = parser->saved_data_ready;
|
||||
parser->saved_data_ready = NULL;
|
||||
parser->enabled = false;
|
||||
}
|
||||
|
|
|
@ -148,8 +148,8 @@ static void sock_map_add_link(struct sk_psock *psock,
|
|||
static void sock_map_del_link(struct sock *sk,
|
||||
struct sk_psock *psock, void *link_raw)
|
||||
{
|
||||
bool strp_stop = false, verdict_stop = false;
|
||||
struct sk_psock_link *link, *tmp;
|
||||
bool strp_stop = false;
|
||||
|
||||
spin_lock_bh(&psock->link_lock);
|
||||
list_for_each_entry_safe(link, tmp, &psock->link, list) {
|
||||
|
@ -159,14 +159,19 @@ static void sock_map_del_link(struct sock *sk,
|
|||
map);
|
||||
if (psock->parser.enabled && stab->progs.skb_parser)
|
||||
strp_stop = true;
|
||||
if (psock->parser.enabled && stab->progs.skb_verdict)
|
||||
verdict_stop = true;
|
||||
list_del(&link->list);
|
||||
sk_psock_free_link(link);
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&psock->link_lock);
|
||||
if (strp_stop) {
|
||||
if (strp_stop || verdict_stop) {
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_stop_strp(sk, psock);
|
||||
if (strp_stop)
|
||||
sk_psock_stop_strp(sk, psock);
|
||||
else
|
||||
sk_psock_stop_verdict(sk, psock);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
}
|
||||
}
|
||||
|
@ -230,16 +235,16 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
|||
{
|
||||
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
|
||||
struct sk_psock *psock;
|
||||
bool skb_progs;
|
||||
int ret;
|
||||
|
||||
skb_verdict = READ_ONCE(progs->skb_verdict);
|
||||
skb_parser = READ_ONCE(progs->skb_parser);
|
||||
skb_progs = skb_parser && skb_verdict;
|
||||
if (skb_progs) {
|
||||
if (skb_verdict) {
|
||||
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
|
||||
if (IS_ERR(skb_verdict))
|
||||
return PTR_ERR(skb_verdict);
|
||||
}
|
||||
if (skb_parser) {
|
||||
skb_parser = bpf_prog_inc_not_zero(skb_parser);
|
||||
if (IS_ERR(skb_parser)) {
|
||||
bpf_prog_put(skb_verdict);
|
||||
|
@ -264,7 +269,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
|||
|
||||
if (psock) {
|
||||
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
|
||||
(skb_progs && READ_ONCE(psock->progs.skb_parser))) {
|
||||
(skb_parser && READ_ONCE(psock->progs.skb_parser)) ||
|
||||
(skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
|
||||
sk_psock_put(sk, psock);
|
||||
ret = -EBUSY;
|
||||
goto out_progs;
|
||||
|
@ -285,28 +291,31 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
|||
goto out_drop;
|
||||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
if (skb_progs && !psock->parser.enabled) {
|
||||
if (skb_parser && skb_verdict && !psock->parser.enabled) {
|
||||
ret = sk_psock_init_strp(sk, psock);
|
||||
if (ret) {
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
goto out_drop;
|
||||
}
|
||||
if (ret)
|
||||
goto out_unlock_drop;
|
||||
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
||||
psock_set_prog(&psock->progs.skb_parser, skb_parser);
|
||||
sk_psock_start_strp(sk, psock);
|
||||
} else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
|
||||
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
||||
sk_psock_start_verdict(sk,psock);
|
||||
}
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
return 0;
|
||||
out_unlock_drop:
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
out_drop:
|
||||
sk_psock_put(sk, psock);
|
||||
out_progs:
|
||||
if (msg_parser)
|
||||
bpf_prog_put(msg_parser);
|
||||
out:
|
||||
if (skb_progs) {
|
||||
if (skb_verdict)
|
||||
bpf_prog_put(skb_verdict);
|
||||
if (skb_parser)
|
||||
bpf_prog_put(skb_parser);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -548,7 +548,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
|
|||
newtp->fastopen_req = NULL;
|
||||
RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
|
||||
|
||||
bpf_skops_init_child(sk, newsk);
|
||||
tcp_bpf_clone(sk, newsk);
|
||||
|
||||
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
|
||||
|
|
|
@ -3,9 +3,6 @@
|
|||
#include <net/xsk_buff_pool.h>
|
||||
#include <net/xdp_sock.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
#include <linux/dma-direct.h>
|
||||
#include <linux/dma-noncoherent.h>
|
||||
#include <linux/swiotlb.h>
|
||||
|
||||
#include "xsk_queue.h"
|
||||
#include "xdp_umem.h"
|
||||
|
|
|
@ -15,6 +15,10 @@
|
|||
|
||||
struct xdp_ring {
|
||||
u32 producer ____cacheline_aligned_in_smp;
|
||||
/* Hinder the adjacent cache prefetcher to prefetch the consumer
|
||||
* pointer if the producer pointer is touched and vice versa.
|
||||
*/
|
||||
u32 pad ____cacheline_aligned_in_smp;
|
||||
u32 consumer ____cacheline_aligned_in_smp;
|
||||
u32 flags;
|
||||
};
|
||||
|
|
|
@ -132,7 +132,7 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
{
|
||||
const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
|
|
@ -98,8 +98,8 @@ test_map_in_map-objs := test_map_in_map_user.o
|
|||
per_socket_stats_example-objs := cookie_uid_helper_example.o
|
||||
xdp_redirect-objs := xdp_redirect_user.o
|
||||
xdp_redirect_map-objs := xdp_redirect_map_user.o
|
||||
xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
|
||||
xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
|
||||
xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
|
||||
xdp_monitor-objs := xdp_monitor_user.o
|
||||
xdp_rxq_info-objs := xdp_rxq_info_user.o
|
||||
syscall_tp-objs := syscall_tp_user.o
|
||||
cpustat-objs := cpustat_user.o
|
||||
|
@ -211,6 +211,8 @@ TPROGLDLIBS_xsk_fwd += -pthread
|
|||
# make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
|
||||
LLC ?= llc
|
||||
CLANG ?= clang
|
||||
OPT ?= opt
|
||||
LLVM_DIS ?= llvm-dis
|
||||
LLVM_OBJCOPY ?= llvm-objcopy
|
||||
BTF_PAHOLE ?= pahole
|
||||
|
||||
|
@ -303,6 +305,11 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
|
|||
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
|
||||
# But, there is no easy way to fix it, so just exclude it since it is
|
||||
# useless for BPF samples.
|
||||
# below we use long chain of commands, clang | opt | llvm-dis | llc,
|
||||
# to generate final object file. 'clang' compiles the source into IR
|
||||
# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin
|
||||
# processing (llvm12) and IR optimizations. 'llvm-dis' converts
|
||||
# 'opt' output to IR, and finally 'llc' generates bpf byte code.
|
||||
$(obj)/%.o: $(src)/%.c
|
||||
@echo " CLANG-bpf " $@
|
||||
$(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
|
||||
|
@ -314,7 +321,9 @@ $(obj)/%.o: $(src)/%.c
|
|||
-Wno-address-of-packed-member -Wno-tautological-compare \
|
||||
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
|
||||
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
|
||||
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
|
||||
-O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
|
||||
$(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
|
||||
$(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
|
||||
ifeq ($(DWARF2BTF),y)
|
||||
$(BTF_PAHOLE) -J $@
|
||||
endif
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf.h>
|
||||
|
@ -483,7 +484,7 @@ int main(int argc, char **argv)
|
|||
"Option -%c requires an argument.\n\n",
|
||||
optopt);
|
||||
case 'h':
|
||||
fallthrough;
|
||||
__fallthrough;
|
||||
default:
|
||||
Usage();
|
||||
return 0;
|
||||
|
|
|
@ -6,21 +6,21 @@
|
|||
#include <uapi/linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
struct bpf_map_def SEC("maps") redirect_err_cnt = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(u32),
|
||||
.value_size = sizeof(u64),
|
||||
.max_entries = 2,
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, u64);
|
||||
__uint(max_entries, 2);
|
||||
/* TODO: have entries for all possible errno's */
|
||||
};
|
||||
} redirect_err_cnt SEC(".maps");
|
||||
|
||||
#define XDP_UNKNOWN XDP_REDIRECT + 1
|
||||
struct bpf_map_def SEC("maps") exception_cnt = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(u32),
|
||||
.value_size = sizeof(u64),
|
||||
.max_entries = XDP_UNKNOWN + 1,
|
||||
};
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, u64);
|
||||
__uint(max_entries, XDP_UNKNOWN + 1);
|
||||
} exception_cnt SEC(".maps");
|
||||
|
||||
/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
|
||||
* Code in: kernel/include/trace/events/xdp.h
|
||||
|
@ -129,19 +129,19 @@ struct datarec {
|
|||
};
|
||||
#define MAX_CPUS 64
|
||||
|
||||
struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(u32),
|
||||
.value_size = sizeof(struct datarec),
|
||||
.max_entries = MAX_CPUS,
|
||||
};
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, struct datarec);
|
||||
__uint(max_entries, MAX_CPUS);
|
||||
} cpumap_enqueue_cnt SEC(".maps");
|
||||
|
||||
struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(u32),
|
||||
.value_size = sizeof(struct datarec),
|
||||
.max_entries = 1,
|
||||
};
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, struct datarec);
|
||||
__uint(max_entries, 1);
|
||||
} cpumap_kthread_cnt SEC(".maps");
|
||||
|
||||
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
|
||||
* Code in: kernel/include/trace/events/xdp.h
|
||||
|
@ -210,12 +210,12 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(u32),
|
||||
.value_size = sizeof(struct datarec),
|
||||
.max_entries = 1,
|
||||
};
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, struct datarec);
|
||||
__uint(max_entries, 1);
|
||||
} devmap_xmit_cnt SEC(".maps");
|
||||
|
||||
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
|
||||
* Code in: kernel/include/trace/events/xdp.h
|
||||
|
|
|
@ -26,12 +26,37 @@ static const char *__doc_err_only__=
|
|||
#include <net/if.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <signal.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include "bpf_load.h"
|
||||
#include <bpf/libbpf.h>
|
||||
#include "bpf_util.h"
|
||||
|
||||
enum map_type {
|
||||
REDIRECT_ERR_CNT,
|
||||
EXCEPTION_CNT,
|
||||
CPUMAP_ENQUEUE_CNT,
|
||||
CPUMAP_KTHREAD_CNT,
|
||||
DEVMAP_XMIT_CNT,
|
||||
};
|
||||
|
||||
static const char *const map_type_strings[] = {
|
||||
[REDIRECT_ERR_CNT] = "redirect_err_cnt",
|
||||
[EXCEPTION_CNT] = "exception_cnt",
|
||||
[CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
|
||||
[CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
|
||||
[DEVMAP_XMIT_CNT] = "devmap_xmit_cnt",
|
||||
};
|
||||
|
||||
#define NUM_MAP 5
|
||||
#define NUM_TP 8
|
||||
|
||||
static int tp_cnt;
|
||||
static int map_cnt;
|
||||
static int verbose = 1;
|
||||
static bool debug = false;
|
||||
struct bpf_map *map_data[NUM_MAP] = {};
|
||||
struct bpf_link *tp_links[NUM_TP] = {};
|
||||
struct bpf_object *obj;
|
||||
|
||||
static const struct option long_options[] = {
|
||||
{"help", no_argument, NULL, 'h' },
|
||||
|
@ -41,6 +66,16 @@ static const struct option long_options[] = {
|
|||
{0, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
static void int_exit(int sig)
|
||||
{
|
||||
/* Detach tracepoints */
|
||||
while (tp_cnt)
|
||||
bpf_link__destroy(tp_links[--tp_cnt]);
|
||||
|
||||
bpf_object__close(obj);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
|
||||
#define EXIT_FAIL_MEM 5
|
||||
|
||||
|
@ -483,23 +518,23 @@ static bool stats_collect(struct stats_record *rec)
|
|||
* this can happen by someone running perf-record -e
|
||||
*/
|
||||
|
||||
fd = map_data[0].fd; /* map0: redirect_err_cnt */
|
||||
fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]);
|
||||
for (i = 0; i < REDIR_RES_MAX; i++)
|
||||
map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
|
||||
|
||||
fd = map_data[1].fd; /* map1: exception_cnt */
|
||||
fd = bpf_map__fd(map_data[EXCEPTION_CNT]);
|
||||
for (i = 0; i < XDP_ACTION_MAX; i++) {
|
||||
map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
|
||||
}
|
||||
|
||||
fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
|
||||
fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]);
|
||||
for (i = 0; i < MAX_CPUS; i++)
|
||||
map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
|
||||
|
||||
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
|
||||
fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]);
|
||||
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
|
||||
|
||||
fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
|
||||
fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]);
|
||||
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
|
||||
|
||||
return true;
|
||||
|
@ -598,8 +633,8 @@ static void stats_poll(int interval, bool err_only)
|
|||
|
||||
/* TODO Need more advanced stats on error types */
|
||||
if (verbose) {
|
||||
printf(" - Stats map0: %s\n", map_data[0].name);
|
||||
printf(" - Stats map1: %s\n", map_data[1].name);
|
||||
printf(" - Stats map0: %s\n", bpf_map__name(map_data[0]));
|
||||
printf(" - Stats map1: %s\n", bpf_map__name(map_data[1]));
|
||||
printf("\n");
|
||||
}
|
||||
fflush(stdout);
|
||||
|
@ -618,44 +653,51 @@ static void stats_poll(int interval, bool err_only)
|
|||
|
||||
static void print_bpf_prog_info(void)
|
||||
{
|
||||
int i;
|
||||
struct bpf_program *prog;
|
||||
struct bpf_map *map;
|
||||
int i = 0;
|
||||
|
||||
/* Prog info */
|
||||
printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
|
||||
for (i = 0; i < prog_cnt; i++) {
|
||||
printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
|
||||
printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt);
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog));
|
||||
i++;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
/* Maps info */
|
||||
printf("Loaded BPF prog have %d map(s)\n", map_data_count);
|
||||
for (i = 0; i < map_data_count; i++) {
|
||||
char *name = map_data[i].name;
|
||||
int fd = map_data[i].fd;
|
||||
printf("Loaded BPF prog have %d map(s)\n", map_cnt);
|
||||
bpf_object__for_each_map(map, obj) {
|
||||
const char *name = bpf_map__name(map);
|
||||
int fd = bpf_map__fd(map);
|
||||
|
||||
printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
|
||||
i++;
|
||||
}
|
||||
|
||||
/* Event info */
|
||||
printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
|
||||
for (i = 0; i < prog_cnt; i++) {
|
||||
if (event_fd[i] != -1)
|
||||
printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
|
||||
printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt);
|
||||
for (i = 0; i < tp_cnt; i++) {
|
||||
int fd = bpf_link__fd(tp_links[i]);
|
||||
|
||||
if (fd != -1)
|
||||
printf(" - event_fd[%d] = fd(%d)\n", i, fd);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
||||
struct bpf_program *prog;
|
||||
int longindex = 0, opt;
|
||||
int ret = EXIT_SUCCESS;
|
||||
char bpf_obj_file[256];
|
||||
int ret = EXIT_FAILURE;
|
||||
enum map_type type;
|
||||
char filename[256];
|
||||
|
||||
/* Default settings: */
|
||||
bool errors_only = true;
|
||||
int interval = 2;
|
||||
|
||||
snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
|
||||
|
||||
/* Parse commands line args */
|
||||
while ((opt = getopt_long(argc, argv, "hDSs:",
|
||||
long_options, &longindex)) != -1) {
|
||||
|
@ -672,40 +714,79 @@ int main(int argc, char **argv)
|
|||
case 'h':
|
||||
default:
|
||||
usage(argv);
|
||||
return EXIT_FAILURE;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
|
||||
perror("setrlimit(RLIMIT_MEMLOCK)");
|
||||
return EXIT_FAILURE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (load_bpf_file(bpf_obj_file)) {
|
||||
printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
|
||||
return EXIT_FAILURE;
|
||||
/* Remove tracepoint program when program is interrupted or killed */
|
||||
signal(SIGINT, int_exit);
|
||||
signal(SIGTERM, int_exit);
|
||||
|
||||
obj = bpf_object__open_file(filename, NULL);
|
||||
if (libbpf_get_error(obj)) {
|
||||
printf("ERROR: opening BPF object file failed\n");
|
||||
obj = NULL;
|
||||
goto cleanup;
|
||||
}
|
||||
if (!prog_fd[0]) {
|
||||
printf("ERROR - load_bpf_file: %s\n", strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
|
||||
/* load BPF program */
|
||||
if (bpf_object__load(obj)) {
|
||||
printf("ERROR: loading BPF object file failed\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for (type = 0; type < NUM_MAP; type++) {
|
||||
map_data[type] =
|
||||
bpf_object__find_map_by_name(obj, map_type_strings[type]);
|
||||
|
||||
if (libbpf_get_error(map_data[type])) {
|
||||
printf("ERROR: finding a map in obj file failed\n");
|
||||
goto cleanup;
|
||||
}
|
||||
map_cnt++;
|
||||
}
|
||||
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
tp_links[tp_cnt] = bpf_program__attach(prog);
|
||||
if (libbpf_get_error(tp_links[tp_cnt])) {
|
||||
printf("ERROR: bpf_program__attach failed\n");
|
||||
tp_links[tp_cnt] = NULL;
|
||||
goto cleanup;
|
||||
}
|
||||
tp_cnt++;
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
print_bpf_prog_info();
|
||||
}
|
||||
|
||||
/* Unload/stop tracepoint event by closing fd's */
|
||||
/* Unload/stop tracepoint event by closing bpf_link's */
|
||||
if (errors_only) {
|
||||
/* The prog_fd[i] and event_fd[i] depend on the
|
||||
* order the functions was defined in _kern.c
|
||||
/* The bpf_link[i] depend on the order of
|
||||
* the functions was defined in _kern.c
|
||||
*/
|
||||
close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
|
||||
close(prog_fd[2]); /* func: trace_xdp_redirect */
|
||||
close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
|
||||
close(prog_fd[3]); /* func: trace_xdp_redirect_map */
|
||||
bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */
|
||||
tp_links[2] = NULL;
|
||||
|
||||
bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */
|
||||
tp_links[3] = NULL;
|
||||
}
|
||||
|
||||
stats_poll(interval, errors_only);
|
||||
|
||||
ret = EXIT_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
/* Detach tracepoints */
|
||||
while (tp_cnt)
|
||||
bpf_link__destroy(tp_links[--tp_cnt]);
|
||||
|
||||
bpf_object__close(obj);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -37,18 +37,35 @@ static __u32 prog_id;
|
|||
|
||||
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
|
||||
static int n_cpus;
|
||||
static int cpu_map_fd;
|
||||
static int rx_cnt_map_fd;
|
||||
static int redirect_err_cnt_map_fd;
|
||||
static int cpumap_enqueue_cnt_map_fd;
|
||||
static int cpumap_kthread_cnt_map_fd;
|
||||
static int cpus_available_map_fd;
|
||||
static int cpus_count_map_fd;
|
||||
static int cpus_iterator_map_fd;
|
||||
static int exception_cnt_map_fd;
|
||||
|
||||
enum map_type {
|
||||
CPU_MAP,
|
||||
RX_CNT,
|
||||
REDIRECT_ERR_CNT,
|
||||
CPUMAP_ENQUEUE_CNT,
|
||||
CPUMAP_KTHREAD_CNT,
|
||||
CPUS_AVAILABLE,
|
||||
CPUS_COUNT,
|
||||
CPUS_ITERATOR,
|
||||
EXCEPTION_CNT,
|
||||
};
|
||||
|
||||
static const char *const map_type_strings[] = {
|
||||
[CPU_MAP] = "cpu_map",
|
||||
[RX_CNT] = "rx_cnt",
|
||||
[REDIRECT_ERR_CNT] = "redirect_err_cnt",
|
||||
[CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
|
||||
[CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
|
||||
[CPUS_AVAILABLE] = "cpus_available",
|
||||
[CPUS_COUNT] = "cpus_count",
|
||||
[CPUS_ITERATOR] = "cpus_iterator",
|
||||
[EXCEPTION_CNT] = "exception_cnt",
|
||||
};
|
||||
|
||||
#define NUM_TP 5
|
||||
struct bpf_link *tp_links[NUM_TP] = { 0 };
|
||||
#define NUM_MAP 9
|
||||
struct bpf_link *tp_links[NUM_TP] = {};
|
||||
static int map_fds[NUM_MAP];
|
||||
static int tp_cnt = 0;
|
||||
|
||||
/* Exit return codes */
|
||||
|
@ -527,20 +544,20 @@ static void stats_collect(struct stats_record *rec)
|
|||
{
|
||||
int fd, i;
|
||||
|
||||
fd = rx_cnt_map_fd;
|
||||
fd = map_fds[RX_CNT];
|
||||
map_collect_percpu(fd, 0, &rec->rx_cnt);
|
||||
|
||||
fd = redirect_err_cnt_map_fd;
|
||||
fd = map_fds[REDIRECT_ERR_CNT];
|
||||
map_collect_percpu(fd, 1, &rec->redir_err);
|
||||
|
||||
fd = cpumap_enqueue_cnt_map_fd;
|
||||
fd = map_fds[CPUMAP_ENQUEUE_CNT];
|
||||
for (i = 0; i < n_cpus; i++)
|
||||
map_collect_percpu(fd, i, &rec->enq[i]);
|
||||
|
||||
fd = cpumap_kthread_cnt_map_fd;
|
||||
fd = map_fds[CPUMAP_KTHREAD_CNT];
|
||||
map_collect_percpu(fd, 0, &rec->kthread);
|
||||
|
||||
fd = exception_cnt_map_fd;
|
||||
fd = map_fds[EXCEPTION_CNT];
|
||||
map_collect_percpu(fd, 0, &rec->exception);
|
||||
}
|
||||
|
||||
|
@ -565,7 +582,7 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
|
|||
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
|
||||
* the kernel for the cpu.
|
||||
*/
|
||||
ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0);
|
||||
ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
|
||||
exit(EXIT_FAIL_BPF);
|
||||
|
@ -574,21 +591,21 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
|
|||
/* Inform bpf_prog's that a new CPU is available to select
|
||||
* from via some control maps.
|
||||
*/
|
||||
ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
|
||||
ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Add to avail CPUs failed\n");
|
||||
exit(EXIT_FAIL_BPF);
|
||||
}
|
||||
|
||||
/* When not replacing/updating existing entry, bump the count */
|
||||
ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
|
||||
ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Failed reading curr cpus_count\n");
|
||||
exit(EXIT_FAIL_BPF);
|
||||
}
|
||||
if (new) {
|
||||
curr_cpus_count++;
|
||||
ret = bpf_map_update_elem(cpus_count_map_fd, &key,
|
||||
ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key,
|
||||
&curr_cpus_count, 0);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Failed write curr cpus_count\n");
|
||||
|
@ -612,7 +629,7 @@ static void mark_cpus_unavailable(void)
|
|||
int ret, i;
|
||||
|
||||
for (i = 0; i < n_cpus; i++) {
|
||||
ret = bpf_map_update_elem(cpus_available_map_fd, &i,
|
||||
ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i,
|
||||
&invalid_cpu, 0);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Failed marking CPU unavailable\n");
|
||||
|
@ -665,68 +682,37 @@ static void stats_poll(int interval, bool use_separators, char *prog_name,
|
|||
free_stats_record(prev);
|
||||
}
|
||||
|
||||
static struct bpf_link * attach_tp(struct bpf_object *obj,
|
||||
const char *tp_category,
|
||||
const char* tp_name)
|
||||
static int init_tracepoints(struct bpf_object *obj)
|
||||
{
|
||||
struct bpf_program *prog;
|
||||
struct bpf_link *link;
|
||||
char sec_name[PATH_MAX];
|
||||
int len;
|
||||
|
||||
len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
|
||||
tp_category, tp_name);
|
||||
if (len < 0)
|
||||
exit(EXIT_FAIL);
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
if (bpf_program__is_tracepoint(prog) != true)
|
||||
continue;
|
||||
|
||||
prog = bpf_object__find_program_by_title(obj, sec_name);
|
||||
if (!prog) {
|
||||
fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
|
||||
exit(EXIT_FAIL_BPF);
|
||||
tp_links[tp_cnt] = bpf_program__attach(prog);
|
||||
if (libbpf_get_error(tp_links[tp_cnt])) {
|
||||
tp_links[tp_cnt] = NULL;
|
||||
return -EINVAL;
|
||||
}
|
||||
tp_cnt++;
|
||||
}
|
||||
|
||||
link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
|
||||
if (libbpf_get_error(link))
|
||||
exit(EXIT_FAIL_BPF);
|
||||
|
||||
return link;
|
||||
}
|
||||
|
||||
static void init_tracepoints(struct bpf_object *obj) {
|
||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
|
||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
|
||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
|
||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
|
||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_map_fds(struct bpf_object *obj)
|
||||
{
|
||||
/* Maps updated by tracepoints */
|
||||
redirect_err_cnt_map_fd =
|
||||
bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
|
||||
exception_cnt_map_fd =
|
||||
bpf_object__find_map_fd_by_name(obj, "exception_cnt");
|
||||
cpumap_enqueue_cnt_map_fd =
|
||||
bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
|
||||
cpumap_kthread_cnt_map_fd =
|
||||
bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
|
||||
enum map_type type;
|
||||
|
||||
/* Maps used by XDP */
|
||||
rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
|
||||
cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
|
||||
cpus_available_map_fd =
|
||||
bpf_object__find_map_fd_by_name(obj, "cpus_available");
|
||||
cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
|
||||
cpus_iterator_map_fd =
|
||||
bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
|
||||
for (type = 0; type < NUM_MAP; type++) {
|
||||
map_fds[type] =
|
||||
bpf_object__find_map_fd_by_name(obj,
|
||||
map_type_strings[type]);
|
||||
|
||||
if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
|
||||
redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
|
||||
cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
|
||||
cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
|
||||
exception_cnt_map_fd < 0)
|
||||
return -ENOENT;
|
||||
if (map_fds[type] < 0)
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -795,13 +781,13 @@ int main(int argc, char **argv)
|
|||
bool stress_mode = false;
|
||||
struct bpf_program *prog;
|
||||
struct bpf_object *obj;
|
||||
int err = EXIT_FAIL;
|
||||
char filename[256];
|
||||
int added_cpus = 0;
|
||||
int longindex = 0;
|
||||
int interval = 2;
|
||||
int add_cpu = -1;
|
||||
int opt, err;
|
||||
int prog_fd;
|
||||
int opt, prog_fd;
|
||||
int *cpu, i;
|
||||
__u32 qsize;
|
||||
|
||||
|
@ -824,24 +810,29 @@ int main(int argc, char **argv)
|
|||
}
|
||||
|
||||
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
|
||||
return EXIT_FAIL;
|
||||
return err;
|
||||
|
||||
if (prog_fd < 0) {
|
||||
fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
|
||||
strerror(errno));
|
||||
return EXIT_FAIL;
|
||||
return err;
|
||||
}
|
||||
init_tracepoints(obj);
|
||||
|
||||
if (init_tracepoints(obj) < 0) {
|
||||
fprintf(stderr, "ERR: bpf_program__attach failed\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
if (init_map_fds(obj) < 0) {
|
||||
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
|
||||
return EXIT_FAIL;
|
||||
return err;
|
||||
}
|
||||
mark_cpus_unavailable();
|
||||
|
||||
cpu = malloc(n_cpus * sizeof(int));
|
||||
if (!cpu) {
|
||||
fprintf(stderr, "failed to allocate cpu array\n");
|
||||
return EXIT_FAIL;
|
||||
return err;
|
||||
}
|
||||
memset(cpu, 0, n_cpus * sizeof(int));
|
||||
|
||||
|
@ -960,14 +951,12 @@ int main(int argc, char **argv)
|
|||
prog = bpf_object__find_program_by_title(obj, prog_name);
|
||||
if (!prog) {
|
||||
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
|
||||
err = EXIT_FAIL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
prog_fd = bpf_program__fd(prog);
|
||||
if (prog_fd < 0) {
|
||||
fprintf(stderr, "bpf_program__fd failed\n");
|
||||
err = EXIT_FAIL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -986,6 +975,8 @@ int main(int argc, char **argv)
|
|||
|
||||
stats_poll(interval, use_separators, prog_name, mprog_name,
|
||||
&value, stress_mode);
|
||||
|
||||
err = EXIT_OK;
|
||||
out:
|
||||
free(cpu);
|
||||
return err;
|
||||
|
|
|
@ -5,14 +5,12 @@
|
|||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
#define SAMPLE_SIZE 64ul
|
||||
#define MAX_CPUS 128
|
||||
|
||||
struct bpf_map_def SEC("maps") my_map = {
|
||||
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(u32),
|
||||
.max_entries = MAX_CPUS,
|
||||
};
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} my_map SEC(".maps");
|
||||
|
||||
SEC("xdp_sample")
|
||||
int xdp_sample_prog(struct xdp_md *ctx)
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
|
||||
#include "perf-sys.h"
|
||||
|
||||
#define MAX_CPUS 128
|
||||
static int if_idx;
|
||||
static char *if_name;
|
||||
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/if_xdp.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/udp.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <locale.h>
|
||||
|
@ -79,6 +80,10 @@ static u16 opt_pkt_size = MIN_PKT_SIZE;
|
|||
static u32 opt_pkt_fill_pattern = 0x12345678;
|
||||
static bool opt_extra_stats;
|
||||
static bool opt_quiet;
|
||||
static bool opt_app_stats;
|
||||
static const char *opt_irq_str = "";
|
||||
static u32 irq_no;
|
||||
static int irqs_at_init = -1;
|
||||
static int opt_poll;
|
||||
static int opt_interval = 1;
|
||||
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
|
||||
|
@ -91,18 +96,7 @@ static bool opt_need_wakeup = true;
|
|||
static u32 opt_num_xsks = 1;
|
||||
static u32 prog_id;
|
||||
|
||||
struct xsk_umem_info {
|
||||
struct xsk_ring_prod fq;
|
||||
struct xsk_ring_cons cq;
|
||||
struct xsk_umem *umem;
|
||||
void *buffer;
|
||||
};
|
||||
|
||||
struct xsk_socket_info {
|
||||
struct xsk_ring_cons rx;
|
||||
struct xsk_ring_prod tx;
|
||||
struct xsk_umem_info *umem;
|
||||
struct xsk_socket *xsk;
|
||||
struct xsk_ring_stats {
|
||||
unsigned long rx_npkts;
|
||||
unsigned long tx_npkts;
|
||||
unsigned long rx_dropped_npkts;
|
||||
|
@ -119,6 +113,41 @@ struct xsk_socket_info {
|
|||
unsigned long prev_rx_full_npkts;
|
||||
unsigned long prev_rx_fill_empty_npkts;
|
||||
unsigned long prev_tx_empty_npkts;
|
||||
};
|
||||
|
||||
struct xsk_driver_stats {
|
||||
unsigned long intrs;
|
||||
unsigned long prev_intrs;
|
||||
};
|
||||
|
||||
struct xsk_app_stats {
|
||||
unsigned long rx_empty_polls;
|
||||
unsigned long fill_fail_polls;
|
||||
unsigned long copy_tx_sendtos;
|
||||
unsigned long tx_wakeup_sendtos;
|
||||
unsigned long opt_polls;
|
||||
unsigned long prev_rx_empty_polls;
|
||||
unsigned long prev_fill_fail_polls;
|
||||
unsigned long prev_copy_tx_sendtos;
|
||||
unsigned long prev_tx_wakeup_sendtos;
|
||||
unsigned long prev_opt_polls;
|
||||
};
|
||||
|
||||
struct xsk_umem_info {
|
||||
struct xsk_ring_prod fq;
|
||||
struct xsk_ring_cons cq;
|
||||
struct xsk_umem *umem;
|
||||
void *buffer;
|
||||
};
|
||||
|
||||
struct xsk_socket_info {
|
||||
struct xsk_ring_cons rx;
|
||||
struct xsk_ring_prod tx;
|
||||
struct xsk_umem_info *umem;
|
||||
struct xsk_socket *xsk;
|
||||
struct xsk_ring_stats ring_stats;
|
||||
struct xsk_app_stats app_stats;
|
||||
struct xsk_driver_stats drv_stats;
|
||||
u32 outstanding_tx;
|
||||
};
|
||||
|
||||
|
@ -173,18 +202,151 @@ static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk)
|
|||
return err;
|
||||
|
||||
if (optlen == sizeof(struct xdp_statistics)) {
|
||||
xsk->rx_dropped_npkts = stats.rx_dropped;
|
||||
xsk->rx_invalid_npkts = stats.rx_invalid_descs;
|
||||
xsk->tx_invalid_npkts = stats.tx_invalid_descs;
|
||||
xsk->rx_full_npkts = stats.rx_ring_full;
|
||||
xsk->rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs;
|
||||
xsk->tx_empty_npkts = stats.tx_ring_empty_descs;
|
||||
xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped;
|
||||
xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs;
|
||||
xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs;
|
||||
xsk->ring_stats.rx_full_npkts = stats.rx_ring_full;
|
||||
xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs;
|
||||
xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void dump_app_stats(long dt)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_socks && xsks[i]; i++) {
|
||||
char *fmt = "%-18s %'-14.0f %'-14lu\n";
|
||||
double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps,
|
||||
tx_wakeup_sendtos_ps, opt_polls_ps;
|
||||
|
||||
rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls -
|
||||
xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt;
|
||||
fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls -
|
||||
xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt;
|
||||
copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos -
|
||||
xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt;
|
||||
tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos -
|
||||
xsks[i]->app_stats.prev_tx_wakeup_sendtos)
|
||||
* 1000000000. / dt;
|
||||
opt_polls_ps = (xsks[i]->app_stats.opt_polls -
|
||||
xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt;
|
||||
|
||||
printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count");
|
||||
printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls);
|
||||
printf(fmt, "fill fail polls", fill_fail_polls_ps,
|
||||
xsks[i]->app_stats.fill_fail_polls);
|
||||
printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps,
|
||||
xsks[i]->app_stats.copy_tx_sendtos);
|
||||
printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps,
|
||||
xsks[i]->app_stats.tx_wakeup_sendtos);
|
||||
printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls);
|
||||
|
||||
xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls;
|
||||
xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls;
|
||||
xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos;
|
||||
xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
|
||||
xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
|
||||
}
|
||||
}
|
||||
|
||||
static bool get_interrupt_number(void)
|
||||
{
|
||||
FILE *f_int_proc;
|
||||
char line[4096];
|
||||
bool found = false;
|
||||
|
||||
f_int_proc = fopen("/proc/interrupts", "r");
|
||||
if (f_int_proc == NULL) {
|
||||
printf("Failed to open /proc/interrupts.\n");
|
||||
return found;
|
||||
}
|
||||
|
||||
while (!feof(f_int_proc) && !found) {
|
||||
/* Make sure to read a full line at a time */
|
||||
if (fgets(line, sizeof(line), f_int_proc) == NULL ||
|
||||
line[strlen(line) - 1] != '\n') {
|
||||
printf("Error reading from interrupts file\n");
|
||||
break;
|
||||
}
|
||||
|
||||
/* Extract interrupt number from line */
|
||||
if (strstr(line, opt_irq_str) != NULL) {
|
||||
irq_no = atoi(line);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f_int_proc);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static int get_irqs(void)
|
||||
{
|
||||
char count_path[PATH_MAX];
|
||||
int total_intrs = -1;
|
||||
FILE *f_count_proc;
|
||||
char line[4096];
|
||||
|
||||
snprintf(count_path, sizeof(count_path),
|
||||
"/sys/kernel/irq/%i/per_cpu_count", irq_no);
|
||||
f_count_proc = fopen(count_path, "r");
|
||||
if (f_count_proc == NULL) {
|
||||
printf("Failed to open %s\n", count_path);
|
||||
return total_intrs;
|
||||
}
|
||||
|
||||
if (fgets(line, sizeof(line), f_count_proc) == NULL ||
|
||||
line[strlen(line) - 1] != '\n') {
|
||||
printf("Error reading from %s\n", count_path);
|
||||
} else {
|
||||
static const char com[2] = ",";
|
||||
char *token;
|
||||
|
||||
total_intrs = 0;
|
||||
token = strtok(line, com);
|
||||
while (token != NULL) {
|
||||
/* sum up interrupts across all cores */
|
||||
total_intrs += atoi(token);
|
||||
token = strtok(NULL, com);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f_count_proc);
|
||||
|
||||
return total_intrs;
|
||||
}
|
||||
|
||||
static void dump_driver_stats(long dt)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_socks && xsks[i]; i++) {
|
||||
char *fmt = "%-18s %'-14.0f %'-14lu\n";
|
||||
double intrs_ps;
|
||||
int n_ints = get_irqs();
|
||||
|
||||
if (n_ints < 0) {
|
||||
printf("error getting intr info for intr %i\n", irq_no);
|
||||
return;
|
||||
}
|
||||
xsks[i]->drv_stats.intrs = n_ints - irqs_at_init;
|
||||
|
||||
intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) *
|
||||
1000000000. / dt;
|
||||
|
||||
printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count");
|
||||
printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs);
|
||||
|
||||
xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs;
|
||||
}
|
||||
}
|
||||
|
||||
static void dump_stats(void)
|
||||
{
|
||||
unsigned long now = get_nsecs();
|
||||
|
@ -194,67 +356,83 @@ static void dump_stats(void)
|
|||
prev_time = now;
|
||||
|
||||
for (i = 0; i < num_socks && xsks[i]; i++) {
|
||||
char *fmt = "%-15s %'-11.0f %'-11lu\n";
|
||||
char *fmt = "%-18s %'-14.0f %'-14lu\n";
|
||||
double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps,
|
||||
tx_invalid_pps, tx_empty_pps;
|
||||
|
||||
rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
|
||||
rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) *
|
||||
1000000000. / dt;
|
||||
tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
|
||||
tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) *
|
||||
1000000000. / dt;
|
||||
|
||||
printf("\n sock%d@", i);
|
||||
print_benchmark(false);
|
||||
printf("\n");
|
||||
|
||||
printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
|
||||
printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts",
|
||||
dt / 1000000000.);
|
||||
printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
|
||||
printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
|
||||
printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts);
|
||||
printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts);
|
||||
|
||||
xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
|
||||
xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
|
||||
xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts;
|
||||
xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts;
|
||||
|
||||
if (opt_extra_stats) {
|
||||
if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) {
|
||||
dropped_pps = (xsks[i]->rx_dropped_npkts -
|
||||
xsks[i]->prev_rx_dropped_npkts) * 1000000000. / dt;
|
||||
rx_invalid_pps = (xsks[i]->rx_invalid_npkts -
|
||||
xsks[i]->prev_rx_invalid_npkts) * 1000000000. / dt;
|
||||
tx_invalid_pps = (xsks[i]->tx_invalid_npkts -
|
||||
xsks[i]->prev_tx_invalid_npkts) * 1000000000. / dt;
|
||||
full_pps = (xsks[i]->rx_full_npkts -
|
||||
xsks[i]->prev_rx_full_npkts) * 1000000000. / dt;
|
||||
fill_empty_pps = (xsks[i]->rx_fill_empty_npkts -
|
||||
xsks[i]->prev_rx_fill_empty_npkts)
|
||||
* 1000000000. / dt;
|
||||
tx_empty_pps = (xsks[i]->tx_empty_npkts -
|
||||
xsks[i]->prev_tx_empty_npkts) * 1000000000. / dt;
|
||||
dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts -
|
||||
xsks[i]->ring_stats.prev_rx_dropped_npkts) *
|
||||
1000000000. / dt;
|
||||
rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts -
|
||||
xsks[i]->ring_stats.prev_rx_invalid_npkts) *
|
||||
1000000000. / dt;
|
||||
tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts -
|
||||
xsks[i]->ring_stats.prev_tx_invalid_npkts) *
|
||||
1000000000. / dt;
|
||||
full_pps = (xsks[i]->ring_stats.rx_full_npkts -
|
||||
xsks[i]->ring_stats.prev_rx_full_npkts) *
|
||||
1000000000. / dt;
|
||||
fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts -
|
||||
xsks[i]->ring_stats.prev_rx_fill_empty_npkts) *
|
||||
1000000000. / dt;
|
||||
tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts -
|
||||
xsks[i]->ring_stats.prev_tx_empty_npkts) *
|
||||
1000000000. / dt;
|
||||
|
||||
printf(fmt, "rx dropped", dropped_pps,
|
||||
xsks[i]->rx_dropped_npkts);
|
||||
xsks[i]->ring_stats.rx_dropped_npkts);
|
||||
printf(fmt, "rx invalid", rx_invalid_pps,
|
||||
xsks[i]->rx_invalid_npkts);
|
||||
xsks[i]->ring_stats.rx_invalid_npkts);
|
||||
printf(fmt, "tx invalid", tx_invalid_pps,
|
||||
xsks[i]->tx_invalid_npkts);
|
||||
xsks[i]->ring_stats.tx_invalid_npkts);
|
||||
printf(fmt, "rx queue full", full_pps,
|
||||
xsks[i]->rx_full_npkts);
|
||||
xsks[i]->ring_stats.rx_full_npkts);
|
||||
printf(fmt, "fill ring empty", fill_empty_pps,
|
||||
xsks[i]->rx_fill_empty_npkts);
|
||||
xsks[i]->ring_stats.rx_fill_empty_npkts);
|
||||
printf(fmt, "tx ring empty", tx_empty_pps,
|
||||
xsks[i]->tx_empty_npkts);
|
||||
xsks[i]->ring_stats.tx_empty_npkts);
|
||||
|
||||
xsks[i]->prev_rx_dropped_npkts = xsks[i]->rx_dropped_npkts;
|
||||
xsks[i]->prev_rx_invalid_npkts = xsks[i]->rx_invalid_npkts;
|
||||
xsks[i]->prev_tx_invalid_npkts = xsks[i]->tx_invalid_npkts;
|
||||
xsks[i]->prev_rx_full_npkts = xsks[i]->rx_full_npkts;
|
||||
xsks[i]->prev_rx_fill_empty_npkts = xsks[i]->rx_fill_empty_npkts;
|
||||
xsks[i]->prev_tx_empty_npkts = xsks[i]->tx_empty_npkts;
|
||||
xsks[i]->ring_stats.prev_rx_dropped_npkts =
|
||||
xsks[i]->ring_stats.rx_dropped_npkts;
|
||||
xsks[i]->ring_stats.prev_rx_invalid_npkts =
|
||||
xsks[i]->ring_stats.rx_invalid_npkts;
|
||||
xsks[i]->ring_stats.prev_tx_invalid_npkts =
|
||||
xsks[i]->ring_stats.tx_invalid_npkts;
|
||||
xsks[i]->ring_stats.prev_rx_full_npkts =
|
||||
xsks[i]->ring_stats.rx_full_npkts;
|
||||
xsks[i]->ring_stats.prev_rx_fill_empty_npkts =
|
||||
xsks[i]->ring_stats.rx_fill_empty_npkts;
|
||||
xsks[i]->ring_stats.prev_tx_empty_npkts =
|
||||
xsks[i]->ring_stats.tx_empty_npkts;
|
||||
} else {
|
||||
printf("%-15s\n", "Error retrieving extra stats");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (opt_app_stats)
|
||||
dump_app_stats(dt);
|
||||
if (irq_no)
|
||||
dump_driver_stats(dt);
|
||||
}
|
||||
|
||||
static bool is_benchmark_done(void)
|
||||
|
@ -693,6 +871,17 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
|
|||
if (ret)
|
||||
exit_with_error(-ret);
|
||||
|
||||
xsk->app_stats.rx_empty_polls = 0;
|
||||
xsk->app_stats.fill_fail_polls = 0;
|
||||
xsk->app_stats.copy_tx_sendtos = 0;
|
||||
xsk->app_stats.tx_wakeup_sendtos = 0;
|
||||
xsk->app_stats.opt_polls = 0;
|
||||
xsk->app_stats.prev_rx_empty_polls = 0;
|
||||
xsk->app_stats.prev_fill_fail_polls = 0;
|
||||
xsk->app_stats.prev_copy_tx_sendtos = 0;
|
||||
xsk->app_stats.prev_tx_wakeup_sendtos = 0;
|
||||
xsk->app_stats.prev_opt_polls = 0;
|
||||
|
||||
return xsk;
|
||||
}
|
||||
|
||||
|
@ -720,6 +909,8 @@ static struct option long_options[] = {
|
|||
{"tx-pkt-pattern", required_argument, 0, 'P'},
|
||||
{"extra-stats", no_argument, 0, 'x'},
|
||||
{"quiet", no_argument, 0, 'Q'},
|
||||
{"app-stats", no_argument, 0, 'a'},
|
||||
{"irq-string", no_argument, 0, 'I'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
|
@ -756,6 +947,8 @@ static void usage(const char *prog)
|
|||
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
|
||||
" -x, --extra-stats Display extra statistics.\n"
|
||||
" -Q, --quiet Do not display any stats.\n"
|
||||
" -a, --app-stats Display application (syscall) statistics.\n"
|
||||
" -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n"
|
||||
"\n";
|
||||
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
|
||||
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
|
||||
|
@ -771,7 +964,7 @@ static void parse_command_line(int argc, char **argv)
|
|||
opterr = 0;
|
||||
|
||||
for (;;) {
|
||||
c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQ",
|
||||
c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:",
|
||||
long_options, &option_index);
|
||||
if (c == -1)
|
||||
break;
|
||||
|
@ -857,6 +1050,19 @@ static void parse_command_line(int argc, char **argv)
|
|||
break;
|
||||
case 'Q':
|
||||
opt_quiet = 1;
|
||||
break;
|
||||
case 'a':
|
||||
opt_app_stats = 1;
|
||||
break;
|
||||
case 'I':
|
||||
opt_irq_str = optarg;
|
||||
if (get_interrupt_number())
|
||||
irqs_at_init = get_irqs();
|
||||
if (irqs_at_init < 0) {
|
||||
fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str);
|
||||
usage(basename(argv[0]));
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
usage(basename(argv[0]));
|
||||
|
@ -908,8 +1114,10 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
|
|||
* is driven by the NAPI loop. So as an optimization, we do not have to call
|
||||
* sendto() all the time in zero-copy mode for l2fwd.
|
||||
*/
|
||||
if (opt_xdp_bind_flags & XDP_COPY)
|
||||
if (opt_xdp_bind_flags & XDP_COPY) {
|
||||
xsk->app_stats.copy_tx_sendtos++;
|
||||
kick_tx(xsk);
|
||||
}
|
||||
|
||||
ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
|
||||
xsk->outstanding_tx;
|
||||
|
@ -924,8 +1132,10 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
|
|||
while (ret != rcvd) {
|
||||
if (ret < 0)
|
||||
exit_with_error(-ret);
|
||||
if (xsk_ring_prod__needs_wakeup(&umem->fq))
|
||||
if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
|
||||
xsk->app_stats.fill_fail_polls++;
|
||||
ret = poll(fds, num_socks, opt_timeout);
|
||||
}
|
||||
ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
|
||||
}
|
||||
|
||||
|
@ -936,7 +1146,7 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
|
|||
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
|
||||
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
|
||||
xsk->outstanding_tx -= rcvd;
|
||||
xsk->tx_npkts += rcvd;
|
||||
xsk->ring_stats.tx_npkts += rcvd;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -949,14 +1159,16 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk,
|
|||
if (!xsk->outstanding_tx)
|
||||
return;
|
||||
|
||||
if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
|
||||
if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) {
|
||||
xsk->app_stats.tx_wakeup_sendtos++;
|
||||
kick_tx(xsk);
|
||||
}
|
||||
|
||||
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
|
||||
if (rcvd > 0) {
|
||||
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
|
||||
xsk->outstanding_tx -= rcvd;
|
||||
xsk->tx_npkts += rcvd;
|
||||
xsk->ring_stats.tx_npkts += rcvd;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -968,8 +1180,10 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||
|
||||
rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
|
||||
if (!rcvd) {
|
||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
|
||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
|
||||
xsk->app_stats.rx_empty_polls++;
|
||||
ret = poll(fds, num_socks, opt_timeout);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -977,8 +1191,10 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||
while (ret != rcvd) {
|
||||
if (ret < 0)
|
||||
exit_with_error(-ret);
|
||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
|
||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
|
||||
xsk->app_stats.fill_fail_polls++;
|
||||
ret = poll(fds, num_socks, opt_timeout);
|
||||
}
|
||||
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
|
||||
}
|
||||
|
||||
|
@ -996,7 +1212,7 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||
|
||||
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
|
||||
xsk_ring_cons__release(&xsk->rx, rcvd);
|
||||
xsk->rx_npkts += rcvd;
|
||||
xsk->ring_stats.rx_npkts += rcvd;
|
||||
}
|
||||
|
||||
static void rx_drop_all(void)
|
||||
|
@ -1011,6 +1227,8 @@ static void rx_drop_all(void)
|
|||
|
||||
for (;;) {
|
||||
if (opt_poll) {
|
||||
for (i = 0; i < num_socks; i++)
|
||||
xsks[i]->app_stats.opt_polls++;
|
||||
ret = poll(fds, num_socks, opt_timeout);
|
||||
if (ret <= 0)
|
||||
continue;
|
||||
|
@ -1091,6 +1309,8 @@ static void tx_only_all(void)
|
|||
int batch_size = get_batch_size(pkt_cnt);
|
||||
|
||||
if (opt_poll) {
|
||||
for (i = 0; i < num_socks; i++)
|
||||
xsks[i]->app_stats.opt_polls++;
|
||||
ret = poll(fds, num_socks, opt_timeout);
|
||||
if (ret <= 0)
|
||||
continue;
|
||||
|
@ -1122,8 +1342,10 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||
|
||||
rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
|
||||
if (!rcvd) {
|
||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
|
||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
|
||||
xsk->app_stats.rx_empty_polls++;
|
||||
ret = poll(fds, num_socks, opt_timeout);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1132,8 +1354,10 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||
if (ret < 0)
|
||||
exit_with_error(-ret);
|
||||
complete_tx_l2fwd(xsk, fds);
|
||||
if (xsk_ring_prod__needs_wakeup(&xsk->tx))
|
||||
if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
|
||||
xsk->app_stats.tx_wakeup_sendtos++;
|
||||
kick_tx(xsk);
|
||||
}
|
||||
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
|
||||
}
|
||||
|
||||
|
@ -1155,7 +1379,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||
xsk_ring_prod__submit(&xsk->tx, rcvd);
|
||||
xsk_ring_cons__release(&xsk->rx, rcvd);
|
||||
|
||||
xsk->rx_npkts += rcvd;
|
||||
xsk->ring_stats.rx_npkts += rcvd;
|
||||
xsk->outstanding_tx += rcvd;
|
||||
}
|
||||
|
||||
|
@ -1171,6 +1395,8 @@ static void l2fwd_all(void)
|
|||
|
||||
for (;;) {
|
||||
if (opt_poll) {
|
||||
for (i = 0; i < num_socks; i++)
|
||||
xsks[i]->app_stats.opt_polls++;
|
||||
ret = poll(fds, num_socks, opt_timeout);
|
||||
if (ret <= 0)
|
||||
continue;
|
||||
|
|
|
@ -356,18 +356,36 @@ enum bpf_link_type {
|
|||
#define BPF_F_SLEEPABLE (1U << 4)
|
||||
|
||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||
* two extensions:
|
||||
* the following extensions:
|
||||
*
|
||||
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
|
||||
* insn[0].imm: map fd map fd
|
||||
* insn[1].imm: 0 offset into value
|
||||
* insn[0].off: 0 0
|
||||
* insn[1].off: 0 0
|
||||
* ldimm64 rewrite: address of map address of map[0]+offset
|
||||
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
|
||||
* insn[0].src_reg: BPF_PSEUDO_MAP_FD
|
||||
* insn[0].imm: map fd
|
||||
* insn[1].imm: 0
|
||||
* insn[0].off: 0
|
||||
* insn[1].off: 0
|
||||
* ldimm64 rewrite: address of map
|
||||
* verifier type: CONST_PTR_TO_MAP
|
||||
*/
|
||||
#define BPF_PSEUDO_MAP_FD 1
|
||||
/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
|
||||
* insn[0].imm: map fd
|
||||
* insn[1].imm: offset into value
|
||||
* insn[0].off: 0
|
||||
* insn[1].off: 0
|
||||
* ldimm64 rewrite: address of map[0]+offset
|
||||
* verifier type: PTR_TO_MAP_VALUE
|
||||
*/
|
||||
#define BPF_PSEUDO_MAP_VALUE 2
|
||||
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
|
||||
* insn[0].imm: kernel btd id of VAR
|
||||
* insn[1].imm: 0
|
||||
* insn[0].off: 0
|
||||
* insn[1].off: 0
|
||||
* ldimm64 rewrite: address of the kernel variable
|
||||
* verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
|
||||
* is struct/union.
|
||||
*/
|
||||
#define BPF_PSEUDO_BTF_ID 3
|
||||
|
||||
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
||||
* offset to another bpf function
|
||||
|
@ -417,6 +435,9 @@ enum {
|
|||
|
||||
/* Share perf_event among processes */
|
||||
BPF_F_PRESERVE_ELEMS = (1U << 11),
|
||||
|
||||
/* Create a map that is suitable to be an inner map with dynamic max entries */
|
||||
BPF_F_INNER_MAP = (1U << 12),
|
||||
};
|
||||
|
||||
/* Flags for BPF_PROG_QUERY. */
|
||||
|
@ -1680,7 +1701,7 @@ union bpf_attr {
|
|||
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
||||
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
|
||||
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
|
||||
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
|
||||
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
|
||||
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
||||
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
||||
* Return
|
||||
|
@ -2235,7 +2256,7 @@ union bpf_attr {
|
|||
* Description
|
||||
* This helper is used in programs implementing policies at the
|
||||
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
|
||||
* if the verdeict eBPF program returns **SK_PASS**), redirect it
|
||||
* if the verdict eBPF program returns **SK_PASS**), redirect it
|
||||
* to the socket referenced by *map* (of type
|
||||
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
|
||||
* egress interfaces can be used for redirection. The
|
||||
|
@ -3661,10 +3682,59 @@ union bpf_attr {
|
|||
* Redirect the packet to another net device of index *ifindex*
|
||||
* and fill in L2 addresses from neighboring subsystem. This helper
|
||||
* is somewhat similar to **bpf_redirect**\ (), except that it
|
||||
* fills in e.g. MAC addresses based on the L3 information from
|
||||
* the packet. This helper is supported for IPv4 and IPv6 protocols.
|
||||
* populates L2 addresses as well, meaning, internally, the helper
|
||||
* performs a FIB lookup based on the skb's networking header to
|
||||
* get the address of the next hop and then relies on the neighbor
|
||||
* lookup for the L2 address of the nexthop.
|
||||
*
|
||||
* The *flags* argument is reserved and must be 0. The helper is
|
||||
* currently only supported for tc BPF program types.
|
||||
* currently only supported for tc BPF program types, and enabled
|
||||
* for IPv4 and IPv6 protocols.
|
||||
* Return
|
||||
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||
* **TC_ACT_SHOT** on error.
|
||||
*
|
||||
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
|
||||
* Description
|
||||
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
|
||||
* pointer to the percpu kernel variable on *cpu*. A ksym is an
|
||||
* extern variable decorated with '__ksym'. For ksym, there is a
|
||||
* global var (either static or global) defined of the same name
|
||||
* in the kernel. The ksym is percpu if the global var is percpu.
|
||||
* The returned pointer points to the global percpu var on *cpu*.
|
||||
*
|
||||
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
|
||||
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
|
||||
* happens if *cpu* is larger than nr_cpu_ids. The caller of
|
||||
* bpf_per_cpu_ptr() must check the returned value.
|
||||
* Return
|
||||
* A pointer pointing to the kernel percpu variable on *cpu*, or
|
||||
* NULL, if *cpu* is invalid.
|
||||
*
|
||||
* void *bpf_this_cpu_ptr(const void *percpu_ptr)
|
||||
* Description
|
||||
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
|
||||
* pointer to the percpu kernel variable on this cpu. See the
|
||||
* description of 'ksym' in **bpf_per_cpu_ptr**\ ().
|
||||
*
|
||||
* bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
|
||||
* the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
|
||||
* never return NULL.
|
||||
* Return
|
||||
* A pointer pointing to the kernel percpu variable on this cpu.
|
||||
*
|
||||
* long bpf_redirect_peer(u32 ifindex, u64 flags)
|
||||
* Description
|
||||
* Redirect the packet to another net device of index *ifindex*.
|
||||
* This helper is somewhat similar to **bpf_redirect**\ (), except
|
||||
* that the redirection happens to the *ifindex*' peer device and
|
||||
* the netns switch takes place from ingress to ingress without
|
||||
* going through the CPU's backlog queue.
|
||||
*
|
||||
* The *flags* argument is reserved and must be 0. The helper is
|
||||
* currently only supported for tc BPF program types at the ingress
|
||||
* hook and for veth device types. The peer device must reside in a
|
||||
* different network namespace.
|
||||
* Return
|
||||
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||
* **TC_ACT_SHOT** on error.
|
||||
|
@ -3823,6 +3893,9 @@ union bpf_attr {
|
|||
FN(seq_printf_btf), \
|
||||
FN(skb_cgroup_classid), \
|
||||
FN(redirect_neigh), \
|
||||
FN(bpf_per_cpu_ptr), \
|
||||
FN(bpf_this_cpu_ptr), \
|
||||
FN(redirect_peer), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
|
|
@ -390,6 +390,12 @@ struct extern_desc {
|
|||
} kcfg;
|
||||
struct {
|
||||
unsigned long long addr;
|
||||
|
||||
/* target btf_id of the corresponding kernel var. */
|
||||
int vmlinux_btf_id;
|
||||
|
||||
/* local btf_id of the ksym extern's type. */
|
||||
__u32 type_id;
|
||||
} ksym;
|
||||
};
|
||||
};
|
||||
|
@ -2522,12 +2528,23 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
|
|||
{
|
||||
bool need_vmlinux_btf = false;
|
||||
struct bpf_program *prog;
|
||||
int err;
|
||||
int i, err;
|
||||
|
||||
/* CO-RE relocations need kernel BTF */
|
||||
if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
|
||||
need_vmlinux_btf = true;
|
||||
|
||||
/* Support for typed ksyms needs kernel BTF */
|
||||
for (i = 0; i < obj->nr_extern; i++) {
|
||||
const struct extern_desc *ext;
|
||||
|
||||
ext = &obj->externs[i];
|
||||
if (ext->type == EXT_KSYM && ext->ksym.type_id) {
|
||||
need_vmlinux_btf = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
if (!prog->load)
|
||||
continue;
|
||||
|
@ -3156,16 +3173,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
|
|||
return -ENOTSUP;
|
||||
}
|
||||
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
|
||||
const struct btf_type *vt;
|
||||
|
||||
ksym_sec = sec;
|
||||
ext->type = EXT_KSYM;
|
||||
|
||||
vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
|
||||
if (!btf_is_void(vt)) {
|
||||
pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
|
||||
return -ENOTSUP;
|
||||
}
|
||||
skip_mods_and_typedefs(obj->btf, t->type,
|
||||
&ext->ksym.type_id);
|
||||
} else {
|
||||
pr_warn("unrecognized extern section '%s'\n", sec_name);
|
||||
return -ENOTSUP;
|
||||
|
@ -4192,6 +4203,36 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int init_map_slots(struct bpf_map *map)
|
||||
{
|
||||
const struct bpf_map *targ_map;
|
||||
unsigned int i;
|
||||
int fd, err;
|
||||
|
||||
for (i = 0; i < map->init_slots_sz; i++) {
|
||||
if (!map->init_slots[i])
|
||||
continue;
|
||||
|
||||
targ_map = map->init_slots[i];
|
||||
fd = bpf_map__fd(targ_map);
|
||||
err = bpf_map_update_elem(map->fd, &i, &fd, 0);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
|
||||
map->name, i, targ_map->name,
|
||||
fd, err);
|
||||
return err;
|
||||
}
|
||||
pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
|
||||
map->name, i, targ_map->name, fd);
|
||||
}
|
||||
|
||||
zfree(&map->init_slots);
|
||||
map->init_slots_sz = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bpf_object__create_maps(struct bpf_object *obj)
|
||||
{
|
||||
|
@ -4215,47 +4256,29 @@ bpf_object__create_maps(struct bpf_object *obj)
|
|||
if (map->fd >= 0) {
|
||||
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
|
||||
map->name, map->fd);
|
||||
continue;
|
||||
}
|
||||
|
||||
err = bpf_object__create_map(obj, map);
|
||||
if (err)
|
||||
goto err_out;
|
||||
|
||||
pr_debug("map '%s': created successfully, fd=%d\n", map->name,
|
||||
map->fd);
|
||||
|
||||
if (bpf_map__is_internal(map)) {
|
||||
err = bpf_object__populate_internal_map(obj, map);
|
||||
if (err < 0) {
|
||||
zclose(map->fd);
|
||||
} else {
|
||||
err = bpf_object__create_map(obj, map);
|
||||
if (err)
|
||||
goto err_out;
|
||||
}
|
||||
}
|
||||
|
||||
if (map->init_slots_sz) {
|
||||
for (j = 0; j < map->init_slots_sz; j++) {
|
||||
const struct bpf_map *targ_map;
|
||||
int fd;
|
||||
pr_debug("map '%s': created successfully, fd=%d\n",
|
||||
map->name, map->fd);
|
||||
|
||||
if (!map->init_slots[j])
|
||||
continue;
|
||||
|
||||
targ_map = map->init_slots[j];
|
||||
fd = bpf_map__fd(targ_map);
|
||||
err = bpf_map_update_elem(map->fd, &j, &fd, 0);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
|
||||
map->name, j, targ_map->name,
|
||||
fd, err);
|
||||
if (bpf_map__is_internal(map)) {
|
||||
err = bpf_object__populate_internal_map(obj, map);
|
||||
if (err < 0) {
|
||||
zclose(map->fd);
|
||||
goto err_out;
|
||||
}
|
||||
}
|
||||
|
||||
if (map->init_slots_sz) {
|
||||
err = init_map_slots(map);
|
||||
if (err < 0) {
|
||||
zclose(map->fd);
|
||||
goto err_out;
|
||||
}
|
||||
pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
|
||||
map->name, j, targ_map->name, fd);
|
||||
}
|
||||
zfree(&map->init_slots);
|
||||
map->init_slots_sz = 0;
|
||||
}
|
||||
|
||||
if (map->pin_path && !map->pinned) {
|
||||
|
@ -5017,16 +5040,19 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
|
|||
static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
||||
const struct bpf_core_relo *relo,
|
||||
const struct bpf_core_spec *spec,
|
||||
__u32 *val, bool *validate)
|
||||
__u32 *val, __u32 *field_sz, __u32 *type_id,
|
||||
bool *validate)
|
||||
{
|
||||
const struct bpf_core_accessor *acc;
|
||||
const struct btf_type *t;
|
||||
__u32 byte_off, byte_sz, bit_off, bit_sz;
|
||||
__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
|
||||
const struct btf_member *m;
|
||||
const struct btf_type *mt;
|
||||
bool bitfield;
|
||||
__s64 sz;
|
||||
|
||||
*field_sz = 0;
|
||||
|
||||
if (relo->kind == BPF_FIELD_EXISTS) {
|
||||
*val = spec ? 1 : 0;
|
||||
return 0;
|
||||
|
@ -5042,6 +5068,12 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
|||
if (!acc->name) {
|
||||
if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
|
||||
*val = spec->bit_offset / 8;
|
||||
/* remember field size for load/store mem size */
|
||||
sz = btf__resolve_size(spec->btf, acc->type_id);
|
||||
if (sz < 0)
|
||||
return -EINVAL;
|
||||
*field_sz = sz;
|
||||
*type_id = acc->type_id;
|
||||
} else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
|
||||
sz = btf__resolve_size(spec->btf, acc->type_id);
|
||||
if (sz < 0)
|
||||
|
@ -5058,7 +5090,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
|||
}
|
||||
|
||||
m = btf_members(t) + acc->idx;
|
||||
mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
|
||||
mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
|
||||
bit_off = spec->bit_offset;
|
||||
bit_sz = btf_member_bitfield_size(t, acc->idx);
|
||||
|
||||
|
@ -5078,7 +5110,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
|||
byte_off = bit_off / 8 / byte_sz * byte_sz;
|
||||
}
|
||||
} else {
|
||||
sz = btf__resolve_size(spec->btf, m->type);
|
||||
sz = btf__resolve_size(spec->btf, field_type_id);
|
||||
if (sz < 0)
|
||||
return -EINVAL;
|
||||
byte_sz = sz;
|
||||
|
@ -5096,6 +5128,10 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
|||
switch (relo->kind) {
|
||||
case BPF_FIELD_BYTE_OFFSET:
|
||||
*val = byte_off;
|
||||
if (!bitfield) {
|
||||
*field_sz = byte_sz;
|
||||
*type_id = field_type_id;
|
||||
}
|
||||
break;
|
||||
case BPF_FIELD_BYTE_SIZE:
|
||||
*val = byte_sz;
|
||||
|
@ -5196,6 +5232,19 @@ struct bpf_core_relo_res
|
|||
bool poison;
|
||||
/* some relocations can't be validated against orig_val */
|
||||
bool validate;
|
||||
/* for field byte offset relocations or the forms:
|
||||
* *(T *)(rX + <off>) = rY
|
||||
* rX = *(T *)(rY + <off>),
|
||||
* we remember original and resolved field size to adjust direct
|
||||
* memory loads of pointers and integers; this is necessary for 32-bit
|
||||
* host kernel architectures, but also allows to automatically
|
||||
* relocate fields that were resized from, e.g., u32 to u64, etc.
|
||||
*/
|
||||
bool fail_memsz_adjust;
|
||||
__u32 orig_sz;
|
||||
__u32 orig_type_id;
|
||||
__u32 new_sz;
|
||||
__u32 new_type_id;
|
||||
};
|
||||
|
||||
/* Calculate original and target relocation values, given local and target
|
||||
|
@ -5217,10 +5266,56 @@ static int bpf_core_calc_relo(const struct bpf_program *prog,
|
|||
res->new_val = 0;
|
||||
res->poison = false;
|
||||
res->validate = true;
|
||||
res->fail_memsz_adjust = false;
|
||||
res->orig_sz = res->new_sz = 0;
|
||||
res->orig_type_id = res->new_type_id = 0;
|
||||
|
||||
if (core_relo_is_field_based(relo->kind)) {
|
||||
err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
|
||||
err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
|
||||
err = bpf_core_calc_field_relo(prog, relo, local_spec,
|
||||
&res->orig_val, &res->orig_sz,
|
||||
&res->orig_type_id, &res->validate);
|
||||
err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
|
||||
&res->new_val, &res->new_sz,
|
||||
&res->new_type_id, NULL);
|
||||
if (err)
|
||||
goto done;
|
||||
/* Validate if it's safe to adjust load/store memory size.
|
||||
* Adjustments are performed only if original and new memory
|
||||
* sizes differ.
|
||||
*/
|
||||
res->fail_memsz_adjust = false;
|
||||
if (res->orig_sz != res->new_sz) {
|
||||
const struct btf_type *orig_t, *new_t;
|
||||
|
||||
orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
|
||||
new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
|
||||
|
||||
/* There are two use cases in which it's safe to
|
||||
* adjust load/store's mem size:
|
||||
* - reading a 32-bit kernel pointer, while on BPF
|
||||
* size pointers are always 64-bit; in this case
|
||||
* it's safe to "downsize" instruction size due to
|
||||
* pointer being treated as unsigned integer with
|
||||
* zero-extended upper 32-bits;
|
||||
* - reading unsigned integers, again due to
|
||||
* zero-extension is preserving the value correctly.
|
||||
*
|
||||
* In all other cases it's incorrect to attempt to
|
||||
* load/store field because read value will be
|
||||
* incorrect, so we poison relocated instruction.
|
||||
*/
|
||||
if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
|
||||
goto done;
|
||||
if (btf_is_int(orig_t) && btf_is_int(new_t) &&
|
||||
btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
|
||||
btf_int_encoding(new_t) != BTF_INT_SIGNED)
|
||||
goto done;
|
||||
|
||||
/* mark as invalid mem size adjustment, but this will
|
||||
* only be checked for LDX/STX/ST insns
|
||||
*/
|
||||
res->fail_memsz_adjust = true;
|
||||
}
|
||||
} else if (core_relo_is_type_based(relo->kind)) {
|
||||
err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
|
||||
err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
|
||||
|
@ -5229,6 +5324,7 @@ static int bpf_core_calc_relo(const struct bpf_program *prog,
|
|||
err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
|
||||
}
|
||||
|
||||
done:
|
||||
if (err == -EUCLEAN) {
|
||||
/* EUCLEAN is used to signal instruction poisoning request */
|
||||
res->poison = true;
|
||||
|
@ -5268,6 +5364,28 @@ static bool is_ldimm64(struct bpf_insn *insn)
|
|||
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
|
||||
}
|
||||
|
||||
static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
|
||||
{
|
||||
switch (BPF_SIZE(insn->code)) {
|
||||
case BPF_DW: return 8;
|
||||
case BPF_W: return 4;
|
||||
case BPF_H: return 2;
|
||||
case BPF_B: return 1;
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int insn_bytes_to_bpf_size(__u32 sz)
|
||||
{
|
||||
switch (sz) {
|
||||
case 8: return BPF_DW;
|
||||
case 4: return BPF_W;
|
||||
case 2: return BPF_H;
|
||||
case 1: return BPF_B;
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Patch relocatable BPF instruction.
|
||||
*
|
||||
|
@ -5277,10 +5395,13 @@ static bool is_ldimm64(struct bpf_insn *insn)
|
|||
* spec, and is checked before patching instruction. If actual insn->imm value
|
||||
* is wrong, bail out with error.
|
||||
*
|
||||
* Currently three kinds of BPF instructions are supported:
|
||||
* Currently supported classes of BPF instruction are:
|
||||
* 1. rX = <imm> (assignment with immediate operand);
|
||||
* 2. rX += <imm> (arithmetic operations with immediate operand);
|
||||
* 3. rX = <imm64> (load with 64-bit immediate value).
|
||||
* 3. rX = <imm64> (load with 64-bit immediate value);
|
||||
* 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
|
||||
* 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
|
||||
* 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
|
||||
*/
|
||||
static int bpf_core_patch_insn(struct bpf_program *prog,
|
||||
const struct bpf_core_relo *relo,
|
||||
|
@ -5304,6 +5425,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
|
|||
class = BPF_CLASS(insn->code);
|
||||
|
||||
if (res->poison) {
|
||||
poison:
|
||||
/* poison second part of ldimm64 to avoid confusing error from
|
||||
* verifier about "unknown opcode 00"
|
||||
*/
|
||||
|
@ -5346,10 +5468,39 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
|
|||
prog->name, relo_idx, insn_idx, new_val);
|
||||
return -ERANGE;
|
||||
}
|
||||
if (res->fail_memsz_adjust) {
|
||||
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
|
||||
"Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
|
||||
prog->name, relo_idx, insn_idx);
|
||||
goto poison;
|
||||
}
|
||||
|
||||
orig_val = insn->off;
|
||||
insn->off = new_val;
|
||||
pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
|
||||
prog->name, relo_idx, insn_idx, orig_val, new_val);
|
||||
|
||||
if (res->new_sz != res->orig_sz) {
|
||||
int insn_bytes_sz, insn_bpf_sz;
|
||||
|
||||
insn_bytes_sz = insn_bpf_size_to_bytes(insn);
|
||||
if (insn_bytes_sz != res->orig_sz) {
|
||||
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
|
||||
prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
|
||||
if (insn_bpf_sz < 0) {
|
||||
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
|
||||
prog->name, relo_idx, insn_idx, res->new_sz);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
|
||||
pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
|
||||
prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
|
||||
}
|
||||
break;
|
||||
case BPF_LD: {
|
||||
__u64 imm;
|
||||
|
@ -5691,7 +5842,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
|
|||
return 0;
|
||||
|
||||
if (targ_btf_path)
|
||||
targ_btf = btf__parse_elf(targ_btf_path, NULL);
|
||||
targ_btf = btf__parse(targ_btf_path, NULL);
|
||||
else
|
||||
targ_btf = obj->btf_vmlinux;
|
||||
if (IS_ERR_OR_NULL(targ_btf)) {
|
||||
|
@ -5742,6 +5893,11 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
|
|||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
/* no need to apply CO-RE relocation if the program is
|
||||
* not going to be loaded
|
||||
*/
|
||||
if (!prog->load)
|
||||
continue;
|
||||
|
||||
err = bpf_core_apply_relo(prog, rec, i, obj->btf,
|
||||
targ_btf, cand_cache);
|
||||
|
@ -5800,8 +5956,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
|
|||
insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
|
||||
insn[1].imm = ext->kcfg.data_off;
|
||||
} else /* EXT_KSYM */ {
|
||||
insn[0].imm = (__u32)ext->ksym.addr;
|
||||
insn[1].imm = ext->ksym.addr >> 32;
|
||||
if (ext->ksym.type_id) { /* typed ksyms */
|
||||
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
|
||||
insn[0].imm = ext->ksym.vmlinux_btf_id;
|
||||
} else { /* typeless ksyms */
|
||||
insn[0].imm = (__u32)ext->ksym.addr;
|
||||
insn[1].imm = ext->ksym.addr >> 32;
|
||||
}
|
||||
}
|
||||
relo->processed = true;
|
||||
break;
|
||||
|
@ -6933,10 +7094,72 @@ out:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
|
||||
{
|
||||
struct extern_desc *ext;
|
||||
int i, id;
|
||||
|
||||
for (i = 0; i < obj->nr_extern; i++) {
|
||||
const struct btf_type *targ_var, *targ_type;
|
||||
__u32 targ_type_id, local_type_id;
|
||||
const char *targ_var_name;
|
||||
int ret;
|
||||
|
||||
ext = &obj->externs[i];
|
||||
if (ext->type != EXT_KSYM || !ext->ksym.type_id)
|
||||
continue;
|
||||
|
||||
id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
|
||||
BTF_KIND_VAR);
|
||||
if (id <= 0) {
|
||||
pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
|
||||
ext->name);
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
/* find local type_id */
|
||||
local_type_id = ext->ksym.type_id;
|
||||
|
||||
/* find target type_id */
|
||||
targ_var = btf__type_by_id(obj->btf_vmlinux, id);
|
||||
targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
|
||||
targ_var->name_off);
|
||||
targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
|
||||
targ_var->type,
|
||||
&targ_type_id);
|
||||
|
||||
ret = bpf_core_types_are_compat(obj->btf, local_type_id,
|
||||
obj->btf_vmlinux, targ_type_id);
|
||||
if (ret <= 0) {
|
||||
const struct btf_type *local_type;
|
||||
const char *targ_name, *local_name;
|
||||
|
||||
local_type = btf__type_by_id(obj->btf, local_type_id);
|
||||
local_name = btf__name_by_offset(obj->btf,
|
||||
local_type->name_off);
|
||||
targ_name = btf__name_by_offset(obj->btf_vmlinux,
|
||||
targ_type->name_off);
|
||||
|
||||
pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
|
||||
ext->name, local_type_id,
|
||||
btf_kind_str(local_type), local_name, targ_type_id,
|
||||
btf_kind_str(targ_type), targ_name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ext->is_set = true;
|
||||
ext->ksym.vmlinux_btf_id = id;
|
||||
pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
|
||||
ext->name, id, btf_kind_str(targ_var), targ_var_name);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_object__resolve_externs(struct bpf_object *obj,
|
||||
const char *extra_kconfig)
|
||||
{
|
||||
bool need_config = false, need_kallsyms = false;
|
||||
bool need_vmlinux_btf = false;
|
||||
struct extern_desc *ext;
|
||||
void *kcfg_data = NULL;
|
||||
int err, i;
|
||||
|
@ -6967,7 +7190,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
|
|||
strncmp(ext->name, "CONFIG_", 7) == 0) {
|
||||
need_config = true;
|
||||
} else if (ext->type == EXT_KSYM) {
|
||||
need_kallsyms = true;
|
||||
if (ext->ksym.type_id)
|
||||
need_vmlinux_btf = true;
|
||||
else
|
||||
need_kallsyms = true;
|
||||
} else {
|
||||
pr_warn("unrecognized extern '%s'\n", ext->name);
|
||||
return -EINVAL;
|
||||
|
@ -6996,6 +7222,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
|
|||
if (err)
|
||||
return -EINVAL;
|
||||
}
|
||||
if (need_vmlinux_btf) {
|
||||
err = bpf_object__resolve_ksyms_btf_id(obj);
|
||||
if (err)
|
||||
return -EINVAL;
|
||||
}
|
||||
for (i = 0; i < obj->nr_extern; i++) {
|
||||
ext = &obj->externs[i];
|
||||
|
||||
|
@ -7028,10 +7259,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
|
|||
}
|
||||
|
||||
err = bpf_object__probe_loading(obj);
|
||||
err = err ? : bpf_object__load_vmlinux_btf(obj);
|
||||
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
|
||||
err = err ? : bpf_object__sanitize_and_load_btf(obj);
|
||||
err = err ? : bpf_object__sanitize_maps(obj);
|
||||
err = err ? : bpf_object__load_vmlinux_btf(obj);
|
||||
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
|
||||
err = err ? : bpf_object__create_maps(obj);
|
||||
err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
|
||||
|
@ -10353,9 +10584,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
|
|||
btf_id = libbpf_find_prog_btf_id(attach_func_name,
|
||||
attach_prog_fd);
|
||||
else
|
||||
btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
|
||||
attach_func_name,
|
||||
prog->expected_attach_type);
|
||||
btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
|
||||
prog->expected_attach_type);
|
||||
|
||||
if (btf_id < 0)
|
||||
return btf_id;
|
||||
|
|
|
@ -705,7 +705,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
|
|||
struct xsk_ctx *ctx;
|
||||
int err, ifindex;
|
||||
|
||||
if (!umem || !xsk_ptr || !(rx || tx) || !fill || !comp)
|
||||
if (!umem || !xsk_ptr || !(rx || tx))
|
||||
return -EFAULT;
|
||||
|
||||
xsk = calloc(1, sizeof(*xsk));
|
||||
|
@ -735,6 +735,11 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
|
|||
|
||||
ctx = xsk_get_ctx(umem, ifindex, queue_id);
|
||||
if (!ctx) {
|
||||
if (!fill || !comp) {
|
||||
err = -EFAULT;
|
||||
goto out_socket;
|
||||
}
|
||||
|
||||
ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
|
||||
fill, comp);
|
||||
if (!ctx) {
|
||||
|
|
|
@ -7,6 +7,44 @@ General instructions on running selftests can be found in
|
|||
Additional information about selftest failures are
|
||||
documented here.
|
||||
|
||||
profiler[23] test failures with clang/llvm <12.0.0
|
||||
==================================================
|
||||
|
||||
With clang/llvm <12.0.0, the profiler[23] test may fail.
|
||||
The symptom looks like
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
// r9 is a pointer to map_value
|
||||
// r7 is a scalar
|
||||
17: bf 96 00 00 00 00 00 00 r6 = r9
|
||||
18: 0f 76 00 00 00 00 00 00 r6 += r7
|
||||
math between map_value pointer and register with unbounded min value is not allowed
|
||||
|
||||
// the instructions below will not be seen in the verifier log
|
||||
19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1
|
||||
20: bf 96 00 00 00 00 00 00 r6 = r9
|
||||
// r6 is used here
|
||||
|
||||
The verifier will reject such code with above error.
|
||||
At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and
|
||||
the insn 20 undoes map_value addition. It is currently impossible for the
|
||||
verifier to understand such speculative pointer arithmetic.
|
||||
Hence
|
||||
https://reviews.llvm.org/D85570
|
||||
addresses it on the compiler side. It was committed on llvm 12.
|
||||
|
||||
The corresponding C code
|
||||
.. code-block:: c
|
||||
|
||||
for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
|
||||
filepart_length = bpf_probe_read_str(payload, ...);
|
||||
if (filepart_length <= MAX_PATH) {
|
||||
barrier_var(filepart_length); // workaround
|
||||
payload += filepart_length;
|
||||
}
|
||||
}
|
||||
|
||||
bpf_iter test failures with clang/llvm 10.0.0
|
||||
=============================================
|
||||
|
||||
|
|
|
@ -195,13 +195,13 @@ static struct bpf_align_test tests[] = {
|
|||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||
.matches = {
|
||||
{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
|
||||
{12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
|
||||
{14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
|
||||
{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
|
||||
{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
|
||||
},
|
||||
|
@ -518,7 +518,7 @@ static struct bpf_align_test tests[] = {
|
|||
* the total offset is 4-byte aligned and meets the
|
||||
* load's requirements.
|
||||
*/
|
||||
{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
|
||||
{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
|
||||
|
||||
},
|
||||
},
|
||||
|
@ -561,18 +561,18 @@ static struct bpf_align_test tests[] = {
|
|||
/* Adding 14 makes R6 be (4n+2) */
|
||||
{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
|
||||
/* Subtracting from packet pointer overflows ubounds */
|
||||
{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
|
||||
{13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
|
||||
/* New unknown value in R7 is (4n), >= 76 */
|
||||
{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
|
||||
/* Adding it to packet pointer gives nice bounds again */
|
||||
{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
|
||||
{16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
|
||||
/* At the time the word size load is performed from R5,
|
||||
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
|
||||
* which is 2. Then the variable offset is (4n+2), so
|
||||
* the total offset is 4-byte aligned and meets the
|
||||
* load's requirements.
|
||||
*/
|
||||
{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
|
||||
{20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
|
@ -55,10 +55,10 @@ static int kern_sync_rcu(void)
|
|||
|
||||
static void test_lookup_update(void)
|
||||
{
|
||||
int err, key = 0, val, i;
|
||||
int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id;
|
||||
int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd;
|
||||
struct test_btf_map_in_map *skel;
|
||||
int outer_arr_fd, outer_hash_fd;
|
||||
int fd, map1_fd, map2_fd, map1_id, map2_id;
|
||||
int err, key = 0, val, i, fd;
|
||||
|
||||
skel = test_btf_map_in_map__open_and_load();
|
||||
if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
|
||||
|
@ -70,32 +70,45 @@ static void test_lookup_update(void)
|
|||
|
||||
map1_fd = bpf_map__fd(skel->maps.inner_map1);
|
||||
map2_fd = bpf_map__fd(skel->maps.inner_map2);
|
||||
map3_fd = bpf_map__fd(skel->maps.inner_map3);
|
||||
map4_fd = bpf_map__fd(skel->maps.inner_map4);
|
||||
map5_fd = bpf_map__fd(skel->maps.inner_map5);
|
||||
outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn);
|
||||
outer_arr_fd = bpf_map__fd(skel->maps.outer_arr);
|
||||
outer_hash_fd = bpf_map__fd(skel->maps.outer_hash);
|
||||
|
||||
/* inner1 = input, inner2 = input + 1 */
|
||||
map1_fd = bpf_map__fd(skel->maps.inner_map1);
|
||||
/* inner1 = input, inner2 = input + 1, inner3 = input + 2 */
|
||||
bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0);
|
||||
map2_fd = bpf_map__fd(skel->maps.inner_map2);
|
||||
bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0);
|
||||
bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0);
|
||||
skel->bss->input = 1;
|
||||
usleep(1);
|
||||
|
||||
bpf_map_lookup_elem(map1_fd, &key, &val);
|
||||
CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
|
||||
bpf_map_lookup_elem(map2_fd, &key, &val);
|
||||
CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
|
||||
bpf_map_lookup_elem(map3_fd, &key, &val);
|
||||
CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3);
|
||||
|
||||
/* inner1 = input + 1, inner2 = input */
|
||||
/* inner2 = input, inner1 = input + 1, inner4 = input + 2 */
|
||||
bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0);
|
||||
bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0);
|
||||
bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0);
|
||||
skel->bss->input = 3;
|
||||
usleep(1);
|
||||
|
||||
bpf_map_lookup_elem(map1_fd, &key, &val);
|
||||
CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
|
||||
bpf_map_lookup_elem(map2_fd, &key, &val);
|
||||
CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
|
||||
bpf_map_lookup_elem(map4_fd, &key, &val);
|
||||
CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5);
|
||||
|
||||
/* inner5 = input + 2 */
|
||||
bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0);
|
||||
skel->bss->input = 5;
|
||||
usleep(1);
|
||||
bpf_map_lookup_elem(map5_fd, &key, &val);
|
||||
CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7);
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
val = i % 2 ? map1_fd : map2_fd;
|
||||
|
@ -106,7 +119,13 @@ static void test_lookup_update(void)
|
|||
}
|
||||
err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0);
|
||||
if (CHECK_FAIL(err)) {
|
||||
printf("failed to update hash_of_maps on iter #%d\n", i);
|
||||
printf("failed to update array_of_maps on iter #%d\n", i);
|
||||
goto cleanup;
|
||||
}
|
||||
val = i % 2 ? map4_fd : map5_fd;
|
||||
err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0);
|
||||
if (CHECK_FAIL(err)) {
|
||||
printf("failed to update array_of_maps (dyn) on iter #%d\n", i);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
|
||||
#include <test_progs.h>
|
||||
#include <bpf/btf.h>
|
||||
|
||||
/* real layout and sizes according to test's (32-bit) BTF
|
||||
* needs to be defined before skeleton is included */
|
||||
struct test_struct___real {
|
||||
unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
|
||||
unsigned int val2;
|
||||
unsigned long long val1;
|
||||
unsigned short val3;
|
||||
unsigned char val4;
|
||||
unsigned char _pad;
|
||||
};
|
||||
|
||||
#include "test_core_autosize.skel.h"
|
||||
|
||||
static int duration = 0;
|
||||
|
||||
static struct {
|
||||
unsigned long long ptr_samesized;
|
||||
unsigned long long val1_samesized;
|
||||
unsigned long long val2_samesized;
|
||||
unsigned long long val3_samesized;
|
||||
unsigned long long val4_samesized;
|
||||
struct test_struct___real output_samesized;
|
||||
|
||||
unsigned long long ptr_downsized;
|
||||
unsigned long long val1_downsized;
|
||||
unsigned long long val2_downsized;
|
||||
unsigned long long val3_downsized;
|
||||
unsigned long long val4_downsized;
|
||||
struct test_struct___real output_downsized;
|
||||
|
||||
unsigned long long ptr_probed;
|
||||
unsigned long long val1_probed;
|
||||
unsigned long long val2_probed;
|
||||
unsigned long long val3_probed;
|
||||
unsigned long long val4_probed;
|
||||
|
||||
unsigned long long ptr_signed;
|
||||
unsigned long long val1_signed;
|
||||
unsigned long long val2_signed;
|
||||
unsigned long long val3_signed;
|
||||
unsigned long long val4_signed;
|
||||
struct test_struct___real output_signed;
|
||||
} out;
|
||||
|
||||
void test_core_autosize(void)
|
||||
{
|
||||
char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
|
||||
int err, fd = -1, zero = 0;
|
||||
int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
|
||||
struct test_core_autosize* skel = NULL;
|
||||
struct bpf_object_load_attr load_attr = {};
|
||||
struct bpf_program *prog;
|
||||
struct bpf_map *bss_map;
|
||||
struct btf *btf = NULL;
|
||||
size_t written;
|
||||
const void *raw_data;
|
||||
__u32 raw_sz;
|
||||
FILE *f = NULL;
|
||||
|
||||
btf = btf__new_empty();
|
||||
if (!ASSERT_OK_PTR(btf, "empty_btf"))
|
||||
return;
|
||||
/* Emit the following struct with 32-bit pointer size:
|
||||
*
|
||||
* struct test_struct {
|
||||
* void *ptr;
|
||||
* unsigned long val2;
|
||||
* unsigned long long val1;
|
||||
* unsigned short val3;
|
||||
* unsigned char val4;
|
||||
* char: 8;
|
||||
* };
|
||||
*
|
||||
* This struct is going to be used as the "kernel BTF" for this test.
|
||||
* It's equivalent memory-layout-wise to test_struct__real above.
|
||||
*/
|
||||
|
||||
/* force 32-bit pointer size */
|
||||
btf__set_pointer_size(btf, 4);
|
||||
|
||||
char_id = btf__add_int(btf, "unsigned char", 1, 0);
|
||||
ASSERT_EQ(char_id, 1, "char_id");
|
||||
short_id = btf__add_int(btf, "unsigned short", 2, 0);
|
||||
ASSERT_EQ(short_id, 2, "short_id");
|
||||
/* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */
|
||||
int_id = btf__add_int(btf, "long unsigned int", 4, 0);
|
||||
ASSERT_EQ(int_id, 3, "int_id");
|
||||
long_long_id = btf__add_int(btf, "unsigned long long", 8, 0);
|
||||
ASSERT_EQ(long_long_id, 4, "long_long_id");
|
||||
void_ptr_id = btf__add_ptr(btf, 0);
|
||||
ASSERT_EQ(void_ptr_id, 5, "void_ptr_id");
|
||||
|
||||
id = btf__add_struct(btf, "test_struct", 20 /* bytes */);
|
||||
ASSERT_EQ(id, 6, "struct_id");
|
||||
err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0);
|
||||
err = err ?: btf__add_field(btf, "val2", int_id, 32, 0);
|
||||
err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0);
|
||||
err = err ?: btf__add_field(btf, "val3", short_id, 128, 0);
|
||||
err = err ?: btf__add_field(btf, "val4", char_id, 144, 0);
|
||||
ASSERT_OK(err, "struct_fields");
|
||||
|
||||
fd = mkstemp(btf_file);
|
||||
if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd))
|
||||
goto cleanup;
|
||||
f = fdopen(fd, "w");
|
||||
if (!ASSERT_OK_PTR(f, "btf_fdopen"))
|
||||
goto cleanup;
|
||||
|
||||
raw_data = btf__get_raw_data(btf, &raw_sz);
|
||||
if (!ASSERT_OK_PTR(raw_data, "raw_data"))
|
||||
goto cleanup;
|
||||
written = fwrite(raw_data, 1, raw_sz, f);
|
||||
if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno))
|
||||
goto cleanup;
|
||||
fflush(f);
|
||||
fclose(f);
|
||||
f = NULL;
|
||||
close(fd);
|
||||
fd = -1;
|
||||
|
||||
/* open and load BPF program with custom BTF as the kernel BTF */
|
||||
skel = test_core_autosize__open();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||
return;
|
||||
|
||||
/* disable handle_signed() for now */
|
||||
prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
|
||||
if (!ASSERT_OK_PTR(prog, "prog_find"))
|
||||
goto cleanup;
|
||||
bpf_program__set_autoload(prog, false);
|
||||
|
||||
load_attr.obj = skel->obj;
|
||||
load_attr.target_btf_path = btf_file;
|
||||
err = bpf_object__load_xattr(&load_attr);
|
||||
if (!ASSERT_OK(err, "prog_load"))
|
||||
goto cleanup;
|
||||
|
||||
prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize");
|
||||
if (!ASSERT_OK_PTR(prog, "prog_find"))
|
||||
goto cleanup;
|
||||
skel->links.handle_samesize = bpf_program__attach(prog);
|
||||
if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach"))
|
||||
goto cleanup;
|
||||
|
||||
prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize");
|
||||
if (!ASSERT_OK_PTR(prog, "prog_find"))
|
||||
goto cleanup;
|
||||
skel->links.handle_downsize = bpf_program__attach(prog);
|
||||
if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach"))
|
||||
goto cleanup;
|
||||
|
||||
prog = bpf_object__find_program_by_name(skel->obj, "handle_probed");
|
||||
if (!ASSERT_OK_PTR(prog, "prog_find"))
|
||||
goto cleanup;
|
||||
skel->links.handle_probed = bpf_program__attach(prog);
|
||||
if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach"))
|
||||
goto cleanup;
|
||||
|
||||
usleep(1);
|
||||
|
||||
bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss");
|
||||
if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
|
||||
goto cleanup;
|
||||
|
||||
err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out);
|
||||
if (!ASSERT_OK(err, "bss_lookup"))
|
||||
goto cleanup;
|
||||
|
||||
ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized");
|
||||
ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized");
|
||||
ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized");
|
||||
ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized");
|
||||
ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized");
|
||||
ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized");
|
||||
ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized");
|
||||
ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized");
|
||||
ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized");
|
||||
ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized");
|
||||
|
||||
ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized");
|
||||
ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized");
|
||||
ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized");
|
||||
ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized");
|
||||
ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized");
|
||||
ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized");
|
||||
ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized");
|
||||
ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized");
|
||||
ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized");
|
||||
ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized");
|
||||
|
||||
ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed");
|
||||
ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed");
|
||||
ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed");
|
||||
ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed");
|
||||
ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed");
|
||||
|
||||
test_core_autosize__destroy(skel);
|
||||
skel = NULL;
|
||||
|
||||
/* now re-load with handle_signed() enabled, it should fail loading */
|
||||
skel = test_core_autosize__open();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||
return;
|
||||
|
||||
load_attr.obj = skel->obj;
|
||||
load_attr.target_btf_path = btf_file;
|
||||
err = bpf_object__load_xattr(&load_attr);
|
||||
if (!ASSERT_ERR(err, "bad_prog_load"))
|
||||
goto cleanup;
|
||||
|
||||
cleanup:
|
||||
if (f)
|
||||
fclose(f);
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
remove(btf_file);
|
||||
btf__free(btf);
|
||||
test_core_autosize__destroy(skel);
|
||||
}
|
|
@ -7,40 +7,28 @@
|
|||
|
||||
static int duration;
|
||||
|
||||
static __u64 kallsyms_find(const char *sym)
|
||||
{
|
||||
char type, name[500];
|
||||
__u64 addr, res = 0;
|
||||
FILE *f;
|
||||
|
||||
f = fopen("/proc/kallsyms", "r");
|
||||
if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||
return 0;
|
||||
|
||||
while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
|
||||
if (strcmp(name, sym) == 0) {
|
||||
res = addr;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
CHECK(false, "not_found", "symbol %s not found\n", sym);
|
||||
out:
|
||||
fclose(f);
|
||||
return res;
|
||||
}
|
||||
|
||||
void test_ksyms(void)
|
||||
{
|
||||
__u64 per_cpu_start_addr = kallsyms_find("__per_cpu_start");
|
||||
__u64 link_fops_addr = kallsyms_find("bpf_link_fops");
|
||||
const char *btf_path = "/sys/kernel/btf/vmlinux";
|
||||
struct test_ksyms *skel;
|
||||
struct test_ksyms__data *data;
|
||||
__u64 link_fops_addr, per_cpu_start_addr;
|
||||
struct stat st;
|
||||
__u64 btf_size;
|
||||
int err;
|
||||
|
||||
err = kallsyms_find("bpf_link_fops", &link_fops_addr);
|
||||
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||
return;
|
||||
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
|
||||
return;
|
||||
|
||||
err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
|
||||
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||
return;
|
||||
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
|
||||
return;
|
||||
|
||||
if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
|
||||
return;
|
||||
btf_size = st.st_size;
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Google */
|
||||
|
||||
#include <test_progs.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <bpf/btf.h>
|
||||
#include "test_ksyms_btf.skel.h"
|
||||
|
||||
static int duration;
|
||||
|
||||
void test_ksyms_btf(void)
|
||||
{
|
||||
__u64 runqueues_addr, bpf_prog_active_addr;
|
||||
__u32 this_rq_cpu;
|
||||
int this_bpf_prog_active;
|
||||
struct test_ksyms_btf *skel = NULL;
|
||||
struct test_ksyms_btf__data *data;
|
||||
struct btf *btf;
|
||||
int percpu_datasec;
|
||||
int err;
|
||||
|
||||
err = kallsyms_find("runqueues", &runqueues_addr);
|
||||
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||
return;
|
||||
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n"))
|
||||
return;
|
||||
|
||||
err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr);
|
||||
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||
return;
|
||||
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
|
||||
return;
|
||||
|
||||
btf = libbpf_find_kernel_btf();
|
||||
if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
|
||||
PTR_ERR(btf)))
|
||||
return;
|
||||
|
||||
percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
|
||||
BTF_KIND_DATASEC);
|
||||
if (percpu_datasec < 0) {
|
||||
printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
|
||||
__func__);
|
||||
test__skip();
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
skel = test_ksyms_btf__open_and_load();
|
||||
if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
|
||||
goto cleanup;
|
||||
|
||||
err = test_ksyms_btf__attach(skel);
|
||||
if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
|
||||
goto cleanup;
|
||||
|
||||
/* trigger tracepoint */
|
||||
usleep(1);
|
||||
|
||||
data = skel->data;
|
||||
CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr",
|
||||
"got %llu, exp %llu\n",
|
||||
(unsigned long long)data->out__runqueues_addr,
|
||||
(unsigned long long)runqueues_addr);
|
||||
CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr",
|
||||
"got %llu, exp %llu\n",
|
||||
(unsigned long long)data->out__bpf_prog_active_addr,
|
||||
(unsigned long long)bpf_prog_active_addr);
|
||||
|
||||
CHECK(data->out__rq_cpu == -1, "rq_cpu",
|
||||
"got %u, exp != -1\n", data->out__rq_cpu);
|
||||
CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active",
|
||||
"got %d, exp >= 0\n", data->out__bpf_prog_active);
|
||||
CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu",
|
||||
"got %u, exp 0\n", data->out__cpu_0_rq_cpu);
|
||||
|
||||
this_rq_cpu = data->out__this_rq_cpu;
|
||||
CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu",
|
||||
"got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu);
|
||||
|
||||
this_bpf_prog_active = data->out__this_bpf_prog_active;
|
||||
CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active",
|
||||
"got %d, exp %d\n", this_bpf_prog_active,
|
||||
data->out__bpf_prog_active);
|
||||
|
||||
cleanup:
|
||||
btf__free(btf);
|
||||
test_ksyms_btf__destroy(skel);
|
||||
}
|
|
@ -37,7 +37,7 @@ void test_pinning(void)
|
|||
struct stat statbuf = {};
|
||||
struct bpf_object *obj;
|
||||
struct bpf_map *map;
|
||||
int err;
|
||||
int err, map_fd;
|
||||
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
|
||||
.pin_root_path = custpath,
|
||||
);
|
||||
|
@ -213,6 +213,53 @@ void test_pinning(void)
|
|||
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* remove the custom pin path to re-test it with reuse fd below */
|
||||
err = unlink(custpinpath);
|
||||
if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
err = rmdir(custpath);
|
||||
if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
bpf_object__close(obj);
|
||||
|
||||
/* test pinning at custom path with reuse fd */
|
||||
obj = bpf_object__open_file(file, NULL);
|
||||
err = libbpf_get_error(obj);
|
||||
if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
|
||||
obj = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32),
|
||||
sizeof(__u64), 1, 0);
|
||||
if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd))
|
||||
goto out;
|
||||
|
||||
map = bpf_object__find_map_by_name(obj, "pinmap");
|
||||
if (CHECK(!map, "find map", "NULL map"))
|
||||
goto close_map_fd;
|
||||
|
||||
err = bpf_map__reuse_fd(map, map_fd);
|
||||
if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno))
|
||||
goto close_map_fd;
|
||||
|
||||
err = bpf_map__set_pin_path(map, custpinpath);
|
||||
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
|
||||
goto close_map_fd;
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
|
||||
goto close_map_fd;
|
||||
|
||||
/* check that pinmap was pinned at the custom path */
|
||||
err = stat(custpinpath, &statbuf);
|
||||
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
||||
goto close_map_fd;
|
||||
|
||||
close_map_fd:
|
||||
close(map_fd);
|
||||
out:
|
||||
unlink(pinpath);
|
||||
unlink(nopinpath);
|
||||
|
|
|
@ -198,7 +198,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
|
|||
{
|
||||
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
|
||||
int err, len, src_fd, iter_fd, duration = 0;
|
||||
union bpf_iter_link_info linfo = {0};
|
||||
union bpf_iter_link_info linfo = {};
|
||||
__u32 i, num_sockets, num_elems;
|
||||
struct bpf_iter_sockmap *skel;
|
||||
__s64 *sock_fd = NULL;
|
||||
|
|
|
@ -264,9 +264,19 @@ static int check_error_linum(const struct sk_fds *sk_fds)
|
|||
|
||||
static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
|
||||
{
|
||||
const __u32 expected_inherit_cb_flags =
|
||||
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
|
||||
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
|
||||
BPF_SOCK_OPS_STATE_CB_FLAG;
|
||||
|
||||
if (sk_fds_shutdown(sk_fds))
|
||||
goto check_linum;
|
||||
|
||||
if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags,
|
||||
"Unexpected inherit_cb_flags", "0x%x != 0x%x\n",
|
||||
skel->bss->inherit_cb_flags, expected_inherit_cb_flags))
|
||||
goto check_linum;
|
||||
|
||||
if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
|
||||
"passive_hdr_stg"))
|
||||
goto check_linum;
|
||||
|
@ -321,6 +331,8 @@ static void reset_test(void)
|
|||
memset(&skel->bss->active_estab_in, 0, optsize);
|
||||
memset(&skel->bss->active_fin_in, 0, optsize);
|
||||
|
||||
skel->bss->inherit_cb_flags = 0;
|
||||
|
||||
skel->data->test_kind = TCPOPT_EXP;
|
||||
skel->data->test_magic = 0xeB9F;
|
||||
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include <test_progs.h>
|
||||
#include "progs/profiler.h"
|
||||
#include "profiler1.skel.h"
|
||||
#include "profiler2.skel.h"
|
||||
#include "profiler3.skel.h"
|
||||
|
||||
static int sanity_run(struct bpf_program *prog)
|
||||
{
|
||||
struct bpf_prog_test_run_attr test_attr = {};
|
||||
__u64 args[] = {1, 2, 3};
|
||||
__u32 duration = 0;
|
||||
int err, prog_fd;
|
||||
|
||||
prog_fd = bpf_program__fd(prog);
|
||||
test_attr.prog_fd = prog_fd;
|
||||
test_attr.ctx_in = args;
|
||||
test_attr.ctx_size_in = sizeof(args);
|
||||
err = bpf_prog_test_run_xattr(&test_attr);
|
||||
if (CHECK(err || test_attr.retval, "test_run",
|
||||
"err %d errno %d retval %d duration %d\n",
|
||||
err, errno, test_attr.retval, duration))
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void test_test_profiler(void)
|
||||
{
|
||||
struct profiler1 *profiler1_skel = NULL;
|
||||
struct profiler2 *profiler2_skel = NULL;
|
||||
struct profiler3 *profiler3_skel = NULL;
|
||||
__u32 duration = 0;
|
||||
int err;
|
||||
|
||||
profiler1_skel = profiler1__open_and_load();
|
||||
if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n"))
|
||||
goto cleanup;
|
||||
|
||||
err = profiler1__attach(profiler1_skel);
|
||||
if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err))
|
||||
goto cleanup;
|
||||
|
||||
if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec))
|
||||
goto cleanup;
|
||||
|
||||
profiler2_skel = profiler2__open_and_load();
|
||||
if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n"))
|
||||
goto cleanup;
|
||||
|
||||
err = profiler2__attach(profiler2_skel);
|
||||
if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err))
|
||||
goto cleanup;
|
||||
|
||||
if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec))
|
||||
goto cleanup;
|
||||
|
||||
profiler3_skel = profiler3__open_and_load();
|
||||
if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n"))
|
||||
goto cleanup;
|
||||
|
||||
err = profiler3__attach(profiler3_skel);
|
||||
if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err))
|
||||
goto cleanup;
|
||||
|
||||
if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec))
|
||||
goto cleanup;
|
||||
cleanup:
|
||||
profiler1__destroy(profiler1_skel);
|
||||
profiler2__destroy(profiler2_skel);
|
||||
profiler3__destroy(profiler3_skel);
|
||||
}
|
|
@ -25,7 +25,7 @@ void test_xdp_noinline(void)
|
|||
__u8 flags;
|
||||
} real_def = {.dst = MAGIC_VAL};
|
||||
__u32 ch_key = 11, real_num = 3;
|
||||
__u32 duration, retval, size;
|
||||
__u32 duration = 0, retval, size;
|
||||
int err, i;
|
||||
__u64 bytes = 0, pkts = 0;
|
||||
char buf[128];
|
||||
|
|
|
@ -23,6 +23,10 @@
|
|||
#define TCP_CA_NAME_MAX 16
|
||||
#endif
|
||||
|
||||
#ifndef TCP_NOTSENT_LOWAT
|
||||
#define TCP_NOTSENT_LOWAT 25
|
||||
#endif
|
||||
|
||||
#ifndef IFNAMSIZ
|
||||
#define IFNAMSIZ 16
|
||||
#endif
|
||||
|
@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
|
||||
{
|
||||
int lowat = 65535;
|
||||
|
||||
if (ctx->type == SOCK_STREAM) {
|
||||
if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("cgroup/connect4")
|
||||
int connect_v4_prog(struct bpf_sock_addr *ctx)
|
||||
{
|
||||
|
@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
|
|||
if (set_keepalive(ctx))
|
||||
return 0;
|
||||
|
||||
if (set_notsent_lowat(ctx))
|
||||
return 0;
|
||||
|
||||
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
|
||||
return 0;
|
||||
else if (ctx->type == SOCK_STREAM)
|
||||
|
|
|
@ -0,0 +1,177 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#pragma once
|
||||
|
||||
#define TASK_COMM_LEN 16
|
||||
#define MAX_ANCESTORS 4
|
||||
#define MAX_PATH 256
|
||||
#define KILL_TARGET_LEN 64
|
||||
#define CTL_MAXNAME 10
|
||||
#define MAX_ARGS_LEN 4096
|
||||
#define MAX_FILENAME_LEN 512
|
||||
#define MAX_ENVIRON_LEN 8192
|
||||
#define MAX_PATH_DEPTH 32
|
||||
#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH)
|
||||
#define MAX_CGROUPS_PATH_DEPTH 8
|
||||
|
||||
#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN
|
||||
|
||||
#define MAX_CGROUP_PAYLOAD_LEN \
|
||||
(MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH))
|
||||
|
||||
#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
|
||||
|
||||
#define MAX_SYSCTL_PAYLOAD_LEN \
|
||||
(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH)
|
||||
|
||||
#define MAX_KILL_PAYLOAD_LEN \
|
||||
(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \
|
||||
KILL_TARGET_LEN)
|
||||
|
||||
#define MAX_EXEC_PAYLOAD_LEN \
|
||||
(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \
|
||||
MAX_ARGS_LEN + MAX_ENVIRON_LEN)
|
||||
|
||||
#define MAX_FILEMOD_PAYLOAD_LEN \
|
||||
(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \
|
||||
MAX_FILEPATH_LENGTH)
|
||||
|
||||
enum data_type {
|
||||
INVALID_EVENT,
|
||||
EXEC_EVENT,
|
||||
FORK_EVENT,
|
||||
KILL_EVENT,
|
||||
SYSCTL_EVENT,
|
||||
FILEMOD_EVENT,
|
||||
MAX_DATA_TYPE_EVENT
|
||||
};
|
||||
|
||||
enum filemod_type {
|
||||
FMOD_OPEN,
|
||||
FMOD_LINK,
|
||||
FMOD_SYMLINK,
|
||||
};
|
||||
|
||||
struct ancestors_data_t {
|
||||
pid_t ancestor_pids[MAX_ANCESTORS];
|
||||
uint32_t ancestor_exec_ids[MAX_ANCESTORS];
|
||||
uint64_t ancestor_start_times[MAX_ANCESTORS];
|
||||
uint32_t num_ancestors;
|
||||
};
|
||||
|
||||
struct var_metadata_t {
|
||||
enum data_type type;
|
||||
pid_t pid;
|
||||
uint32_t exec_id;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
uint64_t start_time;
|
||||
uint32_t cpu_id;
|
||||
uint64_t bpf_stats_num_perf_events;
|
||||
uint64_t bpf_stats_start_ktime_ns;
|
||||
uint8_t comm_length;
|
||||
};
|
||||
|
||||
struct cgroup_data_t {
|
||||
ino_t cgroup_root_inode;
|
||||
ino_t cgroup_proc_inode;
|
||||
uint64_t cgroup_root_mtime;
|
||||
uint64_t cgroup_proc_mtime;
|
||||
uint16_t cgroup_root_length;
|
||||
uint16_t cgroup_proc_length;
|
||||
uint16_t cgroup_full_length;
|
||||
int cgroup_full_path_root_pos;
|
||||
};
|
||||
|
||||
struct var_sysctl_data_t {
|
||||
struct var_metadata_t meta;
|
||||
struct cgroup_data_t cgroup_data;
|
||||
struct ancestors_data_t ancestors_info;
|
||||
uint8_t sysctl_val_length;
|
||||
uint16_t sysctl_path_length;
|
||||
char payload[MAX_SYSCTL_PAYLOAD_LEN];
|
||||
};
|
||||
|
||||
struct var_kill_data_t {
|
||||
struct var_metadata_t meta;
|
||||
struct cgroup_data_t cgroup_data;
|
||||
struct ancestors_data_t ancestors_info;
|
||||
pid_t kill_target_pid;
|
||||
int kill_sig;
|
||||
uint32_t kill_count;
|
||||
uint64_t last_kill_time;
|
||||
uint8_t kill_target_name_length;
|
||||
uint8_t kill_target_cgroup_proc_length;
|
||||
char payload[MAX_KILL_PAYLOAD_LEN];
|
||||
size_t payload_length;
|
||||
};
|
||||
|
||||
struct var_exec_data_t {
|
||||
struct var_metadata_t meta;
|
||||
struct cgroup_data_t cgroup_data;
|
||||
pid_t parent_pid;
|
||||
uint32_t parent_exec_id;
|
||||
uid_t parent_uid;
|
||||
uint64_t parent_start_time;
|
||||
uint16_t bin_path_length;
|
||||
uint16_t cmdline_length;
|
||||
uint16_t environment_length;
|
||||
char payload[MAX_EXEC_PAYLOAD_LEN];
|
||||
};
|
||||
|
||||
struct var_fork_data_t {
|
||||
struct var_metadata_t meta;
|
||||
pid_t parent_pid;
|
||||
uint32_t parent_exec_id;
|
||||
uint64_t parent_start_time;
|
||||
char payload[MAX_METADATA_PAYLOAD_LEN];
|
||||
};
|
||||
|
||||
struct var_filemod_data_t {
|
||||
struct var_metadata_t meta;
|
||||
struct cgroup_data_t cgroup_data;
|
||||
enum filemod_type fmod_type;
|
||||
unsigned int dst_flags;
|
||||
uint32_t src_device_id;
|
||||
uint32_t dst_device_id;
|
||||
ino_t src_inode;
|
||||
ino_t dst_inode;
|
||||
uint16_t src_filepath_length;
|
||||
uint16_t dst_filepath_length;
|
||||
char payload[MAX_FILEMOD_PAYLOAD_LEN];
|
||||
};
|
||||
|
||||
struct profiler_config_struct {
|
||||
bool fetch_cgroups_from_bpf;
|
||||
ino_t cgroup_fs_inode;
|
||||
ino_t cgroup_login_session_inode;
|
||||
uint64_t kill_signals_mask;
|
||||
ino_t inode_filter;
|
||||
uint32_t stale_info_secs;
|
||||
bool use_variable_buffers;
|
||||
bool read_environ_from_exec;
|
||||
bool enable_cgroup_v1_resolver;
|
||||
};
|
||||
|
||||
struct bpf_func_stats_data {
|
||||
uint64_t time_elapsed_ns;
|
||||
uint64_t num_executions;
|
||||
uint64_t num_perf_events;
|
||||
};
|
||||
|
||||
struct bpf_func_stats_ctx {
|
||||
uint64_t start_time_ns;
|
||||
struct bpf_func_stats_data* bpf_func_stats_data_val;
|
||||
};
|
||||
|
||||
enum bpf_function_id {
|
||||
profiler_bpf_proc_sys_write,
|
||||
profiler_bpf_sched_process_exec,
|
||||
profiler_bpf_sched_process_exit,
|
||||
profiler_bpf_sys_enter_kill,
|
||||
profiler_bpf_do_filp_open_ret,
|
||||
profiler_bpf_sched_process_fork,
|
||||
profiler_bpf_vfs_link,
|
||||
profiler_bpf_vfs_symlink,
|
||||
profiler_bpf_max_function_id
|
||||
};
|
|
@ -0,0 +1,969 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include <vmlinux.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "profiler.h"
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
#endif
|
||||
|
||||
#define O_WRONLY 00000001
|
||||
#define O_RDWR 00000002
|
||||
#define O_DIRECTORY 00200000
|
||||
#define __O_TMPFILE 020000000
|
||||
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
|
||||
#define MAX_ERRNO 4095
|
||||
#define S_IFMT 00170000
|
||||
#define S_IFSOCK 0140000
|
||||
#define S_IFLNK 0120000
|
||||
#define S_IFREG 0100000
|
||||
#define S_IFBLK 0060000
|
||||
#define S_IFDIR 0040000
|
||||
#define S_IFCHR 0020000
|
||||
#define S_IFIFO 0010000
|
||||
#define S_ISUID 0004000
|
||||
#define S_ISGID 0002000
|
||||
#define S_ISVTX 0001000
|
||||
#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
|
||||
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
|
||||
#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
|
||||
#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
|
||||
#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
|
||||
#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
|
||||
#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
|
||||
|
||||
#define KILL_DATA_ARRAY_SIZE 8
|
||||
|
||||
struct var_kill_data_arr_t {
|
||||
struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
|
||||
};
|
||||
|
||||
union any_profiler_data_t {
|
||||
struct var_exec_data_t var_exec;
|
||||
struct var_kill_data_t var_kill;
|
||||
struct var_sysctl_data_t var_sysctl;
|
||||
struct var_filemod_data_t var_filemod;
|
||||
struct var_fork_data_t var_fork;
|
||||
struct var_kill_data_arr_t var_kill_data_arr;
|
||||
};
|
||||
|
||||
volatile struct profiler_config_struct bpf_config = {};
|
||||
|
||||
#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
|
||||
#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
|
||||
#define CGROUP_LOGIN_SESSION_INODE \
|
||||
(bpf_config.cgroup_login_session_inode)
|
||||
#define KILL_SIGNALS (bpf_config.kill_signals_mask)
|
||||
#define STALE_INFO (bpf_config.stale_info_secs)
|
||||
#define INODE_FILTER (bpf_config.inode_filter)
|
||||
#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
|
||||
#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
|
||||
|
||||
struct kernfs_iattrs___52 {
|
||||
struct iattr ia_iattr;
|
||||
};
|
||||
|
||||
struct kernfs_node___52 {
|
||||
union /* kernfs_node_id */ {
|
||||
struct {
|
||||
u32 ino;
|
||||
u32 generation;
|
||||
};
|
||||
u64 id;
|
||||
} id;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, u32);
|
||||
__type(value, union any_profiler_data_t);
|
||||
} data_heap SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(int));
|
||||
} events SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, KILL_DATA_ARRAY_SIZE);
|
||||
__type(key, u32);
|
||||
__type(value, struct var_kill_data_arr_t);
|
||||
} var_tpid_to_data SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(max_entries, profiler_bpf_max_function_id);
|
||||
__type(key, u32);
|
||||
__type(value, struct bpf_func_stats_data);
|
||||
} bpf_func_stats SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, u32);
|
||||
__type(value, bool);
|
||||
__uint(max_entries, 16);
|
||||
} allowed_devices SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, u64);
|
||||
__type(value, bool);
|
||||
__uint(max_entries, 1024);
|
||||
} allowed_file_inodes SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, u64);
|
||||
__type(value, bool);
|
||||
__uint(max_entries, 1024);
|
||||
} allowed_directory_inodes SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, u32);
|
||||
__type(value, bool);
|
||||
__uint(max_entries, 16);
|
||||
} disallowed_exec_inodes SEC(".maps");
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
#endif
|
||||
|
||||
static INLINE bool IS_ERR(const void* ptr)
|
||||
{
|
||||
return IS_ERR_VALUE((unsigned long)ptr);
|
||||
}
|
||||
|
||||
static INLINE u32 get_userspace_pid()
|
||||
{
|
||||
return bpf_get_current_pid_tgid() >> 32;
|
||||
}
|
||||
|
||||
static INLINE bool is_init_process(u32 tgid)
|
||||
{
|
||||
return tgid == 1 || tgid == 0;
|
||||
}
|
||||
|
||||
static INLINE unsigned long
|
||||
probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
|
||||
{
|
||||
len = len < max ? len : max;
|
||||
if (len > 1) {
|
||||
if (bpf_probe_read(dst, len, src))
|
||||
return 0;
|
||||
} else if (len == 1) {
|
||||
if (bpf_probe_read(dst, 1, src))
|
||||
return 0;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
|
||||
int spid)
|
||||
{
|
||||
#ifdef UNROLL
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
|
||||
if (arr_struct->array[i].meta.pid == spid)
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static INLINE void populate_ancestors(struct task_struct* task,
|
||||
struct ancestors_data_t* ancestors_data)
|
||||
{
|
||||
struct task_struct* parent = task;
|
||||
u32 num_ancestors, ppid;
|
||||
|
||||
ancestors_data->num_ancestors = 0;
|
||||
#ifdef UNROLL
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
|
||||
parent = BPF_CORE_READ(parent, real_parent);
|
||||
if (parent == NULL)
|
||||
break;
|
||||
ppid = BPF_CORE_READ(parent, tgid);
|
||||
if (is_init_process(ppid))
|
||||
break;
|
||||
ancestors_data->ancestor_pids[num_ancestors] = ppid;
|
||||
ancestors_data->ancestor_exec_ids[num_ancestors] =
|
||||
BPF_CORE_READ(parent, self_exec_id);
|
||||
ancestors_data->ancestor_start_times[num_ancestors] =
|
||||
BPF_CORE_READ(parent, start_time);
|
||||
ancestors_data->num_ancestors = num_ancestors;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
|
||||
struct kernfs_node* cgroup_root_node,
|
||||
void* payload,
|
||||
int* root_pos)
|
||||
{
|
||||
void* payload_start = payload;
|
||||
size_t filepart_length;
|
||||
|
||||
#ifdef UNROLL
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
|
||||
filepart_length =
|
||||
bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
|
||||
if (!cgroup_node)
|
||||
return payload;
|
||||
if (cgroup_node == cgroup_root_node)
|
||||
*root_pos = payload - payload_start;
|
||||
if (filepart_length <= MAX_PATH) {
|
||||
barrier_var(filepart_length);
|
||||
payload += filepart_length;
|
||||
}
|
||||
cgroup_node = BPF_CORE_READ(cgroup_node, parent);
|
||||
}
|
||||
return payload;
|
||||
}
|
||||
|
||||
static ino_t get_inode_from_kernfs(struct kernfs_node* node)
|
||||
{
|
||||
struct kernfs_node___52* node52 = (void*)node;
|
||||
|
||||
if (bpf_core_field_exists(node52->id.ino)) {
|
||||
barrier_var(node52);
|
||||
return BPF_CORE_READ(node52, id.ino);
|
||||
} else {
|
||||
barrier_var(node);
|
||||
return (u64)BPF_CORE_READ(node, id);
|
||||
}
|
||||
}
|
||||
|
||||
int pids_cgrp_id = 1;
|
||||
|
||||
static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
|
||||
struct task_struct* task,
|
||||
void* payload)
|
||||
{
|
||||
struct kernfs_node* root_kernfs =
|
||||
BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
|
||||
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
|
||||
|
||||
if (ENABLE_CGROUP_V1_RESOLVER) {
|
||||
#ifdef UNROLL
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
||||
struct cgroup_subsys_state* subsys =
|
||||
BPF_CORE_READ(task, cgroups, subsys[i]);
|
||||
if (subsys != NULL) {
|
||||
int subsys_id = BPF_CORE_READ(subsys, ss, id);
|
||||
if (subsys_id == pids_cgrp_id) {
|
||||
proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
|
||||
root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
|
||||
cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
|
||||
|
||||
if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
|
||||
cgroup_data->cgroup_root_mtime =
|
||||
BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
|
||||
cgroup_data->cgroup_proc_mtime =
|
||||
BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
|
||||
} else {
|
||||
struct kernfs_iattrs___52* root_iattr =
|
||||
(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
|
||||
cgroup_data->cgroup_root_mtime =
|
||||
BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
|
||||
|
||||
struct kernfs_iattrs___52* proc_iattr =
|
||||
(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
|
||||
cgroup_data->cgroup_proc_mtime =
|
||||
BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
|
||||
}
|
||||
|
||||
cgroup_data->cgroup_root_length = 0;
|
||||
cgroup_data->cgroup_proc_length = 0;
|
||||
cgroup_data->cgroup_full_length = 0;
|
||||
|
||||
size_t cgroup_root_length =
|
||||
bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
|
||||
barrier_var(cgroup_root_length);
|
||||
if (cgroup_root_length <= MAX_PATH) {
|
||||
barrier_var(cgroup_root_length);
|
||||
cgroup_data->cgroup_root_length = cgroup_root_length;
|
||||
payload += cgroup_root_length;
|
||||
}
|
||||
|
||||
size_t cgroup_proc_length =
|
||||
bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
|
||||
barrier_var(cgroup_proc_length);
|
||||
if (cgroup_proc_length <= MAX_PATH) {
|
||||
barrier_var(cgroup_proc_length);
|
||||
cgroup_data->cgroup_proc_length = cgroup_proc_length;
|
||||
payload += cgroup_proc_length;
|
||||
}
|
||||
|
||||
if (FETCH_CGROUPS_FROM_BPF) {
|
||||
cgroup_data->cgroup_full_path_root_pos = -1;
|
||||
void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
|
||||
&cgroup_data->cgroup_full_path_root_pos);
|
||||
cgroup_data->cgroup_full_length = payload_end_pos - payload;
|
||||
payload = payload_end_pos;
|
||||
}
|
||||
|
||||
return (void*)payload;
|
||||
}
|
||||
|
||||
static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
|
||||
struct task_struct* task,
|
||||
u32 pid, void* payload)
|
||||
{
|
||||
u64 uid_gid = bpf_get_current_uid_gid();
|
||||
|
||||
metadata->uid = (u32)uid_gid;
|
||||
metadata->gid = uid_gid >> 32;
|
||||
metadata->pid = pid;
|
||||
metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
|
||||
metadata->start_time = BPF_CORE_READ(task, start_time);
|
||||
metadata->comm_length = 0;
|
||||
|
||||
size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
|
||||
barrier_var(comm_length);
|
||||
if (comm_length <= TASK_COMM_LEN) {
|
||||
barrier_var(comm_length);
|
||||
metadata->comm_length = comm_length;
|
||||
payload += comm_length;
|
||||
}
|
||||
|
||||
return (void*)payload;
|
||||
}
|
||||
|
||||
static INLINE struct var_kill_data_t*
|
||||
get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
|
||||
{
|
||||
int zero = 0;
|
||||
struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||
|
||||
if (kill_data == NULL)
|
||||
return NULL;
|
||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||
|
||||
void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
|
||||
payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
|
||||
size_t payload_length = payload - (void*)kill_data->payload;
|
||||
kill_data->payload_length = payload_length;
|
||||
populate_ancestors(task, &kill_data->ancestors_info);
|
||||
kill_data->meta.type = KILL_EVENT;
|
||||
kill_data->kill_target_pid = tpid;
|
||||
kill_data->kill_sig = sig;
|
||||
kill_data->kill_count = 1;
|
||||
kill_data->last_kill_time = bpf_ktime_get_ns();
|
||||
return kill_data;
|
||||
}
|
||||
|
||||
static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
|
||||
{
|
||||
if ((KILL_SIGNALS & (1ULL << sig)) == 0)
|
||||
return 0;
|
||||
|
||||
u32 spid = get_userspace_pid();
|
||||
struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
|
||||
|
||||
if (arr_struct == NULL) {
|
||||
struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
|
||||
int zero = 0;
|
||||
|
||||
if (kill_data == NULL)
|
||||
return 0;
|
||||
arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
|
||||
if (arr_struct == NULL)
|
||||
return 0;
|
||||
bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
|
||||
} else {
|
||||
int index = get_var_spid_index(arr_struct, spid);
|
||||
|
||||
if (index == -1) {
|
||||
struct var_kill_data_t* kill_data =
|
||||
get_var_kill_data(ctx, spid, tpid, sig);
|
||||
if (kill_data == NULL)
|
||||
return 0;
|
||||
#ifdef UNROLL
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
|
||||
if (arr_struct->array[i].meta.pid == 0) {
|
||||
bpf_probe_read(&arr_struct->array[i],
|
||||
sizeof(arr_struct->array[i]), kill_data);
|
||||
bpf_map_update_elem(&var_tpid_to_data, &tpid,
|
||||
arr_struct, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct var_kill_data_t* kill_data = &arr_struct->array[index];
|
||||
|
||||
u64 delta_sec =
|
||||
(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
|
||||
|
||||
if (delta_sec < STALE_INFO) {
|
||||
kill_data->kill_count++;
|
||||
kill_data->last_kill_time = bpf_ktime_get_ns();
|
||||
bpf_probe_read(&arr_struct->array[index],
|
||||
sizeof(arr_struct->array[index]),
|
||||
kill_data);
|
||||
} else {
|
||||
struct var_kill_data_t* kill_data =
|
||||
get_var_kill_data(ctx, spid, tpid, sig);
|
||||
if (kill_data == NULL)
|
||||
return 0;
|
||||
bpf_probe_read(&arr_struct->array[index],
|
||||
sizeof(arr_struct->array[index]),
|
||||
kill_data);
|
||||
}
|
||||
}
|
||||
bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
|
||||
enum bpf_function_id func_id)
|
||||
{
|
||||
int func_id_key = func_id;
|
||||
|
||||
bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
|
||||
bpf_stat_ctx->bpf_func_stats_data_val =
|
||||
bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
|
||||
if (bpf_stat_ctx->bpf_func_stats_data_val)
|
||||
bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
|
||||
}
|
||||
|
||||
static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
|
||||
{
|
||||
if (bpf_stat_ctx->bpf_func_stats_data_val)
|
||||
bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
|
||||
bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
|
||||
struct var_metadata_t* meta)
|
||||
{
|
||||
if (bpf_stat_ctx->bpf_func_stats_data_val) {
|
||||
bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
|
||||
meta->bpf_stats_num_perf_events =
|
||||
bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
|
||||
}
|
||||
meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
|
||||
meta->cpu_id = bpf_get_smp_processor_id();
|
||||
}
|
||||
|
||||
static INLINE size_t
|
||||
read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
|
||||
{
|
||||
size_t length = 0;
|
||||
size_t filepart_length;
|
||||
struct dentry* parent_dentry;
|
||||
|
||||
#ifdef UNROLL
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
|
||||
filepart_length = bpf_probe_read_str(payload, MAX_PATH,
|
||||
BPF_CORE_READ(filp_dentry, d_name.name));
|
||||
barrier_var(filepart_length);
|
||||
if (filepart_length > MAX_PATH)
|
||||
break;
|
||||
barrier_var(filepart_length);
|
||||
payload += filepart_length;
|
||||
length += filepart_length;
|
||||
|
||||
parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
|
||||
if (filp_dentry == parent_dentry)
|
||||
break;
|
||||
filp_dentry = parent_dentry;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
static INLINE bool
|
||||
is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
|
||||
{
|
||||
struct dentry* parent_dentry;
|
||||
#ifdef UNROLL
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
|
||||
u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
|
||||
bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
|
||||
|
||||
if (allowed_dir != NULL)
|
||||
return true;
|
||||
parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
|
||||
if (filp_dentry == parent_dentry)
|
||||
break;
|
||||
filp_dentry = parent_dentry;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
|
||||
u32* device_id,
|
||||
u64* file_ino)
|
||||
{
|
||||
u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
|
||||
*device_id = dev_id;
|
||||
bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
|
||||
|
||||
if (allowed_device == NULL)
|
||||
return false;
|
||||
|
||||
u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
|
||||
*file_ino = ino;
|
||||
bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
|
||||
|
||||
if (allowed_file == NULL)
|
||||
if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
SEC("kprobe/proc_sys_write")
|
||||
ssize_t BPF_KPROBE(kprobe__proc_sys_write,
|
||||
struct file* filp, const char* buf,
|
||||
size_t count, loff_t* ppos)
|
||||
{
|
||||
struct bpf_func_stats_ctx stats_ctx;
|
||||
bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
|
||||
|
||||
u32 pid = get_userspace_pid();
|
||||
int zero = 0;
|
||||
struct var_sysctl_data_t* sysctl_data =
|
||||
bpf_map_lookup_elem(&data_heap, &zero);
|
||||
if (!sysctl_data)
|
||||
goto out;
|
||||
|
||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||
sysctl_data->meta.type = SYSCTL_EVENT;
|
||||
void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
|
||||
payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
|
||||
|
||||
populate_ancestors(task, &sysctl_data->ancestors_info);
|
||||
|
||||
sysctl_data->sysctl_val_length = 0;
|
||||
sysctl_data->sysctl_path_length = 0;
|
||||
|
||||
size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
|
||||
barrier_var(sysctl_val_length);
|
||||
if (sysctl_val_length <= CTL_MAXNAME) {
|
||||
barrier_var(sysctl_val_length);
|
||||
sysctl_data->sysctl_val_length = sysctl_val_length;
|
||||
payload += sysctl_val_length;
|
||||
}
|
||||
|
||||
size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
|
||||
BPF_CORE_READ(filp, f_path.dentry, d_name.name));
|
||||
barrier_var(sysctl_path_length);
|
||||
if (sysctl_path_length <= MAX_PATH) {
|
||||
barrier_var(sysctl_path_length);
|
||||
sysctl_data->sysctl_path_length = sysctl_path_length;
|
||||
payload += sysctl_path_length;
|
||||
}
|
||||
|
||||
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
|
||||
unsigned long data_len = payload - (void*)sysctl_data;
|
||||
data_len = data_len > sizeof(struct var_sysctl_data_t)
|
||||
? sizeof(struct var_sysctl_data_t)
|
||||
: data_len;
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
|
||||
out:
|
||||
bpf_stats_exit(&stats_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tracepoint/syscalls/sys_enter_kill")
|
||||
int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
|
||||
{
|
||||
struct bpf_func_stats_ctx stats_ctx;
|
||||
|
||||
bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
|
||||
int pid = ctx->args[0];
|
||||
int sig = ctx->args[1];
|
||||
int ret = trace_var_sys_kill(ctx, pid, sig);
|
||||
bpf_stats_exit(&stats_ctx);
|
||||
return ret;
|
||||
};
|
||||
|
||||
SEC("raw_tracepoint/sched_process_exit")
|
||||
int raw_tracepoint__sched_process_exit(void* ctx)
|
||||
{
|
||||
int zero = 0;
|
||||
struct bpf_func_stats_ctx stats_ctx;
|
||||
bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
|
||||
|
||||
u32 tpid = get_userspace_pid();
|
||||
|
||||
struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
|
||||
struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||
|
||||
if (arr_struct == NULL || kill_data == NULL)
|
||||
goto out;
|
||||
|
||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
|
||||
|
||||
#ifdef UNROLL
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
|
||||
struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
|
||||
|
||||
if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
|
||||
bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
|
||||
void* payload = kill_data->payload;
|
||||
size_t offset = kill_data->payload_length;
|
||||
if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
|
||||
return 0;
|
||||
payload += offset;
|
||||
|
||||
kill_data->kill_target_name_length = 0;
|
||||
kill_data->kill_target_cgroup_proc_length = 0;
|
||||
|
||||
size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
|
||||
barrier_var(comm_length);
|
||||
if (comm_length <= TASK_COMM_LEN) {
|
||||
barrier_var(comm_length);
|
||||
kill_data->kill_target_name_length = comm_length;
|
||||
payload += comm_length;
|
||||
}
|
||||
|
||||
size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
|
||||
BPF_CORE_READ(proc_kernfs, name));
|
||||
barrier_var(cgroup_proc_length);
|
||||
if (cgroup_proc_length <= KILL_TARGET_LEN) {
|
||||
barrier_var(cgroup_proc_length);
|
||||
kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
|
||||
payload += cgroup_proc_length;
|
||||
}
|
||||
|
||||
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
|
||||
unsigned long data_len = (void*)payload - (void*)kill_data;
|
||||
data_len = data_len > sizeof(struct var_kill_data_t)
|
||||
? sizeof(struct var_kill_data_t)
|
||||
: data_len;
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
|
||||
}
|
||||
}
|
||||
bpf_map_delete_elem(&var_tpid_to_data, &tpid);
|
||||
out:
|
||||
bpf_stats_exit(&stats_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tracepoint/sched_process_exec")
|
||||
int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
|
||||
{
|
||||
struct bpf_func_stats_ctx stats_ctx;
|
||||
bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
|
||||
|
||||
struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
|
||||
u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
|
||||
|
||||
bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
|
||||
if (should_filter_binprm != NULL)
|
||||
goto out;
|
||||
|
||||
int zero = 0;
|
||||
struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||
if (!proc_exec_data)
|
||||
goto out;
|
||||
|
||||
if (INODE_FILTER && inode != INODE_FILTER)
|
||||
return 0;
|
||||
|
||||
u32 pid = get_userspace_pid();
|
||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||
|
||||
proc_exec_data->meta.type = EXEC_EVENT;
|
||||
proc_exec_data->bin_path_length = 0;
|
||||
proc_exec_data->cmdline_length = 0;
|
||||
proc_exec_data->environment_length = 0;
|
||||
void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
|
||||
proc_exec_data->payload);
|
||||
payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
|
||||
|
||||
struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
|
||||
proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
|
||||
proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
|
||||
proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
|
||||
proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
|
||||
|
||||
const char* filename = BPF_CORE_READ(bprm, filename);
|
||||
size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
|
||||
barrier_var(bin_path_length);
|
||||
if (bin_path_length <= MAX_FILENAME_LEN) {
|
||||
barrier_var(bin_path_length);
|
||||
proc_exec_data->bin_path_length = bin_path_length;
|
||||
payload += bin_path_length;
|
||||
}
|
||||
|
||||
void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
|
||||
void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
|
||||
unsigned int cmdline_length = probe_read_lim(payload, arg_start,
|
||||
arg_end - arg_start, MAX_ARGS_LEN);
|
||||
|
||||
if (cmdline_length <= MAX_ARGS_LEN) {
|
||||
barrier_var(cmdline_length);
|
||||
proc_exec_data->cmdline_length = cmdline_length;
|
||||
payload += cmdline_length;
|
||||
}
|
||||
|
||||
if (READ_ENVIRON_FROM_EXEC) {
|
||||
void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
|
||||
void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
|
||||
unsigned long env_len = probe_read_lim(payload, env_start,
|
||||
env_end - env_start, MAX_ENVIRON_LEN);
|
||||
if (cmdline_length <= MAX_ENVIRON_LEN) {
|
||||
proc_exec_data->environment_length = env_len;
|
||||
payload += env_len;
|
||||
}
|
||||
}
|
||||
|
||||
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
|
||||
unsigned long data_len = payload - (void*)proc_exec_data;
|
||||
data_len = data_len > sizeof(struct var_exec_data_t)
|
||||
? sizeof(struct var_exec_data_t)
|
||||
: data_len;
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
|
||||
out:
|
||||
bpf_stats_exit(&stats_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kretprobe/do_filp_open")
|
||||
int kprobe_ret__do_filp_open(struct pt_regs* ctx)
|
||||
{
|
||||
struct bpf_func_stats_ctx stats_ctx;
|
||||
bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
|
||||
|
||||
struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
|
||||
|
||||
if (filp == NULL || IS_ERR(filp))
|
||||
goto out;
|
||||
unsigned int flags = BPF_CORE_READ(filp, f_flags);
|
||||
if ((flags & (O_RDWR | O_WRONLY)) == 0)
|
||||
goto out;
|
||||
if ((flags & O_TMPFILE) > 0)
|
||||
goto out;
|
||||
struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
|
||||
umode_t mode = BPF_CORE_READ(file_inode, i_mode);
|
||||
if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
|
||||
S_ISSOCK(mode))
|
||||
goto out;
|
||||
|
||||
struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
|
||||
u32 device_id = 0;
|
||||
u64 file_ino = 0;
|
||||
if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
|
||||
goto out;
|
||||
|
||||
int zero = 0;
|
||||
struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||
if (!filemod_data)
|
||||
goto out;
|
||||
|
||||
u32 pid = get_userspace_pid();
|
||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||
|
||||
filemod_data->meta.type = FILEMOD_EVENT;
|
||||
filemod_data->fmod_type = FMOD_OPEN;
|
||||
filemod_data->dst_flags = flags;
|
||||
filemod_data->src_inode = 0;
|
||||
filemod_data->dst_inode = file_ino;
|
||||
filemod_data->src_device_id = 0;
|
||||
filemod_data->dst_device_id = device_id;
|
||||
filemod_data->src_filepath_length = 0;
|
||||
filemod_data->dst_filepath_length = 0;
|
||||
|
||||
void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
|
||||
filemod_data->payload);
|
||||
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
|
||||
|
||||
size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
|
||||
barrier_var(len);
|
||||
if (len <= MAX_FILEPATH_LENGTH) {
|
||||
barrier_var(len);
|
||||
payload += len;
|
||||
filemod_data->dst_filepath_length = len;
|
||||
}
|
||||
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
|
||||
unsigned long data_len = payload - (void*)filemod_data;
|
||||
data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
|
||||
out:
|
||||
bpf_stats_exit(&stats_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/vfs_link")
|
||||
int BPF_KPROBE(kprobe__vfs_link,
|
||||
struct dentry* old_dentry, struct inode* dir,
|
||||
struct dentry* new_dentry, struct inode** delegated_inode)
|
||||
{
|
||||
struct bpf_func_stats_ctx stats_ctx;
|
||||
bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
|
||||
|
||||
u32 src_device_id = 0;
|
||||
u64 src_file_ino = 0;
|
||||
u32 dst_device_id = 0;
|
||||
u64 dst_file_ino = 0;
|
||||
if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
|
||||
!is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
|
||||
goto out;
|
||||
|
||||
int zero = 0;
|
||||
struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||
if (!filemod_data)
|
||||
goto out;
|
||||
|
||||
u32 pid = get_userspace_pid();
|
||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||
|
||||
filemod_data->meta.type = FILEMOD_EVENT;
|
||||
filemod_data->fmod_type = FMOD_LINK;
|
||||
filemod_data->dst_flags = 0;
|
||||
filemod_data->src_inode = src_file_ino;
|
||||
filemod_data->dst_inode = dst_file_ino;
|
||||
filemod_data->src_device_id = src_device_id;
|
||||
filemod_data->dst_device_id = dst_device_id;
|
||||
filemod_data->src_filepath_length = 0;
|
||||
filemod_data->dst_filepath_length = 0;
|
||||
|
||||
void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
|
||||
filemod_data->payload);
|
||||
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
|
||||
|
||||
size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
|
||||
barrier_var(len);
|
||||
if (len <= MAX_FILEPATH_LENGTH) {
|
||||
barrier_var(len);
|
||||
payload += len;
|
||||
filemod_data->src_filepath_length = len;
|
||||
}
|
||||
|
||||
len = read_absolute_file_path_from_dentry(new_dentry, payload);
|
||||
barrier_var(len);
|
||||
if (len <= MAX_FILEPATH_LENGTH) {
|
||||
barrier_var(len);
|
||||
payload += len;
|
||||
filemod_data->dst_filepath_length = len;
|
||||
}
|
||||
|
||||
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
|
||||
unsigned long data_len = payload - (void*)filemod_data;
|
||||
data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
|
||||
out:
|
||||
bpf_stats_exit(&stats_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/vfs_symlink")
|
||||
int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
|
||||
const char* oldname)
|
||||
{
|
||||
struct bpf_func_stats_ctx stats_ctx;
|
||||
bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
|
||||
|
||||
u32 dst_device_id = 0;
|
||||
u64 dst_file_ino = 0;
|
||||
if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
|
||||
goto out;
|
||||
|
||||
int zero = 0;
|
||||
struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||
if (!filemod_data)
|
||||
goto out;
|
||||
|
||||
u32 pid = get_userspace_pid();
|
||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||
|
||||
filemod_data->meta.type = FILEMOD_EVENT;
|
||||
filemod_data->fmod_type = FMOD_SYMLINK;
|
||||
filemod_data->dst_flags = 0;
|
||||
filemod_data->src_inode = 0;
|
||||
filemod_data->dst_inode = dst_file_ino;
|
||||
filemod_data->src_device_id = 0;
|
||||
filemod_data->dst_device_id = dst_device_id;
|
||||
filemod_data->src_filepath_length = 0;
|
||||
filemod_data->dst_filepath_length = 0;
|
||||
|
||||
void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
|
||||
filemod_data->payload);
|
||||
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
|
||||
|
||||
size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
|
||||
barrier_var(len);
|
||||
if (len <= MAX_FILEPATH_LENGTH) {
|
||||
barrier_var(len);
|
||||
payload += len;
|
||||
filemod_data->src_filepath_length = len;
|
||||
}
|
||||
len = read_absolute_file_path_from_dentry(dentry, payload);
|
||||
barrier_var(len);
|
||||
if (len <= MAX_FILEPATH_LENGTH) {
|
||||
barrier_var(len);
|
||||
payload += len;
|
||||
filemod_data->dst_filepath_length = len;
|
||||
}
|
||||
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
|
||||
unsigned long data_len = payload - (void*)filemod_data;
|
||||
data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
|
||||
out:
|
||||
bpf_stats_exit(&stats_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tracepoint/sched_process_fork")
|
||||
int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
|
||||
{
|
||||
struct bpf_func_stats_ctx stats_ctx;
|
||||
bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
|
||||
|
||||
int zero = 0;
|
||||
struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||
if (!fork_data)
|
||||
goto out;
|
||||
|
||||
struct task_struct* parent = (struct task_struct*)ctx->args[0];
|
||||
struct task_struct* child = (struct task_struct*)ctx->args[1];
|
||||
fork_data->meta.type = FORK_EVENT;
|
||||
|
||||
void* payload = populate_var_metadata(&fork_data->meta, child,
|
||||
BPF_CORE_READ(child, pid), fork_data->payload);
|
||||
fork_data->parent_pid = BPF_CORE_READ(parent, pid);
|
||||
fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
|
||||
fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
|
||||
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
|
||||
|
||||
unsigned long data_len = payload - (void*)fork_data;
|
||||
data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
|
||||
out:
|
||||
bpf_stats_exit(&stats_ctx);
|
||||
return 0;
|
||||
}
|
||||
char _license[] SEC("license") = "GPL";
|
|
@ -0,0 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
|
||||
#define UNROLL
|
||||
#define INLINE __always_inline
|
||||
#include "profiler.inc.h"
|
|
@ -0,0 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#define barrier_var(var) /**/
|
||||
/* undef #define UNROLL */
|
||||
#define INLINE /**/
|
||||
#include "profiler.inc.h"
|
|
@ -0,0 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#define barrier_var(var) /**/
|
||||
#define UNROLL
|
||||
#define INLINE __noinline
|
||||
#include "profiler.inc.h"
|
|
@ -41,6 +41,43 @@ struct outer_arr {
|
|||
.values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
|
||||
};
|
||||
|
||||
struct inner_map_sz3 {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(map_flags, BPF_F_INNER_MAP);
|
||||
__uint(max_entries, 3);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} inner_map3 SEC(".maps"),
|
||||
inner_map4 SEC(".maps");
|
||||
|
||||
struct inner_map_sz4 {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(map_flags, BPF_F_INNER_MAP);
|
||||
__uint(max_entries, 5);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} inner_map5 SEC(".maps");
|
||||
|
||||
struct outer_arr_dyn {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
|
||||
__uint(max_entries, 3);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(int));
|
||||
__array(values, struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(map_flags, BPF_F_INNER_MAP);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
});
|
||||
} outer_arr_dyn SEC(".maps") = {
|
||||
.values = {
|
||||
[0] = (void *)&inner_map3,
|
||||
[1] = (void *)&inner_map4,
|
||||
[2] = (void *)&inner_map5,
|
||||
},
|
||||
};
|
||||
|
||||
struct outer_hash {
|
||||
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
|
||||
__uint(max_entries, 5);
|
||||
|
@ -101,6 +138,12 @@ int handle__sys_enter(void *ctx)
|
|||
val = input + 1;
|
||||
bpf_map_update_elem(inner_map, &key, &val, 0);
|
||||
|
||||
inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key);
|
||||
if (!inner_map)
|
||||
return 1;
|
||||
val = input + 2;
|
||||
bpf_map_update_elem(inner_map, &key, &val, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <stdint.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
/* fields of exactly the same size */
|
||||
struct test_struct___samesize {
|
||||
void *ptr;
|
||||
unsigned long long val1;
|
||||
unsigned int val2;
|
||||
unsigned short val3;
|
||||
unsigned char val4;
|
||||
} __attribute((preserve_access_index));
|
||||
|
||||
/* unsigned fields that have to be downsized by libbpf */
|
||||
struct test_struct___downsize {
|
||||
void *ptr;
|
||||
unsigned long val1;
|
||||
unsigned long val2;
|
||||
unsigned long val3;
|
||||
unsigned long val4;
|
||||
/* total sz: 40 */
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
/* fields with signed integers of wrong size, should be rejected */
|
||||
struct test_struct___signed {
|
||||
void *ptr;
|
||||
long val1;
|
||||
long val2;
|
||||
long val3;
|
||||
long val4;
|
||||
} __attribute((preserve_access_index));
|
||||
|
||||
/* real layout and sizes according to test's (32-bit) BTF */
|
||||
struct test_struct___real {
|
||||
unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
|
||||
unsigned int val2;
|
||||
unsigned long long val1;
|
||||
unsigned short val3;
|
||||
unsigned char val4;
|
||||
unsigned char _pad;
|
||||
/* total sz: 20 */
|
||||
};
|
||||
|
||||
struct test_struct___real input = {
|
||||
.ptr = 0x01020304,
|
||||
.val1 = 0x1020304050607080,
|
||||
.val2 = 0x0a0b0c0d,
|
||||
.val3 = 0xfeed,
|
||||
.val4 = 0xb9,
|
||||
._pad = 0xff, /* make sure no accidental zeros are present */
|
||||
};
|
||||
|
||||
unsigned long long ptr_samesized = 0;
|
||||
unsigned long long val1_samesized = 0;
|
||||
unsigned long long val2_samesized = 0;
|
||||
unsigned long long val3_samesized = 0;
|
||||
unsigned long long val4_samesized = 0;
|
||||
struct test_struct___real output_samesized = {};
|
||||
|
||||
unsigned long long ptr_downsized = 0;
|
||||
unsigned long long val1_downsized = 0;
|
||||
unsigned long long val2_downsized = 0;
|
||||
unsigned long long val3_downsized = 0;
|
||||
unsigned long long val4_downsized = 0;
|
||||
struct test_struct___real output_downsized = {};
|
||||
|
||||
unsigned long long ptr_probed = 0;
|
||||
unsigned long long val1_probed = 0;
|
||||
unsigned long long val2_probed = 0;
|
||||
unsigned long long val3_probed = 0;
|
||||
unsigned long long val4_probed = 0;
|
||||
|
||||
unsigned long long ptr_signed = 0;
|
||||
unsigned long long val1_signed = 0;
|
||||
unsigned long long val2_signed = 0;
|
||||
unsigned long long val3_signed = 0;
|
||||
unsigned long long val4_signed = 0;
|
||||
struct test_struct___real output_signed = {};
|
||||
|
||||
SEC("raw_tp/sys_exit")
|
||||
int handle_samesize(void *ctx)
|
||||
{
|
||||
struct test_struct___samesize *in = (void *)&input;
|
||||
struct test_struct___samesize *out = (void *)&output_samesized;
|
||||
|
||||
ptr_samesized = (unsigned long long)in->ptr;
|
||||
val1_samesized = in->val1;
|
||||
val2_samesized = in->val2;
|
||||
val3_samesized = in->val3;
|
||||
val4_samesized = in->val4;
|
||||
|
||||
out->ptr = in->ptr;
|
||||
out->val1 = in->val1;
|
||||
out->val2 = in->val2;
|
||||
out->val3 = in->val3;
|
||||
out->val4 = in->val4;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tp/sys_exit")
|
||||
int handle_downsize(void *ctx)
|
||||
{
|
||||
struct test_struct___downsize *in = (void *)&input;
|
||||
struct test_struct___downsize *out = (void *)&output_downsized;
|
||||
|
||||
ptr_downsized = (unsigned long long)in->ptr;
|
||||
val1_downsized = in->val1;
|
||||
val2_downsized = in->val2;
|
||||
val3_downsized = in->val3;
|
||||
val4_downsized = in->val4;
|
||||
|
||||
out->ptr = in->ptr;
|
||||
out->val1 = in->val1;
|
||||
out->val2 = in->val2;
|
||||
out->val3 = in->val3;
|
||||
out->val4 = in->val4;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tp/sys_enter")
|
||||
int handle_probed(void *ctx)
|
||||
{
|
||||
struct test_struct___downsize *in = (void *)&input;
|
||||
__u64 tmp;
|
||||
|
||||
tmp = 0;
|
||||
bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
|
||||
ptr_probed = tmp;
|
||||
|
||||
tmp = 0;
|
||||
bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
|
||||
val1_probed = tmp;
|
||||
|
||||
tmp = 0;
|
||||
bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
|
||||
val2_probed = tmp;
|
||||
|
||||
tmp = 0;
|
||||
bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
|
||||
val3_probed = tmp;
|
||||
|
||||
tmp = 0;
|
||||
bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
|
||||
val4_probed = tmp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tp/sys_enter")
|
||||
int handle_signed(void *ctx)
|
||||
{
|
||||
struct test_struct___signed *in = (void *)&input;
|
||||
struct test_struct___signed *out = (void *)&output_signed;
|
||||
|
||||
val2_signed = in->val2;
|
||||
val3_signed = in->val3;
|
||||
val4_signed = in->val4;
|
||||
|
||||
out->val2= in->val2;
|
||||
out->val3= in->val3;
|
||||
out->val4= in->val4;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Google */
|
||||
|
||||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
__u64 out__runqueues_addr = -1;
|
||||
__u64 out__bpf_prog_active_addr = -1;
|
||||
|
||||
__u32 out__rq_cpu = -1; /* percpu struct fields */
|
||||
int out__bpf_prog_active = -1; /* percpu int */
|
||||
|
||||
__u32 out__this_rq_cpu = -1;
|
||||
int out__this_bpf_prog_active = -1;
|
||||
|
||||
__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */
|
||||
|
||||
extern const struct rq runqueues __ksym; /* struct type global var. */
|
||||
extern const int bpf_prog_active __ksym; /* int type global var. */
|
||||
|
||||
SEC("raw_tp/sys_enter")
|
||||
int handler(const void *ctx)
|
||||
{
|
||||
struct rq *rq;
|
||||
int *active;
|
||||
__u32 cpu;
|
||||
|
||||
out__runqueues_addr = (__u64)&runqueues;
|
||||
out__bpf_prog_active_addr = (__u64)&bpf_prog_active;
|
||||
|
||||
cpu = bpf_get_smp_processor_id();
|
||||
|
||||
/* test bpf_per_cpu_ptr() */
|
||||
rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
|
||||
if (rq)
|
||||
out__rq_cpu = rq->cpu;
|
||||
active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
|
||||
if (active)
|
||||
out__bpf_prog_active = *active;
|
||||
|
||||
rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
|
||||
if (rq) /* should always be valid, but we can't spare the check. */
|
||||
out__cpu_0_rq_cpu = rq->cpu;
|
||||
|
||||
/* test bpf_this_cpu_ptr */
|
||||
rq = (struct rq *)bpf_this_cpu_ptr(&runqueues);
|
||||
out__this_rq_cpu = rq->cpu;
|
||||
active = (int *)bpf_this_cpu_ptr(&bpf_prog_active);
|
||||
out__this_bpf_prog_active = *active;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
|
@ -304,10 +304,10 @@ int misc_estab(struct bpf_sock_ops *skops)
|
|||
passive_lport_n = __bpf_htons(passive_lport_h);
|
||||
bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
|
||||
&true_val, sizeof(true_val));
|
||||
set_hdr_cb_flags(skops);
|
||||
set_hdr_cb_flags(skops, 0);
|
||||
break;
|
||||
case BPF_SOCK_OPS_TCP_CONNECT_CB:
|
||||
set_hdr_cb_flags(skops);
|
||||
set_hdr_cb_flags(skops, 0);
|
||||
break;
|
||||
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
|
||||
return handle_parse_hdr(skops);
|
||||
|
|
|
@ -131,39 +131,55 @@ int bpf_prog2(struct __sk_buff *skb)
|
|||
|
||||
}
|
||||
|
||||
SEC("sk_skb3")
|
||||
int bpf_prog3(struct __sk_buff *skb)
|
||||
static inline void bpf_write_pass(struct __sk_buff *skb, int offset)
|
||||
{
|
||||
const int one = 1;
|
||||
int err, *f, ret = SK_PASS;
|
||||
int err = bpf_skb_pull_data(skb, 6 + offset);
|
||||
void *data_end;
|
||||
char *c;
|
||||
|
||||
err = bpf_skb_pull_data(skb, 19);
|
||||
if (err)
|
||||
goto tls_out;
|
||||
return;
|
||||
|
||||
c = (char *)(long)skb->data;
|
||||
data_end = (void *)(long)skb->data_end;
|
||||
|
||||
if (c + 18 < data_end)
|
||||
memcpy(&c[13], "PASS", 4);
|
||||
if (c + 5 + offset < data_end)
|
||||
memcpy(c + offset, "PASS", 4);
|
||||
}
|
||||
|
||||
SEC("sk_skb3")
|
||||
int bpf_prog3(struct __sk_buff *skb)
|
||||
{
|
||||
int err, *f, ret = SK_PASS;
|
||||
const int one = 1;
|
||||
|
||||
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
|
||||
if (f && *f) {
|
||||
__u64 flags = 0;
|
||||
|
||||
ret = 0;
|
||||
flags = *f;
|
||||
|
||||
err = bpf_skb_adjust_room(skb, -13, 0, 0);
|
||||
if (err)
|
||||
return SK_DROP;
|
||||
err = bpf_skb_adjust_room(skb, 4, 0, 0);
|
||||
if (err)
|
||||
return SK_DROP;
|
||||
bpf_write_pass(skb, 0);
|
||||
#ifdef SOCKMAP
|
||||
return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
|
||||
#else
|
||||
return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
|
||||
#endif
|
||||
}
|
||||
|
||||
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
|
||||
if (f && *f)
|
||||
ret = SK_DROP;
|
||||
err = bpf_skb_adjust_room(skb, 4, 0, 0);
|
||||
if (err)
|
||||
return SK_DROP;
|
||||
bpf_write_pass(skb, 13);
|
||||
tls_out:
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -13,17 +13,10 @@
|
|||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
|
||||
#ifndef barrier_data
|
||||
# define barrier_data(ptr) asm volatile("": :"r"(ptr) :"memory")
|
||||
#endif
|
||||
|
||||
#ifndef ctx_ptr
|
||||
# define ctx_ptr(field) (void *)(long)(field)
|
||||
#endif
|
||||
|
||||
#define dst_to_src_tmp 0xeeddddeeU
|
||||
#define src_to_dst_tmp 0xeeffffeeU
|
||||
|
||||
#define ip4_src 0xac100164 /* 172.16.1.100 */
|
||||
#define ip4_dst 0xac100264 /* 172.16.2.100 */
|
||||
|
||||
|
@ -39,6 +32,18 @@
|
|||
a.s6_addr32[3] == b.s6_addr32[3])
|
||||
#endif
|
||||
|
||||
enum {
|
||||
dev_src,
|
||||
dev_dst,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") ifindex_map = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.max_entries = 2,
|
||||
};
|
||||
|
||||
static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
|
||||
__be32 addr)
|
||||
{
|
||||
|
@ -73,7 +78,14 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
|
|||
return v6_equal(ip6h->daddr, addr);
|
||||
}
|
||||
|
||||
SEC("chk_neigh") int tc_chk(struct __sk_buff *skb)
|
||||
static __always_inline int get_dev_ifindex(int which)
|
||||
{
|
||||
int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
|
||||
|
||||
return ifindex ? *ifindex : 0;
|
||||
}
|
||||
|
||||
SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
|
||||
{
|
||||
void *data_end = ctx_ptr(skb->data_end);
|
||||
void *data = ctx_ptr(skb->data);
|
||||
|
@ -87,7 +99,6 @@ SEC("chk_neigh") int tc_chk(struct __sk_buff *skb)
|
|||
|
||||
SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
|
||||
{
|
||||
int idx = dst_to_src_tmp;
|
||||
__u8 zero[ETH_ALEN * 2];
|
||||
bool redirect = false;
|
||||
|
||||
|
@ -103,19 +114,15 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
|
|||
if (!redirect)
|
||||
return TC_ACT_OK;
|
||||
|
||||
barrier_data(&idx);
|
||||
idx = bpf_ntohl(idx);
|
||||
|
||||
__builtin_memset(&zero, 0, sizeof(zero));
|
||||
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
return bpf_redirect_neigh(idx, 0);
|
||||
return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0);
|
||||
}
|
||||
|
||||
SEC("src_ingress") int tc_src(struct __sk_buff *skb)
|
||||
{
|
||||
int idx = src_to_dst_tmp;
|
||||
__u8 zero[ETH_ALEN * 2];
|
||||
bool redirect = false;
|
||||
|
||||
|
@ -131,14 +138,11 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
|
|||
if (!redirect)
|
||||
return TC_ACT_OK;
|
||||
|
||||
barrier_data(&idx);
|
||||
idx = bpf_ntohl(idx);
|
||||
|
||||
__builtin_memset(&zero, 0, sizeof(zero));
|
||||
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
return bpf_redirect_neigh(idx, 0);
|
||||
return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0);
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
enum {
|
||||
dev_src,
|
||||
dev_dst,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") ifindex_map = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.max_entries = 2,
|
||||
};
|
||||
|
||||
static __always_inline int get_dev_ifindex(int which)
|
||||
{
|
||||
int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
|
||||
|
||||
return ifindex ? *ifindex : 0;
|
||||
}
|
||||
|
||||
SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
|
||||
{
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
|
||||
{
|
||||
return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
|
||||
}
|
||||
|
||||
SEC("src_ingress") int tc_src(struct __sk_buff *skb)
|
||||
{
|
||||
return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
__u8 test_kind = TCPOPT_EXP;
|
||||
__u16 test_magic = 0xeB9F;
|
||||
__u32 inherit_cb_flags = 0;
|
||||
|
||||
struct bpf_test_option passive_synack_out = {};
|
||||
struct bpf_test_option passive_fin_out = {};
|
||||
|
@ -467,6 +468,8 @@ static int handle_passive_estab(struct bpf_sock_ops *skops)
|
|||
struct tcphdr *th;
|
||||
int err;
|
||||
|
||||
inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
|
||||
|
||||
err = load_option(skops, &passive_estab_in, true);
|
||||
if (err == -ENOENT) {
|
||||
/* saved_syn is not found. It was in syncookie mode.
|
||||
|
@ -600,10 +603,10 @@ int estab(struct bpf_sock_ops *skops)
|
|||
case BPF_SOCK_OPS_TCP_LISTEN_CB:
|
||||
bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
|
||||
&true_val, sizeof(true_val));
|
||||
set_hdr_cb_flags(skops);
|
||||
set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
|
||||
break;
|
||||
case BPF_SOCK_OPS_TCP_CONNECT_CB:
|
||||
set_hdr_cb_flags(skops);
|
||||
set_hdr_cb_flags(skops, 0);
|
||||
break;
|
||||
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
|
||||
return handle_parse_hdr(skops);
|
||||
|
|
|
@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir;
|
|||
int ktls;
|
||||
int peek_flag;
|
||||
int skb_use_parser;
|
||||
int txmsg_omit_skb_parser;
|
||||
|
||||
static const struct option long_options[] = {
|
||||
{"help", no_argument, NULL, 'h' },
|
||||
|
@ -111,6 +112,7 @@ static const struct option long_options[] = {
|
|||
{"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
|
||||
{"ktls", no_argument, &ktls, 1 },
|
||||
{"peek", no_argument, &peek_flag, 1 },
|
||||
{"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1},
|
||||
{"whitelist", required_argument, NULL, 'n' },
|
||||
{"blacklist", required_argument, NULL, 'b' },
|
||||
{0, 0, NULL, 0 }
|
||||
|
@ -175,6 +177,7 @@ static void test_reset(void)
|
|||
txmsg_apply = txmsg_cork = 0;
|
||||
txmsg_ingress = txmsg_redir_skb = 0;
|
||||
txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
|
||||
txmsg_omit_skb_parser = 0;
|
||||
skb_use_parser = 0;
|
||||
}
|
||||
|
||||
|
@ -518,28 +521,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
|
|||
if (i == 0 && txmsg_ktls_skb) {
|
||||
if (msg->msg_iov[i].iov_len < 4)
|
||||
return -EIO;
|
||||
if (txmsg_ktls_skb_redir) {
|
||||
if (memcmp(&d[13], "PASS", 4) != 0) {
|
||||
fprintf(stderr,
|
||||
"detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
|
||||
return -EIO;
|
||||
}
|
||||
d[13] = 0;
|
||||
d[14] = 1;
|
||||
d[15] = 2;
|
||||
d[16] = 3;
|
||||
j = 13;
|
||||
} else if (txmsg_ktls_skb) {
|
||||
if (memcmp(d, "PASS", 4) != 0) {
|
||||
fprintf(stderr,
|
||||
"detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
|
||||
return -EIO;
|
||||
}
|
||||
d[0] = 0;
|
||||
d[1] = 1;
|
||||
d[2] = 2;
|
||||
d[3] = 3;
|
||||
if (memcmp(d, "PASS", 4) != 0) {
|
||||
fprintf(stderr,
|
||||
"detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
|
||||
i, 0, d[0], d[1], d[2], d[3]);
|
||||
return -EIO;
|
||||
}
|
||||
j = 4; /* advance index past PASS header */
|
||||
}
|
||||
|
||||
for (; j < msg->msg_iov[i].iov_len && size; j++) {
|
||||
|
@ -927,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
|
|||
goto run;
|
||||
|
||||
/* Attach programs to sockmap */
|
||||
err = bpf_prog_attach(prog_fd[0], map_fd[0],
|
||||
BPF_SK_SKB_STREAM_PARSER, 0);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
|
||||
prog_fd[0], map_fd[0], err, strerror(errno));
|
||||
return err;
|
||||
if (!txmsg_omit_skb_parser) {
|
||||
err = bpf_prog_attach(prog_fd[0], map_fd[0],
|
||||
BPF_SK_SKB_STREAM_PARSER, 0);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
|
||||
prog_fd[0], map_fd[0], err, strerror(errno));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = bpf_prog_attach(prog_fd[1], map_fd[0],
|
||||
|
@ -946,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
|
|||
|
||||
/* Attach programs to TLS sockmap */
|
||||
if (txmsg_ktls_skb) {
|
||||
err = bpf_prog_attach(prog_fd[0], map_fd[8],
|
||||
BPF_SK_SKB_STREAM_PARSER, 0);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
|
||||
prog_fd[0], map_fd[8], err, strerror(errno));
|
||||
return err;
|
||||
if (!txmsg_omit_skb_parser) {
|
||||
err = bpf_prog_attach(prog_fd[0], map_fd[8],
|
||||
BPF_SK_SKB_STREAM_PARSER, 0);
|
||||
if (err) {
|
||||
fprintf(stderr,
|
||||
"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
|
||||
prog_fd[0], map_fd[8], err, strerror(errno));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = bpf_prog_attach(prog_fd[2], map_fd[8],
|
||||
|
@ -1480,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
|
|||
txmsg_ktls_skb_drop = 0;
|
||||
txmsg_ktls_skb_redir = 1;
|
||||
test_exec(cgrp, opt);
|
||||
txmsg_ktls_skb_redir = 0;
|
||||
|
||||
/* Tests that omit skb_parser */
|
||||
txmsg_omit_skb_parser = 1;
|
||||
ktls = 0;
|
||||
txmsg_ktls_skb = 0;
|
||||
test_exec(cgrp, opt);
|
||||
|
||||
txmsg_ktls_skb_drop = 1;
|
||||
test_exec(cgrp, opt);
|
||||
txmsg_ktls_skb_drop = 0;
|
||||
|
||||
txmsg_ktls_skb_redir = 1;
|
||||
test_exec(cgrp, opt);
|
||||
|
||||
ktls = 1;
|
||||
test_exec(cgrp, opt);
|
||||
txmsg_omit_skb_parser = 0;
|
||||
|
||||
opt->data_test = data;
|
||||
ktls = k;
|
||||
}
|
||||
|
||||
|
||||
/* Test cork with hung data. This tests poor usage patterns where
|
||||
* cork can leave data on the ring if user program is buggy and
|
||||
* doesn't flush them somehow. They do take some time however
|
||||
|
|
|
@ -1,168 +0,0 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
|
||||
# between src and dst. The netns fwd has veth links to each src and dst. The
|
||||
# client is in src and server in dst. The test installs a TC BPF program to each
|
||||
# host facing veth in fwd which calls into bpf_redirect_peer() to perform the
|
||||
# neigh addr population and redirect; it also installs a dropper prog on the
|
||||
# egress side to drop skbs if neigh addrs were not populated.
|
||||
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo "This script must be run as root"
|
||||
echo "FAIL"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check that nc, dd, ping, ping6 and timeout are present
|
||||
command -v nc >/dev/null 2>&1 || \
|
||||
{ echo >&2 "nc is not available"; exit 1; }
|
||||
command -v dd >/dev/null 2>&1 || \
|
||||
{ echo >&2 "dd is not available"; exit 1; }
|
||||
command -v timeout >/dev/null 2>&1 || \
|
||||
{ echo >&2 "timeout is not available"; exit 1; }
|
||||
command -v ping >/dev/null 2>&1 || \
|
||||
{ echo >&2 "ping is not available"; exit 1; }
|
||||
command -v ping6 >/dev/null 2>&1 || \
|
||||
{ echo >&2 "ping6 is not available"; exit 1; }
|
||||
|
||||
readonly GREEN='\033[0;92m'
|
||||
readonly RED='\033[0;31m'
|
||||
readonly NC='\033[0m' # No Color
|
||||
|
||||
readonly PING_ARG="-c 3 -w 10 -q"
|
||||
|
||||
readonly TIMEOUT=10
|
||||
|
||||
readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
|
||||
readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
|
||||
readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
|
||||
|
||||
readonly IP4_SRC="172.16.1.100"
|
||||
readonly IP4_DST="172.16.2.100"
|
||||
|
||||
readonly IP6_SRC="::1:dead:beef:cafe"
|
||||
readonly IP6_DST="::2:dead:beef:cafe"
|
||||
|
||||
readonly IP4_SLL="169.254.0.1"
|
||||
readonly IP4_DLL="169.254.0.2"
|
||||
readonly IP4_NET="169.254.0.0"
|
||||
|
||||
cleanup()
|
||||
{
|
||||
ip netns del ${NS_SRC}
|
||||
ip netns del ${NS_FWD}
|
||||
ip netns del ${NS_DST}
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
set -e
|
||||
|
||||
ip netns add "${NS_SRC}"
|
||||
ip netns add "${NS_FWD}"
|
||||
ip netns add "${NS_DST}"
|
||||
|
||||
ip link add veth_src type veth peer name veth_src_fwd
|
||||
ip link add veth_dst type veth peer name veth_dst_fwd
|
||||
|
||||
ip link set veth_src netns ${NS_SRC}
|
||||
ip link set veth_src_fwd netns ${NS_FWD}
|
||||
|
||||
ip link set veth_dst netns ${NS_DST}
|
||||
ip link set veth_dst_fwd netns ${NS_FWD}
|
||||
|
||||
ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
|
||||
ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
|
||||
|
||||
# The fwd netns automatically get a v6 LL address / routes, but also needs v4
|
||||
# one in order to start ARP probing. IP4_NET route is added to the endpoints
|
||||
# so that the ARP processing will reply.
|
||||
|
||||
ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
|
||||
ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
|
||||
|
||||
ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
|
||||
ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
|
||||
|
||||
ip -netns ${NS_SRC} link set dev veth_src up
|
||||
ip -netns ${NS_FWD} link set dev veth_src_fwd up
|
||||
|
||||
ip -netns ${NS_DST} link set dev veth_dst up
|
||||
ip -netns ${NS_FWD} link set dev veth_dst_fwd up
|
||||
|
||||
ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
|
||||
ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
|
||||
ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
|
||||
|
||||
ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
|
||||
ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
|
||||
|
||||
ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
|
||||
ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
|
||||
ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
|
||||
|
||||
ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
|
||||
ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
|
||||
|
||||
fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
|
||||
fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
|
||||
|
||||
ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
|
||||
ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
|
||||
|
||||
ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
|
||||
ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
|
||||
|
||||
veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex | awk '{printf "%08x\n", $1}')
|
||||
veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex | awk '{printf "%08x\n", $1}')
|
||||
|
||||
xxd -p < test_tc_neigh.o | sed "s/eeddddee/$veth_src/g" | xxd -r -p > test_tc_neigh.x.o
|
||||
xxd -p < test_tc_neigh.x.o | sed "s/eeffffee/$veth_dst/g" | xxd -r -p > test_tc_neigh.y.o
|
||||
|
||||
ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
|
||||
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.y.o sec src_ingress
|
||||
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh
|
||||
|
||||
ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
|
||||
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.y.o sec dst_ingress
|
||||
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh
|
||||
|
||||
rm -f test_tc_neigh.x.o test_tc_neigh.y.o
|
||||
|
||||
ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
|
||||
ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
|
||||
|
||||
set +e
|
||||
|
||||
TEST="TCPv4 connectivity test"
|
||||
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||
|
||||
TEST="TCPv6 connectivity test"
|
||||
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||
|
||||
TEST="ICMPv4 connectivity test"
|
||||
ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||
|
||||
TEST="ICMPv6 connectivity test"
|
||||
ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
|
@ -0,0 +1,204 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
|
||||
# between src and dst. The netns fwd has veth links to each src and dst. The
|
||||
# client is in src and server in dst. The test installs a TC BPF program to each
|
||||
# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
|
||||
# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
|
||||
# switch from ingress side; it also installs a checker prog on the egress side
|
||||
# to drop unexpected traffic.
|
||||
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo "This script must be run as root"
|
||||
echo "FAIL"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check that needed tools are present
|
||||
command -v nc >/dev/null 2>&1 || \
|
||||
{ echo >&2 "nc is not available"; exit 1; }
|
||||
command -v dd >/dev/null 2>&1 || \
|
||||
{ echo >&2 "dd is not available"; exit 1; }
|
||||
command -v timeout >/dev/null 2>&1 || \
|
||||
{ echo >&2 "timeout is not available"; exit 1; }
|
||||
command -v ping >/dev/null 2>&1 || \
|
||||
{ echo >&2 "ping is not available"; exit 1; }
|
||||
command -v ping6 >/dev/null 2>&1 || \
|
||||
{ echo >&2 "ping6 is not available"; exit 1; }
|
||||
command -v perl >/dev/null 2>&1 || \
|
||||
{ echo >&2 "perl is not available"; exit 1; }
|
||||
command -v jq >/dev/null 2>&1 || \
|
||||
{ echo >&2 "jq is not available"; exit 1; }
|
||||
command -v bpftool >/dev/null 2>&1 || \
|
||||
{ echo >&2 "bpftool is not available"; exit 1; }
|
||||
|
||||
readonly GREEN='\033[0;92m'
|
||||
readonly RED='\033[0;31m'
|
||||
readonly NC='\033[0m' # No Color
|
||||
|
||||
readonly PING_ARG="-c 3 -w 10 -q"
|
||||
|
||||
readonly TIMEOUT=10
|
||||
|
||||
readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
|
||||
readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
|
||||
readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
|
||||
|
||||
readonly IP4_SRC="172.16.1.100"
|
||||
readonly IP4_DST="172.16.2.100"
|
||||
|
||||
readonly IP6_SRC="::1:dead:beef:cafe"
|
||||
readonly IP6_DST="::2:dead:beef:cafe"
|
||||
|
||||
readonly IP4_SLL="169.254.0.1"
|
||||
readonly IP4_DLL="169.254.0.2"
|
||||
readonly IP4_NET="169.254.0.0"
|
||||
|
||||
netns_cleanup()
|
||||
{
|
||||
ip netns del ${NS_SRC}
|
||||
ip netns del ${NS_FWD}
|
||||
ip netns del ${NS_DST}
|
||||
}
|
||||
|
||||
netns_setup()
|
||||
{
|
||||
ip netns add "${NS_SRC}"
|
||||
ip netns add "${NS_FWD}"
|
||||
ip netns add "${NS_DST}"
|
||||
|
||||
ip link add veth_src type veth peer name veth_src_fwd
|
||||
ip link add veth_dst type veth peer name veth_dst_fwd
|
||||
|
||||
ip link set veth_src netns ${NS_SRC}
|
||||
ip link set veth_src_fwd netns ${NS_FWD}
|
||||
|
||||
ip link set veth_dst netns ${NS_DST}
|
||||
ip link set veth_dst_fwd netns ${NS_FWD}
|
||||
|
||||
ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
|
||||
ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
|
||||
|
||||
# The fwd netns automatically get a v6 LL address / routes, but also
|
||||
# needs v4 one in order to start ARP probing. IP4_NET route is added
|
||||
# to the endpoints so that the ARP processing will reply.
|
||||
|
||||
ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
|
||||
ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
|
||||
|
||||
ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
|
||||
ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
|
||||
|
||||
ip -netns ${NS_SRC} link set dev veth_src up
|
||||
ip -netns ${NS_FWD} link set dev veth_src_fwd up
|
||||
|
||||
ip -netns ${NS_DST} link set dev veth_dst up
|
||||
ip -netns ${NS_FWD} link set dev veth_dst_fwd up
|
||||
|
||||
ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
|
||||
ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
|
||||
ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
|
||||
|
||||
ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
|
||||
ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
|
||||
|
||||
ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
|
||||
ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
|
||||
ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
|
||||
|
||||
ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
|
||||
ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
|
||||
|
||||
fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
|
||||
fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
|
||||
|
||||
ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
|
||||
ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
|
||||
|
||||
ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
|
||||
ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
|
||||
}
|
||||
|
||||
netns_test_connectivity()
|
||||
{
|
||||
set +e
|
||||
|
||||
ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
|
||||
ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
|
||||
|
||||
TEST="TCPv4 connectivity test"
|
||||
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||
|
||||
TEST="TCPv6 connectivity test"
|
||||
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||
|
||||
TEST="ICMPv4 connectivity test"
|
||||
ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||
|
||||
TEST="ICMPv6 connectivity test"
|
||||
ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||
|
||||
set -e
|
||||
}
|
||||
|
||||
hex_mem_str()
|
||||
{
|
||||
perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
|
||||
}
|
||||
|
||||
netns_setup_bpf()
|
||||
{
|
||||
local obj=$1
|
||||
|
||||
ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
|
||||
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
|
||||
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
|
||||
|
||||
ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
|
||||
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
|
||||
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
|
||||
|
||||
veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
|
||||
veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
|
||||
|
||||
progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
|
||||
for prog in $progs; do
|
||||
map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
|
||||
if [ ! -z "$map" ]; then
|
||||
bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
|
||||
bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
trap netns_cleanup EXIT
|
||||
set -e
|
||||
|
||||
netns_setup
|
||||
netns_setup_bpf test_tc_neigh.o
|
||||
netns_test_connectivity
|
||||
netns_cleanup
|
||||
netns_setup
|
||||
netns_setup_bpf test_tc_peer.o
|
||||
netns_test_connectivity
|
|
@ -110,12 +110,13 @@ static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops)
|
|||
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
|
||||
}
|
||||
|
||||
static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops)
|
||||
static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra)
|
||||
{
|
||||
bpf_sock_ops_cb_flags_set(skops,
|
||||
skops->bpf_sock_ops_cb_flags |
|
||||
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
|
||||
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
|
||||
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
|
||||
extra);
|
||||
}
|
||||
static inline void
|
||||
clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
|
||||
|
|
|
@ -90,6 +90,33 @@ long ksym_get_addr(const char *name)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
|
||||
* this is faster than load + find.
|
||||
*/
|
||||
int kallsyms_find(const char *sym, unsigned long long *addr)
|
||||
{
|
||||
char type, name[500];
|
||||
unsigned long long value;
|
||||
int err = 0;
|
||||
FILE *f;
|
||||
|
||||
f = fopen("/proc/kallsyms", "r");
|
||||
if (!f)
|
||||
return -EINVAL;
|
||||
|
||||
while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
|
||||
if (strcmp(name, sym) == 0) {
|
||||
*addr = value;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
err = -ENOENT;
|
||||
|
||||
out:
|
||||
fclose(f);
|
||||
return err;
|
||||
}
|
||||
|
||||
void read_trace_pipe(void)
|
||||
{
|
||||
int trace_fd;
|
||||
|
|
|
@ -12,6 +12,10 @@ struct ksym {
|
|||
int load_kallsyms(void);
|
||||
struct ksym *ksym_search(long key);
|
||||
long ksym_get_addr(const char *name);
|
||||
|
||||
/* open kallsyms and find addresses on the fly, faster than load + search. */
|
||||
int kallsyms_find(const char *sym, unsigned long long *addr);
|
||||
|
||||
void read_trace_pipe(void);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"empty prog",
|
||||
.insns = {
|
||||
},
|
||||
.errstr = "unknown opcode 00",
|
||||
.errstr = "last insn is not an exit or jmp",
|
||||
.result = REJECT,
|
||||
},
|
||||
{
|
||||
|
|
|
@ -529,7 +529,7 @@
|
|||
},
|
||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||
.result = REJECT,
|
||||
.errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
|
||||
.errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)",
|
||||
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
|
||||
},
|
||||
{
|
||||
|
|
|
@ -50,14 +50,6 @@
|
|||
.errstr = "invalid bpf_ld_imm64 insn",
|
||||
.result = REJECT,
|
||||
},
|
||||
{
|
||||
"test5 ld_imm64",
|
||||
.insns = {
|
||||
BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
|
||||
},
|
||||
.errstr = "invalid bpf_ld_imm64 insn",
|
||||
.result = REJECT,
|
||||
},
|
||||
{
|
||||
"test6 ld_imm64",
|
||||
.insns = {
|
||||
|
|
|
@ -0,0 +1,243 @@
|
|||
{
|
||||
"regalloc basic",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4),
|
||||
BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
{
|
||||
"regalloc negative",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4),
|
||||
BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = REJECT,
|
||||
.errstr = "invalid access to map value, value_size=48 off=48 size=1",
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
{
|
||||
"regalloc src_reg mark",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5),
|
||||
BPF_MOV64_IMM(BPF_REG_3, 0),
|
||||
BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
{
|
||||
"regalloc src_reg negative",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5),
|
||||
BPF_MOV64_IMM(BPF_REG_3, 0),
|
||||
BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = REJECT,
|
||||
.errstr = "invalid access to map value, value_size=48 off=44 size=8",
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
{
|
||||
"regalloc and spill",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7),
|
||||
/* r0 has upper bound that should propagate into r2 */
|
||||
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
|
||||
BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
|
||||
/* r3 has lower and upper bounds */
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
{
|
||||
"regalloc and spill negative",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7),
|
||||
/* r0 has upper bound that should propagate into r2 */
|
||||
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
|
||||
BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
|
||||
/* r3 has lower and upper bounds */
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = REJECT,
|
||||
.errstr = "invalid access to map value, value_size=48 off=48 size=8",
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
{
|
||||
"regalloc three regs",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||
BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5),
|
||||
BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
{
|
||||
"regalloc after call",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4),
|
||||
BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
{
|
||||
"regalloc in callee",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||
BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5),
|
||||
BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_hash_48b = { 4 },
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
Loading…
Reference in New Issue