Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Alexei Starovoitov says: ==================== pull-request: bpf 2018-07-07 The following pull-request contains BPF updates for your *net* tree. Plenty of fixes for different components: 1) A set of critical fixes for sockmap and sockhash, from John Fastabend. 2) fixes for several race conditions in af_xdp, from Magnus Karlsson. 3) hash map refcnt fix, from Mauricio Vasquez. 4) samples/bpf fixes, from Taeung Song. 5) ifup+mtu check for xdp_redirect, from Toshiaki Makita. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
7f93d12951
|
@ -765,8 +765,8 @@ static inline bool bpf_dump_raw_ok(void)
|
|||
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
|
||||
const struct bpf_insn *patch, u32 len);
|
||||
|
||||
static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
|
||||
struct net_device *fwd)
|
||||
static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
|
||||
unsigned int pktlen)
|
||||
{
|
||||
unsigned int len;
|
||||
|
||||
|
@ -774,7 +774,7 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
|
|||
return -ENETDOWN;
|
||||
|
||||
len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
|
||||
if (skb->len > len)
|
||||
if (pktlen > len)
|
||||
return -EMSGSIZE;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -828,6 +828,10 @@ struct tcp_skb_cb {
|
|||
|
||||
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
|
||||
|
||||
static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
|
||||
{
|
||||
TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
/* This is the variant of inet6_iif() that must be used by TCP,
|
||||
|
|
|
@ -60,6 +60,10 @@ struct xdp_sock {
|
|||
bool zc;
|
||||
/* Protects multiple processes in the control path */
|
||||
struct mutex mutex;
|
||||
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
|
||||
* in the SKB destructor callback.
|
||||
*/
|
||||
spinlock_t tx_completion_lock;
|
||||
u64 rx_dropped;
|
||||
};
|
||||
|
||||
|
|
|
@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
|||
{
|
||||
struct net_device *dev = dst->dev;
|
||||
struct xdp_frame *xdpf;
|
||||
int err;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
|||
{
|
||||
int err;
|
||||
|
||||
err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
|
||||
err = xdp_ok_fwd_dev(dst->dev, skb->len);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
skb->dev = dst->dev;
|
||||
|
|
|
@ -747,13 +747,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
|||
* old element will be freed immediately.
|
||||
* Otherwise return an error
|
||||
*/
|
||||
atomic_dec(&htab->count);
|
||||
return ERR_PTR(-E2BIG);
|
||||
l_new = ERR_PTR(-E2BIG);
|
||||
goto dec_count;
|
||||
}
|
||||
l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
|
||||
htab->map.numa_node);
|
||||
if (!l_new)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (!l_new) {
|
||||
l_new = ERR_PTR(-ENOMEM);
|
||||
goto dec_count;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(l_new->key, key, key_size);
|
||||
|
@ -766,7 +768,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
|||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!pptr) {
|
||||
kfree(l_new);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
l_new = ERR_PTR(-ENOMEM);
|
||||
goto dec_count;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -780,6 +783,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
|||
|
||||
l_new->hash = hash;
|
||||
return l_new;
|
||||
dec_count:
|
||||
atomic_dec(&htab->count);
|
||||
return l_new;
|
||||
}
|
||||
|
||||
static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
|
||||
|
|
|
@ -312,10 +312,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
|
|||
struct smap_psock *psock;
|
||||
struct sock *osk;
|
||||
|
||||
lock_sock(sk);
|
||||
rcu_read_lock();
|
||||
psock = smap_psock_sk(sk);
|
||||
if (unlikely(!psock)) {
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
return sk->sk_prot->close(sk, timeout);
|
||||
}
|
||||
|
||||
|
@ -371,6 +373,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
|
|||
e = psock_map_pop(sk, psock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
close_fun(sk, timeout);
|
||||
}
|
||||
|
||||
|
@ -568,7 +571,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
|
|||
while (sg[i].length) {
|
||||
free += sg[i].length;
|
||||
sk_mem_uncharge(sk, sg[i].length);
|
||||
put_page(sg_page(&sg[i]));
|
||||
if (!md->skb)
|
||||
put_page(sg_page(&sg[i]));
|
||||
sg[i].length = 0;
|
||||
sg[i].page_link = 0;
|
||||
sg[i].offset = 0;
|
||||
|
@ -577,6 +581,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
|
|||
if (i == MAX_SKB_FRAGS)
|
||||
i = 0;
|
||||
}
|
||||
if (md->skb)
|
||||
consume_skb(md->skb);
|
||||
|
||||
return free;
|
||||
}
|
||||
|
@ -1230,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
|
|||
*/
|
||||
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
|
||||
skb->sk = psock->sock;
|
||||
bpf_compute_data_pointers(skb);
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
preempt_disable();
|
||||
rc = (*prog->bpf_func)(skb, prog->insnsi);
|
||||
preempt_enable();
|
||||
|
@ -1485,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp,
|
|||
* any socket yet.
|
||||
*/
|
||||
skb->sk = psock->sock;
|
||||
bpf_compute_data_pointers(skb);
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
rc = (*prog->bpf_func)(skb, prog->insnsi);
|
||||
skb->sk = NULL;
|
||||
rcu_read_unlock();
|
||||
|
@ -1896,7 +1902,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
|
|||
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!e) {
|
||||
err = -ENOMEM;
|
||||
goto out_progs;
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2069,7 +2075,13 @@ static int sock_map_update_elem(struct bpf_map *map,
|
|||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
lock_sock(skops.sk);
|
||||
preempt_disable();
|
||||
rcu_read_lock();
|
||||
err = sock_map_ctx_update_elem(&skops, map, key, flags);
|
||||
rcu_read_unlock();
|
||||
preempt_enable();
|
||||
release_sock(skops.sk);
|
||||
fput(socket->file);
|
||||
return err;
|
||||
}
|
||||
|
@ -2342,7 +2354,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
|||
if (err)
|
||||
goto err;
|
||||
|
||||
/* bpf_map_update_elem() can be called in_irq() */
|
||||
/* psock is valid here because otherwise above *ctx_update_elem would
|
||||
* have thrown an error. It is safe to skip error check.
|
||||
*/
|
||||
psock = smap_psock_sk(sock);
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||
if (l_old && map_flags == BPF_NOEXIST) {
|
||||
|
@ -2360,12 +2375,6 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
|||
goto bucket_err;
|
||||
}
|
||||
|
||||
psock = smap_psock_sk(sock);
|
||||
if (unlikely(!psock)) {
|
||||
err = -EINVAL;
|
||||
goto bucket_err;
|
||||
}
|
||||
|
||||
rcu_assign_pointer(e->hash_link, l_new);
|
||||
rcu_assign_pointer(e->htab,
|
||||
container_of(map, struct bpf_htab, map));
|
||||
|
@ -2388,12 +2397,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
|||
raw_spin_unlock_bh(&b->lock);
|
||||
return 0;
|
||||
bucket_err:
|
||||
smap_release_sock(psock, sock);
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
err:
|
||||
kfree(e);
|
||||
psock = smap_psock_sk(sock);
|
||||
if (psock)
|
||||
smap_release_sock(psock, sock);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -2415,7 +2422,13 @@ static int sock_hash_update_elem(struct bpf_map *map,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
lock_sock(skops.sk);
|
||||
preempt_disable();
|
||||
rcu_read_lock();
|
||||
err = sock_hash_ctx_update_elem(&skops, map, key, flags);
|
||||
rcu_read_unlock();
|
||||
preempt_enable();
|
||||
release_sock(skops.sk);
|
||||
fput(socket->file);
|
||||
return err;
|
||||
}
|
||||
|
@ -2472,10 +2485,8 @@ struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
|
|||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
l = lookup_elem_raw(head, hash, key, key_size);
|
||||
sk = l ? l->sk : NULL;
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
return sk;
|
||||
}
|
||||
|
||||
|
|
|
@ -735,7 +735,9 @@ static int map_update_elem(union bpf_attr *attr)
|
|||
if (bpf_map_is_dev_bound(map)) {
|
||||
err = bpf_map_offload_update_elem(map, key, value, attr->flags);
|
||||
goto out;
|
||||
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
|
||||
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
|
||||
map->map_type == BPF_MAP_TYPE_SOCKHASH ||
|
||||
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
|
||||
err = map->ops->map_update_elem(map, key, value, attr->flags);
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -1762,6 +1762,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
|
|||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
|
||||
unsigned int write_len)
|
||||
{
|
||||
int err = __bpf_try_make_writable(skb, write_len);
|
||||
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
return err;
|
||||
}
|
||||
|
||||
BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
|
||||
{
|
||||
/* Idea is the following: should the needed direct read/write
|
||||
* test fail during runtime, we can pull in more data and redo
|
||||
* again, since implicitly, we invalidate previous checks here.
|
||||
*
|
||||
* Or, since we know how much we need to make read/writeable,
|
||||
* this can be done once at the program beginning for direct
|
||||
* access case. By this we overcome limitations of only current
|
||||
* headroom being accessible.
|
||||
*/
|
||||
return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_skb_pull_data_proto = {
|
||||
.func = sk_skb_pull_data,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
|
||||
u64, from, u64, to, u64, flags)
|
||||
{
|
||||
|
@ -2779,7 +2810,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
|
|||
|
||||
static u32 __bpf_skb_max_len(const struct sk_buff *skb)
|
||||
{
|
||||
return skb->dev->mtu + skb->dev->hard_header_len;
|
||||
return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
|
||||
SKB_MAX_ALLOC;
|
||||
}
|
||||
|
||||
static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
|
||||
|
@ -2863,8 +2895,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
|
|||
return __skb_trim_rcsum(skb, new_len);
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
|
||||
u64, flags)
|
||||
static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
|
||||
u64 flags)
|
||||
{
|
||||
u32 max_len = __bpf_skb_max_len(skb);
|
||||
u32 min_len = __bpf_skb_min_len(skb);
|
||||
|
@ -2900,6 +2932,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
|
|||
if (!ret && skb_is_gso(skb))
|
||||
skb_gso_reset(skb);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
|
||||
u64, flags)
|
||||
{
|
||||
int ret = __bpf_skb_change_tail(skb, new_len, flags);
|
||||
|
||||
bpf_compute_data_pointers(skb);
|
||||
return ret;
|
||||
|
@ -2914,8 +2953,26 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
|
|||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
|
||||
BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
|
||||
u64, flags)
|
||||
{
|
||||
int ret = __bpf_skb_change_tail(skb, new_len, flags);
|
||||
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_skb_change_tail_proto = {
|
||||
.func = sk_skb_change_tail,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
|
||||
u64 flags)
|
||||
{
|
||||
u32 max_len = __bpf_skb_max_len(skb);
|
||||
u32 new_len = skb->len + head_room;
|
||||
|
@ -2941,8 +2998,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
|
|||
skb_reset_mac_header(skb);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
|
||||
u64, flags)
|
||||
{
|
||||
int ret = __bpf_skb_change_head(skb, head_room, flags);
|
||||
|
||||
bpf_compute_data_pointers(skb);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_skb_change_head_proto = {
|
||||
|
@ -2954,6 +3019,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
|
|||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
|
||||
u64, flags)
|
||||
{
|
||||
int ret = __bpf_skb_change_head(skb, head_room, flags);
|
||||
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_skb_change_head_proto = {
|
||||
.func = sk_skb_change_head,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
|
||||
{
|
||||
return xdp_data_meta_unsupported(xdp) ? 0 :
|
||||
|
@ -3046,12 +3128,16 @@ static int __bpf_tx_xdp(struct net_device *dev,
|
|||
u32 index)
|
||||
{
|
||||
struct xdp_frame *xdpf;
|
||||
int sent;
|
||||
int err, sent;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
@ -3285,7 +3371,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
|
|||
goto err;
|
||||
}
|
||||
|
||||
if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
|
||||
err = xdp_ok_fwd_dev(fwd, skb->len);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
skb->dev = fwd;
|
||||
|
@ -4617,9 +4704,12 @@ bool bpf_helper_changes_pkt_data(void *func)
|
|||
func == bpf_skb_store_bytes ||
|
||||
func == bpf_skb_change_proto ||
|
||||
func == bpf_skb_change_head ||
|
||||
func == sk_skb_change_head ||
|
||||
func == bpf_skb_change_tail ||
|
||||
func == sk_skb_change_tail ||
|
||||
func == bpf_skb_adjust_room ||
|
||||
func == bpf_skb_pull_data ||
|
||||
func == sk_skb_pull_data ||
|
||||
func == bpf_clone_redirect ||
|
||||
func == bpf_l3_csum_replace ||
|
||||
func == bpf_l4_csum_replace ||
|
||||
|
@ -4871,11 +4961,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
case BPF_FUNC_skb_load_bytes:
|
||||
return &bpf_skb_load_bytes_proto;
|
||||
case BPF_FUNC_skb_pull_data:
|
||||
return &bpf_skb_pull_data_proto;
|
||||
return &sk_skb_pull_data_proto;
|
||||
case BPF_FUNC_skb_change_tail:
|
||||
return &bpf_skb_change_tail_proto;
|
||||
return &sk_skb_change_tail_proto;
|
||||
case BPF_FUNC_skb_change_head:
|
||||
return &bpf_skb_change_head_proto;
|
||||
return &sk_skb_change_head_proto;
|
||||
case BPF_FUNC_get_socket_cookie:
|
||||
return &bpf_get_socket_cookie_proto;
|
||||
case BPF_FUNC_get_socket_uid:
|
||||
|
|
|
@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb)
|
|||
{
|
||||
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
|
||||
struct xdp_sock *xs = xdp_sk(skb->sk);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&xs->tx_completion_lock, flags);
|
||||
WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
|
||||
spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
|
||||
|
||||
sock_wfree(skb);
|
||||
}
|
||||
|
@ -268,15 +271,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
|
|||
skb->destructor = xsk_destruct_skb;
|
||||
|
||||
err = dev_direct_xmit(skb, xs->queue_id);
|
||||
xskq_discard_desc(xs->tx);
|
||||
/* Ignore NET_XMIT_CN as packet might have been sent */
|
||||
if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
|
||||
err = -EAGAIN;
|
||||
/* SKB consumed by dev_direct_xmit() */
|
||||
/* SKB completed but not sent */
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sent_frame = true;
|
||||
xskq_discard_desc(xs->tx);
|
||||
}
|
||||
|
||||
out:
|
||||
|
@ -755,6 +758,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
|
|||
|
||||
xs = xdp_sk(sk);
|
||||
mutex_init(&xs->mutex);
|
||||
spin_lock_init(&xs->tx_completion_lock);
|
||||
|
||||
local_bh_disable();
|
||||
sock_prot_inuse_add(net, &xsk_proto, 1);
|
||||
|
|
|
@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
|
|||
return (entries > dcnt) ? dcnt : entries;
|
||||
}
|
||||
|
||||
static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
|
||||
{
|
||||
return q->nentries - (producer - q->cons_tail);
|
||||
}
|
||||
|
||||
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
|
||||
{
|
||||
u32 free_entries = xskq_nb_free_lazy(q, producer);
|
||||
u32 free_entries = q->nentries - (producer - q->cons_tail);
|
||||
|
||||
if (free_entries >= dcnt)
|
||||
return free_entries;
|
||||
|
@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
|
|||
{
|
||||
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
|
||||
|
||||
if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
|
||||
if (xskq_nb_free(q, q->prod_tail, 1) == 0)
|
||||
return -ENOSPC;
|
||||
|
||||
ring->desc[q->prod_tail++ & q->ring_mask] = addr;
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
cpustat
|
||||
fds_example
|
||||
lathist
|
||||
load_sock_ops
|
||||
lwt_len_hist
|
||||
map_perf_test
|
||||
offwaketime
|
||||
per_socket_stats_example
|
||||
sampleip
|
||||
sock_example
|
||||
sockex1
|
||||
sockex2
|
||||
sockex3
|
||||
spintest
|
||||
syscall_nrs.h
|
||||
syscall_tp
|
||||
task_fd_query
|
||||
tc_l2_redirect
|
||||
test_cgrp2_array_pin
|
||||
test_cgrp2_attach
|
||||
test_cgrp2_attach2
|
||||
test_cgrp2_sock
|
||||
test_cgrp2_sock2
|
||||
test_current_task_under_cgroup
|
||||
test_lru_dist
|
||||
test_map_in_map
|
||||
test_overhead
|
||||
test_probe_write_user
|
||||
trace_event
|
||||
trace_output
|
||||
tracex1
|
||||
tracex2
|
||||
tracex3
|
||||
tracex4
|
||||
tracex5
|
||||
tracex6
|
||||
tracex7
|
||||
xdp1
|
||||
xdp2
|
||||
xdp_adjust_tail
|
||||
xdp_fwd
|
||||
xdp_monitor
|
||||
xdp_redirect
|
||||
xdp_redirect_cpu
|
||||
xdp_redirect_map
|
||||
xdp_router_ipv4
|
||||
xdp_rxq_info
|
||||
xdp_tx_iptunnel
|
||||
xdpsock
|
|
@ -6,6 +6,7 @@
|
|||
*/
|
||||
#define KBUILD_MODNAME "foo"
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/in.h>
|
||||
|
@ -108,11 +109,6 @@ static int parse_ipv6(void *data, uint64_t nh_off, void *data_end)
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct vlan_hdr {
|
||||
uint16_t h_vlan_TCI;
|
||||
uint16_t h_vlan_encapsulated_proto;
|
||||
};
|
||||
|
||||
SEC("varlen")
|
||||
int handle_ingress(struct __sk_buff *skb)
|
||||
{
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
*/
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <asm/unistd.h>
|
||||
|
@ -44,8 +45,13 @@ static void test_task_rename(int cpu)
|
|||
exit(1);
|
||||
}
|
||||
start_time = time_get_ns();
|
||||
for (i = 0; i < MAX_CNT; i++)
|
||||
write(fd, buf, sizeof(buf));
|
||||
for (i = 0; i < MAX_CNT; i++) {
|
||||
if (write(fd, buf, sizeof(buf)) < 0) {
|
||||
printf("task rename failed: %s\n", strerror(errno));
|
||||
close(fd);
|
||||
return;
|
||||
}
|
||||
}
|
||||
printf("task_rename:%d: %lld events per sec\n",
|
||||
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
|
||||
close(fd);
|
||||
|
@ -63,8 +69,13 @@ static void test_urandom_read(int cpu)
|
|||
exit(1);
|
||||
}
|
||||
start_time = time_get_ns();
|
||||
for (i = 0; i < MAX_CNT; i++)
|
||||
read(fd, buf, sizeof(buf));
|
||||
for (i = 0; i < MAX_CNT; i++) {
|
||||
if (read(fd, buf, sizeof(buf)) < 0) {
|
||||
printf("failed to read from /dev/urandom: %s\n", strerror(errno));
|
||||
close(fd);
|
||||
return;
|
||||
}
|
||||
}
|
||||
printf("urandom_read:%d: %lld events per sec\n",
|
||||
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
|
||||
close(fd);
|
||||
|
|
|
@ -122,6 +122,16 @@ static void print_stacks(void)
|
|||
}
|
||||
}
|
||||
|
||||
static inline int generate_load(void)
|
||||
{
|
||||
if (system("dd if=/dev/zero of=/dev/null count=5000k status=none") < 0) {
|
||||
printf("failed to generate some load with dd: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void test_perf_event_all_cpu(struct perf_event_attr *attr)
|
||||
{
|
||||
int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
|
||||
|
@ -142,7 +152,11 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr)
|
|||
assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
|
||||
assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0);
|
||||
}
|
||||
system("dd if=/dev/zero of=/dev/null count=5000k status=none");
|
||||
|
||||
if (generate_load() < 0) {
|
||||
error = 1;
|
||||
goto all_cpu_err;
|
||||
}
|
||||
print_stacks();
|
||||
all_cpu_err:
|
||||
for (i--; i >= 0; i--) {
|
||||
|
@ -156,7 +170,7 @@ all_cpu_err:
|
|||
|
||||
static void test_perf_event_task(struct perf_event_attr *attr)
|
||||
{
|
||||
int pmu_fd;
|
||||
int pmu_fd, error = 0;
|
||||
|
||||
/* per task perf event, enable inherit so the "dd ..." command can be traced properly.
|
||||
* Enabling inherit will cause bpf_perf_prog_read_time helper failure.
|
||||
|
@ -171,10 +185,17 @@ static void test_perf_event_task(struct perf_event_attr *attr)
|
|||
}
|
||||
assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
|
||||
assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0);
|
||||
system("dd if=/dev/zero of=/dev/null count=5000k status=none");
|
||||
|
||||
if (generate_load() < 0) {
|
||||
error = 1;
|
||||
goto err;
|
||||
}
|
||||
print_stacks();
|
||||
err:
|
||||
ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
|
||||
close(pmu_fd);
|
||||
if (error)
|
||||
int_exit(0);
|
||||
}
|
||||
|
||||
static void test_bpf_perf_event(void)
|
||||
|
|
|
@ -729,7 +729,7 @@ static void kick_tx(int fd)
|
|||
int ret;
|
||||
|
||||
ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
|
||||
if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN)
|
||||
if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
|
||||
return;
|
||||
lassert(0);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue