Merge branch 'faster-soreuseport'
Craig Gallek says: ==================== Faster SO_REUSEPORT This series contains two optimizations for the SO_REUSEPORT feature: Faster lookup when selecting a socket for an incoming packet and the ability to select the socket from the group using a BPF program. This series only includes the UDP path. I plan to submit a follow-up including the TCP path if the implementation in this series is acceptable. Changes in v4: - pskb_may_pull is unnecessary with pskb_pull (per Alexei Starovoitov) Changes in v3: - skb_pull_inline -> pskb_pull (per Alexei Starovoitov) - reuseport_attach* -> sk_reuseport_attach* and simple return statement syntax change (per Daniel Borkmann) Changes in v2: - Fix ARM build; remove unnecessary include. - Handle case where protocol header is not in linear section (per Alexei Starovoitov). ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
6a5ef90c58
|
@ -92,4 +92,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _UAPI_ASM_SOCKET_H */
|
||||
|
|
|
@ -85,4 +85,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _UAPI__ASM_AVR32_SOCKET_H */
|
||||
|
|
|
@ -85,5 +85,8 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _ASM_SOCKET_H */
|
||||
|
||||
|
|
|
@ -94,4 +94,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _ASM_IA64_SOCKET_H */
|
||||
|
|
|
@ -85,4 +85,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _ASM_M32R_SOCKET_H */
|
||||
|
|
|
@ -103,4 +103,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _UAPI_ASM_SOCKET_H */
|
||||
|
|
|
@ -85,4 +85,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _ASM_SOCKET_H */
|
||||
|
|
|
@ -84,4 +84,7 @@
|
|||
#define SO_ATTACH_BPF 0x402B
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 0x402C
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 0x402D
|
||||
|
||||
#endif /* _UAPI_ASM_SOCKET_H */
|
||||
|
|
|
@ -92,4 +92,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _ASM_POWERPC_SOCKET_H */
|
||||
|
|
|
@ -91,4 +91,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _ASM_SOCKET_H */
|
||||
|
|
|
@ -81,6 +81,9 @@
|
|||
#define SO_ATTACH_BPF 0x0034
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 0x0035
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 0x0036
|
||||
|
||||
/* Security levels - as per NRL IPv6 - don't actually do anything */
|
||||
#define SO_SECURITY_AUTHENTICATION 0x5001
|
||||
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
|
||||
|
|
|
@ -96,4 +96,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* _XTENSA_SOCKET_H */
|
||||
|
|
|
@ -447,6 +447,8 @@ void bpf_prog_destroy(struct bpf_prog *fp);
|
|||
|
||||
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
|
||||
int sk_attach_bpf(u32 ufd, struct sock *sk);
|
||||
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
|
||||
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
|
||||
int sk_detach_filter(struct sock *sk);
|
||||
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
|
||||
unsigned int len);
|
||||
|
|
|
@ -87,7 +87,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
|
|||
u32 banned_flags);
|
||||
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
|
||||
u32 banned_flags);
|
||||
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2);
|
||||
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
|
||||
bool match_wildcard);
|
||||
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
|
||||
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
|
||||
|
||||
|
|
|
@ -318,6 +318,7 @@ struct cg_proto;
|
|||
* @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
|
||||
* @sk_backlog_rcv: callback to process the backlog
|
||||
* @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
|
||||
* @sk_reuseport_cb: reuseport group container
|
||||
*/
|
||||
struct sock {
|
||||
/*
|
||||
|
@ -453,6 +454,7 @@ struct sock {
|
|||
int (*sk_backlog_rcv)(struct sock *sk,
|
||||
struct sk_buff *skb);
|
||||
void (*sk_destruct)(struct sock *sk);
|
||||
struct sock_reuseport __rcu *sk_reuseport_cb;
|
||||
};
|
||||
|
||||
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
#ifndef _SOCK_REUSEPORT_H
|
||||
#define _SOCK_REUSEPORT_H
|
||||
|
||||
#include <linux/filter.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/types.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
struct sock_reuseport {
|
||||
struct rcu_head rcu;
|
||||
|
||||
u16 max_socks; /* length of socks */
|
||||
u16 num_socks; /* elements in socks */
|
||||
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
|
||||
struct sock *socks[0]; /* array of sock pointers */
|
||||
};
|
||||
|
||||
extern int reuseport_alloc(struct sock *sk);
|
||||
extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
|
||||
extern void reuseport_detach_sock(struct sock *sk);
|
||||
extern struct sock *reuseport_select_sock(struct sock *sk,
|
||||
u32 hash,
|
||||
struct sk_buff *skb,
|
||||
int hdr_len);
|
||||
extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
|
||||
struct bpf_prog *prog);
|
||||
|
||||
#endif /* _SOCK_REUSEPORT_H */
|
|
@ -191,7 +191,7 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
|
|||
}
|
||||
|
||||
int udp_lib_get_port(struct sock *sk, unsigned short snum,
|
||||
int (*)(const struct sock *, const struct sock *),
|
||||
int (*)(const struct sock *, const struct sock *, bool),
|
||||
unsigned int hash2_nulladdr);
|
||||
|
||||
u32 udp_flow_hashrnd(void);
|
||||
|
@ -258,7 +258,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
|
|||
__be32 daddr, __be16 dport, int dif);
|
||||
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
|
||||
__be32 daddr, __be16 dport, int dif,
|
||||
struct udp_table *tbl);
|
||||
struct udp_table *tbl, struct sk_buff *skb);
|
||||
struct sock *udp6_lib_lookup(struct net *net,
|
||||
const struct in6_addr *saddr, __be16 sport,
|
||||
const struct in6_addr *daddr, __be16 dport,
|
||||
|
@ -266,7 +266,8 @@ struct sock *udp6_lib_lookup(struct net *net,
|
|||
struct sock *__udp6_lib_lookup(struct net *net,
|
||||
const struct in6_addr *saddr, __be16 sport,
|
||||
const struct in6_addr *daddr, __be16 dport,
|
||||
int dif, struct udp_table *tbl);
|
||||
int dif, struct udp_table *tbl,
|
||||
struct sk_buff *skb);
|
||||
|
||||
/*
|
||||
* SNMP statistics for UDP and UDP-Lite
|
||||
|
|
|
@ -87,4 +87,7 @@
|
|||
#define SO_ATTACH_BPF 50
|
||||
#define SO_DETACH_BPF SO_DETACH_FILTER
|
||||
|
||||
#define SO_ATTACH_REUSEPORT_CBPF 51
|
||||
#define SO_ATTACH_REUSEPORT_EBPF 52
|
||||
|
||||
#endif /* __ASM_GENERIC_SOCKET_H */
|
||||
|
|
|
@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
|
|||
|
||||
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
|
||||
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
|
||||
sock_diag.o dev_ioctl.o tso.o
|
||||
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
|
||||
|
||||
obj-$(CONFIG_XFRM) += flow.o
|
||||
obj-y += net-sysfs.o
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include <net/cls_cgroup.h>
|
||||
#include <net/dst_metadata.h>
|
||||
#include <net/dst.h>
|
||||
#include <net/sock_reuseport.h>
|
||||
|
||||
/**
|
||||
* sk_filter - run a packet through a socket filter
|
||||
|
@ -1167,6 +1168,68 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
|
||||
{
|
||||
struct bpf_prog *old_prog;
|
||||
int err;
|
||||
|
||||
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
|
||||
return -ENOMEM;
|
||||
|
||||
if (sk_unhashed(sk)) {
|
||||
err = reuseport_alloc(sk);
|
||||
if (err)
|
||||
return err;
|
||||
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
|
||||
/* The socket wasn't bound with SO_REUSEPORT */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
old_prog = reuseport_attach_prog(sk, prog);
|
||||
if (old_prog)
|
||||
bpf_prog_destroy(old_prog);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
|
||||
{
|
||||
unsigned int fsize = bpf_classic_proglen(fprog);
|
||||
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
|
||||
struct bpf_prog *prog;
|
||||
int err;
|
||||
|
||||
if (sock_flag(sk, SOCK_FILTER_LOCKED))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
/* Make sure new filter is there and in the right amounts. */
|
||||
if (fprog->filter == NULL)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
prog = bpf_prog_alloc(bpf_fsize, 0);
|
||||
if (!prog)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (copy_from_user(prog->insns, fprog->filter, fsize)) {
|
||||
__bpf_prog_free(prog);
|
||||
return ERR_PTR(-EFAULT);
|
||||
}
|
||||
|
||||
prog->len = fprog->len;
|
||||
|
||||
err = bpf_prog_store_orig_filter(prog, fprog);
|
||||
if (err) {
|
||||
__bpf_prog_free(prog);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* bpf_prepare_filter() already takes care of freeing
|
||||
* memory in case something goes wrong.
|
||||
*/
|
||||
return bpf_prepare_filter(prog, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* sk_attach_filter - attach a socket filter
|
||||
* @fprog: the filter program
|
||||
|
@ -1179,39 +1242,9 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
|
|||
*/
|
||||
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
|
||||
{
|
||||
unsigned int fsize = bpf_classic_proglen(fprog);
|
||||
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_prog *prog = __get_filter(fprog, sk);
|
||||
int err;
|
||||
|
||||
if (sock_flag(sk, SOCK_FILTER_LOCKED))
|
||||
return -EPERM;
|
||||
|
||||
/* Make sure new filter is there and in the right amounts. */
|
||||
if (fprog->filter == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
prog = bpf_prog_alloc(bpf_fsize, 0);
|
||||
if (!prog)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(prog->insns, fprog->filter, fsize)) {
|
||||
__bpf_prog_free(prog);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
prog->len = fprog->len;
|
||||
|
||||
err = bpf_prog_store_orig_filter(prog, fprog);
|
||||
if (err) {
|
||||
__bpf_prog_free(prog);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* bpf_prepare_filter() already takes care of freeing
|
||||
* memory in case something goes wrong.
|
||||
*/
|
||||
prog = bpf_prepare_filter(prog, NULL);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
|
@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(sk_attach_filter);
|
||||
|
||||
int sk_attach_bpf(u32 ufd, struct sock *sk)
|
||||
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_prog *prog = __get_filter(fprog, sk);
|
||||
int err;
|
||||
|
||||
if (sock_flag(sk, SOCK_FILTER_LOCKED))
|
||||
return -EPERM;
|
||||
|
||||
prog = bpf_prog_get(ufd);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
|
||||
bpf_prog_put(prog);
|
||||
return -EINVAL;
|
||||
err = __reuseport_attach_prog(prog, sk);
|
||||
if (err < 0) {
|
||||
__bpf_prog_release(prog);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
|
||||
if (sock_flag(sk, SOCK_FILTER_LOCKED))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
prog = bpf_prog_get(ufd);
|
||||
if (IS_ERR(prog))
|
||||
return prog;
|
||||
|
||||
if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
|
||||
bpf_prog_put(prog);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
return prog;
|
||||
}
|
||||
|
||||
int sk_attach_bpf(u32 ufd, struct sock *sk)
|
||||
{
|
||||
struct bpf_prog *prog = __get_bpf(ufd, sk);
|
||||
int err;
|
||||
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
err = __sk_attach_prog(prog, sk);
|
||||
if (err < 0) {
|
||||
bpf_prog_put(prog);
|
||||
|
@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
|
||||
{
|
||||
struct bpf_prog *prog = __get_bpf(ufd, sk);
|
||||
int err;
|
||||
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
err = __reuseport_attach_prog(prog, sk);
|
||||
if (err < 0) {
|
||||
bpf_prog_put(prog);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
|
||||
#define BPF_LDST_LEN 16U
|
||||
|
||||
|
|
|
@ -134,6 +134,7 @@
|
|||
#include <linux/sock_diag.h>
|
||||
|
||||
#include <linux/filter.h>
|
||||
#include <net/sock_reuseport.h>
|
||||
|
||||
#include <trace/events/sock.h>
|
||||
|
||||
|
@ -932,6 +933,32 @@ set_rcvbuf:
|
|||
}
|
||||
break;
|
||||
|
||||
case SO_ATTACH_REUSEPORT_CBPF:
|
||||
ret = -EINVAL;
|
||||
if (optlen == sizeof(struct sock_fprog)) {
|
||||
struct sock_fprog fprog;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(&fprog, optval, sizeof(fprog)))
|
||||
break;
|
||||
|
||||
ret = sk_reuseport_attach_filter(&fprog, sk);
|
||||
}
|
||||
break;
|
||||
|
||||
case SO_ATTACH_REUSEPORT_EBPF:
|
||||
ret = -EINVAL;
|
||||
if (optlen == sizeof(u32)) {
|
||||
u32 ufd;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(&ufd, optval, sizeof(ufd)))
|
||||
break;
|
||||
|
||||
ret = sk_reuseport_attach_bpf(ufd, sk);
|
||||
}
|
||||
break;
|
||||
|
||||
case SO_DETACH_FILTER:
|
||||
ret = sk_detach_filter(sk);
|
||||
break;
|
||||
|
@ -1443,6 +1470,8 @@ void sk_destruct(struct sock *sk)
|
|||
sk_filter_uncharge(sk, filter);
|
||||
RCU_INIT_POINTER(sk->sk_filter, NULL);
|
||||
}
|
||||
if (rcu_access_pointer(sk->sk_reuseport_cb))
|
||||
reuseport_detach_sock(sk);
|
||||
|
||||
sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
|
||||
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
/*
|
||||
* To speed up listener socket lookup, create an array to store all sockets
|
||||
* listening on the same port. This allows a decision to be made after finding
|
||||
* the first socket. An optional BPF program can also be configured for
|
||||
* selecting the socket index from the array of available sockets.
|
||||
*/
|
||||
|
||||
#include <net/sock_reuseport.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
#define INIT_SOCKS 128
|
||||
|
||||
static DEFINE_SPINLOCK(reuseport_lock);
|
||||
|
||||
static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
|
||||
{
|
||||
size_t size = sizeof(struct sock_reuseport) +
|
||||
sizeof(struct sock *) * max_socks;
|
||||
struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
|
||||
|
||||
if (!reuse)
|
||||
return NULL;
|
||||
|
||||
reuse->max_socks = max_socks;
|
||||
|
||||
RCU_INIT_POINTER(reuse->prog, NULL);
|
||||
return reuse;
|
||||
}
|
||||
|
||||
int reuseport_alloc(struct sock *sk)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
|
||||
/* bh lock used since this function call may precede hlist lock in
|
||||
* soft irq of receive path or setsockopt from process context
|
||||
*/
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock)),
|
||||
"multiple allocations for the same socket");
|
||||
reuse = __reuseport_alloc(INIT_SOCKS);
|
||||
if (!reuse) {
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
reuse->socks[0] = sk;
|
||||
reuse->num_socks = 1;
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
|
||||
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_alloc);
|
||||
|
||||
static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
|
||||
{
|
||||
struct sock_reuseport *more_reuse;
|
||||
u32 more_socks_size, i;
|
||||
|
||||
more_socks_size = reuse->max_socks * 2U;
|
||||
if (more_socks_size > U16_MAX)
|
||||
return NULL;
|
||||
|
||||
more_reuse = __reuseport_alloc(more_socks_size);
|
||||
if (!more_reuse)
|
||||
return NULL;
|
||||
|
||||
more_reuse->max_socks = more_socks_size;
|
||||
more_reuse->num_socks = reuse->num_socks;
|
||||
more_reuse->prog = reuse->prog;
|
||||
|
||||
memcpy(more_reuse->socks, reuse->socks,
|
||||
reuse->num_socks * sizeof(struct sock *));
|
||||
|
||||
for (i = 0; i < reuse->num_socks; ++i)
|
||||
rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
|
||||
more_reuse);
|
||||
|
||||
/* Note: we use kfree_rcu here instead of reuseport_free_rcu so
|
||||
* that reuse and more_reuse can temporarily share a reference
|
||||
* to prog.
|
||||
*/
|
||||
kfree_rcu(reuse, rcu);
|
||||
return more_reuse;
|
||||
}
|
||||
|
||||
/**
|
||||
* reuseport_add_sock - Add a socket to the reuseport group of another.
|
||||
* @sk: New socket to add to the group.
|
||||
* @sk2: Socket belonging to the existing reuseport group.
|
||||
* May return ENOMEM and not add socket to group under memory pressure.
|
||||
*/
|
||||
int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock)),
|
||||
WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock)),
|
||||
"socket already in reuseport group");
|
||||
|
||||
if (reuse->num_socks == reuse->max_socks) {
|
||||
reuse = reuseport_grow(reuse);
|
||||
if (!reuse) {
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
reuse->socks[reuse->num_socks] = sk;
|
||||
/* paired with smp_rmb() in reuseport_select_sock() */
|
||||
smp_wmb();
|
||||
reuse->num_socks++;
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
|
||||
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_add_sock);
|
||||
|
||||
static void reuseport_free_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
|
||||
reuse = container_of(head, struct sock_reuseport, rcu);
|
||||
if (reuse->prog)
|
||||
bpf_prog_destroy(reuse->prog);
|
||||
kfree(reuse);
|
||||
}
|
||||
|
||||
void reuseport_detach_sock(struct sock *sk)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
int i;
|
||||
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
|
||||
|
||||
for (i = 0; i < reuse->num_socks; i++) {
|
||||
if (reuse->socks[i] == sk) {
|
||||
reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
|
||||
reuse->num_socks--;
|
||||
if (reuse->num_socks == 0)
|
||||
call_rcu(&reuse->rcu, reuseport_free_rcu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_detach_sock);
|
||||
|
||||
static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
|
||||
struct bpf_prog *prog, struct sk_buff *skb,
|
||||
int hdr_len)
|
||||
{
|
||||
struct sk_buff *nskb = NULL;
|
||||
u32 index;
|
||||
|
||||
if (skb_shared(skb)) {
|
||||
nskb = skb_clone(skb, GFP_ATOMIC);
|
||||
if (!nskb)
|
||||
return NULL;
|
||||
skb = nskb;
|
||||
}
|
||||
|
||||
/* temporarily advance data past protocol header */
|
||||
if (!pskb_pull(skb, hdr_len)) {
|
||||
consume_skb(nskb);
|
||||
return NULL;
|
||||
}
|
||||
index = bpf_prog_run_save_cb(prog, skb);
|
||||
__skb_push(skb, hdr_len);
|
||||
|
||||
consume_skb(nskb);
|
||||
|
||||
if (index >= socks)
|
||||
return NULL;
|
||||
|
||||
return reuse->socks[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
|
||||
* @sk: First socket in the group.
|
||||
* @hash: When no BPF filter is available, use this hash to select.
|
||||
* @skb: skb to run through BPF filter.
|
||||
* @hdr_len: BPF filter expects skb data pointer at payload data. If
|
||||
* the skb does not yet point at the payload, this parameter represents
|
||||
* how far the pointer needs to advance to reach the payload.
|
||||
* Returns a socket that should receive the packet (or NULL on error).
|
||||
*/
|
||||
struct sock *reuseport_select_sock(struct sock *sk,
|
||||
u32 hash,
|
||||
struct sk_buff *skb,
|
||||
int hdr_len)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
struct bpf_prog *prog;
|
||||
struct sock *sk2 = NULL;
|
||||
u16 socks;
|
||||
|
||||
rcu_read_lock();
|
||||
reuse = rcu_dereference(sk->sk_reuseport_cb);
|
||||
|
||||
/* if memory allocation failed or add call is not yet complete */
|
||||
if (!reuse)
|
||||
goto out;
|
||||
|
||||
prog = rcu_dereference(reuse->prog);
|
||||
socks = READ_ONCE(reuse->num_socks);
|
||||
if (likely(socks)) {
|
||||
/* paired with smp_wmb() in reuseport_add_sock() */
|
||||
smp_rmb();
|
||||
|
||||
if (prog && skb)
|
||||
sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
|
||||
else
|
||||
sk2 = reuse->socks[reciprocal_scale(hash, socks)];
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return sk2;
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_select_sock);
|
||||
|
||||
struct bpf_prog *
|
||||
reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
struct bpf_prog *old_prog;
|
||||
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
old_prog = rcu_dereference_protected(reuse->prog,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
rcu_assign_pointer(reuse->prog, prog);
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
|
||||
return old_prog;
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_attach_prog);
|
123
net/ipv4/udp.c
123
net/ipv4/udp.c
|
@ -113,6 +113,7 @@
|
|||
#include <trace/events/skb.h>
|
||||
#include <net/busy_poll.h>
|
||||
#include "udp_impl.h"
|
||||
#include <net/sock_reuseport.h>
|
||||
|
||||
struct udp_table udp_table __read_mostly;
|
||||
EXPORT_SYMBOL(udp_table);
|
||||
|
@ -137,7 +138,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
|
|||
unsigned long *bitmap,
|
||||
struct sock *sk,
|
||||
int (*saddr_comp)(const struct sock *sk1,
|
||||
const struct sock *sk2),
|
||||
const struct sock *sk2,
|
||||
bool match_wildcard),
|
||||
unsigned int log)
|
||||
{
|
||||
struct sock *sk2;
|
||||
|
@ -152,8 +154,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
|
|||
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
|
||||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
|
||||
(!sk2->sk_reuseport || !sk->sk_reuseport ||
|
||||
rcu_access_pointer(sk->sk_reuseport_cb) ||
|
||||
!uid_eq(uid, sock_i_uid(sk2))) &&
|
||||
saddr_comp(sk, sk2)) {
|
||||
saddr_comp(sk, sk2, true)) {
|
||||
if (!bitmap)
|
||||
return 1;
|
||||
__set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap);
|
||||
|
@ -170,7 +173,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
|
|||
struct udp_hslot *hslot2,
|
||||
struct sock *sk,
|
||||
int (*saddr_comp)(const struct sock *sk1,
|
||||
const struct sock *sk2))
|
||||
const struct sock *sk2,
|
||||
bool match_wildcard))
|
||||
{
|
||||
struct sock *sk2;
|
||||
struct hlist_nulls_node *node;
|
||||
|
@ -186,8 +190,9 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
|
|||
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
|
||||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
|
||||
(!sk2->sk_reuseport || !sk->sk_reuseport ||
|
||||
rcu_access_pointer(sk->sk_reuseport_cb) ||
|
||||
!uid_eq(uid, sock_i_uid(sk2))) &&
|
||||
saddr_comp(sk, sk2)) {
|
||||
saddr_comp(sk, sk2, true)) {
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
|
@ -196,6 +201,35 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
|
|||
return res;
|
||||
}
|
||||
|
||||
static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
|
||||
int (*saddr_same)(const struct sock *sk1,
|
||||
const struct sock *sk2,
|
||||
bool match_wildcard))
|
||||
{
|
||||
struct net *net = sock_net(sk);
|
||||
struct hlist_nulls_node *node;
|
||||
kuid_t uid = sock_i_uid(sk);
|
||||
struct sock *sk2;
|
||||
|
||||
sk_nulls_for_each(sk2, node, &hslot->head) {
|
||||
if (net_eq(sock_net(sk2), net) &&
|
||||
sk2 != sk &&
|
||||
sk2->sk_family == sk->sk_family &&
|
||||
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
|
||||
(udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
|
||||
(sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
|
||||
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
|
||||
(*saddr_same)(sk, sk2, false)) {
|
||||
return reuseport_add_sock(sk, sk2);
|
||||
}
|
||||
}
|
||||
|
||||
/* Initial allocation may have already happened via setsockopt */
|
||||
if (!rcu_access_pointer(sk->sk_reuseport_cb))
|
||||
return reuseport_alloc(sk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
|
||||
*
|
||||
|
@ -207,7 +241,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
|
|||
*/
|
||||
int udp_lib_get_port(struct sock *sk, unsigned short snum,
|
||||
int (*saddr_comp)(const struct sock *sk1,
|
||||
const struct sock *sk2),
|
||||
const struct sock *sk2,
|
||||
bool match_wildcard),
|
||||
unsigned int hash2_nulladdr)
|
||||
{
|
||||
struct udp_hslot *hslot, *hslot2;
|
||||
|
@ -290,6 +325,14 @@ found:
|
|||
udp_sk(sk)->udp_port_hash = snum;
|
||||
udp_sk(sk)->udp_portaddr_hash ^= snum;
|
||||
if (sk_unhashed(sk)) {
|
||||
if (sk->sk_reuseport &&
|
||||
udp_reuseport_add_sock(sk, hslot, saddr_comp)) {
|
||||
inet_sk(sk)->inet_num = 0;
|
||||
udp_sk(sk)->udp_port_hash = 0;
|
||||
udp_sk(sk)->udp_portaddr_hash ^= snum;
|
||||
goto fail_unlock;
|
||||
}
|
||||
|
||||
sk_nulls_add_node_rcu(sk, &hslot->head);
|
||||
hslot->count++;
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
|
@ -309,13 +352,22 @@ fail:
|
|||
}
|
||||
EXPORT_SYMBOL(udp_lib_get_port);
|
||||
|
||||
static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
|
||||
/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
|
||||
* match_wildcard == false: addresses must be exactly the same, i.e.
|
||||
* 0.0.0.0 only equals to 0.0.0.0
|
||||
*/
|
||||
static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
|
||||
bool match_wildcard)
|
||||
{
|
||||
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
|
||||
|
||||
return (!ipv6_only_sock(sk2) &&
|
||||
(!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
|
||||
inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
|
||||
if (!ipv6_only_sock(sk2)) {
|
||||
if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
|
||||
return 1;
|
||||
if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
|
||||
return match_wildcard;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
|
||||
|
@ -459,8 +511,14 @@ begin:
|
|||
badness = score;
|
||||
reuseport = sk->sk_reuseport;
|
||||
if (reuseport) {
|
||||
struct sock *sk2;
|
||||
hash = udp_ehashfn(net, daddr, hnum,
|
||||
saddr, sport);
|
||||
sk2 = reuseport_select_sock(sk, hash, NULL, 0);
|
||||
if (sk2) {
|
||||
result = sk2;
|
||||
goto found;
|
||||
}
|
||||
matches = 1;
|
||||
}
|
||||
} else if (score == badness && reuseport) {
|
||||
|
@ -478,6 +536,7 @@ begin:
|
|||
if (get_nulls_value(node) != slot2)
|
||||
goto begin;
|
||||
if (result) {
|
||||
found:
|
||||
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
|
||||
result = NULL;
|
||||
else if (unlikely(compute_score2(result, net, saddr, sport,
|
||||
|
@ -494,7 +553,7 @@ begin:
|
|||
*/
|
||||
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
|
||||
__be16 sport, __be32 daddr, __be16 dport,
|
||||
int dif, struct udp_table *udptable)
|
||||
int dif, struct udp_table *udptable, struct sk_buff *skb)
|
||||
{
|
||||
struct sock *sk, *result;
|
||||
struct hlist_nulls_node *node;
|
||||
|
@ -540,8 +599,15 @@ begin:
|
|||
badness = score;
|
||||
reuseport = sk->sk_reuseport;
|
||||
if (reuseport) {
|
||||
struct sock *sk2;
|
||||
hash = udp_ehashfn(net, daddr, hnum,
|
||||
saddr, sport);
|
||||
sk2 = reuseport_select_sock(sk, hash, skb,
|
||||
sizeof(struct udphdr));
|
||||
if (sk2) {
|
||||
result = sk2;
|
||||
goto found;
|
||||
}
|
||||
matches = 1;
|
||||
}
|
||||
} else if (score == badness && reuseport) {
|
||||
|
@ -560,6 +626,7 @@ begin:
|
|||
goto begin;
|
||||
|
||||
if (result) {
|
||||
found:
|
||||
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
|
||||
result = NULL;
|
||||
else if (unlikely(compute_score(result, net, saddr, hnum, sport,
|
||||
|
@ -581,13 +648,14 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
|
|||
|
||||
return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
|
||||
iph->daddr, dport, inet_iif(skb),
|
||||
udptable);
|
||||
udptable, skb);
|
||||
}
|
||||
|
||||
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
|
||||
__be32 daddr, __be16 dport, int dif)
|
||||
{
|
||||
return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
|
||||
return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif,
|
||||
&udp_table, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
|
||||
|
||||
|
@ -635,7 +703,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
|
|||
struct net *net = dev_net(skb->dev);
|
||||
|
||||
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
|
||||
iph->saddr, uh->source, skb->dev->ifindex, udptable);
|
||||
iph->saddr, uh->source, skb->dev->ifindex, udptable,
|
||||
NULL);
|
||||
if (!sk) {
|
||||
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
|
||||
return; /* No socket for error */
|
||||
|
@ -1398,6 +1467,8 @@ void udp_lib_unhash(struct sock *sk)
|
|||
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
|
||||
|
||||
spin_lock_bh(&hslot->lock);
|
||||
if (rcu_access_pointer(sk->sk_reuseport_cb))
|
||||
reuseport_detach_sock(sk);
|
||||
if (sk_nulls_del_node_init_rcu(sk)) {
|
||||
hslot->count--;
|
||||
inet_sk(sk)->inet_num = 0;
|
||||
|
@ -1425,22 +1496,28 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
|
|||
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
|
||||
nhslot2 = udp_hashslot2(udptable, newhash);
|
||||
udp_sk(sk)->udp_portaddr_hash = newhash;
|
||||
if (hslot2 != nhslot2) {
|
||||
|
||||
if (hslot2 != nhslot2 ||
|
||||
rcu_access_pointer(sk->sk_reuseport_cb)) {
|
||||
hslot = udp_hashslot(udptable, sock_net(sk),
|
||||
udp_sk(sk)->udp_port_hash);
|
||||
/* we must lock primary chain too */
|
||||
spin_lock_bh(&hslot->lock);
|
||||
if (rcu_access_pointer(sk->sk_reuseport_cb))
|
||||
reuseport_detach_sock(sk);
|
||||
|
||||
spin_lock(&hslot2->lock);
|
||||
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
|
||||
hslot2->count--;
|
||||
spin_unlock(&hslot2->lock);
|
||||
if (hslot2 != nhslot2) {
|
||||
spin_lock(&hslot2->lock);
|
||||
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
|
||||
hslot2->count--;
|
||||
spin_unlock(&hslot2->lock);
|
||||
|
||||
spin_lock(&nhslot2->lock);
|
||||
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
|
||||
&nhslot2->head);
|
||||
nhslot2->count++;
|
||||
spin_unlock(&nhslot2->lock);
|
||||
spin_lock(&nhslot2->lock);
|
||||
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
|
||||
&nhslot2->head);
|
||||
nhslot2->count++;
|
||||
spin_unlock(&nhslot2->lock);
|
||||
}
|
||||
|
||||
spin_unlock_bh(&hslot->lock);
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
|
|||
sk = __udp4_lib_lookup(net,
|
||||
req->id.idiag_src[0], req->id.idiag_sport,
|
||||
req->id.idiag_dst[0], req->id.idiag_dport,
|
||||
req->id.idiag_if, tbl);
|
||||
req->id.idiag_if, tbl, NULL);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
else if (req->sdiag_family == AF_INET6)
|
||||
sk = __udp6_lib_lookup(net,
|
||||
|
@ -52,7 +52,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
|
|||
req->id.idiag_sport,
|
||||
(struct in6_addr *)req->id.idiag_dst,
|
||||
req->id.idiag_dport,
|
||||
req->id.idiag_if, tbl);
|
||||
req->id.idiag_if, tbl, NULL);
|
||||
#endif
|
||||
else
|
||||
goto out_nosk;
|
||||
|
|
|
@ -51,12 +51,12 @@ int inet6_csk_bind_conflict(const struct sock *sk,
|
|||
(sk2->sk_state != TCP_TIME_WAIT &&
|
||||
!uid_eq(uid,
|
||||
sock_i_uid((struct sock *)sk2))))) {
|
||||
if (ipv6_rcv_saddr_equal(sk, sk2))
|
||||
if (ipv6_rcv_saddr_equal(sk, sk2, true))
|
||||
break;
|
||||
}
|
||||
if (!relax && reuse && sk2->sk_reuse &&
|
||||
sk2->sk_state != TCP_LISTEN &&
|
||||
ipv6_rcv_saddr_equal(sk, sk2))
|
||||
ipv6_rcv_saddr_equal(sk, sk2, true))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include <net/xfrm.h>
|
||||
#include <net/inet6_hashtables.h>
|
||||
#include <net/busy_poll.h>
|
||||
#include <net/sock_reuseport.h>
|
||||
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
@ -76,7 +77,14 @@ static u32 udp6_ehashfn(const struct net *net,
|
|||
udp_ipv6_hash_secret + net_hash_mix(net));
|
||||
}
|
||||
|
||||
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
|
||||
/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
|
||||
* only, and any IPv4 addresses if not IPv6 only
|
||||
* match_wildcard == false: addresses must be exactly the same, i.e.
|
||||
* IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
|
||||
* and 0.0.0.0 equals to 0.0.0.0 only
|
||||
*/
|
||||
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
|
||||
bool match_wildcard)
|
||||
{
|
||||
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
|
||||
int sk2_ipv6only = inet_v6_ipv6only(sk2);
|
||||
|
@ -84,16 +92,24 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
|
|||
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
|
||||
|
||||
/* if both are mapped, treat as IPv4 */
|
||||
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
|
||||
return (!sk2_ipv6only &&
|
||||
(!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
|
||||
sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
|
||||
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
|
||||
if (!sk2_ipv6only) {
|
||||
if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
|
||||
return 1;
|
||||
if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
|
||||
return match_wildcard;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (addr_type2 == IPV6_ADDR_ANY &&
|
||||
if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
|
||||
return 1;
|
||||
|
||||
if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
|
||||
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
|
||||
return 1;
|
||||
|
||||
if (addr_type == IPV6_ADDR_ANY &&
|
||||
if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
|
||||
!(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
|
||||
return 1;
|
||||
|
||||
|
@ -253,8 +269,14 @@ begin:
|
|||
badness = score;
|
||||
reuseport = sk->sk_reuseport;
|
||||
if (reuseport) {
|
||||
struct sock *sk2;
|
||||
hash = udp6_ehashfn(net, daddr, hnum,
|
||||
saddr, sport);
|
||||
sk2 = reuseport_select_sock(sk, hash, NULL, 0);
|
||||
if (sk2) {
|
||||
result = sk2;
|
||||
goto found;
|
||||
}
|
||||
matches = 1;
|
||||
}
|
||||
} else if (score == badness && reuseport) {
|
||||
|
@ -273,6 +295,7 @@ begin:
|
|||
goto begin;
|
||||
|
||||
if (result) {
|
||||
found:
|
||||
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
|
||||
result = NULL;
|
||||
else if (unlikely(compute_score2(result, net, saddr, sport,
|
||||
|
@ -287,7 +310,8 @@ begin:
|
|||
struct sock *__udp6_lib_lookup(struct net *net,
|
||||
const struct in6_addr *saddr, __be16 sport,
|
||||
const struct in6_addr *daddr, __be16 dport,
|
||||
int dif, struct udp_table *udptable)
|
||||
int dif, struct udp_table *udptable,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct sock *sk, *result;
|
||||
struct hlist_nulls_node *node;
|
||||
|
@ -332,8 +356,15 @@ begin:
|
|||
badness = score;
|
||||
reuseport = sk->sk_reuseport;
|
||||
if (reuseport) {
|
||||
struct sock *sk2;
|
||||
hash = udp6_ehashfn(net, daddr, hnum,
|
||||
saddr, sport);
|
||||
sk2 = reuseport_select_sock(sk, hash, skb,
|
||||
sizeof(struct udphdr));
|
||||
if (sk2) {
|
||||
result = sk2;
|
||||
goto found;
|
||||
}
|
||||
matches = 1;
|
||||
}
|
||||
} else if (score == badness && reuseport) {
|
||||
|
@ -352,6 +383,7 @@ begin:
|
|||
goto begin;
|
||||
|
||||
if (result) {
|
||||
found:
|
||||
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
|
||||
result = NULL;
|
||||
else if (unlikely(compute_score(result, net, hnum, saddr, sport,
|
||||
|
@ -377,13 +409,13 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
|
|||
return sk;
|
||||
return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
|
||||
&iph->daddr, dport, inet6_iif(skb),
|
||||
udptable);
|
||||
udptable, skb);
|
||||
}
|
||||
|
||||
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
|
||||
const struct in6_addr *daddr, __be16 dport, int dif)
|
||||
{
|
||||
return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
|
||||
return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
|
||||
|
||||
|
@ -549,8 +581,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
|||
int err;
|
||||
struct net *net = dev_net(skb->dev);
|
||||
|
||||
sk = __udp6_lib_lookup(net, daddr, uh->dest,
|
||||
saddr, uh->source, inet6_iif(skb), udptable);
|
||||
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
|
||||
inet6_iif(skb), udptable, skb);
|
||||
if (!sk) {
|
||||
ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
|
||||
ICMP6_MIB_INERRORS);
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
socket
|
||||
psock_fanout
|
||||
psock_tpacket
|
||||
reuseport_bpf
|
||||
|
|
|
@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g
|
|||
|
||||
CFLAGS += -I../../../../usr/include/
|
||||
|
||||
NET_PROGS = socket psock_fanout psock_tpacket
|
||||
NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf
|
||||
|
||||
all: $(NET_PROGS)
|
||||
%: %.c
|
||||
|
|
|
@ -0,0 +1,467 @@
|
|||
/*
|
||||
* Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
|
||||
* a BPF program (both classic and extended) to read the first word from an
|
||||
* incoming packet (expected to be in network byte-order), calculate a modulus
|
||||
* of that number, and then dispatch the packet to the Nth socket using the
|
||||
* result. These tests are run for each supported address family and protocol.
|
||||
* Additionally, a few edge cases in the implementation are tested.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <error.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <netinet/in.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
#endif
|
||||
|
||||
struct test_params {
|
||||
int recv_family;
|
||||
int send_family;
|
||||
int protocol;
|
||||
size_t recv_socks;
|
||||
uint16_t recv_port;
|
||||
uint16_t send_port_min;
|
||||
};
|
||||
|
||||
static size_t sockaddr_size(void)
|
||||
{
|
||||
return sizeof(struct sockaddr_storage);
|
||||
}
|
||||
|
||||
static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
|
||||
{
|
||||
struct sockaddr_storage *addr;
|
||||
struct sockaddr_in *addr4;
|
||||
struct sockaddr_in6 *addr6;
|
||||
|
||||
addr = malloc(sizeof(struct sockaddr_storage));
|
||||
memset(addr, 0, sizeof(struct sockaddr_storage));
|
||||
|
||||
switch (family) {
|
||||
case AF_INET:
|
||||
addr4 = (struct sockaddr_in *)addr;
|
||||
addr4->sin_family = AF_INET;
|
||||
addr4->sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
addr4->sin_port = htons(port);
|
||||
break;
|
||||
case AF_INET6:
|
||||
addr6 = (struct sockaddr_in6 *)addr;
|
||||
addr6->sin6_family = AF_INET6;
|
||||
addr6->sin6_addr = in6addr_any;
|
||||
addr6->sin6_port = htons(port);
|
||||
break;
|
||||
default:
|
||||
error(1, 0, "Unsupported family %d", family);
|
||||
}
|
||||
return (struct sockaddr *)addr;
|
||||
}
|
||||
|
||||
static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
|
||||
{
|
||||
struct sockaddr *addr = new_any_sockaddr(family, port);
|
||||
struct sockaddr_in *addr4;
|
||||
struct sockaddr_in6 *addr6;
|
||||
|
||||
switch (family) {
|
||||
case AF_INET:
|
||||
addr4 = (struct sockaddr_in *)addr;
|
||||
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||
break;
|
||||
case AF_INET6:
|
||||
addr6 = (struct sockaddr_in6 *)addr;
|
||||
addr6->sin6_addr = in6addr_loopback;
|
||||
break;
|
||||
default:
|
||||
error(1, 0, "Unsupported family %d", family);
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
static void attach_ebpf(int fd, uint16_t mod)
|
||||
{
|
||||
static char bpf_log_buf[65536];
|
||||
static const char bpf_license[] = "GPL";
|
||||
|
||||
int bpf_fd;
|
||||
const struct bpf_insn prog[] = {
|
||||
/* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
|
||||
{ BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
|
||||
/* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
|
||||
{ BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
|
||||
/* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
|
||||
{ BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
|
||||
/* BPF_EXIT_INSN() */
|
||||
{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
|
||||
};
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
|
||||
attr.insn_cnt = ARRAY_SIZE(prog);
|
||||
attr.insns = (uint64_t)prog;
|
||||
attr.license = (uint64_t)bpf_license;
|
||||
attr.log_buf = (uint64_t)bpf_log_buf;
|
||||
attr.log_size = sizeof(bpf_log_buf);
|
||||
attr.log_level = 1;
|
||||
attr.kern_version = 0;
|
||||
|
||||
bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
|
||||
if (bpf_fd < 0)
|
||||
error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
|
||||
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
|
||||
sizeof(bpf_fd)))
|
||||
error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
|
||||
}
|
||||
|
||||
static void attach_cbpf(int fd, uint16_t mod)
|
||||
{
|
||||
struct sock_filter code[] = {
|
||||
/* A = (uint32_t)skb[0] */
|
||||
{ BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
|
||||
/* A = A % mod */
|
||||
{ BPF_ALU | BPF_MOD, 0, 0, mod },
|
||||
/* return A */
|
||||
{ BPF_RET | BPF_A, 0, 0, 0 },
|
||||
};
|
||||
struct sock_fprog p = {
|
||||
.len = ARRAY_SIZE(code),
|
||||
.filter = code,
|
||||
};
|
||||
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
|
||||
error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
|
||||
}
|
||||
|
||||
static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
|
||||
void (*attach_bpf)(int, uint16_t))
|
||||
{
|
||||
struct sockaddr * const addr =
|
||||
new_any_sockaddr(p.recv_family, p.recv_port);
|
||||
int i, opt;
|
||||
|
||||
for (i = 0; i < p.recv_socks; ++i) {
|
||||
fd[i] = socket(p.recv_family, p.protocol, 0);
|
||||
if (fd[i] < 0)
|
||||
error(1, errno, "failed to create recv %d", i);
|
||||
|
||||
opt = 1;
|
||||
if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
|
||||
sizeof(opt)))
|
||||
error(1, errno, "failed to set SO_REUSEPORT on %d", i);
|
||||
|
||||
if (i == 0)
|
||||
attach_bpf(fd[i], mod);
|
||||
|
||||
if (bind(fd[i], addr, sockaddr_size()))
|
||||
error(1, errno, "failed to bind recv socket %d", i);
|
||||
|
||||
if (p.protocol == SOCK_STREAM)
|
||||
if (listen(fd[i], p.recv_socks * 10))
|
||||
error(1, errno, "failed to listen on socket");
|
||||
}
|
||||
free(addr);
|
||||
}
|
||||
|
||||
static void send_from(struct test_params p, uint16_t sport, char *buf,
|
||||
size_t len)
|
||||
{
|
||||
struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
|
||||
struct sockaddr * const daddr =
|
||||
new_loopback_sockaddr(p.send_family, p.recv_port);
|
||||
const int fd = socket(p.send_family, p.protocol, 0);
|
||||
|
||||
if (fd < 0)
|
||||
error(1, errno, "failed to create send socket");
|
||||
|
||||
if (bind(fd, saddr, sockaddr_size()))
|
||||
error(1, errno, "failed to bind send socket");
|
||||
if (connect(fd, daddr, sockaddr_size()))
|
||||
error(1, errno, "failed to connect");
|
||||
|
||||
if (send(fd, buf, len, 0) < 0)
|
||||
error(1, errno, "failed to send message");
|
||||
|
||||
close(fd);
|
||||
free(saddr);
|
||||
free(daddr);
|
||||
}
|
||||
|
||||
static void test_recv_order(const struct test_params p, int fd[], int mod)
|
||||
{
|
||||
char recv_buf[8], send_buf[8];
|
||||
struct msghdr msg;
|
||||
struct iovec recv_io = { recv_buf, 8 };
|
||||
struct epoll_event ev;
|
||||
int epfd, conn, i, sport, expected;
|
||||
uint32_t data, ndata;
|
||||
|
||||
epfd = epoll_create(1);
|
||||
if (epfd < 0)
|
||||
error(1, errno, "failed to create epoll");
|
||||
for (i = 0; i < p.recv_socks; ++i) {
|
||||
ev.events = EPOLLIN;
|
||||
ev.data.fd = fd[i];
|
||||
if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
|
||||
error(1, errno, "failed to register sock %d epoll", i);
|
||||
}
|
||||
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
msg.msg_iov = &recv_io;
|
||||
msg.msg_iovlen = 1;
|
||||
|
||||
for (data = 0; data < p.recv_socks * 2; ++data) {
|
||||
sport = p.send_port_min + data;
|
||||
ndata = htonl(data);
|
||||
memcpy(send_buf, &ndata, sizeof(ndata));
|
||||
send_from(p, sport, send_buf, sizeof(ndata));
|
||||
|
||||
i = epoll_wait(epfd, &ev, 1, -1);
|
||||
if (i < 0)
|
||||
error(1, errno, "epoll wait failed");
|
||||
|
||||
if (p.protocol == SOCK_STREAM) {
|
||||
conn = accept(ev.data.fd, NULL, NULL);
|
||||
if (conn < 0)
|
||||
error(1, errno, "error accepting");
|
||||
i = recvmsg(conn, &msg, 0);
|
||||
close(conn);
|
||||
} else {
|
||||
i = recvmsg(ev.data.fd, &msg, 0);
|
||||
}
|
||||
if (i < 0)
|
||||
error(1, errno, "recvmsg error");
|
||||
if (i != sizeof(ndata))
|
||||
error(1, 0, "expected size %zd got %d",
|
||||
sizeof(ndata), i);
|
||||
|
||||
for (i = 0; i < p.recv_socks; ++i)
|
||||
if (ev.data.fd == fd[i])
|
||||
break;
|
||||
memcpy(&ndata, recv_buf, sizeof(ndata));
|
||||
fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
|
||||
|
||||
expected = (sport % mod);
|
||||
if (i != expected)
|
||||
error(1, 0, "expected socket %d", expected);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_reuseport_ebpf(const struct test_params p)
|
||||
{
|
||||
int i, fd[p.recv_socks];
|
||||
|
||||
fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
|
||||
build_recv_group(p, fd, p.recv_socks, attach_ebpf);
|
||||
test_recv_order(p, fd, p.recv_socks);
|
||||
|
||||
fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
|
||||
attach_ebpf(fd[0], p.recv_socks / 2);
|
||||
test_recv_order(p, fd, p.recv_socks / 2);
|
||||
|
||||
for (i = 0; i < p.recv_socks; ++i)
|
||||
close(fd[i]);
|
||||
}
|
||||
|
||||
static void test_reuseport_cbpf(const struct test_params p)
|
||||
{
|
||||
int i, fd[p.recv_socks];
|
||||
|
||||
fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
|
||||
build_recv_group(p, fd, p.recv_socks, attach_cbpf);
|
||||
test_recv_order(p, fd, p.recv_socks);
|
||||
|
||||
fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
|
||||
attach_cbpf(fd[0], p.recv_socks / 2);
|
||||
test_recv_order(p, fd, p.recv_socks / 2);
|
||||
|
||||
for (i = 0; i < p.recv_socks; ++i)
|
||||
close(fd[i]);
|
||||
}
|
||||
|
||||
static void test_extra_filter(const struct test_params p)
|
||||
{
|
||||
struct sockaddr * const addr =
|
||||
new_any_sockaddr(p.recv_family, p.recv_port);
|
||||
int fd1, fd2, opt;
|
||||
|
||||
fprintf(stderr, "Testing too many filters...\n");
|
||||
fd1 = socket(p.recv_family, p.protocol, 0);
|
||||
if (fd1 < 0)
|
||||
error(1, errno, "failed to create socket 1");
|
||||
fd2 = socket(p.recv_family, p.protocol, 0);
|
||||
if (fd2 < 0)
|
||||
error(1, errno, "failed to create socket 2");
|
||||
|
||||
opt = 1;
|
||||
if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
|
||||
error(1, errno, "failed to set SO_REUSEPORT on socket 1");
|
||||
if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
|
||||
error(1, errno, "failed to set SO_REUSEPORT on socket 2");
|
||||
|
||||
attach_ebpf(fd1, 10);
|
||||
attach_ebpf(fd2, 10);
|
||||
|
||||
if (bind(fd1, addr, sockaddr_size()))
|
||||
error(1, errno, "failed to bind recv socket 1");
|
||||
|
||||
if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
|
||||
error(1, errno, "bind socket 2 should fail with EADDRINUSE");
|
||||
|
||||
free(addr);
|
||||
}
|
||||
|
||||
static void test_filter_no_reuseport(const struct test_params p)
|
||||
{
|
||||
struct sockaddr * const addr =
|
||||
new_any_sockaddr(p.recv_family, p.recv_port);
|
||||
const char bpf_license[] = "GPL";
|
||||
struct bpf_insn ecode[] = {
|
||||
{ BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
|
||||
{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
|
||||
};
|
||||
struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
|
||||
union bpf_attr eprog;
|
||||
struct sock_fprog cprog;
|
||||
int fd, bpf_fd;
|
||||
|
||||
fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
|
||||
|
||||
memset(&eprog, 0, sizeof(eprog));
|
||||
eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
|
||||
eprog.insn_cnt = ARRAY_SIZE(ecode);
|
||||
eprog.insns = (uint64_t)ecode;
|
||||
eprog.license = (uint64_t)bpf_license;
|
||||
eprog.kern_version = 0;
|
||||
|
||||
memset(&cprog, 0, sizeof(cprog));
|
||||
cprog.len = ARRAY_SIZE(ccode);
|
||||
cprog.filter = ccode;
|
||||
|
||||
|
||||
bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
|
||||
if (bpf_fd < 0)
|
||||
error(1, errno, "ebpf error");
|
||||
fd = socket(p.recv_family, p.protocol, 0);
|
||||
if (fd < 0)
|
||||
error(1, errno, "failed to create socket 1");
|
||||
|
||||
if (bind(fd, addr, sockaddr_size()))
|
||||
error(1, errno, "failed to bind recv socket 1");
|
||||
|
||||
errno = 0;
|
||||
if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
|
||||
sizeof(bpf_fd)) || errno != EINVAL)
|
||||
error(1, errno, "setsockopt should have returned EINVAL");
|
||||
|
||||
errno = 0;
|
||||
if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
|
||||
sizeof(cprog)) || errno != EINVAL)
|
||||
error(1, errno, "setsockopt should have returned EINVAL");
|
||||
|
||||
free(addr);
|
||||
}
|
||||
|
||||
static void test_filter_without_bind(void)
|
||||
{
|
||||
int fd1, fd2;
|
||||
|
||||
fprintf(stderr, "Testing filter add without bind...\n");
|
||||
fd1 = socket(AF_INET, SOCK_DGRAM, 0);
|
||||
if (fd1 < 0)
|
||||
error(1, errno, "failed to create socket 1");
|
||||
fd2 = socket(AF_INET, SOCK_DGRAM, 0);
|
||||
if (fd2 < 0)
|
||||
error(1, errno, "failed to create socket 2");
|
||||
|
||||
attach_ebpf(fd1, 10);
|
||||
attach_cbpf(fd2, 10);
|
||||
|
||||
close(fd1);
|
||||
close(fd2);
|
||||
}
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
fprintf(stderr, "---- IPv4 UDP ----\n");
|
||||
test_reuseport_ebpf((struct test_params) {
|
||||
.recv_family = AF_INET,
|
||||
.send_family = AF_INET,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_socks = 10,
|
||||
.recv_port = 8000,
|
||||
.send_port_min = 9000});
|
||||
test_reuseport_cbpf((struct test_params) {
|
||||
.recv_family = AF_INET,
|
||||
.send_family = AF_INET,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_socks = 10,
|
||||
.recv_port = 8001,
|
||||
.send_port_min = 9020});
|
||||
test_extra_filter((struct test_params) {
|
||||
.recv_family = AF_INET,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_port = 8002});
|
||||
test_filter_no_reuseport((struct test_params) {
|
||||
.recv_family = AF_INET,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_port = 8008});
|
||||
|
||||
fprintf(stderr, "---- IPv6 UDP ----\n");
|
||||
test_reuseport_ebpf((struct test_params) {
|
||||
.recv_family = AF_INET6,
|
||||
.send_family = AF_INET6,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_socks = 10,
|
||||
.recv_port = 8003,
|
||||
.send_port_min = 9040});
|
||||
test_reuseport_cbpf((struct test_params) {
|
||||
.recv_family = AF_INET6,
|
||||
.send_family = AF_INET6,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_socks = 10,
|
||||
.recv_port = 8004,
|
||||
.send_port_min = 9060});
|
||||
test_extra_filter((struct test_params) {
|
||||
.recv_family = AF_INET6,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_port = 8005});
|
||||
test_filter_no_reuseport((struct test_params) {
|
||||
.recv_family = AF_INET6,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_port = 8009});
|
||||
|
||||
fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
|
||||
test_reuseport_ebpf((struct test_params) {
|
||||
.recv_family = AF_INET6,
|
||||
.send_family = AF_INET,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_socks = 10,
|
||||
.recv_port = 8006,
|
||||
.send_port_min = 9080});
|
||||
test_reuseport_cbpf((struct test_params) {
|
||||
.recv_family = AF_INET6,
|
||||
.send_family = AF_INET,
|
||||
.protocol = SOCK_DGRAM,
|
||||
.recv_socks = 10,
|
||||
.recv_port = 8007,
|
||||
.send_port_min = 9100});
|
||||
|
||||
|
||||
test_filter_without_bind();
|
||||
|
||||
fprintf(stderr, "SUCCESS\n");
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue