net: netfilter: Add kfuncs to allocate and insert CT
Introduce bpf_xdp_ct_alloc, bpf_skb_ct_alloc and bpf_ct_insert_entry kfuncs in order to insert a new entry from XDP and TC programs. Introduce bpf_nf_ct_tuple_parse utility routine to consolidate common code. We extract out a helper __nf_ct_set_timeout, used by the ctnetlink and nf_conntrack_bpf code, extract it out to nf_conntrack_core, so that nf_conntrack_bpf doesn't need a dependency on CONFIG_NF_CT_NETLINK. Later this helper will be reused as a helper to set timeout of allocated but not yet inserted CT entry. The allocation functions return struct nf_conn___init instead of nf_conn, to distinguish allocated CT from an already inserted or looked up CT. This is later used to enforce restrictions on what kfuncs allocated CT can be used with. Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Co-developed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20220721134245.2450-8-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
aed8ee7feb
commit
d7e79c97c0
|
@ -84,4 +84,19 @@ void nf_conntrack_lock(spinlock_t *lock);
|
||||||
|
|
||||||
extern spinlock_t nf_conntrack_expect_lock;
|
extern spinlock_t nf_conntrack_expect_lock;
|
||||||
|
|
||||||
|
/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
|
||||||
|
|
||||||
|
#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
|
||||||
|
(IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \
|
||||||
|
IS_ENABLED(CONFIG_NF_CT_NETLINK))
|
||||||
|
|
||||||
|
static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
|
||||||
|
{
|
||||||
|
if (timeout > INT_MAX)
|
||||||
|
timeout = INT_MAX;
|
||||||
|
WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _NF_CONNTRACK_CORE_H */
|
#endif /* _NF_CONNTRACK_CORE_H */
|
||||||
|
|
|
@ -55,6 +55,94 @@ enum {
|
||||||
NF_BPF_CT_OPTS_SZ = 12,
|
NF_BPF_CT_OPTS_SZ = 12,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
|
||||||
|
u32 tuple_len, u8 protonum, u8 dir,
|
||||||
|
struct nf_conntrack_tuple *tuple)
|
||||||
|
{
|
||||||
|
union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
|
||||||
|
union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
|
||||||
|
union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
|
||||||
|
: &tuple->src.u;
|
||||||
|
union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
|
||||||
|
: (void *)&tuple->dst.u;
|
||||||
|
|
||||||
|
if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
|
||||||
|
return -EPROTO;
|
||||||
|
|
||||||
|
memset(tuple, 0, sizeof(*tuple));
|
||||||
|
|
||||||
|
switch (tuple_len) {
|
||||||
|
case sizeof(bpf_tuple->ipv4):
|
||||||
|
tuple->src.l3num = AF_INET;
|
||||||
|
src->ip = bpf_tuple->ipv4.saddr;
|
||||||
|
sport->tcp.port = bpf_tuple->ipv4.sport;
|
||||||
|
dst->ip = bpf_tuple->ipv4.daddr;
|
||||||
|
dport->tcp.port = bpf_tuple->ipv4.dport;
|
||||||
|
break;
|
||||||
|
case sizeof(bpf_tuple->ipv6):
|
||||||
|
tuple->src.l3num = AF_INET6;
|
||||||
|
memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
|
||||||
|
sport->tcp.port = bpf_tuple->ipv6.sport;
|
||||||
|
memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
|
||||||
|
dport->tcp.port = bpf_tuple->ipv6.dport;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -EAFNOSUPPORT;
|
||||||
|
}
|
||||||
|
tuple->dst.protonum = protonum;
|
||||||
|
tuple->dst.dir = dir;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct nf_conn *
|
||||||
|
__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
|
||||||
|
u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
|
||||||
|
u32 timeout)
|
||||||
|
{
|
||||||
|
struct nf_conntrack_tuple otuple, rtuple;
|
||||||
|
struct nf_conn *ct;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
|
||||||
|
opts_len != NF_BPF_CT_OPTS_SZ)
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
|
||||||
|
IP_CT_DIR_ORIGINAL, &otuple);
|
||||||
|
if (err < 0)
|
||||||
|
return ERR_PTR(err);
|
||||||
|
|
||||||
|
err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
|
||||||
|
IP_CT_DIR_REPLY, &rtuple);
|
||||||
|
if (err < 0)
|
||||||
|
return ERR_PTR(err);
|
||||||
|
|
||||||
|
if (opts->netns_id >= 0) {
|
||||||
|
net = get_net_ns_by_id(net, opts->netns_id);
|
||||||
|
if (unlikely(!net))
|
||||||
|
return ERR_PTR(-ENONET);
|
||||||
|
}
|
||||||
|
|
||||||
|
ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
|
||||||
|
GFP_ATOMIC);
|
||||||
|
if (IS_ERR(ct))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
memset(&ct->proto, 0, sizeof(ct->proto));
|
||||||
|
__nf_ct_set_timeout(ct, timeout * HZ);
|
||||||
|
ct->status |= IPS_CONFIRMED;
|
||||||
|
|
||||||
|
out:
|
||||||
|
if (opts->netns_id >= 0)
|
||||||
|
put_net(net);
|
||||||
|
|
||||||
|
return ct;
|
||||||
|
}
|
||||||
|
|
||||||
static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
|
static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
|
||||||
struct bpf_sock_tuple *bpf_tuple,
|
struct bpf_sock_tuple *bpf_tuple,
|
||||||
u32 tuple_len, struct bpf_ct_opts *opts,
|
u32 tuple_len, struct bpf_ct_opts *opts,
|
||||||
|
@ -63,6 +151,7 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
|
||||||
struct nf_conntrack_tuple_hash *hash;
|
struct nf_conntrack_tuple_hash *hash;
|
||||||
struct nf_conntrack_tuple tuple;
|
struct nf_conntrack_tuple tuple;
|
||||||
struct nf_conn *ct;
|
struct nf_conn *ct;
|
||||||
|
int err;
|
||||||
|
|
||||||
if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
|
if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
|
||||||
opts_len != NF_BPF_CT_OPTS_SZ)
|
opts_len != NF_BPF_CT_OPTS_SZ)
|
||||||
|
@ -72,27 +161,10 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
|
||||||
if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
|
if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
memset(&tuple, 0, sizeof(tuple));
|
err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
|
||||||
switch (tuple_len) {
|
IP_CT_DIR_ORIGINAL, &tuple);
|
||||||
case sizeof(bpf_tuple->ipv4):
|
if (err < 0)
|
||||||
tuple.src.l3num = AF_INET;
|
return ERR_PTR(err);
|
||||||
tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
|
|
||||||
tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
|
|
||||||
tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
|
|
||||||
tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
|
|
||||||
break;
|
|
||||||
case sizeof(bpf_tuple->ipv6):
|
|
||||||
tuple.src.l3num = AF_INET6;
|
|
||||||
memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
|
|
||||||
tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
|
|
||||||
memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
|
|
||||||
tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return ERR_PTR(-EAFNOSUPPORT);
|
|
||||||
}
|
|
||||||
|
|
||||||
tuple.dst.protonum = opts->l4proto;
|
|
||||||
|
|
||||||
if (opts->netns_id >= 0) {
|
if (opts->netns_id >= 0) {
|
||||||
net = get_net_ns_by_id(net, opts->netns_id);
|
net = get_net_ns_by_id(net, opts->netns_id);
|
||||||
|
@ -116,6 +188,43 @@ __diag_push();
|
||||||
__diag_ignore_all("-Wmissing-prototypes",
|
__diag_ignore_all("-Wmissing-prototypes",
|
||||||
"Global functions as their definitions will be in nf_conntrack BTF");
|
"Global functions as their definitions will be in nf_conntrack BTF");
|
||||||
|
|
||||||
|
struct nf_conn___init {
|
||||||
|
struct nf_conn ct;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* bpf_xdp_ct_alloc - Allocate a new CT entry
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
|
||||||
|
* Cannot be NULL
|
||||||
|
* @bpf_tuple - Pointer to memory representing the tuple to look up
|
||||||
|
* Cannot be NULL
|
||||||
|
* @tuple__sz - Length of the tuple structure
|
||||||
|
* Must be one of sizeof(bpf_tuple->ipv4) or
|
||||||
|
* sizeof(bpf_tuple->ipv6)
|
||||||
|
* @opts - Additional options for allocation (documented above)
|
||||||
|
* Cannot be NULL
|
||||||
|
* @opts__sz - Length of the bpf_ct_opts structure
|
||||||
|
* Must be NF_BPF_CT_OPTS_SZ (12)
|
||||||
|
*/
|
||||||
|
struct nf_conn___init *
|
||||||
|
bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
|
||||||
|
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
|
||||||
|
{
|
||||||
|
struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
|
||||||
|
struct nf_conn *nfct;
|
||||||
|
|
||||||
|
nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
|
||||||
|
opts, opts__sz, 10);
|
||||||
|
if (IS_ERR(nfct)) {
|
||||||
|
if (opts)
|
||||||
|
opts->error = PTR_ERR(nfct);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (struct nf_conn___init *)nfct;
|
||||||
|
}
|
||||||
|
|
||||||
/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
|
/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
|
||||||
* reference to it
|
* reference to it
|
||||||
*
|
*
|
||||||
|
@ -150,6 +259,40 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
|
||||||
return nfct;
|
return nfct;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* bpf_skb_ct_alloc - Allocate a new CT entry
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* @skb_ctx - Pointer to ctx (__sk_buff) in TC program
|
||||||
|
* Cannot be NULL
|
||||||
|
* @bpf_tuple - Pointer to memory representing the tuple to look up
|
||||||
|
* Cannot be NULL
|
||||||
|
* @tuple__sz - Length of the tuple structure
|
||||||
|
* Must be one of sizeof(bpf_tuple->ipv4) or
|
||||||
|
* sizeof(bpf_tuple->ipv6)
|
||||||
|
* @opts - Additional options for allocation (documented above)
|
||||||
|
* Cannot be NULL
|
||||||
|
* @opts__sz - Length of the bpf_ct_opts structure
|
||||||
|
* Must be NF_BPF_CT_OPTS_SZ (12)
|
||||||
|
*/
|
||||||
|
struct nf_conn___init *
|
||||||
|
bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
|
||||||
|
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
|
||||||
|
struct nf_conn *nfct;
|
||||||
|
struct net *net;
|
||||||
|
|
||||||
|
net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
|
||||||
|
nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
|
||||||
|
if (IS_ERR(nfct)) {
|
||||||
|
if (opts)
|
||||||
|
opts->error = PTR_ERR(nfct);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (struct nf_conn___init *)nfct;
|
||||||
|
}
|
||||||
|
|
||||||
/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
|
/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
|
||||||
* reference to it
|
* reference to it
|
||||||
*
|
*
|
||||||
|
@ -184,6 +327,26 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
|
||||||
return nfct;
|
return nfct;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* bpf_ct_insert_entry - Add the provided entry into a CT map
|
||||||
|
*
|
||||||
|
* This must be invoked for referenced PTR_TO_BTF_ID.
|
||||||
|
*
|
||||||
|
* @nfct__ref - Pointer to referenced nf_conn___init object, obtained
|
||||||
|
* using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
|
||||||
|
*/
|
||||||
|
struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct__ref)
|
||||||
|
{
|
||||||
|
struct nf_conn *nfct = (struct nf_conn *)nfct__ref;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = nf_conntrack_hash_check_insert(nfct);
|
||||||
|
if (err < 0) {
|
||||||
|
nf_conntrack_free(nfct);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return nfct;
|
||||||
|
}
|
||||||
|
|
||||||
/* bpf_ct_release - Release acquired nf_conn object
|
/* bpf_ct_release - Release acquired nf_conn object
|
||||||
*
|
*
|
||||||
* This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
|
* This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
|
||||||
|
@ -204,8 +367,11 @@ void bpf_ct_release(struct nf_conn *nfct)
|
||||||
__diag_pop()
|
__diag_pop()
|
||||||
|
|
||||||
BTF_SET8_START(nf_ct_kfunc_set)
|
BTF_SET8_START(nf_ct_kfunc_set)
|
||||||
|
BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
|
||||||
BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
|
BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
|
||||||
|
BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
|
||||||
BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
|
BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
|
||||||
|
BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
|
||||||
BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
|
BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
|
||||||
BTF_SET8_END(nf_ct_kfunc_set)
|
BTF_SET8_END(nf_ct_kfunc_set)
|
||||||
|
|
||||||
|
|
|
@ -2025,9 +2025,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
|
||||||
{
|
{
|
||||||
u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
|
u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
|
||||||
|
|
||||||
if (timeout > INT_MAX)
|
__nf_ct_set_timeout(ct, timeout);
|
||||||
timeout = INT_MAX;
|
|
||||||
WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
|
|
||||||
|
|
||||||
if (test_bit(IPS_DYING_BIT, &ct->status))
|
if (test_bit(IPS_DYING_BIT, &ct->status))
|
||||||
return -ETIME;
|
return -ETIME;
|
||||||
|
@ -2292,9 +2290,7 @@ ctnetlink_create_conntrack(struct net *net,
|
||||||
goto err1;
|
goto err1;
|
||||||
|
|
||||||
timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
|
timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
|
||||||
if (timeout > INT_MAX)
|
__nf_ct_set_timeout(ct, timeout);
|
||||||
timeout = INT_MAX;
|
|
||||||
ct->timeout = (u32)timeout + nfct_time_stamp;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
if (cda[CTA_HELP]) {
|
if (cda[CTA_HELP]) {
|
||||||
|
|
Loading…
Reference in New Issue