bpf: implement getsockopt and setsockopt hooks
Implement new BPF_PROG_TYPE_CGROUP_SOCKOPT program type and BPF_CGROUP_{G,S}ETSOCKOPT cgroup hooks. BPF_CGROUP_SETSOCKOPT can modify user setsockopt arguments before passing them down to the kernel or bypass kernel completely. BPF_CGROUP_GETSOCKOPT can can inspect/modify getsockopt arguments that kernel returns. Both hooks reuse existing PTR_TO_PACKET{,_END} infrastructure. The buffer memory is pre-allocated (because I don't think there is a precedent for working with __user memory from bpf). This might be slow to do for each {s,g}etsockopt call, that's why I've added __cgroup_bpf_prog_array_is_empty that exits early if there is nothing attached to a cgroup. Note, however, that there is a race between __cgroup_bpf_prog_array_is_empty and BPF_PROG_RUN_ARRAY where cgroup program layout might have changed; this should not be a problem because in general there is a race between multiple calls to {s,g}etsocktop and user adding/removing bpf progs from a cgroup. The return code of the BPF program is handled as follows: * 0: EPERM * 1: success, continue with next BPF program in the cgroup chain v9: * allow overwriting setsockopt arguments (Alexei Starovoitov): * use set_fs (same as kernel_setsockopt) * buffer is always kzalloc'd (no small on-stack buffer) v8: * use s32 for optlen (Andrii Nakryiko) v7: * return only 0 or 1 (Alexei Starovoitov) * always run all progs (Alexei Starovoitov) * use optval=0 as kernel bypass in setsockopt (Alexei Starovoitov) (decided to use optval=-1 instead, optval=0 might be a valid input) * call getsockopt hook after kernel handlers (Alexei Starovoitov) v6: * rework cgroup chaining; stop as soon as bpf program returns 0 or 2; see patch with the documentation for the details * drop Andrii's and Martin's Acked-by (not sure they are comfortable with the new state of things) v5: * skip copy_to_user() and put_user() when ret == 0 (Martin Lau) v4: * don't export bpf_sk_fullsock helper (Martin Lau) * size != sizeof(__u64) for uapi pointers (Martin Lau) * offsetof instead of bpf_ctx_range when checking ctx access (Martin Lau) v3: * typos in BPF_PROG_CGROUP_SOCKOPT_RUN_ARRAY comments (Andrii Nakryiko) * reverse christmas tree in BPF_PROG_CGROUP_SOCKOPT_RUN_ARRAY (Andrii Nakryiko) * use __bpf_md_ptr instead of __u32 for optval{,_end} (Martin Lau) * use BPF_FIELD_SIZEOF() for consistency (Martin Lau) * new CG_SOCKOPT_ACCESS macro to wrap repeated parts v2: * moved bpf_sockopt_kern fields around to remove a hole (Martin Lau) * aligned bpf_sockopt_kern->buf to 8 bytes (Martin Lau) * bpf_prog_array_is_empty instead of bpf_prog_array_length (Martin Lau) * added [0,2] return code check to verifier (Martin Lau) * dropped unused buf[64] from the stack (Martin Lau) * use PTR_TO_SOCKET for bpf_sockopt->sk (Martin Lau) * dropped bpf_target_off from ctx rewrites (Martin Lau) * use return code for kernel bypass (Martin Lau & Andrii Nakryiko) Cc: Andrii Nakryiko <andriin@fb.com> Cc: Martin Lau <kafai@fb.com> Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
3b1c667e47
commit
0d01da6afc
|
@ -124,6 +124,14 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
|
||||||
loff_t *ppos, void **new_buf,
|
loff_t *ppos, void **new_buf,
|
||||||
enum bpf_attach_type type);
|
enum bpf_attach_type type);
|
||||||
|
|
||||||
|
int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
|
||||||
|
int *optname, char __user *optval,
|
||||||
|
int *optlen, char **kernel_optval);
|
||||||
|
int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||||
|
int optname, char __user *optval,
|
||||||
|
int __user *optlen, int max_optlen,
|
||||||
|
int retval);
|
||||||
|
|
||||||
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
|
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
|
||||||
struct bpf_map *map)
|
struct bpf_map *map)
|
||||||
{
|
{
|
||||||
|
@ -286,6 +294,38 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
|
||||||
__ret; \
|
__ret; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
|
||||||
|
kernel_optval) \
|
||||||
|
({ \
|
||||||
|
int __ret = 0; \
|
||||||
|
if (cgroup_bpf_enabled) \
|
||||||
|
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
|
||||||
|
optname, optval, \
|
||||||
|
optlen, \
|
||||||
|
kernel_optval); \
|
||||||
|
__ret; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
|
||||||
|
({ \
|
||||||
|
int __ret = 0; \
|
||||||
|
if (cgroup_bpf_enabled) \
|
||||||
|
get_user(__ret, optlen); \
|
||||||
|
__ret; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
|
||||||
|
max_optlen, retval) \
|
||||||
|
({ \
|
||||||
|
int __ret = retval; \
|
||||||
|
if (cgroup_bpf_enabled) \
|
||||||
|
__ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
|
||||||
|
optname, optval, \
|
||||||
|
optlen, max_optlen, \
|
||||||
|
retval); \
|
||||||
|
__ret; \
|
||||||
|
})
|
||||||
|
|
||||||
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
|
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
|
||||||
enum bpf_prog_type ptype, struct bpf_prog *prog);
|
enum bpf_prog_type ptype, struct bpf_prog *prog);
|
||||||
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
|
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
|
||||||
|
@ -357,6 +397,11 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
|
||||||
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
|
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
|
||||||
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
|
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
|
||||||
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
|
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
|
||||||
|
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
|
||||||
|
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
|
||||||
|
optlen, max_optlen, retval) ({ retval; })
|
||||||
|
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
|
||||||
|
kernel_optval) ({ 0; })
|
||||||
|
|
||||||
#define for_each_cgroup_storage_type(stype) for (; false; )
|
#define for_each_cgroup_storage_type(stype) for (; false; )
|
||||||
|
|
||||||
|
|
|
@ -518,6 +518,7 @@ struct bpf_prog_array {
|
||||||
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
|
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
|
||||||
void bpf_prog_array_free(struct bpf_prog_array *progs);
|
void bpf_prog_array_free(struct bpf_prog_array *progs);
|
||||||
int bpf_prog_array_length(struct bpf_prog_array *progs);
|
int bpf_prog_array_length(struct bpf_prog_array *progs);
|
||||||
|
bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
|
||||||
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
|
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
|
||||||
__u32 __user *prog_ids, u32 cnt);
|
__u32 __user *prog_ids, u32 cnt);
|
||||||
|
|
||||||
|
@ -1051,6 +1052,7 @@ extern const struct bpf_func_proto bpf_spin_unlock_proto;
|
||||||
extern const struct bpf_func_proto bpf_get_local_storage_proto;
|
extern const struct bpf_func_proto bpf_get_local_storage_proto;
|
||||||
extern const struct bpf_func_proto bpf_strtol_proto;
|
extern const struct bpf_func_proto bpf_strtol_proto;
|
||||||
extern const struct bpf_func_proto bpf_strtoul_proto;
|
extern const struct bpf_func_proto bpf_strtoul_proto;
|
||||||
|
extern const struct bpf_func_proto bpf_tcp_sock_proto;
|
||||||
|
|
||||||
/* Shared helpers among cBPF and eBPF. */
|
/* Shared helpers among cBPF and eBPF. */
|
||||||
void bpf_user_rnd_init_once(void);
|
void bpf_user_rnd_init_once(void);
|
||||||
|
|
|
@ -30,6 +30,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
|
||||||
#ifdef CONFIG_CGROUP_BPF
|
#ifdef CONFIG_CGROUP_BPF
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
|
||||||
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt)
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_BPF_LIRC_MODE2
|
#ifdef CONFIG_BPF_LIRC_MODE2
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
|
||||||
|
|
|
@ -1199,4 +1199,14 @@ struct bpf_sysctl_kern {
|
||||||
u64 tmp_reg;
|
u64 tmp_reg;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bpf_sockopt_kern {
|
||||||
|
struct sock *sk;
|
||||||
|
u8 *optval;
|
||||||
|
u8 *optval_end;
|
||||||
|
s32 level;
|
||||||
|
s32 optname;
|
||||||
|
s32 optlen;
|
||||||
|
s32 retval;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* __LINUX_FILTER_H__ */
|
#endif /* __LINUX_FILTER_H__ */
|
||||||
|
|
|
@ -170,6 +170,7 @@ enum bpf_prog_type {
|
||||||
BPF_PROG_TYPE_FLOW_DISSECTOR,
|
BPF_PROG_TYPE_FLOW_DISSECTOR,
|
||||||
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
||||||
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||||
|
BPF_PROG_TYPE_CGROUP_SOCKOPT,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_attach_type {
|
enum bpf_attach_type {
|
||||||
|
@ -194,6 +195,8 @@ enum bpf_attach_type {
|
||||||
BPF_CGROUP_SYSCTL,
|
BPF_CGROUP_SYSCTL,
|
||||||
BPF_CGROUP_UDP4_RECVMSG,
|
BPF_CGROUP_UDP4_RECVMSG,
|
||||||
BPF_CGROUP_UDP6_RECVMSG,
|
BPF_CGROUP_UDP6_RECVMSG,
|
||||||
|
BPF_CGROUP_GETSOCKOPT,
|
||||||
|
BPF_CGROUP_SETSOCKOPT,
|
||||||
__MAX_BPF_ATTACH_TYPE
|
__MAX_BPF_ATTACH_TYPE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -3541,4 +3544,15 @@ struct bpf_sysctl {
|
||||||
*/
|
*/
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bpf_sockopt {
|
||||||
|
__bpf_md_ptr(struct bpf_sock *, sk);
|
||||||
|
__bpf_md_ptr(void *, optval);
|
||||||
|
__bpf_md_ptr(void *, optval_end);
|
||||||
|
|
||||||
|
__s32 level;
|
||||||
|
__s32 optname;
|
||||||
|
__s32 optlen;
|
||||||
|
__s32 retval;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
#include <linux/bpf-cgroup.h>
|
#include <linux/bpf-cgroup.h>
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
|
#include <net/bpf_sk_storage.h>
|
||||||
|
|
||||||
#include "../cgroup/cgroup-internal.h"
|
#include "../cgroup/cgroup-internal.h"
|
||||||
|
|
||||||
|
@ -938,6 +939,188 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
|
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
|
||||||
|
|
||||||
|
static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
|
||||||
|
enum bpf_attach_type attach_type)
|
||||||
|
{
|
||||||
|
struct bpf_prog_array *prog_array;
|
||||||
|
bool empty;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
|
||||||
|
empty = bpf_prog_array_is_empty(prog_array);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
|
||||||
|
{
|
||||||
|
if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ctx->optval = kzalloc(max_optlen, GFP_USER);
|
||||||
|
if (!ctx->optval)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
ctx->optval_end = ctx->optval + max_optlen;
|
||||||
|
ctx->optlen = max_optlen;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
|
||||||
|
{
|
||||||
|
kfree(ctx->optval);
|
||||||
|
}
|
||||||
|
|
||||||
|
int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
|
||||||
|
int *optname, char __user *optval,
|
||||||
|
int *optlen, char **kernel_optval)
|
||||||
|
{
|
||||||
|
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||||
|
struct bpf_sockopt_kern ctx = {
|
||||||
|
.sk = sk,
|
||||||
|
.level = *level,
|
||||||
|
.optname = *optname,
|
||||||
|
};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Opportunistic check to see whether we have any BPF program
|
||||||
|
* attached to the hook so we don't waste time allocating
|
||||||
|
* memory and locking the socket.
|
||||||
|
*/
|
||||||
|
if (!cgroup_bpf_enabled ||
|
||||||
|
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = sockopt_alloc_buf(&ctx, *optlen);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
lock_sock(sk);
|
||||||
|
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
|
||||||
|
&ctx, BPF_PROG_RUN);
|
||||||
|
release_sock(sk);
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
|
ret = -EPERM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx.optlen == -1) {
|
||||||
|
/* optlen set to -1, bypass kernel */
|
||||||
|
ret = 1;
|
||||||
|
} else if (ctx.optlen > *optlen || ctx.optlen < -1) {
|
||||||
|
/* optlen is out of bounds */
|
||||||
|
ret = -EFAULT;
|
||||||
|
} else {
|
||||||
|
/* optlen within bounds, run kernel handler */
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
/* export any potential modifications */
|
||||||
|
*level = ctx.level;
|
||||||
|
*optname = ctx.optname;
|
||||||
|
*optlen = ctx.optlen;
|
||||||
|
*kernel_optval = ctx.optval;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
if (ret)
|
||||||
|
sockopt_free_buf(&ctx);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
|
||||||
|
|
||||||
|
int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||||
|
int optname, char __user *optval,
|
||||||
|
int __user *optlen, int max_optlen,
|
||||||
|
int retval)
|
||||||
|
{
|
||||||
|
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||||
|
struct bpf_sockopt_kern ctx = {
|
||||||
|
.sk = sk,
|
||||||
|
.level = level,
|
||||||
|
.optname = optname,
|
||||||
|
.retval = retval,
|
||||||
|
};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Opportunistic check to see whether we have any BPF program
|
||||||
|
* attached to the hook so we don't waste time allocating
|
||||||
|
* memory and locking the socket.
|
||||||
|
*/
|
||||||
|
if (!cgroup_bpf_enabled ||
|
||||||
|
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
|
||||||
|
return retval;
|
||||||
|
|
||||||
|
ret = sockopt_alloc_buf(&ctx, max_optlen);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (!retval) {
|
||||||
|
/* If kernel getsockopt finished successfully,
|
||||||
|
* copy whatever was returned to the user back
|
||||||
|
* into our temporary buffer. Set optlen to the
|
||||||
|
* one that kernel returned as well to let
|
||||||
|
* BPF programs inspect the value.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (get_user(ctx.optlen, optlen)) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx.optlen > max_optlen)
|
||||||
|
ctx.optlen = max_optlen;
|
||||||
|
|
||||||
|
if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lock_sock(sk);
|
||||||
|
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
|
||||||
|
&ctx, BPF_PROG_RUN);
|
||||||
|
release_sock(sk);
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
|
ret = -EPERM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx.optlen > max_optlen) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* BPF programs only allowed to set retval to 0, not some
|
||||||
|
* arbitrary value.
|
||||||
|
*/
|
||||||
|
if (ctx.retval != 0 && ctx.retval != retval) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
|
||||||
|
put_user(ctx.optlen, optlen)) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ctx.retval;
|
||||||
|
|
||||||
|
out:
|
||||||
|
sockopt_free_buf(&ctx);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
|
||||||
|
|
||||||
static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
|
static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
|
||||||
size_t *lenp)
|
size_t *lenp)
|
||||||
{
|
{
|
||||||
|
@ -1198,3 +1381,153 @@ const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
|
||||||
|
|
||||||
const struct bpf_prog_ops cg_sysctl_prog_ops = {
|
const struct bpf_prog_ops cg_sysctl_prog_ops = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct bpf_func_proto *
|
||||||
|
cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||||
|
{
|
||||||
|
switch (func_id) {
|
||||||
|
case BPF_FUNC_sk_storage_get:
|
||||||
|
return &bpf_sk_storage_get_proto;
|
||||||
|
case BPF_FUNC_sk_storage_delete:
|
||||||
|
return &bpf_sk_storage_delete_proto;
|
||||||
|
#ifdef CONFIG_INET
|
||||||
|
case BPF_FUNC_tcp_sock:
|
||||||
|
return &bpf_tcp_sock_proto;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
return cgroup_base_func_proto(func_id, prog);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool cg_sockopt_is_valid_access(int off, int size,
|
||||||
|
enum bpf_access_type type,
|
||||||
|
const struct bpf_prog *prog,
|
||||||
|
struct bpf_insn_access_aux *info)
|
||||||
|
{
|
||||||
|
const int size_default = sizeof(__u32);
|
||||||
|
|
||||||
|
if (off < 0 || off >= sizeof(struct bpf_sockopt))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (off % size != 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (type == BPF_WRITE) {
|
||||||
|
switch (off) {
|
||||||
|
case offsetof(struct bpf_sockopt, retval):
|
||||||
|
if (size != size_default)
|
||||||
|
return false;
|
||||||
|
return prog->expected_attach_type ==
|
||||||
|
BPF_CGROUP_GETSOCKOPT;
|
||||||
|
case offsetof(struct bpf_sockopt, optname):
|
||||||
|
/* fallthrough */
|
||||||
|
case offsetof(struct bpf_sockopt, level):
|
||||||
|
if (size != size_default)
|
||||||
|
return false;
|
||||||
|
return prog->expected_attach_type ==
|
||||||
|
BPF_CGROUP_SETSOCKOPT;
|
||||||
|
case offsetof(struct bpf_sockopt, optlen):
|
||||||
|
return size == size_default;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (off) {
|
||||||
|
case offsetof(struct bpf_sockopt, sk):
|
||||||
|
if (size != sizeof(__u64))
|
||||||
|
return false;
|
||||||
|
info->reg_type = PTR_TO_SOCKET;
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, optval):
|
||||||
|
if (size != sizeof(__u64))
|
||||||
|
return false;
|
||||||
|
info->reg_type = PTR_TO_PACKET;
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, optval_end):
|
||||||
|
if (size != sizeof(__u64))
|
||||||
|
return false;
|
||||||
|
info->reg_type = PTR_TO_PACKET_END;
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, retval):
|
||||||
|
if (size != size_default)
|
||||||
|
return false;
|
||||||
|
return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
|
||||||
|
default:
|
||||||
|
if (size != size_default)
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CG_SOCKOPT_ACCESS_FIELD(T, F) \
|
||||||
|
T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
|
||||||
|
si->dst_reg, si->src_reg, \
|
||||||
|
offsetof(struct bpf_sockopt_kern, F))
|
||||||
|
|
||||||
|
static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
|
||||||
|
const struct bpf_insn *si,
|
||||||
|
struct bpf_insn *insn_buf,
|
||||||
|
struct bpf_prog *prog,
|
||||||
|
u32 *target_size)
|
||||||
|
{
|
||||||
|
struct bpf_insn *insn = insn_buf;
|
||||||
|
|
||||||
|
switch (si->off) {
|
||||||
|
case offsetof(struct bpf_sockopt, sk):
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, level):
|
||||||
|
if (type == BPF_WRITE)
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
|
||||||
|
else
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, optname):
|
||||||
|
if (type == BPF_WRITE)
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
|
||||||
|
else
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, optlen):
|
||||||
|
if (type == BPF_WRITE)
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
|
||||||
|
else
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, retval):
|
||||||
|
if (type == BPF_WRITE)
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
|
||||||
|
else
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, optval):
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
|
||||||
|
break;
|
||||||
|
case offsetof(struct bpf_sockopt, optval_end):
|
||||||
|
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return insn - insn_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
|
||||||
|
bool direct_write,
|
||||||
|
const struct bpf_prog *prog)
|
||||||
|
{
|
||||||
|
/* Nothing to do for sockopt argument. The data is kzalloc'ated.
|
||||||
|
*/
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
|
||||||
|
.get_func_proto = cg_sockopt_func_proto,
|
||||||
|
.is_valid_access = cg_sockopt_is_valid_access,
|
||||||
|
.convert_ctx_access = cg_sockopt_convert_ctx_access,
|
||||||
|
.gen_prologue = cg_sockopt_get_prologue,
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct bpf_prog_ops cg_sockopt_prog_ops = {
|
||||||
|
};
|
||||||
|
|
|
@ -1809,6 +1809,15 @@ int bpf_prog_array_length(struct bpf_prog_array *array)
|
||||||
return cnt;
|
return cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
|
||||||
|
{
|
||||||
|
struct bpf_prog_array_item *item;
|
||||||
|
|
||||||
|
for (item = array->items; item->prog; item++)
|
||||||
|
if (item->prog != &dummy_bpf_prog.prog)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
|
static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
|
||||||
u32 *prog_ids,
|
u32 *prog_ids,
|
||||||
|
|
|
@ -1590,6 +1590,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||||
|
switch (expected_attach_type) {
|
||||||
|
case BPF_CGROUP_SETSOCKOPT:
|
||||||
|
case BPF_CGROUP_GETSOCKOPT:
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1840,6 +1848,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
|
||||||
switch (prog->type) {
|
switch (prog->type) {
|
||||||
case BPF_PROG_TYPE_CGROUP_SOCK:
|
case BPF_PROG_TYPE_CGROUP_SOCK:
|
||||||
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
|
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
|
||||||
|
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||||
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
|
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
|
||||||
case BPF_PROG_TYPE_CGROUP_SKB:
|
case BPF_PROG_TYPE_CGROUP_SKB:
|
||||||
return prog->enforce_expected_attach_type &&
|
return prog->enforce_expected_attach_type &&
|
||||||
|
@ -1912,6 +1921,10 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||||
case BPF_CGROUP_SYSCTL:
|
case BPF_CGROUP_SYSCTL:
|
||||||
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
|
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
|
||||||
break;
|
break;
|
||||||
|
case BPF_CGROUP_GETSOCKOPT:
|
||||||
|
case BPF_CGROUP_SETSOCKOPT:
|
||||||
|
ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -1995,6 +2008,10 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
||||||
case BPF_CGROUP_SYSCTL:
|
case BPF_CGROUP_SYSCTL:
|
||||||
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
|
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
|
||||||
break;
|
break;
|
||||||
|
case BPF_CGROUP_GETSOCKOPT:
|
||||||
|
case BPF_CGROUP_SETSOCKOPT:
|
||||||
|
ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -2031,6 +2048,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
|
||||||
case BPF_CGROUP_SOCK_OPS:
|
case BPF_CGROUP_SOCK_OPS:
|
||||||
case BPF_CGROUP_DEVICE:
|
case BPF_CGROUP_DEVICE:
|
||||||
case BPF_CGROUP_SYSCTL:
|
case BPF_CGROUP_SYSCTL:
|
||||||
|
case BPF_CGROUP_GETSOCKOPT:
|
||||||
|
case BPF_CGROUP_SETSOCKOPT:
|
||||||
break;
|
break;
|
||||||
case BPF_LIRC_MODE2:
|
case BPF_LIRC_MODE2:
|
||||||
return lirc_prog_query(attr, uattr);
|
return lirc_prog_query(attr, uattr);
|
||||||
|
|
|
@ -2215,6 +2215,13 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
|
||||||
|
|
||||||
env->seen_direct_write = true;
|
env->seen_direct_write = true;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||||
|
if (t == BPF_WRITE)
|
||||||
|
env->seen_direct_write = true;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -6066,6 +6073,7 @@ static int check_return_code(struct bpf_verifier_env *env)
|
||||||
case BPF_PROG_TYPE_SOCK_OPS:
|
case BPF_PROG_TYPE_SOCK_OPS:
|
||||||
case BPF_PROG_TYPE_CGROUP_DEVICE:
|
case BPF_PROG_TYPE_CGROUP_DEVICE:
|
||||||
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
||||||
|
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -5651,7 +5651,7 @@ BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
|
||||||
return (unsigned long)NULL;
|
return (unsigned long)NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct bpf_func_proto bpf_tcp_sock_proto = {
|
const struct bpf_func_proto bpf_tcp_sock_proto = {
|
||||||
.func = bpf_tcp_sock,
|
.func = bpf_tcp_sock,
|
||||||
.gpl_only = false,
|
.gpl_only = false,
|
||||||
.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
|
.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
|
||||||
|
|
30
net/socket.c
30
net/socket.c
|
@ -2051,6 +2051,8 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
|
||||||
static int __sys_setsockopt(int fd, int level, int optname,
|
static int __sys_setsockopt(int fd, int level, int optname,
|
||||||
char __user *optval, int optlen)
|
char __user *optval, int optlen)
|
||||||
{
|
{
|
||||||
|
mm_segment_t oldfs = get_fs();
|
||||||
|
char *kernel_optval = NULL;
|
||||||
int err, fput_needed;
|
int err, fput_needed;
|
||||||
struct socket *sock;
|
struct socket *sock;
|
||||||
|
|
||||||
|
@ -2063,6 +2065,22 @@ static int __sys_setsockopt(int fd, int level, int optname,
|
||||||
if (err)
|
if (err)
|
||||||
goto out_put;
|
goto out_put;
|
||||||
|
|
||||||
|
err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
|
||||||
|
&optname, optval, &optlen,
|
||||||
|
&kernel_optval);
|
||||||
|
|
||||||
|
if (err < 0) {
|
||||||
|
goto out_put;
|
||||||
|
} else if (err > 0) {
|
||||||
|
err = 0;
|
||||||
|
goto out_put;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kernel_optval) {
|
||||||
|
set_fs(KERNEL_DS);
|
||||||
|
optval = (char __user __force *)kernel_optval;
|
||||||
|
}
|
||||||
|
|
||||||
if (level == SOL_SOCKET)
|
if (level == SOL_SOCKET)
|
||||||
err =
|
err =
|
||||||
sock_setsockopt(sock, level, optname, optval,
|
sock_setsockopt(sock, level, optname, optval,
|
||||||
|
@ -2071,6 +2089,11 @@ static int __sys_setsockopt(int fd, int level, int optname,
|
||||||
err =
|
err =
|
||||||
sock->ops->setsockopt(sock, level, optname, optval,
|
sock->ops->setsockopt(sock, level, optname, optval,
|
||||||
optlen);
|
optlen);
|
||||||
|
|
||||||
|
if (kernel_optval) {
|
||||||
|
set_fs(oldfs);
|
||||||
|
kfree(kernel_optval);
|
||||||
|
}
|
||||||
out_put:
|
out_put:
|
||||||
fput_light(sock->file, fput_needed);
|
fput_light(sock->file, fput_needed);
|
||||||
}
|
}
|
||||||
|
@ -2093,6 +2116,7 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
||||||
{
|
{
|
||||||
int err, fput_needed;
|
int err, fput_needed;
|
||||||
struct socket *sock;
|
struct socket *sock;
|
||||||
|
int max_optlen;
|
||||||
|
|
||||||
sock = sockfd_lookup_light(fd, &err, &fput_needed);
|
sock = sockfd_lookup_light(fd, &err, &fput_needed);
|
||||||
if (sock != NULL) {
|
if (sock != NULL) {
|
||||||
|
@ -2100,6 +2124,8 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
||||||
if (err)
|
if (err)
|
||||||
goto out_put;
|
goto out_put;
|
||||||
|
|
||||||
|
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
|
||||||
|
|
||||||
if (level == SOL_SOCKET)
|
if (level == SOL_SOCKET)
|
||||||
err =
|
err =
|
||||||
sock_getsockopt(sock, level, optname, optval,
|
sock_getsockopt(sock, level, optname, optval,
|
||||||
|
@ -2108,6 +2134,10 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
||||||
err =
|
err =
|
||||||
sock->ops->getsockopt(sock, level, optname, optval,
|
sock->ops->getsockopt(sock, level, optname, optval,
|
||||||
optlen);
|
optlen);
|
||||||
|
|
||||||
|
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
|
||||||
|
optval, optlen,
|
||||||
|
max_optlen, err);
|
||||||
out_put:
|
out_put:
|
||||||
fput_light(sock->file, fput_needed);
|
fput_light(sock->file, fput_needed);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue