bpf: allow extended BPF programs access skb fields
introduce user accessible mirror of in-kernel 'struct sk_buff': struct __sk_buff { __u32 len; __u32 pkt_type; __u32 mark; __u32 queue_mapping; }; bpf programs can do: int bpf_prog(struct __sk_buff *skb) { __u32 var = skb->pkt_type; which will be compiled to bpf assembler as: dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type) bpf verifier will check validity of access and will convert it to: dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset)) dst_reg &= 7 since skb->pkt_type is a bitfield. Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
a498cfe990
commit
9bac3d6d54
|
@ -103,6 +103,9 @@ struct bpf_verifier_ops {
|
|||
* with 'type' (read or write) is allowed
|
||||
*/
|
||||
bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
|
||||
|
||||
u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
|
||||
struct bpf_insn *insn);
|
||||
};
|
||||
|
||||
struct bpf_prog_type_list {
|
||||
|
@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f);
|
|||
void bpf_map_put(struct bpf_map *map);
|
||||
|
||||
/* verify correctness of eBPF program */
|
||||
int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
|
||||
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
|
||||
#else
|
||||
static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
|
||||
{
|
||||
|
|
|
@ -170,4 +170,14 @@ enum bpf_func_id {
|
|||
__BPF_FUNC_MAX_ID,
|
||||
};
|
||||
|
||||
/* user accessible mirror of in-kernel sk_buff.
|
||||
* new fields can only be added to the end of this structure
|
||||
*/
|
||||
struct __sk_buff {
|
||||
__u32 len;
|
||||
__u32 pkt_type;
|
||||
__u32 mark;
|
||||
__u32 queue_mapping;
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
|
|
@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr)
|
|||
goto free_prog;
|
||||
|
||||
/* run eBPF verifier */
|
||||
err = bpf_check(prog, attr);
|
||||
err = bpf_check(&prog, attr);
|
||||
if (err < 0)
|
||||
goto free_used_maps;
|
||||
|
||||
|
|
|
@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env)
|
|||
return err;
|
||||
|
||||
} else if (class == BPF_LDX) {
|
||||
if (BPF_MODE(insn->code) != BPF_MEM ||
|
||||
insn->imm != 0) {
|
||||
verbose("BPF_LDX uses reserved fields\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
enum bpf_reg_type src_reg_type;
|
||||
|
||||
/* check for reserved fields is already done */
|
||||
|
||||
/* check src operand */
|
||||
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
||||
if (err)
|
||||
|
@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env)
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
src_reg_type = regs[insn->src_reg].type;
|
||||
|
||||
if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) {
|
||||
/* saw a valid insn
|
||||
* dst_reg = *(u32 *)(src_reg + off)
|
||||
* use reserved 'imm' field to mark this insn
|
||||
*/
|
||||
insn->imm = src_reg_type;
|
||||
|
||||
} else if (src_reg_type != insn->imm &&
|
||||
(src_reg_type == PTR_TO_CTX ||
|
||||
insn->imm == PTR_TO_CTX)) {
|
||||
/* ABuser program is trying to use the same insn
|
||||
* dst_reg = *(u32*) (src_reg + off)
|
||||
* with different pointer types:
|
||||
* src_reg == ctx in one branch and
|
||||
* src_reg == stack|map in some other branch.
|
||||
* Reject it.
|
||||
*/
|
||||
verbose("same insn cannot be used with different pointers\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
} else if (class == BPF_STX) {
|
||||
if (BPF_MODE(insn->code) == BPF_XADD) {
|
||||
err = check_xadd(env, insn);
|
||||
|
@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
|
|||
int i, j;
|
||||
|
||||
for (i = 0; i < insn_cnt; i++, insn++) {
|
||||
if (BPF_CLASS(insn->code) == BPF_LDX &&
|
||||
(BPF_MODE(insn->code) != BPF_MEM ||
|
||||
insn->imm != 0)) {
|
||||
verbose("BPF_LDX uses reserved fields\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
|
||||
struct bpf_map *map;
|
||||
struct fd f;
|
||||
|
@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
|
|||
insn->src_reg = 0;
|
||||
}
|
||||
|
||||
static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
|
||||
{
|
||||
struct bpf_insn *insn = prog->insnsi;
|
||||
int insn_cnt = prog->len;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < insn_cnt; i++, insn++) {
|
||||
if (BPF_CLASS(insn->code) != BPF_JMP ||
|
||||
BPF_OP(insn->code) == BPF_CALL ||
|
||||
BPF_OP(insn->code) == BPF_EXIT)
|
||||
continue;
|
||||
|
||||
/* adjust offset of jmps if necessary */
|
||||
if (i < pos && i + insn->off + 1 > pos)
|
||||
insn->off += delta;
|
||||
else if (i > pos && i + insn->off + 1 < pos)
|
||||
insn->off -= delta;
|
||||
}
|
||||
}
|
||||
|
||||
/* convert load instructions that access fields of 'struct __sk_buff'
|
||||
* into sequence of instructions that access fields of 'struct sk_buff'
|
||||
*/
|
||||
static int convert_ctx_accesses(struct verifier_env *env)
|
||||
{
|
||||
struct bpf_insn *insn = env->prog->insnsi;
|
||||
int insn_cnt = env->prog->len;
|
||||
struct bpf_insn insn_buf[16];
|
||||
struct bpf_prog *new_prog;
|
||||
u32 cnt;
|
||||
int i;
|
||||
|
||||
if (!env->prog->aux->ops->convert_ctx_access)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < insn_cnt; i++, insn++) {
|
||||
if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
|
||||
continue;
|
||||
|
||||
if (insn->imm != PTR_TO_CTX) {
|
||||
/* clear internal mark */
|
||||
insn->imm = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
cnt = env->prog->aux->ops->
|
||||
convert_ctx_access(insn->dst_reg, insn->src_reg,
|
||||
insn->off, insn_buf);
|
||||
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
||||
verbose("bpf verifier is misconfigured\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (cnt == 1) {
|
||||
memcpy(insn, insn_buf, sizeof(*insn));
|
||||
continue;
|
||||
}
|
||||
|
||||
/* several new insns need to be inserted. Make room for them */
|
||||
insn_cnt += cnt - 1;
|
||||
new_prog = bpf_prog_realloc(env->prog,
|
||||
bpf_prog_size(insn_cnt),
|
||||
GFP_USER);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
|
||||
new_prog->len = insn_cnt;
|
||||
|
||||
memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
|
||||
sizeof(*insn) * (insn_cnt - i - cnt));
|
||||
|
||||
/* copy substitute insns in place of load instruction */
|
||||
memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
|
||||
|
||||
/* adjust branches in the whole program */
|
||||
adjust_branches(new_prog, i, cnt - 1);
|
||||
|
||||
/* keep walking new program and skip insns we just inserted */
|
||||
env->prog = new_prog;
|
||||
insn = new_prog->insnsi + i + cnt - 1;
|
||||
i += cnt - 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_states(struct verifier_env *env)
|
||||
{
|
||||
struct verifier_state_list *sl, *sln;
|
||||
|
@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env)
|
|||
kfree(env->explored_states);
|
||||
}
|
||||
|
||||
int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
|
||||
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
|
||||
{
|
||||
char __user *log_ubuf = NULL;
|
||||
struct verifier_env *env;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
|
||||
if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
|
||||
return -E2BIG;
|
||||
|
||||
/* 'struct verifier_env' can be global, but since it's not small,
|
||||
|
@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
|
|||
if (!env)
|
||||
return -ENOMEM;
|
||||
|
||||
env->prog = prog;
|
||||
env->prog = *prog;
|
||||
|
||||
/* grab the mutex to protect few globals used by verifier */
|
||||
mutex_lock(&bpf_verifier_lock);
|
||||
|
@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
|
|||
if (ret < 0)
|
||||
goto skip_full_check;
|
||||
|
||||
env->explored_states = kcalloc(prog->len,
|
||||
env->explored_states = kcalloc(env->prog->len,
|
||||
sizeof(struct verifier_state_list *),
|
||||
GFP_USER);
|
||||
ret = -ENOMEM;
|
||||
|
@ -1968,6 +2083,10 @@ skip_full_check:
|
|||
while (pop_stack(env, NULL) >= 0);
|
||||
free_states(env);
|
||||
|
||||
if (ret == 0)
|
||||
/* program is valid, convert *(u32*)(ctx + off) accesses */
|
||||
ret = convert_ctx_accesses(env);
|
||||
|
||||
if (log_level && log_len >= log_size - 1) {
|
||||
BUG_ON(log_len >= log_size);
|
||||
/* verifier log exceeded user supplied buffer */
|
||||
|
@ -1983,18 +2102,18 @@ skip_full_check:
|
|||
|
||||
if (ret == 0 && env->used_map_cnt) {
|
||||
/* if program passed verifier, update used_maps in bpf_prog_info */
|
||||
prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
|
||||
sizeof(env->used_maps[0]),
|
||||
GFP_KERNEL);
|
||||
env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
|
||||
sizeof(env->used_maps[0]),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!prog->aux->used_maps) {
|
||||
if (!env->prog->aux->used_maps) {
|
||||
ret = -ENOMEM;
|
||||
goto free_log_buf;
|
||||
}
|
||||
|
||||
memcpy(prog->aux->used_maps, env->used_maps,
|
||||
memcpy(env->prog->aux->used_maps, env->used_maps,
|
||||
sizeof(env->used_maps[0]) * env->used_map_cnt);
|
||||
prog->aux->used_map_cnt = env->used_map_cnt;
|
||||
env->prog->aux->used_map_cnt = env->used_map_cnt;
|
||||
|
||||
/* program is valid. Convert pseudo bpf_ld_imm64 into generic
|
||||
* bpf_ld_imm64 instructions
|
||||
|
@ -2006,11 +2125,12 @@ free_log_buf:
|
|||
if (log_level)
|
||||
vfree(log_buf);
|
||||
free_env:
|
||||
if (!prog->aux->used_maps)
|
||||
if (!env->prog->aux->used_maps)
|
||||
/* if we didn't copy map pointers into bpf_prog_info, release
|
||||
* them now. Otherwise free_bpf_prog_info() will release them.
|
||||
*/
|
||||
release_maps(env);
|
||||
*prog = env->prog;
|
||||
kfree(env);
|
||||
mutex_unlock(&bpf_verifier_lock);
|
||||
return ret;
|
||||
|
|
|
@ -150,10 +150,43 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
|
|||
return prandom_u32();
|
||||
}
|
||||
|
||||
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
|
||||
struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
switch (skb_field) {
|
||||
case SKF_AD_MARK:
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
|
||||
offsetof(struct sk_buff, mark));
|
||||
break;
|
||||
|
||||
case SKF_AD_PKTTYPE:
|
||||
*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
|
||||
#ifdef __BIG_ENDIAN_BITFIELD
|
||||
*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case SKF_AD_QUEUE:
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
|
||||
offsetof(struct sk_buff, queue_mapping));
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
static bool convert_bpf_extensions(struct sock_filter *fp,
|
||||
struct bpf_insn **insnp)
|
||||
{
|
||||
struct bpf_insn *insn = *insnp;
|
||||
u32 cnt;
|
||||
|
||||
switch (fp->k) {
|
||||
case SKF_AD_OFF + SKF_AD_PROTOCOL:
|
||||
|
@ -167,13 +200,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
|
|||
break;
|
||||
|
||||
case SKF_AD_OFF + SKF_AD_PKTTYPE:
|
||||
*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
|
||||
PKT_TYPE_OFFSET());
|
||||
*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
|
||||
#ifdef __BIG_ENDIAN_BITFIELD
|
||||
insn++;
|
||||
*insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
|
||||
#endif
|
||||
cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
|
||||
insn += cnt - 1;
|
||||
break;
|
||||
|
||||
case SKF_AD_OFF + SKF_AD_IFINDEX:
|
||||
|
@ -197,10 +225,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
|
|||
break;
|
||||
|
||||
case SKF_AD_OFF + SKF_AD_MARK:
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
|
||||
|
||||
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
|
||||
offsetof(struct sk_buff, mark));
|
||||
cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
|
||||
insn += cnt - 1;
|
||||
break;
|
||||
|
||||
case SKF_AD_OFF + SKF_AD_RXHASH:
|
||||
|
@ -211,10 +237,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
|
|||
break;
|
||||
|
||||
case SKF_AD_OFF + SKF_AD_QUEUE:
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
|
||||
|
||||
*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
|
||||
offsetof(struct sk_buff, queue_mapping));
|
||||
cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
|
||||
insn += cnt - 1;
|
||||
break;
|
||||
|
||||
case SKF_AD_OFF + SKF_AD_VLAN_TAG:
|
||||
|
@ -1151,13 +1175,55 @@ sk_filter_func_proto(enum bpf_func_id func_id)
|
|||
static bool sk_filter_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type)
|
||||
{
|
||||
/* skb fields cannot be accessed yet */
|
||||
return false;
|
||||
/* only read is allowed */
|
||||
if (type != BPF_READ)
|
||||
return false;
|
||||
|
||||
/* check bounds */
|
||||
if (off < 0 || off >= sizeof(struct __sk_buff))
|
||||
return false;
|
||||
|
||||
/* disallow misaligned access */
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
|
||||
/* all __sk_buff fields are __u32 */
|
||||
if (size != 4)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
|
||||
struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
switch (ctx_off) {
|
||||
case offsetof(struct __sk_buff, len):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
|
||||
offsetof(struct sk_buff, len));
|
||||
break;
|
||||
|
||||
case offsetof(struct __sk_buff, mark):
|
||||
return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
|
||||
|
||||
case offsetof(struct __sk_buff, pkt_type):
|
||||
return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
|
||||
|
||||
case offsetof(struct __sk_buff, queue_mapping):
|
||||
return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
static const struct bpf_verifier_ops sk_filter_ops = {
|
||||
.get_func_proto = sk_filter_func_proto,
|
||||
.is_valid_access = sk_filter_is_valid_access,
|
||||
.convert_ctx_access = sk_filter_convert_ctx_access,
|
||||
};
|
||||
|
||||
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
|
||||
|
|
Loading…
Reference in New Issue