2016-09-21 18:43:57 +08:00
|
|
|
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
|
|
* License as published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
#ifndef _LINUX_BPF_VERIFIER_H
|
|
|
|
#define _LINUX_BPF_VERIFIER_H 1
|
|
|
|
|
|
|
|
#include <linux/bpf.h> /* for enum bpf_reg_type */
|
|
|
|
#include <linux/filter.h> /* for MAX_BPF_STACK */
|
|
|
|
|
2016-09-28 22:54:32 +08:00
|
|
|
/* Just some arbitrary values so we can safely do math without overflowing and
|
|
|
|
* are obviously wrong for any sort of memory access.
|
|
|
|
*/
|
|
|
|
#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024)
|
2016-11-15 04:45:36 +08:00
|
|
|
#define BPF_REGISTER_MIN_RANGE -1
|
2016-09-28 22:54:32 +08:00
|
|
|
|
2016-09-21 18:43:57 +08:00
|
|
|
struct bpf_reg_state {
|
|
|
|
enum bpf_reg_type type;
|
|
|
|
union {
|
|
|
|
/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
|
|
|
|
s64 imm;
|
|
|
|
|
|
|
|
/* valid when type == PTR_TO_PACKET* */
|
|
|
|
struct {
|
|
|
|
u16 off;
|
|
|
|
u16 range;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
|
|
|
|
* PTR_TO_MAP_VALUE_OR_NULL
|
|
|
|
*/
|
|
|
|
struct bpf_map *map_ptr;
|
|
|
|
};
|
2016-12-08 02:57:59 +08:00
|
|
|
u32 id;
|
|
|
|
/* Used to determine if any memory access using this register will
|
|
|
|
* result in a bad access. These two fields must be last.
|
|
|
|
* See states_equal()
|
|
|
|
*/
|
|
|
|
s64 min_value;
|
|
|
|
u64 max_value;
|
bpf: Track alignment of register values in the verifier.
Currently if we add only constant values to pointers we can fully
validate the alignment, and properly check if we need to reject the
program on !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS architectures.
However, once an unknown value is introduced we only allow byte sized
memory accesses which is too restrictive.
Add logic to track the known minimum alignment of register values,
and propagate this state into registers containing pointers.
The most common paradigm that makes use of this new logic is computing
the transport header using the IP header length field. For example:
struct ethhdr *ep = skb->data;
struct iphdr *iph = (struct iphdr *) (ep + 1);
struct tcphdr *th;
...
n = iph->ihl;
th = ((void *)iph + (n * 4));
port = th->dest;
The existing code will reject the load of th->dest because it cannot
validate that the alignment is at least 2 once "n * 4" is added the
the packet pointer.
In the new code, the register holding "n * 4" will have a reg->min_align
value of 4, because any value multiplied by 4 will be at least 4 byte
aligned. (actually, the eBPF code emitted by the compiler in this case
is most likely to use a shift left by 2, but the end result is identical)
At the critical addition:
th = ((void *)iph + (n * 4));
The register holding 'th' will start with reg->off value of 14. The
pointer addition will transform that reg into something that looks like:
reg->aux_off = 14
reg->aux_off_align = 4
Next, the verifier will look at the th->dest load, and it will see
a load offset of 2, and first check:
if (reg->aux_off_align % size)
which will pass because aux_off_align is 4. reg_off will be computed:
reg_off = reg->off;
...
reg_off += reg->aux_off;
plus we have off==2, and it will thus check:
if ((NET_IP_ALIGN + reg_off + off) % size != 0)
which evaluates to:
if ((NET_IP_ALIGN + 14 + 2) % size != 0)
On strict alignment architectures, NET_IP_ALIGN is 2, thus:
if ((2 + 14 + 2) % size != 0)
which passes.
These pointer transformations and checks work regardless of whether
the constant offset or the variable with known alignment is added
first to the pointer register.
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
2017-05-11 02:22:52 +08:00
|
|
|
u32 min_align;
|
|
|
|
u32 aux_off;
|
|
|
|
u32 aux_off_align;
|
bpf: fix mixed signed/unsigned derived min/max value bounds
Edward reported that there's an issue in min/max value bounds
tracking when signed and unsigned compares both provide hints
on limits when having unknown variables. E.g. a program such
as the following should have been rejected:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (18) r1 = 0xffff8a94cda93400
5: (85) call bpf_map_lookup_elem#1
6: (15) if r0 == 0x0 goto pc+7
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
7: (7a) *(u64 *)(r10 -16) = -8
8: (79) r1 = *(u64 *)(r10 -16)
9: (b7) r2 = -1
10: (2d) if r1 > r2 goto pc+3
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0
R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
11: (65) if r1 s> 0x1 goto pc+2
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1
R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
12: (0f) r0 += r1
13: (72) *(u8 *)(r0 +0) = 0
R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1
R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
14: (b7) r0 = 0
15: (95) exit
What happens is that in the first part ...
8: (79) r1 = *(u64 *)(r10 -16)
9: (b7) r2 = -1
10: (2d) if r1 > r2 goto pc+3
... r1 carries an unsigned value, and is compared as unsigned
against a register carrying an immediate. Verifier deduces in
reg_set_min_max() that since the compare is unsigned and operation
is greater than (>), that in the fall-through/false case, r1's
minimum bound must be 0 and maximum bound must be r2. Latter is
larger than the bound and thus max value is reset back to being
'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now
'R1=inv,min_value=0'. The subsequent test ...
11: (65) if r1 s> 0x1 goto pc+2
... is a signed compare of r1 with immediate value 1. Here,
verifier deduces in reg_set_min_max() that since the compare
is signed this time and operation is greater than (>), that
in the fall-through/false case, we can deduce that r1's maximum
bound must be 1, meaning with prior test, we result in r1 having
the following state: R1=inv,min_value=0,max_value=1. Given that
the actual value this holds is -8, the bounds are wrongly deduced.
When this is being added to r0 which holds the map_value(_adj)
type, then subsequent store access in above case will go through
check_mem_access() which invokes check_map_access_adj(), that
will then probe whether the map memory is in bounds based
on the min_value and max_value as well as access size since
the actual unknown value is min_value <= x <= max_value; commit
fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{,
_adj} register types") provides some more explanation on the
semantics.
It's worth to note in this context that in the current code,
min_value and max_value tracking are used for two things, i)
dynamic map value access via check_map_access_adj() and since
commit 06c1c049721a ("bpf: allow helpers access to variable memory")
ii) also enforced at check_helper_mem_access() when passing a
memory address (pointer to packet, map value, stack) and length
pair to a helper and the length in this case is an unknown value
defining an access range through min_value/max_value in that
case. The min_value/max_value tracking is /not/ used in the
direct packet access case to track ranges. However, the issue
also affects case ii), for example, the following crafted program
based on the same principle must be rejected as well:
0: (b7) r2 = 0
1: (bf) r3 = r10
2: (07) r3 += -512
3: (7a) *(u64 *)(r10 -16) = -8
4: (79) r4 = *(u64 *)(r10 -16)
5: (b7) r6 = -1
6: (2d) if r4 > r6 goto pc+5
R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512
R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
7: (65) if r4 s> 0x1 goto pc+4
R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512
R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1
R10=fp
8: (07) r4 += 1
9: (b7) r5 = 0
10: (6a) *(u16 *)(r10 -512) = 0
11: (85) call bpf_skb_load_bytes#26
12: (b7) r0 = 0
13: (95) exit
Meaning, while we initialize the max_value stack slot that the
verifier thinks we access in the [1,2] range, in reality we
pass -7 as length which is interpreted as u32 in the helper.
Thus, this issue is relevant also for the case of helper ranges.
Resetting both bounds in check_reg_overflow() in case only one
of them exceeds limits is also not enough as similar test can be
created that uses values which are within range, thus also here
learned min value in r1 is incorrect when mixed with later signed
test to create a range:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (18) r1 = 0xffff880ad081fa00
5: (85) call bpf_map_lookup_elem#1
6: (15) if r0 == 0x0 goto pc+7
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
7: (7a) *(u64 *)(r10 -16) = -8
8: (79) r1 = *(u64 *)(r10 -16)
9: (b7) r2 = 2
10: (3d) if r2 >= r1 goto pc+3
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
11: (65) if r1 s> 0x4 goto pc+2
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0
R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
12: (0f) r0 += r1
13: (72) *(u8 *)(r0 +0) = 0
R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4
R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
14: (b7) r0 = 0
15: (95) exit
This leaves us with two options for fixing this: i) to invalidate
all prior learned information once we switch signed context, ii)
to track min/max signed and unsigned boundaries separately as
done in [0]. (Given latter introduces major changes throughout
the whole verifier, it's rather net-next material, thus this
patch follows option i), meaning we can derive bounds either
from only signed tests or only unsigned tests.) There is still the
case of adjust_reg_min_max_vals(), where we adjust bounds on ALU
operations, meaning programs like the following where boundaries
on the reg get mixed in context later on when bounds are merged
on the dst reg must get rejected, too:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (18) r1 = 0xffff89b2bf87ce00
5: (85) call bpf_map_lookup_elem#1
6: (15) if r0 == 0x0 goto pc+6
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
7: (7a) *(u64 *)(r10 -16) = -8
8: (79) r1 = *(u64 *)(r10 -16)
9: (b7) r2 = 2
10: (3d) if r2 >= r1 goto pc+2
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
11: (b7) r7 = 1
12: (65) if r7 s> 0x0 goto pc+2
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp
13: (b7) r0 = 0
14: (95) exit
from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0
R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp
15: (0f) r7 += r1
16: (65) if r7 s> 0x4 goto pc+2
R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp
17: (0f) r0 += r7
18: (72) *(u8 *)(r0 +0) = 0
R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3
R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp
19: (b7) r0 = 0
20: (95) exit
Meaning, in adjust_reg_min_max_vals() we must also reset range
values on the dst when src/dst registers have mixed signed/
unsigned derived min/max value bounds with one unbounded value
as otherwise they can be added together deducing false boundaries.
Once both boundaries are established from either ALU ops or
compare operations w/o mixing signed/unsigned insns, then they
can safely be added to other regs also having both boundaries
established. Adding regs with one unbounded side to a map value
where the bounded side has been learned w/o mixing ops is
possible, but the resulting map value won't recover from that,
meaning such op is considered invalid on the time of actual
access. Invalid bounds are set on the dst reg in case i) src reg,
or ii) in case dst reg already had them. The only way to recover
would be to perform i) ALU ops but only 'add' is allowed on map
value types or ii) comparisons, but these are disallowed on
pointers in case they span a range. This is fine as only BPF_JEQ
and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers
which potentially turn them into PTR_TO_MAP_VALUE type depending
on the branch, so only here min/max value cannot be invalidated
for them.
In terms of state pruning, value_from_signed is considered
as well in states_equal() when dealing with adjusted map values.
With regards to breaking existing programs, there is a small
risk, but use-cases are rather quite narrow where this could
occur and mixing compares probably unlikely.
Joint work with Josef and Edward.
[0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html
Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Reported-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-21 06:00:21 +08:00
|
|
|
bool value_from_signed;
|
2016-09-21 18:43:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum bpf_stack_slot_type {
|
|
|
|
STACK_INVALID, /* nothing was stored in this stack slot */
|
|
|
|
STACK_SPILL, /* register spilled into stack */
|
|
|
|
STACK_MISC /* BPF program wrote some data into this slot */
|
|
|
|
};
|
|
|
|
|
|
|
|
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
|
|
|
|
|
|
|
|
/* state of the program:
|
|
|
|
* type of all registers and stack info
|
|
|
|
*/
|
|
|
|
struct bpf_verifier_state {
|
|
|
|
struct bpf_reg_state regs[MAX_BPF_REG];
|
|
|
|
u8 stack_slot_type[MAX_BPF_STACK];
|
|
|
|
struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
|
|
|
|
};
|
|
|
|
|
|
|
|
/* linked list of verifier states used to prune search */
|
|
|
|
struct bpf_verifier_state_list {
|
|
|
|
struct bpf_verifier_state state;
|
|
|
|
struct bpf_verifier_state_list *next;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct bpf_insn_aux_data {
|
2017-03-16 09:26:42 +08:00
|
|
|
union {
|
|
|
|
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
|
|
|
|
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
|
|
|
|
};
|
2017-06-23 06:07:39 +08:00
|
|
|
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
|
|
|
int converted_op_size; /* the valid value width after perceived conversion */
|
2016-09-21 18:43:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
|
|
|
|
|
2016-09-21 18:43:58 +08:00
|
|
|
struct bpf_verifier_env;
|
|
|
|
struct bpf_ext_analyzer_ops {
|
|
|
|
int (*insn_hook)(struct bpf_verifier_env *env,
|
|
|
|
int insn_idx, int prev_insn_idx);
|
|
|
|
};
|
|
|
|
|
2016-09-21 18:43:57 +08:00
|
|
|
/* single container for all structs
|
|
|
|
* one verifier_env per bpf_check() call
|
|
|
|
*/
|
|
|
|
struct bpf_verifier_env {
|
|
|
|
struct bpf_prog *prog; /* eBPF program being verified */
|
|
|
|
struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
|
|
|
|
int stack_size; /* number of states to be processed */
|
2017-05-11 02:38:07 +08:00
|
|
|
bool strict_alignment; /* perform strict pointer alignment checks */
|
2016-09-21 18:43:57 +08:00
|
|
|
struct bpf_verifier_state cur_state; /* current verifier state */
|
|
|
|
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
|
2016-09-21 18:43:58 +08:00
|
|
|
const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
|
|
|
|
void *analyzer_priv; /* pointer to external analyzer's private data */
|
2016-09-21 18:43:57 +08:00
|
|
|
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
|
|
|
|
u32 used_map_cnt; /* number of used maps */
|
|
|
|
u32 id_gen; /* used to generate unique reg IDs */
|
|
|
|
bool allow_ptr_leaks;
|
|
|
|
bool seen_direct_write;
|
2016-09-28 22:54:32 +08:00
|
|
|
bool varlen_map_value_access;
|
2016-09-21 18:43:57 +08:00
|
|
|
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
|
|
|
|
};
|
|
|
|
|
2016-09-21 18:43:58 +08:00
|
|
|
int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
|
|
|
|
void *priv);
|
|
|
|
|
2016-09-21 18:43:57 +08:00
|
|
|
#endif /* _LINUX_BPF_VERIFIER_H */
|