Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2018-01-09

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Prevent out-of-bounds speculation in BPF maps by masking the
   index after bounds checks in order to fix spectre v1, and
   add an option BPF_JIT_ALWAYS_ON into Kconfig that allows for
   removing the BPF interpreter from the kernel in favor of
   JIT-only mode to make spectre v2 harder, from Alexei.

2) Remove false sharing of map refcount with max_entries which
   was used in spectre v1, from Daniel.

3) Add a missing NULL psock check in sockmap in order to fix
   a race, from John.

4) Fix test_align BPF selftest case since a recent change in
   verifier rejects the bit-wise arithmetic on pointers
   earlier but test_align update was missing, from Alexei.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-01-10 11:17:21 -05:00
commit 661e4e33a9
11 changed files with 150 additions and 50 deletions

View File

@ -43,7 +43,14 @@ struct bpf_map_ops {
}; };
struct bpf_map { struct bpf_map {
atomic_t refcnt; /* 1st cacheline with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
*/
const struct bpf_map_ops *ops ____cacheline_aligned;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
void *security;
#endif
enum bpf_map_type map_type; enum bpf_map_type map_type;
u32 key_size; u32 key_size;
u32 value_size; u32 value_size;
@ -52,15 +59,17 @@ struct bpf_map {
u32 pages; u32 pages;
u32 id; u32 id;
int numa_node; int numa_node;
struct user_struct *user; bool unpriv_array;
const struct bpf_map_ops *ops; /* 7 bytes hole */
struct work_struct work;
/* 2nd cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
struct user_struct *user ____cacheline_aligned;
atomic_t refcnt;
atomic_t usercnt; atomic_t usercnt;
struct bpf_map *inner_map_meta; struct work_struct work;
char name[BPF_OBJ_NAME_LEN]; char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
void *security;
#endif
}; };
/* function argument constraints */ /* function argument constraints */
@ -221,6 +230,7 @@ struct bpf_prog_aux {
struct bpf_array { struct bpf_array {
struct bpf_map map; struct bpf_map map;
u32 elem_size; u32 elem_size;
u32 index_mask;
/* 'ownership' of prog_array is claimed by the first program that /* 'ownership' of prog_array is claimed by the first program that
* is going to use this map or by the first program which FD is stored * is going to use this map or by the first program which FD is stored
* in the map to make sure that all callers and callees have the same * in the map to make sure that all callers and callees have the same

View File

@ -1396,6 +1396,13 @@ config BPF_SYSCALL
Enable the bpf() system call that allows to manipulate eBPF Enable the bpf() system call that allows to manipulate eBPF
programs and maps via file descriptors. programs and maps via file descriptors.
config BPF_JIT_ALWAYS_ON
bool "Permanently enable BPF JIT and remove BPF interpreter"
depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
help
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter
config USERFAULTFD config USERFAULTFD
bool "Enable userfaultfd() system call" bool "Enable userfaultfd() system call"
select ANON_INODES select ANON_INODES

View File

@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{ {
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr); int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array; struct bpf_array *array;
u64 array_size; u64 array_size;
u32 elem_size;
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
elem_size = round_up(attr->value_size, 8); elem_size = round_up(attr->value_size, 8);
max_entries = attr->max_entries;
index_mask = roundup_pow_of_two(max_entries) - 1;
if (unpriv)
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
max_entries = index_mask + 1;
array_size = sizeof(*array); array_size = sizeof(*array);
if (percpu) if (percpu)
array_size += (u64) attr->max_entries * sizeof(void *); array_size += (u64) max_entries * sizeof(void *);
else else
array_size += (u64) attr->max_entries * elem_size; array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */ /* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE) if (array_size >= U32_MAX - PAGE_SIZE)
@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array = bpf_map_area_alloc(array_size, numa_node); array = bpf_map_area_alloc(array_size, numa_node);
if (!array) if (!array)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */ /* copy mandatory map attributes */
array->map.map_type = attr->map_type; array->map.map_type = attr->map_type;
@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries)) if (unlikely(index >= array->map.max_entries))
return NULL; return NULL;
return array->value + array->elem_size * index; return array->value + array->elem_size * (index & array->index_mask);
} }
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{ {
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf; struct bpf_insn *insn = insn_buf;
u32 elem_size = round_up(map->value_size, 8); u32 elem_size = round_up(map->value_size, 8);
const int ret = BPF_REG_0; const int ret = BPF_REG_0;
@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
}
if (is_power_of_2(elem_size)) { if (is_power_of_2(elem_size)) {
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries)) if (unlikely(index >= array->map.max_entries))
return NULL; return NULL;
return this_cpu_ptr(array->pptrs[index]); return this_cpu_ptr(array->pptrs[index & array->index_mask]);
} }
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
*/ */
size = round_up(map->value_size, 8); size = round_up(map->value_size, 8);
rcu_read_lock(); rcu_read_lock();
pptr = array->pptrs[index]; pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size; off += size;
@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EEXIST; return -EEXIST;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index]), memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size); value, map->value_size);
else else
memcpy(array->value + array->elem_size * index, memcpy(array->value +
array->elem_size * (index & array->index_mask),
value, map->value_size); value, map->value_size);
return 0; return 0;
} }
@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
*/ */
size = round_up(map->value_size, 8); size = round_up(map->value_size, 8);
rcu_read_lock(); rcu_read_lock();
pptr = array->pptrs[index]; pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size; off += size;
@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
static u32 array_of_map_gen_lookup(struct bpf_map *map, static u32 array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf) struct bpf_insn *insn_buf)
{ {
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 elem_size = round_up(map->value_size, 8); u32 elem_size = round_up(map->value_size, 8);
struct bpf_insn *insn = insn_buf; struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0; const int ret = BPF_REG_0;
@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
}
if (is_power_of_2(elem_size)) if (is_power_of_2(elem_size))
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
else else

View File

@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
} }
EXPORT_SYMBOL_GPL(__bpf_call_base); EXPORT_SYMBOL_GPL(__bpf_call_base);
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/** /**
* __bpf_prog_run - run eBPF program on a given context * __bpf_prog_run - run eBPF program on a given context
* @ctx: is the data we are operating on * @ctx: is the data we are operating on
@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
}; };
#else
static unsigned int __bpf_prog_ret0(const void *ctx,
const struct bpf_insn *insn)
{
return 0;
}
#endif
bool bpf_prog_array_compatible(struct bpf_array *array, bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp) const struct bpf_prog *fp)
{ {
@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
*/ */
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{ {
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
#else
fp->bpf_func = __bpf_prog_ret0;
#endif
/* eBPF JITs can rewrite the program in case constant /* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during * blinding is active. However, in case of error during
@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
*/ */
if (!bpf_prog_is_dev_bound(fp->aux)) { if (!bpf_prog_is_dev_bound(fp->aux)) {
fp = bpf_int_jit_compile(fp); fp = bpf_int_jit_compile(fp);
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
if (!fp->jited) {
*err = -ENOTSUPP;
return fp;
}
#endif
} else { } else {
*err = bpf_prog_offload_compile(fp); *err = bpf_prog_offload_compile(fp);
if (*err) if (*err)

View File

@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)
write_lock_bh(&sock->sk_callback_lock); write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock); psock = smap_psock_sk(sock);
smap_list_remove(psock, &stab->sock_map[i]); /* This check handles a racing sock event that can get the
smap_release_sock(psock, sock); * sk_callback_lock before this case but after xchg happens
* causing the refcnt to hit zero and sock user data (psock)
* to be null and queued for garbage collection.
*/
if (likely(psock)) {
smap_list_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, sock);
}
write_unlock_bh(&sock->sk_callback_lock); write_unlock_bh(&sock->sk_callback_lock);
} }
rcu_read_unlock(); rcu_read_unlock();

View File

@ -1729,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
if (err) if (err)
return err; return err;
if (func_id == BPF_FUNC_tail_call) {
if (meta.map_ptr == NULL) {
verbose(env, "verifier bug\n");
return -EINVAL;
}
env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
}
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
if (err) if (err)
return err; return err;
@ -4456,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*/ */
insn->imm = 0; insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL; insn->code = BPF_JMP | BPF_TAIL_CALL;
/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
* if (index >= max_entries) goto out;
* index &= array->index_mask;
* to avoid out-of-bounds cpu speculation
*/
map_ptr = env->insn_aux_data[i + delta].map_ptr;
if (map_ptr == BPF_MAP_PTR_POISON) {
verbose(env, "tail_call obusing map_ptr\n");
return -EINVAL;
}
if (!map_ptr->unpriv_array)
continue;
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
container_of(map_ptr,
struct bpf_array,
map)->index_mask);
insn_buf[2] = *insn;
cnt = 3;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue; continue;
} }

View File

@ -6250,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
return NULL; return NULL;
} }
} }
/* We don't expect to fail. */
if (*err) { if (*err) {
pr_cont("FAIL to attach err=%d len=%d\n", pr_cont("FAIL to prog_create err=%d len=%d\n",
*err, fprog.len); *err, fprog.len);
return NULL; return NULL;
} }
@ -6276,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
* checks. * checks.
*/ */
fp = bpf_prog_select_runtime(fp, err); fp = bpf_prog_select_runtime(fp, err);
if (*err) {
pr_cont("FAIL to select_runtime err=%d\n", *err);
return NULL;
}
break; break;
} }
@ -6461,8 +6464,8 @@ static __init int test_bpf(void)
pass_cnt++; pass_cnt++;
continue; continue;
} }
err_cnt++;
return err; continue;
} }
pr_cont("jited:%u ", fp->jited); pr_cont("jited:%u ", fp->jited);

View File

@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
*/ */
goto out_err_free; goto out_err_free;
/* We are guaranteed to never error here with cBPF to eBPF
* transitions, since there's no issue with type compatibility
* checks on program arrays.
*/
fp = bpf_prog_select_runtime(fp, &err); fp = bpf_prog_select_runtime(fp, &err);
if (err)
goto out_err_free;
kfree(old_prog); kfree(old_prog);
return fp; return fp;

View File

@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_enable, .data = &bpf_jit_enable,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
#else
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
.extra2 = &one,
#endif
}, },
# ifdef CONFIG_HAVE_EBPF_JIT # ifdef CONFIG_HAVE_EBPF_JIT
{ {

View File

@ -2619,6 +2619,15 @@ out_fs:
core_initcall(sock_init); /* early initcall */ core_initcall(sock_init); /* early initcall */
static int __init jit_init(void)
{
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
bpf_jit_enable = 1;
#endif
return 0;
}
pure_initcall(jit_init);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq) void socket_seq_show(struct seq_file *seq)
{ {

View File

@ -474,27 +474,7 @@ static struct bpf_align_test tests[] = {
.result = REJECT, .result = REJECT,
.matches = { .matches = {
{4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, {4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
/* ptr & 0x40 == either 0 or 0x40 */ /* R5 bitwise operator &= on pointer prohibited */
{5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
/* ptr << 2 == unknown, (4n) */
{7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
{8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
/* Checked s>=0 */
{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* packet pointer + nonnegative (4n+2) */
{12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
* upper half of the address space.
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
{16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
} }
}, },
{ {