bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper
Jiri Olsa reported a bug ([1]) in kernel where cgroup local storage pointer may be NULL in bpf_get_local_storage() helper. There are two issues uncovered by this bug: (1). kprobe or tracepoint prog incorrectly sets cgroup local storage before prog run, (2). due to change from preempt_disable to migrate_disable, preemption is possible and percpu storage might be overwritten by other tasks. This issue (1) is fixed in [2]. This patch tried to address issue (2). The following shows how things can go wrong: task 1: bpf_cgroup_storage_set() for percpu local storage preemption happens task 2: bpf_cgroup_storage_set() for percpu local storage preemption happens task 1: run bpf program task 1 will effectively use the percpu local storage setting by task 2 which will be either NULL or incorrect ones. Instead of just one common local storage per cpu, this patch fixed the issue by permitting 8 local storages per cpu and each local storage is identified by a task_struct pointer. This way, we allow at most 8 nested preemption between bpf_cgroup_storage_set() and bpf_cgroup_storage_unset(). The percpu local storage slot is released (calling bpf_cgroup_storage_unset()) by the same task after bpf program finished running. bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set() interface. The patch is tested on top of [2] with reproducer in [1]. Without this patch, kernel will emit error in 2-3 minutes. With this patch, after one hour, still no error. [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com Signed-off-by: Yonghong Song <yhs@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Roman Gushchin <guro@fb.com> Link: https://lore.kernel.org/bpf/20210323055146.3334476-1-yhs@fb.com
This commit is contained in:
parent
a46410d5e4
commit
b910eaaaa4
|
@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
|
||||||
struct bpf_cgroup_storage;
|
struct bpf_cgroup_storage;
|
||||||
struct ctl_table;
|
struct ctl_table;
|
||||||
struct ctl_table_header;
|
struct ctl_table_header;
|
||||||
|
struct task_struct;
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_BPF
|
#ifdef CONFIG_CGROUP_BPF
|
||||||
|
|
||||||
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
|
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
|
||||||
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
|
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
|
||||||
|
|
||||||
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
|
#define BPF_CGROUP_STORAGE_NEST_MAX 8
|
||||||
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
|
|
||||||
|
struct bpf_cgroup_storage_info {
|
||||||
|
struct task_struct *task;
|
||||||
|
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
|
||||||
|
* to use bpf cgroup storage simultaneously.
|
||||||
|
*/
|
||||||
|
DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
|
||||||
|
bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
|
||||||
|
|
||||||
#define for_each_cgroup_storage_type(stype) \
|
#define for_each_cgroup_storage_type(stype) \
|
||||||
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
|
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
|
||||||
|
@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
|
||||||
return BPF_CGROUP_STORAGE_SHARED;
|
return BPF_CGROUP_STORAGE_SHARED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
|
static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
|
||||||
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])
|
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])
|
||||||
{
|
{
|
||||||
enum bpf_cgroup_storage_type stype;
|
enum bpf_cgroup_storage_type stype;
|
||||||
|
int i, err = 0;
|
||||||
|
|
||||||
for_each_cgroup_storage_type(stype)
|
preempt_disable();
|
||||||
this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
|
for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
|
||||||
|
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
this_cpu_write(bpf_cgroup_storage_info[i].task, current);
|
||||||
|
for_each_cgroup_storage_type(stype)
|
||||||
|
this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
|
||||||
|
storage[stype]);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
err = -EBUSY;
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
|
||||||
|
out:
|
||||||
|
preempt_enable();
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void bpf_cgroup_storage_unset(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
|
||||||
|
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bpf_cgroup_storage *
|
struct bpf_cgroup_storage *
|
||||||
|
@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void bpf_cgroup_storage_set(
|
static inline int bpf_cgroup_storage_set(
|
||||||
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
|
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
|
||||||
|
static inline void bpf_cgroup_storage_unset(void) {}
|
||||||
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
|
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
|
||||||
struct bpf_map *map) { return 0; }
|
struct bpf_map *map) { return 0; }
|
||||||
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
|
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
|
||||||
|
|
|
@ -1106,6 +1106,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
|
||||||
/* BPF program asks to set CN on the packet. */
|
/* BPF program asks to set CN on the packet. */
|
||||||
#define BPF_RET_SET_CN (1 << 0)
|
#define BPF_RET_SET_CN (1 << 0)
|
||||||
|
|
||||||
|
/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
|
||||||
|
* if bpf_cgroup_storage_set() failed, the rest of programs
|
||||||
|
* will not execute. This should be a really rare scenario
|
||||||
|
* as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
|
||||||
|
* preemptions all between bpf_cgroup_storage_set() and
|
||||||
|
* bpf_cgroup_storage_unset() on the same cpu.
|
||||||
|
*/
|
||||||
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
|
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
|
||||||
({ \
|
({ \
|
||||||
struct bpf_prog_array_item *_item; \
|
struct bpf_prog_array_item *_item; \
|
||||||
|
@ -1118,10 +1125,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
|
||||||
_array = rcu_dereference(array); \
|
_array = rcu_dereference(array); \
|
||||||
_item = &_array->items[0]; \
|
_item = &_array->items[0]; \
|
||||||
while ((_prog = READ_ONCE(_item->prog))) { \
|
while ((_prog = READ_ONCE(_item->prog))) { \
|
||||||
bpf_cgroup_storage_set(_item->cgroup_storage); \
|
if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
|
||||||
|
break; \
|
||||||
func_ret = func(_prog, ctx); \
|
func_ret = func(_prog, ctx); \
|
||||||
_ret &= (func_ret & 1); \
|
_ret &= (func_ret & 1); \
|
||||||
*(ret_flags) |= (func_ret >> 1); \
|
*(ret_flags) |= (func_ret >> 1); \
|
||||||
|
bpf_cgroup_storage_unset(); \
|
||||||
_item++; \
|
_item++; \
|
||||||
} \
|
} \
|
||||||
rcu_read_unlock(); \
|
rcu_read_unlock(); \
|
||||||
|
@ -1142,9 +1151,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
|
||||||
goto _out; \
|
goto _out; \
|
||||||
_item = &_array->items[0]; \
|
_item = &_array->items[0]; \
|
||||||
while ((_prog = READ_ONCE(_item->prog))) { \
|
while ((_prog = READ_ONCE(_item->prog))) { \
|
||||||
if (set_cg_storage) \
|
if (!set_cg_storage) { \
|
||||||
bpf_cgroup_storage_set(_item->cgroup_storage); \
|
_ret &= func(_prog, ctx); \
|
||||||
_ret &= func(_prog, ctx); \
|
} else { \
|
||||||
|
if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
|
||||||
|
break; \
|
||||||
|
_ret &= func(_prog, ctx); \
|
||||||
|
bpf_cgroup_storage_unset(); \
|
||||||
|
} \
|
||||||
_item++; \
|
_item++; \
|
||||||
} \
|
} \
|
||||||
_out: \
|
_out: \
|
||||||
|
|
|
@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_BPF
|
#ifdef CONFIG_CGROUP_BPF
|
||||||
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
|
DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
|
||||||
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
|
bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
|
||||||
|
|
||||||
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
|
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
|
||||||
{
|
{
|
||||||
|
@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
|
||||||
* verifier checks that its value is correct.
|
* verifier checks that its value is correct.
|
||||||
*/
|
*/
|
||||||
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
|
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
|
||||||
struct bpf_cgroup_storage *storage;
|
struct bpf_cgroup_storage *storage = NULL;
|
||||||
void *ptr;
|
void *ptr;
|
||||||
|
int i;
|
||||||
|
|
||||||
storage = this_cpu_read(bpf_cgroup_storage[stype]);
|
for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
|
||||||
|
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (stype == BPF_CGROUP_STORAGE_SHARED)
|
if (stype == BPF_CGROUP_STORAGE_SHARED)
|
||||||
ptr = &READ_ONCE(storage->buf)->data[0];
|
ptr = &READ_ONCE(storage->buf)->data[0];
|
||||||
|
|
|
@ -9,10 +9,11 @@
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <uapi/linux/btf.h>
|
#include <uapi/linux/btf.h>
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
|
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_BPF
|
#ifdef CONFIG_CGROUP_BPF
|
||||||
|
|
||||||
|
DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
|
||||||
|
bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
|
||||||
|
|
||||||
#include "../cgroup/cgroup-internal.h"
|
#include "../cgroup/cgroup-internal.h"
|
||||||
|
|
||||||
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
|
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
|
||||||
|
|
|
@ -106,12 +106,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
||||||
|
|
||||||
bpf_test_timer_enter(&t);
|
bpf_test_timer_enter(&t);
|
||||||
do {
|
do {
|
||||||
bpf_cgroup_storage_set(storage);
|
ret = bpf_cgroup_storage_set(storage);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
|
||||||
if (xdp)
|
if (xdp)
|
||||||
*retval = bpf_prog_run_xdp(prog, ctx);
|
*retval = bpf_prog_run_xdp(prog, ctx);
|
||||||
else
|
else
|
||||||
*retval = BPF_PROG_RUN(prog, ctx);
|
*retval = BPF_PROG_RUN(prog, ctx);
|
||||||
|
|
||||||
|
bpf_cgroup_storage_unset();
|
||||||
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
|
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
|
||||||
bpf_test_timer_leave(&t);
|
bpf_test_timer_leave(&t);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue