bpf: Check map->usercnt after timer->timer is assigned
[ Upstream commit fd381ce60a2d79cc967506208085336d3d268ae0 ]
When there are concurrent uref release and bpf timer init operations,
the following sequence diagram is possible. It will break the guarantee
provided by bpf_timer: bpf_timer will still be alive after userspace
application releases or unpins the map. It also will lead to kmemleak
for old kernel version which doesn't release bpf_timer when map is
released.
bpf program X:
bpf_timer_init()
lock timer->lock
read timer->timer as NULL
read map->usercnt != 0
process Y:
close(map_fd)
// put last uref
bpf_map_put_uref()
atomic_dec_and_test(map->usercnt)
array_map_free_timers()
bpf_timer_cancel_and_free()
// just return
read timer->timer is NULL
t = bpf_map_kmalloc_node()
timer->timer = t
unlock timer->lock
Fix the problem by checking map->usercnt after timer->timer is assigned,
so when there are concurrent uref release and bpf timer init, either
bpf_timer_cancel_and_free() from uref release reads a no-NULL timer
or the newly-added atomic64_read() returns a zero usercnt.
Because atomic_dec_and_test(map->usercnt) and READ_ONCE(timer->timer)
in bpf_timer_cancel_and_free() are not protected by a lock, so add
a memory barrier to guarantee the order between map->usercnt and
timer->timer. Also use WRITE_ONCE(timer->timer, x) to match the lockless
read of timer->timer in bpf_timer_cancel_and_free().
Reported-by: Hsin-Wei Hung <hsinweih@uci.edu>
Closes: https://lore.kernel.org/bpf/CABcoxUaT2k9hWsS1tNgXyoU3E-=PuOgMn737qK984fbFmfYixQ@mail.gmail.com
Fixes: b00628b1c7
("bpf: Introduce bpf timers.")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231030063616.1653024-1-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
b8a4f0a27c
commit
77bf9287c5
|
@ -1176,13 +1176,6 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
|
|||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (!atomic64_read(&map->usercnt)) {
|
||||
/* maps with timers must be either held by user space
|
||||
* or pinned in bpffs.
|
||||
*/
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
/* allocate hrtimer via map_kmalloc to use memcg accounting */
|
||||
t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
|
||||
if (!t) {
|
||||
|
@ -1195,7 +1188,21 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
|
|||
rcu_assign_pointer(t->callback_fn, NULL);
|
||||
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
|
||||
t->timer.function = bpf_timer_cb;
|
||||
timer->timer = t;
|
||||
WRITE_ONCE(timer->timer, t);
|
||||
/* Guarantee the order between timer->timer and map->usercnt. So
|
||||
* when there are concurrent uref release and bpf timer init, either
|
||||
* bpf_timer_cancel_and_free() called by uref release reads a no-NULL
|
||||
* timer or atomic64_read() below returns a zero usercnt.
|
||||
*/
|
||||
smp_mb();
|
||||
if (!atomic64_read(&map->usercnt)) {
|
||||
/* maps with timers must be either held by user space
|
||||
* or pinned in bpffs.
|
||||
*/
|
||||
WRITE_ONCE(timer->timer, NULL);
|
||||
kfree(t);
|
||||
ret = -EPERM;
|
||||
}
|
||||
out:
|
||||
__bpf_spin_unlock_irqrestore(&timer->lock);
|
||||
return ret;
|
||||
|
@ -1370,7 +1377,7 @@ void bpf_timer_cancel_and_free(void *val)
|
|||
/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
|
||||
* this timer, since it won't be initialized.
|
||||
*/
|
||||
timer->timer = NULL;
|
||||
WRITE_ONCE(timer->timer, NULL);
|
||||
out:
|
||||
__bpf_spin_unlock_irqrestore(&timer->lock);
|
||||
if (!t)
|
||||
|
|
Loading…
Reference in New Issue