bpf: Address KCSAN report on bpf_lru_list
[ Upstream commit ee9fd0ac30
]
KCSAN reported a data-race when accessing node->ref.
Although node->ref does not have to be accurate,
take this chance to use a more common READ_ONCE() and WRITE_ONCE()
pattern instead of data_race().
There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref().
This patch also adds bpf_lru_node_clear_ref() to do the
WRITE_ONCE(node->ref, 0) also.
==================================================================
BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem
write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1:
__bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline]
__bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline]
__bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240
bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline]
bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline]
bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499
prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline]
__htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316
bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
__sys_bpf+0x338/0x810
__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0:
bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline]
__htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332
bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
__sys_bpf+0x338/0x810
__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
value changed: 0x01 -> 0x00
Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023
==================================================================
Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20230511043748.1384166-1-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Jianping Liu <frankjpliu@tencent.com>
This commit is contained in:
parent
bd0ebbe839
commit
a52663f05b
|
@ -41,7 +41,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
|
||||||
/* bpf_lru_node helpers */
|
/* bpf_lru_node helpers */
|
||||||
static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
|
static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
|
||||||
{
|
{
|
||||||
return node->ref;
|
return READ_ONCE(node->ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_lru_node_clear_ref(struct bpf_lru_node *node)
|
||||||
|
{
|
||||||
|
WRITE_ONCE(node->ref, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
|
static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
|
||||||
|
@ -89,7 +94,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
|
||||||
|
|
||||||
bpf_lru_list_count_inc(l, tgt_type);
|
bpf_lru_list_count_inc(l, tgt_type);
|
||||||
node->type = tgt_type;
|
node->type = tgt_type;
|
||||||
node->ref = 0;
|
bpf_lru_node_clear_ref(node);
|
||||||
list_move(&node->list, &l->lists[tgt_type]);
|
list_move(&node->list, &l->lists[tgt_type]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,7 +115,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l,
|
||||||
bpf_lru_list_count_inc(l, tgt_type);
|
bpf_lru_list_count_inc(l, tgt_type);
|
||||||
node->type = tgt_type;
|
node->type = tgt_type;
|
||||||
}
|
}
|
||||||
node->ref = 0;
|
bpf_lru_node_clear_ref(node);
|
||||||
|
|
||||||
/* If the moving node is the next_inactive_rotation candidate,
|
/* If the moving node is the next_inactive_rotation candidate,
|
||||||
* move the next_inactive_rotation pointer also.
|
* move the next_inactive_rotation pointer also.
|
||||||
|
@ -353,7 +358,7 @@ static void __local_list_add_pending(struct bpf_lru *lru,
|
||||||
*(u32 *)((void *)node + lru->hash_offset) = hash;
|
*(u32 *)((void *)node + lru->hash_offset) = hash;
|
||||||
node->cpu = cpu;
|
node->cpu = cpu;
|
||||||
node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
|
node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
|
||||||
node->ref = 0;
|
bpf_lru_node_clear_ref(node);
|
||||||
list_add(&node->list, local_pending_list(loc_l));
|
list_add(&node->list, local_pending_list(loc_l));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -419,7 +424,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
|
||||||
if (!list_empty(free_list)) {
|
if (!list_empty(free_list)) {
|
||||||
node = list_first_entry(free_list, struct bpf_lru_node, list);
|
node = list_first_entry(free_list, struct bpf_lru_node, list);
|
||||||
*(u32 *)((void *)node + lru->hash_offset) = hash;
|
*(u32 *)((void *)node + lru->hash_offset) = hash;
|
||||||
node->ref = 0;
|
bpf_lru_node_clear_ref(node);
|
||||||
__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
|
__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -522,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru,
|
||||||
}
|
}
|
||||||
|
|
||||||
node->type = BPF_LRU_LOCAL_LIST_T_FREE;
|
node->type = BPF_LRU_LOCAL_LIST_T_FREE;
|
||||||
node->ref = 0;
|
bpf_lru_node_clear_ref(node);
|
||||||
list_move(&node->list, local_free_list(loc_l));
|
list_move(&node->list, local_free_list(loc_l));
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&loc_l->lock, flags);
|
raw_spin_unlock_irqrestore(&loc_l->lock, flags);
|
||||||
|
@ -568,7 +573,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
|
||||||
|
|
||||||
node = (struct bpf_lru_node *)(buf + node_offset);
|
node = (struct bpf_lru_node *)(buf + node_offset);
|
||||||
node->type = BPF_LRU_LIST_T_FREE;
|
node->type = BPF_LRU_LIST_T_FREE;
|
||||||
node->ref = 0;
|
bpf_lru_node_clear_ref(node);
|
||||||
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
|
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
|
||||||
buf += elem_size;
|
buf += elem_size;
|
||||||
}
|
}
|
||||||
|
@ -594,7 +599,7 @@ again:
|
||||||
node = (struct bpf_lru_node *)(buf + node_offset);
|
node = (struct bpf_lru_node *)(buf + node_offset);
|
||||||
node->cpu = cpu;
|
node->cpu = cpu;
|
||||||
node->type = BPF_LRU_LIST_T_FREE;
|
node->type = BPF_LRU_LIST_T_FREE;
|
||||||
node->ref = 0;
|
bpf_lru_node_clear_ref(node);
|
||||||
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
|
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
|
||||||
i++;
|
i++;
|
||||||
buf += elem_size;
|
buf += elem_size;
|
||||||
|
|
|
@ -63,11 +63,8 @@ struct bpf_lru {
|
||||||
|
|
||||||
static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
|
static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
|
||||||
{
|
{
|
||||||
/* ref is an approximation on access frequency. It does not
|
if (!READ_ONCE(node->ref))
|
||||||
* have to be very accurate. Hence, no protection is used.
|
WRITE_ONCE(node->ref, 1);
|
||||||
*/
|
|
||||||
if (!node->ref)
|
|
||||||
node->ref = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
|
int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
|
||||||
|
|
Loading…
Reference in New Issue