mm/vmscan.c: fix data races using kswapd_classzone_idx
pgdat->kswapd_classzone_idx could be accessed concurrently in wakeup_kswapd(). Plain writes and reads without any lock protection result in data races. Fix them by adding a pair of READ|WRITE_ONCE() as well as saving a branch (compilers might well optimize the original code in an unintentional way anyway). While at it, also take care of pgdat->kswapd_order and non-kswapd threads in allow_direct_reclaim(). The data races were reported by KCSAN, BUG: KCSAN: data-race in wakeup_kswapd / wakeup_kswapd write to 0xffff9f427ffff2dc of 4 bytes by task 7454 on cpu 13: wakeup_kswapd+0xf1/0x400 wakeup_kswapd at mm/vmscan.c:3967 wake_all_kswapds+0x59/0xc0 wake_all_kswapds at mm/page_alloc.c:4241 __alloc_pages_slowpath+0xdcc/0x1290 __alloc_pages_slowpath at mm/page_alloc.c:4512 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x16e/0x6f0 __handle_mm_fault+0xcd5/0xd40 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 1 lock held by mtest01/7454: #0: ffff9f425afe8808 (&mm->mmap_sem#2){++++}, at: do_page_fault+0x143/0x6f9 do_user_addr_fault at arch/x86/mm/fault.c:1405 (inlined by) do_page_fault at arch/x86/mm/fault.c:1539 irq event stamp: 6944085 count_memcg_event_mm+0x1a6/0x270 count_memcg_event_mm+0x119/0x270 __do_softirq+0x34c/0x57c irq_exit+0xa2/0xc0 read to 0xffff9f427ffff2dc of 4 bytes by task 7472 on cpu 38: wakeup_kswapd+0xc8/0x400 wake_all_kswapds+0x59/0xc0 __alloc_pages_slowpath+0xdcc/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x16e/0x6f0 __handle_mm_fault+0xcd5/0xd40 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 1 lock held by mtest01/7472: #0: ffff9f425a9ac148 (&mm->mmap_sem#2){++++}, at: do_page_fault+0x143/0x6f9 irq event stamp: 6793561 count_memcg_event_mm+0x1a6/0x270 count_memcg_event_mm+0x119/0x270 __do_softirq+0x34c/0x57c irq_exit+0xa2/0xc0 BUG: KCSAN: data-race in kswapd / wakeup_kswapd write to 0xffff90973ffff2dc of 4 bytes by task 820 on cpu 6: kswapd+0x27c/0x8d0 kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 read to 0xffff90973ffff2dc of 4 bytes by task 6299 on cpu 0: wakeup_kswapd+0xf3/0x450 wake_all_kswapds+0x59/0xc0 __alloc_pages_slowpath+0xdcc/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Signed-off-by: Qian Cai <cai@lca.pw> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Marco Elver <elver@google.com> Cc: Matthew Wilcox <willy@infradead.org> Link: http://lkml.kernel.org/r/1582749472-5171-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
6b700b5b3c
commit
5644e1fbbf
47
mm/vmscan.c
47
mm/vmscan.c
|
@ -3136,8 +3136,9 @@ static bool allow_direct_reclaim(pg_data_t *pgdat)
|
|||
|
||||
/* kswapd must be awake if processes are being throttled */
|
||||
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
|
||||
pgdat->kswapd_classzone_idx = min(pgdat->kswapd_classzone_idx,
|
||||
(enum zone_type)ZONE_NORMAL);
|
||||
if (READ_ONCE(pgdat->kswapd_classzone_idx) > ZONE_NORMAL)
|
||||
WRITE_ONCE(pgdat->kswapd_classzone_idx, ZONE_NORMAL);
|
||||
|
||||
wake_up_interruptible(&pgdat->kswapd_wait);
|
||||
}
|
||||
|
||||
|
@ -3769,9 +3770,9 @@ out:
|
|||
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
|
||||
enum zone_type prev_classzone_idx)
|
||||
{
|
||||
if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
|
||||
return prev_classzone_idx;
|
||||
return pgdat->kswapd_classzone_idx;
|
||||
enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx);
|
||||
|
||||
return curr_idx == MAX_NR_ZONES ? prev_classzone_idx : curr_idx;
|
||||
}
|
||||
|
||||
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
|
||||
|
@ -3815,8 +3816,11 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
|
|||
* the previous request that slept prematurely.
|
||||
*/
|
||||
if (remaining) {
|
||||
pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
|
||||
pgdat->kswapd_order = max(pgdat->kswapd_order, reclaim_order);
|
||||
WRITE_ONCE(pgdat->kswapd_classzone_idx,
|
||||
kswapd_classzone_idx(pgdat, classzone_idx));
|
||||
|
||||
if (READ_ONCE(pgdat->kswapd_order) < reclaim_order)
|
||||
WRITE_ONCE(pgdat->kswapd_order, reclaim_order);
|
||||
}
|
||||
|
||||
finish_wait(&pgdat->kswapd_wait, &wait);
|
||||
|
@ -3893,12 +3897,12 @@ static int kswapd(void *p)
|
|||
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
|
||||
set_freezable();
|
||||
|
||||
pgdat->kswapd_order = 0;
|
||||
pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
|
||||
WRITE_ONCE(pgdat->kswapd_order, 0);
|
||||
WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES);
|
||||
for ( ; ; ) {
|
||||
bool ret;
|
||||
|
||||
alloc_order = reclaim_order = pgdat->kswapd_order;
|
||||
alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
|
||||
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
|
||||
|
||||
kswapd_try_sleep:
|
||||
|
@ -3906,10 +3910,10 @@ kswapd_try_sleep:
|
|||
classzone_idx);
|
||||
|
||||
/* Read the new order and classzone_idx */
|
||||
alloc_order = reclaim_order = pgdat->kswapd_order;
|
||||
alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
|
||||
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
|
||||
pgdat->kswapd_order = 0;
|
||||
pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
|
||||
WRITE_ONCE(pgdat->kswapd_order, 0);
|
||||
WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES);
|
||||
|
||||
ret = try_to_freeze();
|
||||
if (kthread_should_stop())
|
||||
|
@ -3953,20 +3957,23 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
|
|||
enum zone_type classzone_idx)
|
||||
{
|
||||
pg_data_t *pgdat;
|
||||
enum zone_type curr_idx;
|
||||
|
||||
if (!managed_zone(zone))
|
||||
return;
|
||||
|
||||
if (!cpuset_zone_allowed(zone, gfp_flags))
|
||||
return;
|
||||
pgdat = zone->zone_pgdat;
|
||||
|
||||
if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
|
||||
pgdat->kswapd_classzone_idx = classzone_idx;
|
||||
else
|
||||
pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx,
|
||||
classzone_idx);
|
||||
pgdat->kswapd_order = max(pgdat->kswapd_order, order);
|
||||
pgdat = zone->zone_pgdat;
|
||||
curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx);
|
||||
|
||||
if (curr_idx == MAX_NR_ZONES || curr_idx < classzone_idx)
|
||||
WRITE_ONCE(pgdat->kswapd_classzone_idx, classzone_idx);
|
||||
|
||||
if (READ_ONCE(pgdat->kswapd_order) < order)
|
||||
WRITE_ONCE(pgdat->kswapd_order, order);
|
||||
|
||||
if (!waitqueue_active(&pgdat->kswapd_wait))
|
||||
return;
|
||||
|
||||
|
|
Loading…
Reference in New Issue