mm: multi-gen LRU: clarify scan_control flags

Among the flags in scan_control:
1. sc->may_swap, which indicates swap constraint due to memsw.max, is
   supported as usual.
2. sc->proactive, which indicates reclaim by memory.reclaim, may not
   opportunistically skip the aging path, since it is considered less
   latency sensitive.
3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
   swappiness to prioritize file LRU, since clean file folios are more
   likely to exist.
4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
   reclaim, are rejected, since unmapped clean folios are already
   prioritized. Scanning for more of them is likely futile and can
   cause high reclaim latency when there is a large number of memcgs.

The rest are handled by the existing code.

Link: https://lkml.kernel.org/r/20221222041905.2431096-8-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Larabel <Michael@MichaelLarabel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Yu Zhao 2022-12-21 21:19:05 -07:00 committed by Andrew Morton
parent e4dde56cd2
commit e9d4e1ee78
1 changed files with 28 additions and 28 deletions

View File

@ -3210,6 +3210,9 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec);
if (!sc->may_swap)
return 0;
if (!can_demote(pgdat->node_id, sc) && if (!can_demote(pgdat->node_id, sc) &&
mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
return 0; return 0;
@ -4236,7 +4239,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
} while (err == -EAGAIN); } while (err == -EAGAIN);
} }
static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
{ {
struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
@ -4244,7 +4247,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
VM_WARN_ON_ONCE(walk); VM_WARN_ON_ONCE(walk);
walk = &pgdat->mm_walk; walk = &pgdat->mm_walk;
} else if (!pgdat && !walk) { } else if (!walk && force_alloc) {
VM_WARN_ON_ONCE(current_is_kswapd()); VM_WARN_ON_ONCE(current_is_kswapd());
walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
@ -4430,7 +4433,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
goto done; goto done;
} }
walk = set_mm_walk(NULL); walk = set_mm_walk(NULL, true);
if (!walk) { if (!walk) {
success = iterate_mm_list_nowalk(lruvec, max_seq); success = iterate_mm_list_nowalk(lruvec, max_seq);
goto done; goto done;
@ -4499,8 +4502,6 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec);
VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
/* see the comment on lru_gen_folio */ /* see the comment on lru_gen_folio */
gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
@ -4756,12 +4757,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
{ {
bool success; bool success;
/* unmapping inhibited */
if (!sc->may_unmap && folio_mapped(folio))
return false;
/* swapping inhibited */ /* swapping inhibited */
if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && if (!(sc->gfp_mask & __GFP_IO) &&
(folio_test_dirty(folio) || (folio_test_dirty(folio) ||
(folio_test_anon(folio) && !folio_test_swapcache(folio)))) (folio_test_anon(folio) && !folio_test_swapcache(folio))))
return false; return false;
@ -4858,9 +4855,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
__count_vm_events(PGSCAN_ANON + type, isolated); __count_vm_events(PGSCAN_ANON + type, isolated);
/* /*
* There might not be eligible pages due to reclaim_idx, may_unmap and * There might not be eligible folios due to reclaim_idx. Check the
* may_writepage. Check the remaining to prevent livelock if it's not * remaining to prevent livelock if it's not making progress.
* making progress.
*/ */
return isolated || !remaining ? scanned : 0; return isolated || !remaining ? scanned : 0;
} }
@ -5120,9 +5116,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec); DEFINE_MAX_SEQ(lruvec);
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) || if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
(mem_cgroup_below_low(sc->target_mem_cgroup, memcg) &&
!sc->memcg_low_reclaim))
return 0; return 0;
if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
@ -5150,17 +5144,14 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
long nr_to_scan; long nr_to_scan;
unsigned long scanned = 0; unsigned long scanned = 0;
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
int swappiness = get_swappiness(lruvec, sc);
/* clean file folios are more likely to exist */
if (swappiness && !(sc->gfp_mask & __GFP_IO))
swappiness = 1;
while (true) { while (true) {
int delta; int delta;
int swappiness;
if (sc->may_swap)
swappiness = get_swappiness(lruvec, sc);
else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
swappiness = 1;
else
swappiness = 0;
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
if (nr_to_scan <= 0) if (nr_to_scan <= 0)
@ -5291,12 +5282,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
struct blk_plug plug; struct blk_plug plug;
VM_WARN_ON_ONCE(global_reclaim(sc)); VM_WARN_ON_ONCE(global_reclaim(sc));
VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap);
lru_add_drain(); lru_add_drain();
blk_start_plug(&plug); blk_start_plug(&plug);
set_mm_walk(lruvec_pgdat(lruvec)); set_mm_walk(NULL, sc->proactive);
if (try_to_shrink_lruvec(lruvec, sc)) if (try_to_shrink_lruvec(lruvec, sc))
lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
@ -5352,11 +5344,19 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
VM_WARN_ON_ONCE(!global_reclaim(sc)); VM_WARN_ON_ONCE(!global_reclaim(sc));
/*
* Unmapped clean folios are already prioritized. Scanning for more of
* them is likely futile and can cause high reclaim latency when there
* is a large number of memcgs.
*/
if (!sc->may_writepage || !sc->may_unmap)
goto done;
lru_add_drain(); lru_add_drain();
blk_start_plug(&plug); blk_start_plug(&plug);
set_mm_walk(pgdat); set_mm_walk(pgdat, sc->proactive);
set_initial_priority(pgdat, sc); set_initial_priority(pgdat, sc);
@ -5374,7 +5374,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
clear_mm_walk(); clear_mm_walk();
blk_finish_plug(&plug); blk_finish_plug(&plug);
done:
/* kswapd should never fail */ /* kswapd should never fail */
pgdat->kswapd_failures = 0; pgdat->kswapd_failures = 0;
} }
@ -5943,7 +5943,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
set_task_reclaim_state(current, &sc.reclaim_state); set_task_reclaim_state(current, &sc.reclaim_state);
flags = memalloc_noreclaim_save(); flags = memalloc_noreclaim_save();
blk_start_plug(&plug); blk_start_plug(&plug);
if (!set_mm_walk(NULL)) { if (!set_mm_walk(NULL, true)) {
err = -ENOMEM; err = -ENOMEM;
goto done; goto done;
} }