rue/mm: add memory cgroup async page reclaim mechanism

Introduce background page reclaim mechanism for memcg, it can
be configured according to the cgroup priorities for different
reclaim strategies.

Signed-off-by: Yulei Zhang <yuleixzhang@tencent.com>
Signed-off-by: Mengmeng Chen <bauerchen@tencent.com>
Signed-off-by: Chunguang Xu <brookxu@tencent.com>
Signed-off-by: Honglin Li <honglinli@tencent.com>
This commit is contained in:
Honglin Li 2023-08-31 15:38:34 +08:00 committed by Haisu Wang
parent 0d35c4c639
commit 56d80c4ea2
7 changed files with 494 additions and 7 deletions

View File

@ -109,6 +109,13 @@ Brief summary of control files.
memory.kmem.tcp.failcnt show the number of tcp buf memory usage
hits limits
memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
memory.async_ratio ratio setting for async reclaim wmark
memory.async_high high limit for start async reclaim
memory.async_low low limit to strop async reclaim
memory.async_distance_factor the distance between async_high and async_low, valid
value is from 1 to 150000, the unit is in fractions
of 1000000
==================================== ==========================================
1. History
@ -971,7 +978,19 @@ Test:
(Expect a bunch of notifications, and eventually, the oom-killer will
trigger.)
12. TODO
12. Async reclaim
=================
Add memory usage water mark for async reclaim, memory.async_ratio. Valid
value is from 0 to 100, which represents percentage of memory.limit, the
actual value of percentage * memory.limit will be asigned to memory.async_high.
When charge pages to memory cgroup, it will schedule a work to reclaim
pages when the memory usage exceed the memory.async_high, and the work
will stop when the reclaim reachs memory.async_low. memory.async_ratio = 0
means async reclaim is disabled, both async_low and async_high
would be max, which is the default value.
13. TODO
========
1. Make per-cgroup scanner reclaim not-shared pages first

View File

@ -1275,6 +1275,70 @@ PAGE_SIZE multiple when read back.
The max memory usage recorded for the cgroup and its
descendants since the creation of the cgroup.
memory.async_ratio
A read-write single value file which exists on non-root
cgroups. The default is 0.
Memory usage water mark for async reclaim. Valid value is from 0
to 100, which represents percentage of memory.high, the actual value
of percentage * memory.high will be asigned to async_high. When
charge pages to memory cgroup, it will schedule a work to reclaim
pages when the memory usage exceed the async_high.
0 means async reclaim is disabled, both async_low and async_high
would be max, which is the default value, and the mechanism takes
effect.
memory.async_high
A read-only single value file which exists on non-root cgroups.
The default is max.
Memory usage high water mark for async reclaim, if a cgroup's
memory usage exceeds this limit will trigger the async reclaim
logic.
memory.async_low
A read-only single value file which exists on non-root cgroups.
The default is max.
Memory usage low water mark for async reclaim, if a reclaim work
is scheduled, it will try to reclaim (async_high - async_low)
pages.
memory.async_distance_factor
A read-write single value file which exists on non-root cgroups.
The default is 1.
Define the distance between async_high and async_low. Valid value is
from 1 to 150000, the unit is in fractions of 1000000. The default value of
1 means the distance between async_high and async_low is 0.01% of memory.high
of the cgroup. The maximum value is 150000, which 15% of memory.high.
memory.async_ratio_delta
A read-write single value file which exists on non-root
cgroups. The default is -1.
Memory usage watermark calculation factor for async reclaim. Valid
value is from 1 to 10, which will be used to calculate the memory.async_ratio
relay on cgroup priority, async_ratio = 100 - (MAX_PRIORITY - cgroup_prio) *
async_ratio_delta, and the result will be used to calculate the
async reclaim wmark.
Default value -1 is a show stop value which means the async reclaim wmark
calculation will not relay on the cgroup priority, it only depends on the
async_ratio value we manully set.
memory.async_distance_delta
A read-write single value file which exists on non-root cgroups.
The default is 1.
Per cgroup prioirty factor to define the distance between async_high
and async_low. Valid value is from 1 to 50, The formula is
async_distance_factor = async_distance_delta * (cgroup_priority_value + 1).
Async reclaim mechanism will use the result for reclaim distance
cacluation. And when async_ratio_delta == -1, this value won't
take effect either when cgroup priority changes.
memory.oom.group
A read-write single value file which exists on non-root
cgroups. The default value is "0".

View File

@ -338,6 +338,12 @@ struct mem_cgroup {
int reclaim_failed;
struct list_head prio_list;
struct list_head prio_list_async;
/* per cgroup memory async reclaim */
unsigned int async_wmark;
unsigned int async_distance_factor;
int async_wmark_delta;
unsigned int async_distance_delta;
struct work_struct async_work;
/*
* set > 0 if pages under this cgroup are moving to other cgroup.

View File

@ -25,6 +25,10 @@ struct page_counter {
atomic_long_t low_usage;
atomic_long_t children_low_usage;
/* async reclaim threshold */
unsigned long async_low;
unsigned long async_high;
unsigned long watermark;
unsigned long failcnt;
@ -76,6 +80,11 @@ int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages);
int page_counter_memparse(const char *buf, const char *max,
unsigned long *nr_pages);
void page_counter_set_async_high(struct page_counter *counter,
unsigned long nr_pages);
void page_counter_set_async_low(struct page_counter *counter,
unsigned long nr_pages);
static inline void page_counter_reset_watermark(struct page_counter *counter)
{
counter->watermark = page_counter_read(counter);

View File

@ -125,6 +125,11 @@ static atomic_long_t memcg_reclaimed_count;
static unsigned long memcg_reclaim_goal;
static int memcg_cur_reclaim_prio = CGROUP_PRIORITY_MAX;
static DEFINE_SPINLOCK(memcg_reclaim_prio_lock);
/* workqueue for async reclaim */
struct workqueue_struct *memcg_async_reclaim_wq;
#define ASYNC_DISTANCE_DIV 1000000
#define ASYNC_RATIO_DIV 100
#define ASYNC_DISTANCE_DEF 1
/* Whether legacy memory+swap accounting is active */
static bool do_memsw_account(void)
@ -1875,6 +1880,10 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
memcg_events(memcg, PGSTEAL_KSWAPD) +
memcg_events(memcg, PGSTEAL_DIRECT) +
memcg_events(memcg, PGSTEAL_KHUGEPAGED));
seq_buf_printf(s, "pgscan_in_background %lu\n",
memcg_events(memcg, PGSCAN_KSWAPD));
seq_buf_printf(s, "pgsteal_in_background %lu\n",
memcg_events(memcg, PGSTEAL_KSWAPD));
for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
if (memcg_vm_event_stat[i] == PGPGIN ||
@ -2652,6 +2661,32 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
return 0;
}
static bool need_memcg_async_reclaim(struct mem_cgroup *memcg)
{
if (!sysctl_vm_memory_qos)
return false;
return page_counter_read(&memcg->memory) > memcg->memory.async_high;
}
static void async_reclaim_func(struct work_struct *work)
{
struct mem_cgroup *memcg;
unsigned long nr_pages;
memcg = container_of(work, struct mem_cgroup, async_work);
nr_pages = page_counter_read(&memcg->memory) - memcg->memory.async_low;
if (nr_pages <= 0)
return;
nr_pages = min(nr_pages,
(memcg->memory.async_high - memcg->memory.async_low));
memcg_memory_event(memcg, MEMCG_HIGH);
try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, true);
}
static unsigned long reclaim_high(struct mem_cgroup *memcg,
unsigned int nr_pages,
gfp_t gfp_mask)
@ -2915,6 +2950,47 @@ out:
css_put(&memcg->css);
}
static void setup_async_wmark(struct mem_cgroup *memcg)
{
unsigned long high_throttle, low_throttle, distance;
unsigned long high = cgroup_subsys_on_dfl(memory_cgrp_subsys) ?
memcg->memory.high : memcg->memory.max;
if (memcg->async_wmark) {
high_throttle = (memcg->async_wmark * high) / ASYNC_RATIO_DIV;
distance = mult_frac(high,
memcg->async_distance_factor, ASYNC_DISTANCE_DIV);
if (distance >= high_throttle)
low_throttle = memcg->memory.low;
else
low_throttle = high_throttle - distance;
} else {
high_throttle = PAGE_COUNTER_MAX;
low_throttle = PAGE_COUNTER_MAX;
}
page_counter_set_async_high(&memcg->memory, high_throttle);
page_counter_set_async_low(&memcg->memory, low_throttle);
}
static void async_reclaim_reset_factor(struct mem_cgroup *memcg,
unsigned int new_prio)
{
unsigned int wmark, distance;
if (memcg->async_wmark_delta < 0)
return;
wmark = ASYNC_RATIO_DIV -
(CGROUP_PRIORITY_MAX - new_prio) * memcg->async_wmark_delta;
xchg(&memcg->async_wmark, wmark);
distance = memcg->async_distance_delta * (new_prio + 1);
xchg(&memcg->async_distance_factor, distance);
setup_async_wmark(memcg);
if (need_memcg_async_reclaim(memcg))
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
}
static struct task_struct *memcg_priod;
static struct task_struct *memcg_priod_async;
static DECLARE_WAIT_QUEUE_HEAD(memcg_prio_reclaim_wq);
@ -3052,6 +3128,7 @@ int mem_cgroup_notify_prio_change(struct cgroup_subsys_state *css,
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
async_reclaim_reset_factor(memcg, new_prio);
return memcg_notify_prio_change(memcg, old_prio, new_prio);
}
@ -3239,6 +3316,12 @@ done_restock:
do {
bool mem_high, swap_high;
if (need_memcg_async_reclaim(memcg)) {
/* Kick off per memory cgroup async reclaim */
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
break;
}
mem_high = page_counter_read(&memcg->memory) >
READ_ONCE(memcg->memory.high);
swap_high = page_counter_read(&memcg->swap) >
@ -3972,8 +4055,14 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}
} while (true);
if (!ret && enlarge)
memcg_oom_recover(memcg);
if (!ret) {
setup_async_wmark(memcg);
if (need_memcg_async_reclaim(memcg))
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
if (enlarge)
memcg_oom_recover(memcg);
}
return ret;
}
@ -4175,6 +4264,8 @@ enum {
RES_MAX_USAGE,
RES_FAILCNT,
RES_SOFT_LIMIT,
ASYNC_HIGH_LIMIT,
ASYNC_LOW_LIMIT,
};
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
@ -4215,6 +4306,10 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
return counter->failcnt;
case RES_SOFT_LIMIT:
return (u64)READ_ONCE(memcg->soft_limit) * PAGE_SIZE;
case ASYNC_HIGH_LIMIT:
return (u64)counter->async_high * PAGE_SIZE;
case ASYNC_LOW_LIMIT:
return (u64)counter->async_low * PAGE_SIZE;
default:
BUG();
}
@ -4656,6 +4751,11 @@ static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
seq_buf_printf(s, "file_cost %lu\n", file_cost);
}
#endif
seq_buf_printf(s, "pgscan_in_background %lu\n",
memcg_events(memcg, PGSCAN_KSWAPD));
seq_buf_printf(s, "pgsteal_in_background %lu\n",
memcg_events(memcg, PGSTEAL_KSWAPD));
}
#ifdef CONFIG_TEXT_UNEVICTABLE
@ -5898,6 +5998,74 @@ static int mem_cgroup_unevictable_percent_write(struct cgroup_subsys_state *css,
}
#endif
static int memory_async_reclaim_wmark_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
seq_printf(m, "%d\n", READ_ONCE(memcg->async_wmark));
return 0;
}
static ssize_t memory_async_reclaim_wmark_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
int ret, wmark;
buf = strstrip(buf);
if (!buf)
return -EINVAL;
ret = kstrtoint(buf, 0, &wmark);
if (ret)
return ret;
if (wmark > 100)
return -EINVAL;
xchg(&memcg->async_wmark, wmark);
setup_async_wmark(memcg);
if (need_memcg_async_reclaim(memcg))
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
return nbytes;
}
static int memory_async_distance_factor_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
seq_printf(m, "%d\n", READ_ONCE(memcg->async_distance_factor));
return 0;
}
static ssize_t memory_async_distance_factor_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
int ret, factor;
buf = strstrip(buf);
if (!buf)
return -EINVAL;
ret = kstrtoint(buf, 0, &factor);
if (ret)
return ret;
if ((factor > 150000) || (factor < 1))
return -EINVAL;
xchg(&memcg->async_distance_factor, factor);
setup_async_wmark(memcg);
return nbytes;
}
static int memory_oom_group_show(struct seq_file *m, void *v);
static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
@ -5972,6 +6140,30 @@ static struct cftype mem_cgroup_legacy_files[] = {
.seq_show = memory_oom_group_show,
.write = memory_oom_group_write,
},
{
.name = "async_ratio",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_async_reclaim_wmark_show,
.write = memory_async_reclaim_wmark_write,
},
{
.name = "async_high",
.flags = CFTYPE_NOT_ON_ROOT,
.private = MEMFILE_PRIVATE(_MEM, ASYNC_HIGH_LIMIT),
.read_u64 = mem_cgroup_read_u64,
},
{
.name = "async_low",
.flags = CFTYPE_NOT_ON_ROOT,
.private = MEMFILE_PRIVATE(_MEM, ASYNC_LOW_LIMIT),
.read_u64 = mem_cgroup_read_u64,
},
{
.name = "async_distance_factor",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_async_distance_factor_show,
.write = memory_async_distance_factor_write,
},
{
.name = "cgroup.event_control", /* XXX: for compat */
.write = memcg_write_event_control,
@ -6352,6 +6544,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
goto fail;
INIT_WORK(&memcg->high_work, high_work_func);
INIT_WORK(&memcg->async_work, async_reclaim_func);
INIT_LIST_HEAD(&memcg->oom_notify);
mutex_init(&memcg->thresholds_lock);
spin_lock_init(&memcg->move_lock);
@ -6413,6 +6606,12 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
#endif
WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable));
memcg->async_wmark = parent->async_wmark;
memcg->async_distance_factor = parent->async_distance_factor ?
: ASYNC_DISTANCE_DEF;
memcg->async_wmark_delta = parent->async_wmark_delta;
memcg->async_distance_delta = parent->async_distance_delta ?
: ASYNC_DISTANCE_DEF;
#ifdef CONFIG_MEMCG_ZRAM
memcg->zram_prio = parent->zram_prio;
#endif
@ -6426,7 +6625,12 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
page_counter_init(&memcg->swap, NULL);
page_counter_init(&memcg->kmem, NULL);
page_counter_init(&memcg->tcpmem, NULL);
}
setup_async_wmark(memcg);
if (!parent) {
memcg->async_wmark_delta = -1;
root_mem_cgroup = memcg;
return &memcg->css;
}
@ -6483,6 +6687,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
spin_unlock(&memcg_idr_lock);
async_reclaim_reset_factor(memcg, memcg_get_prio(memcg));
memcg_notify_prio_change(memcg, 0, memcg_get_prio(memcg));
return 0;
@ -6514,6 +6719,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
page_counter_set_min(&memcg->memory, 0);
page_counter_set_low(&memcg->memory, 0);
page_counter_set_async_high(&memcg->memory, PAGE_COUNTER_MAX);
page_counter_set_async_low(&memcg->memory, PAGE_COUNTER_MAX);
memcg_offline_kmem(memcg);
reparent_shrinker_deferred(memcg);
@ -6557,6 +6764,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
vmpressure_cleanup(&memcg->vmpressure);
cancel_work_sync(&memcg->high_work);
cancel_work_sync(&memcg->async_work);
mem_cgroup_remove_from_trees(memcg);
free_shrinker_info(memcg);
mem_cgroup_free(memcg);
@ -6585,6 +6793,8 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
page_counter_set_min(&memcg->memory, 0);
page_counter_set_low(&memcg->memory, 0);
page_counter_set_async_high(&memcg->memory, PAGE_COUNTER_MAX);
page_counter_set_async_low(&memcg->memory, PAGE_COUNTER_MAX);
page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
@ -7587,6 +7797,10 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
break;
}
setup_async_wmark(memcg);
if (need_memcg_async_reclaim(memcg))
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
memcg_wb_domain_size_changed(memcg);
return nbytes;
}
@ -7640,6 +7854,10 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
break;
}
setup_async_wmark(memcg);
if (need_memcg_async_reclaim(memcg))
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
memcg_wb_domain_size_changed(memcg);
return nbytes;
}
@ -7817,6 +8035,84 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
return nbytes;
}
static int memory_async_high_wmark_show(struct seq_file *m, void *v)
{
return seq_puts_memcg_tunable(m,
READ_ONCE(mem_cgroup_from_seq(m)->memory.async_high));
}
static int memory_async_low_wmark_show(struct seq_file *m, void *v)
{
return seq_puts_memcg_tunable(m,
READ_ONCE(mem_cgroup_from_seq(m)->memory.async_low));
}
static int memory_async_distance_delta_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
seq_printf(m, "%d\n", READ_ONCE(memcg->async_distance_delta));
return 0;
}
static ssize_t memory_async_distance_delta_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
int ret, delta;
buf = strstrip(buf);
if (!buf)
return -EINVAL;
ret = kstrtoint(buf, 0, &delta);
if (ret)
return ret;
if ((delta > 50) || (delta < 1))
return -EINVAL;
xchg(&memcg->async_distance_delta, delta);
async_reclaim_reset_factor(memcg, memcg_get_prio(memcg));
return nbytes;
}
static int memory_async_wmark_delta_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
seq_printf(m, "%d\n", READ_ONCE(memcg->async_wmark_delta));
return 0;
}
static ssize_t memory_async_wmark_delta_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
int ret, delta;
buf = strstrip(buf);
if (!buf)
return -EINVAL;
ret = kstrtoint(buf, 0, &delta);
if (ret)
return ret;
if (((delta > 10) || (delta < 1)) && (delta != -1))
return -EINVAL;
xchg(&memcg->async_wmark_delta, delta);
async_reclaim_reset_factor(memcg, memcg_get_prio(memcg));
return nbytes;
}
static struct cftype memory_files[] = {
{
.name = "current",
@ -7890,6 +8186,40 @@ static struct cftype memory_files[] = {
.flags = CFTYPE_NS_DELEGATABLE,
.write = memory_reclaim,
},
{
.name = "async_ratio",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_async_reclaim_wmark_show,
.write = memory_async_reclaim_wmark_write,
},
{
.name = "async_high",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_async_high_wmark_show,
},
{
.name = "async_low",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_async_low_wmark_show,
},
{
.name = "async_distance_factor",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_async_distance_factor_show,
.write = memory_async_distance_factor_write,
},
{
.name = "async_ratio_delta",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_async_wmark_delta_show,
.write = memory_async_wmark_delta_write,
},
{
.name = "async_distance_delta",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_async_distance_delta_show,
.write = memory_async_distance_delta_write,
},
{ } /* terminate */
};
@ -8479,6 +8809,13 @@ static int __init mem_cgroup_init(void)
{
int cpu, node;
memcg_async_reclaim_wq = alloc_workqueue("memcg_async_reclaim",
WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_FREEZABLE,
WQ_UNBOUND_MAX_ACTIVE);
if (!memcg_async_reclaim_wq)
return -ENOMEM;
/*
* Currently s32 type (can refer to struct batched_lruvec_stat) is
* used for per-memcg-per-cpu caching of per-node statistics. In order

View File

@ -234,6 +234,34 @@ void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
propagate_protected_usage(c, atomic_long_read(&c->usage));
}
/**
* page_counter_set_async_high - set the start throttle of memory for
* memcg async reclaim
* @counter: counter
* @nr_pages: value to set
*
* The caller must serialize invocations on the same counter.
*/
void page_counter_set_async_high(struct page_counter *counter,
unsigned long nr_pages)
{
xchg(&counter->async_high, nr_pages);
}
/**
* page_counter_set_async_low - set the stop throttle of memory for
* memcg async reclaim
* @counter: counter
* @nr_pages: value to set
*
* The caller must serialize invocations on the same counter.
*/
void page_counter_set_async_low(struct page_counter *counter,
unsigned long nr_pages)
{
xchg(&counter->async_low, nr_pages);
}
/**
* page_counter_memparse - memparse() for page counter limits
* @buf: string to parse

View File

@ -1163,6 +1163,24 @@ static int reclaimer_offset(void)
return PGSTEAL_DIRECT - PGSTEAL_KSWAPD;
}
static int rue_reclaimer_offset(struct scan_control *sc)
{
BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD);
BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
PGSCAN_DIRECT - PGSCAN_KSWAPD);
BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD);
BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD);
if (current_is_kswapd() || (cgroup_reclaim(sc) && current_work()))
return 0;
if (current_is_khugepaged())
return PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD;
return PGSTEAL_DIRECT - PGSTEAL_KSWAPD;
}
static inline int is_page_cache_freeable(struct folio *folio)
{
/*
@ -2676,7 +2694,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
&nr_scanned, sc, lru);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
item = PGSCAN_KSWAPD + reclaimer_offset();
item = PGSCAN_KSWAPD + rue_reclaimer_offset(sc);
if (!cgroup_reclaim(sc))
__count_vm_events(item, nr_scanned);
__count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
@ -2693,7 +2711,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
move_folios_to_lru(lruvec, &folio_list);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
item = PGSTEAL_KSWAPD + reclaimer_offset();
item = PGSTEAL_KSWAPD + rue_reclaimer_offset(sc);
if (!cgroup_reclaim(sc))
__count_vm_events(item, nr_reclaimed);
__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
@ -5200,7 +5218,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
break;
}
item = PGSCAN_KSWAPD + reclaimer_offset();
item = PGSCAN_KSWAPD + rue_reclaimer_offset(sc);
if (!cgroup_reclaim(sc)) {
__count_vm_events(item, isolated);
__count_vm_events(PGREFILL, sorted);
@ -5390,7 +5408,7 @@ retry:
if (walk && walk->batched)
reset_batch_size(lruvec, walk);
item = PGSTEAL_KSWAPD + reclaimer_offset();
item = PGSTEAL_KSWAPD + rue_reclaimer_offset(sc);
if (!cgroup_reclaim(sc))
__count_vm_events(item, reclaimed);
__count_memcg_events(memcg, item, reclaimed);
@ -6717,6 +6735,12 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
sc->nr_scanned - scanned,
sc->nr_reclaimed - reclaimed);
if (cgroup_reclaim(sc) &&
((sc->nr_reclaimed >= sc->nr_to_reclaim))) {
mem_cgroup_iter_break(target_memcg, memcg);
break;
}
} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
}