rue/mm: add memory cgroup async page reclaim mechanism
Introduce background page reclaim mechanism for memcg, it can be configured according to the cgroup priorities for different reclaim strategies. Signed-off-by: Yulei Zhang <yuleixzhang@tencent.com> Signed-off-by: Mengmeng Chen <bauerchen@tencent.com> Signed-off-by: Chunguang Xu <brookxu@tencent.com> Signed-off-by: Honglin Li <honglinli@tencent.com>
This commit is contained in:
parent
0d35c4c639
commit
56d80c4ea2
|
@ -109,6 +109,13 @@ Brief summary of control files.
|
|||
memory.kmem.tcp.failcnt show the number of tcp buf memory usage
|
||||
hits limits
|
||||
memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
|
||||
memory.async_ratio ratio setting for async reclaim wmark
|
||||
memory.async_high high limit for start async reclaim
|
||||
memory.async_low low limit to strop async reclaim
|
||||
memory.async_distance_factor the distance between async_high and async_low, valid
|
||||
value is from 1 to 150000, the unit is in fractions
|
||||
of 1000000
|
||||
|
||||
==================================== ==========================================
|
||||
|
||||
1. History
|
||||
|
@ -971,7 +978,19 @@ Test:
|
|||
(Expect a bunch of notifications, and eventually, the oom-killer will
|
||||
trigger.)
|
||||
|
||||
12. TODO
|
||||
12. Async reclaim
|
||||
=================
|
||||
|
||||
Add memory usage water mark for async reclaim, memory.async_ratio. Valid
|
||||
value is from 0 to 100, which represents percentage of memory.limit, the
|
||||
actual value of percentage * memory.limit will be asigned to memory.async_high.
|
||||
When charge pages to memory cgroup, it will schedule a work to reclaim
|
||||
pages when the memory usage exceed the memory.async_high, and the work
|
||||
will stop when the reclaim reachs memory.async_low. memory.async_ratio = 0
|
||||
means async reclaim is disabled, both async_low and async_high
|
||||
would be max, which is the default value.
|
||||
|
||||
13. TODO
|
||||
========
|
||||
|
||||
1. Make per-cgroup scanner reclaim not-shared pages first
|
||||
|
|
|
@ -1275,6 +1275,70 @@ PAGE_SIZE multiple when read back.
|
|||
The max memory usage recorded for the cgroup and its
|
||||
descendants since the creation of the cgroup.
|
||||
|
||||
memory.async_ratio
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default is 0.
|
||||
|
||||
Memory usage water mark for async reclaim. Valid value is from 0
|
||||
to 100, which represents percentage of memory.high, the actual value
|
||||
of percentage * memory.high will be asigned to async_high. When
|
||||
charge pages to memory cgroup, it will schedule a work to reclaim
|
||||
pages when the memory usage exceed the async_high.
|
||||
|
||||
0 means async reclaim is disabled, both async_low and async_high
|
||||
would be max, which is the default value, and the mechanism takes
|
||||
effect.
|
||||
|
||||
memory.async_high
|
||||
A read-only single value file which exists on non-root cgroups.
|
||||
The default is max.
|
||||
|
||||
Memory usage high water mark for async reclaim, if a cgroup's
|
||||
memory usage exceeds this limit will trigger the async reclaim
|
||||
logic.
|
||||
|
||||
memory.async_low
|
||||
A read-only single value file which exists on non-root cgroups.
|
||||
The default is max.
|
||||
|
||||
Memory usage low water mark for async reclaim, if a reclaim work
|
||||
is scheduled, it will try to reclaim (async_high - async_low)
|
||||
pages.
|
||||
|
||||
memory.async_distance_factor
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
The default is 1.
|
||||
|
||||
Define the distance between async_high and async_low. Valid value is
|
||||
from 1 to 150000, the unit is in fractions of 1000000. The default value of
|
||||
1 means the distance between async_high and async_low is 0.01% of memory.high
|
||||
of the cgroup. The maximum value is 150000, which 15% of memory.high.
|
||||
|
||||
memory.async_ratio_delta
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default is -1.
|
||||
|
||||
Memory usage watermark calculation factor for async reclaim. Valid
|
||||
value is from 1 to 10, which will be used to calculate the memory.async_ratio
|
||||
relay on cgroup priority, async_ratio = 100 - (MAX_PRIORITY - cgroup_prio) *
|
||||
async_ratio_delta, and the result will be used to calculate the
|
||||
async reclaim wmark.
|
||||
|
||||
Default value -1 is a show stop value which means the async reclaim wmark
|
||||
calculation will not relay on the cgroup priority, it only depends on the
|
||||
async_ratio value we manully set.
|
||||
|
||||
memory.async_distance_delta
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
The default is 1.
|
||||
|
||||
Per cgroup prioirty factor to define the distance between async_high
|
||||
and async_low. Valid value is from 1 to 50, The formula is
|
||||
async_distance_factor = async_distance_delta * (cgroup_priority_value + 1).
|
||||
Async reclaim mechanism will use the result for reclaim distance
|
||||
cacluation. And when async_ratio_delta == -1, this value won't
|
||||
take effect either when cgroup priority changes.
|
||||
|
||||
memory.oom.group
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default value is "0".
|
||||
|
|
|
@ -338,6 +338,12 @@ struct mem_cgroup {
|
|||
int reclaim_failed;
|
||||
struct list_head prio_list;
|
||||
struct list_head prio_list_async;
|
||||
/* per cgroup memory async reclaim */
|
||||
unsigned int async_wmark;
|
||||
unsigned int async_distance_factor;
|
||||
int async_wmark_delta;
|
||||
unsigned int async_distance_delta;
|
||||
struct work_struct async_work;
|
||||
|
||||
/*
|
||||
* set > 0 if pages under this cgroup are moving to other cgroup.
|
||||
|
|
|
@ -25,6 +25,10 @@ struct page_counter {
|
|||
atomic_long_t low_usage;
|
||||
atomic_long_t children_low_usage;
|
||||
|
||||
/* async reclaim threshold */
|
||||
unsigned long async_low;
|
||||
unsigned long async_high;
|
||||
|
||||
unsigned long watermark;
|
||||
unsigned long failcnt;
|
||||
|
||||
|
@ -76,6 +80,11 @@ int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages);
|
|||
int page_counter_memparse(const char *buf, const char *max,
|
||||
unsigned long *nr_pages);
|
||||
|
||||
void page_counter_set_async_high(struct page_counter *counter,
|
||||
unsigned long nr_pages);
|
||||
void page_counter_set_async_low(struct page_counter *counter,
|
||||
unsigned long nr_pages);
|
||||
|
||||
static inline void page_counter_reset_watermark(struct page_counter *counter)
|
||||
{
|
||||
counter->watermark = page_counter_read(counter);
|
||||
|
|
341
mm/memcontrol.c
341
mm/memcontrol.c
|
@ -125,6 +125,11 @@ static atomic_long_t memcg_reclaimed_count;
|
|||
static unsigned long memcg_reclaim_goal;
|
||||
static int memcg_cur_reclaim_prio = CGROUP_PRIORITY_MAX;
|
||||
static DEFINE_SPINLOCK(memcg_reclaim_prio_lock);
|
||||
/* workqueue for async reclaim */
|
||||
struct workqueue_struct *memcg_async_reclaim_wq;
|
||||
#define ASYNC_DISTANCE_DIV 1000000
|
||||
#define ASYNC_RATIO_DIV 100
|
||||
#define ASYNC_DISTANCE_DEF 1
|
||||
|
||||
/* Whether legacy memory+swap accounting is active */
|
||||
static bool do_memsw_account(void)
|
||||
|
@ -1875,6 +1880,10 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
|
|||
memcg_events(memcg, PGSTEAL_KSWAPD) +
|
||||
memcg_events(memcg, PGSTEAL_DIRECT) +
|
||||
memcg_events(memcg, PGSTEAL_KHUGEPAGED));
|
||||
seq_buf_printf(s, "pgscan_in_background %lu\n",
|
||||
memcg_events(memcg, PGSCAN_KSWAPD));
|
||||
seq_buf_printf(s, "pgsteal_in_background %lu\n",
|
||||
memcg_events(memcg, PGSTEAL_KSWAPD));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
|
||||
if (memcg_vm_event_stat[i] == PGPGIN ||
|
||||
|
@ -2652,6 +2661,32 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool need_memcg_async_reclaim(struct mem_cgroup *memcg)
|
||||
{
|
||||
if (!sysctl_vm_memory_qos)
|
||||
return false;
|
||||
|
||||
return page_counter_read(&memcg->memory) > memcg->memory.async_high;
|
||||
}
|
||||
|
||||
static void async_reclaim_func(struct work_struct *work)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
unsigned long nr_pages;
|
||||
|
||||
memcg = container_of(work, struct mem_cgroup, async_work);
|
||||
nr_pages = page_counter_read(&memcg->memory) - memcg->memory.async_low;
|
||||
|
||||
if (nr_pages <= 0)
|
||||
return;
|
||||
|
||||
nr_pages = min(nr_pages,
|
||||
(memcg->memory.async_high - memcg->memory.async_low));
|
||||
memcg_memory_event(memcg, MEMCG_HIGH);
|
||||
try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, true);
|
||||
|
||||
}
|
||||
|
||||
static unsigned long reclaim_high(struct mem_cgroup *memcg,
|
||||
unsigned int nr_pages,
|
||||
gfp_t gfp_mask)
|
||||
|
@ -2915,6 +2950,47 @@ out:
|
|||
css_put(&memcg->css);
|
||||
}
|
||||
|
||||
static void setup_async_wmark(struct mem_cgroup *memcg)
|
||||
{
|
||||
unsigned long high_throttle, low_throttle, distance;
|
||||
unsigned long high = cgroup_subsys_on_dfl(memory_cgrp_subsys) ?
|
||||
memcg->memory.high : memcg->memory.max;
|
||||
|
||||
if (memcg->async_wmark) {
|
||||
high_throttle = (memcg->async_wmark * high) / ASYNC_RATIO_DIV;
|
||||
distance = mult_frac(high,
|
||||
memcg->async_distance_factor, ASYNC_DISTANCE_DIV);
|
||||
if (distance >= high_throttle)
|
||||
low_throttle = memcg->memory.low;
|
||||
else
|
||||
low_throttle = high_throttle - distance;
|
||||
} else {
|
||||
high_throttle = PAGE_COUNTER_MAX;
|
||||
low_throttle = PAGE_COUNTER_MAX;
|
||||
}
|
||||
page_counter_set_async_high(&memcg->memory, high_throttle);
|
||||
page_counter_set_async_low(&memcg->memory, low_throttle);
|
||||
}
|
||||
|
||||
static void async_reclaim_reset_factor(struct mem_cgroup *memcg,
|
||||
unsigned int new_prio)
|
||||
{
|
||||
unsigned int wmark, distance;
|
||||
|
||||
if (memcg->async_wmark_delta < 0)
|
||||
return;
|
||||
|
||||
wmark = ASYNC_RATIO_DIV -
|
||||
(CGROUP_PRIORITY_MAX - new_prio) * memcg->async_wmark_delta;
|
||||
xchg(&memcg->async_wmark, wmark);
|
||||
distance = memcg->async_distance_delta * (new_prio + 1);
|
||||
xchg(&memcg->async_distance_factor, distance);
|
||||
|
||||
setup_async_wmark(memcg);
|
||||
if (need_memcg_async_reclaim(memcg))
|
||||
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
|
||||
}
|
||||
|
||||
static struct task_struct *memcg_priod;
|
||||
static struct task_struct *memcg_priod_async;
|
||||
static DECLARE_WAIT_QUEUE_HEAD(memcg_prio_reclaim_wq);
|
||||
|
@ -3052,6 +3128,7 @@ int mem_cgroup_notify_prio_change(struct cgroup_subsys_state *css,
|
|||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
|
||||
async_reclaim_reset_factor(memcg, new_prio);
|
||||
return memcg_notify_prio_change(memcg, old_prio, new_prio);
|
||||
}
|
||||
|
||||
|
@ -3239,6 +3316,12 @@ done_restock:
|
|||
do {
|
||||
bool mem_high, swap_high;
|
||||
|
||||
if (need_memcg_async_reclaim(memcg)) {
|
||||
/* Kick off per memory cgroup async reclaim */
|
||||
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
|
||||
break;
|
||||
}
|
||||
|
||||
mem_high = page_counter_read(&memcg->memory) >
|
||||
READ_ONCE(memcg->memory.high);
|
||||
swap_high = page_counter_read(&memcg->swap) >
|
||||
|
@ -3972,8 +4055,14 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
|
|||
}
|
||||
} while (true);
|
||||
|
||||
if (!ret && enlarge)
|
||||
memcg_oom_recover(memcg);
|
||||
if (!ret) {
|
||||
setup_async_wmark(memcg);
|
||||
if (need_memcg_async_reclaim(memcg))
|
||||
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
|
||||
|
||||
if (enlarge)
|
||||
memcg_oom_recover(memcg);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -4175,6 +4264,8 @@ enum {
|
|||
RES_MAX_USAGE,
|
||||
RES_FAILCNT,
|
||||
RES_SOFT_LIMIT,
|
||||
ASYNC_HIGH_LIMIT,
|
||||
ASYNC_LOW_LIMIT,
|
||||
};
|
||||
|
||||
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
|
||||
|
@ -4215,6 +4306,10 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
|
|||
return counter->failcnt;
|
||||
case RES_SOFT_LIMIT:
|
||||
return (u64)READ_ONCE(memcg->soft_limit) * PAGE_SIZE;
|
||||
case ASYNC_HIGH_LIMIT:
|
||||
return (u64)counter->async_high * PAGE_SIZE;
|
||||
case ASYNC_LOW_LIMIT:
|
||||
return (u64)counter->async_low * PAGE_SIZE;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
@ -4656,6 +4751,11 @@ static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
|
|||
seq_buf_printf(s, "file_cost %lu\n", file_cost);
|
||||
}
|
||||
#endif
|
||||
|
||||
seq_buf_printf(s, "pgscan_in_background %lu\n",
|
||||
memcg_events(memcg, PGSCAN_KSWAPD));
|
||||
seq_buf_printf(s, "pgsteal_in_background %lu\n",
|
||||
memcg_events(memcg, PGSTEAL_KSWAPD));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
|
@ -5898,6 +5998,74 @@ static int mem_cgroup_unevictable_percent_write(struct cgroup_subsys_state *css,
|
|||
}
|
||||
#endif
|
||||
|
||||
static int memory_async_reclaim_wmark_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
|
||||
|
||||
seq_printf(m, "%d\n", READ_ONCE(memcg->async_wmark));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t memory_async_reclaim_wmark_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
||||
int ret, wmark;
|
||||
|
||||
buf = strstrip(buf);
|
||||
if (!buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = kstrtoint(buf, 0, &wmark);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (wmark > 100)
|
||||
return -EINVAL;
|
||||
|
||||
xchg(&memcg->async_wmark, wmark);
|
||||
|
||||
setup_async_wmark(memcg);
|
||||
if (need_memcg_async_reclaim(memcg))
|
||||
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int memory_async_distance_factor_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
|
||||
|
||||
seq_printf(m, "%d\n", READ_ONCE(memcg->async_distance_factor));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t memory_async_distance_factor_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
||||
int ret, factor;
|
||||
|
||||
buf = strstrip(buf);
|
||||
if (!buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = kstrtoint(buf, 0, &factor);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if ((factor > 150000) || (factor < 1))
|
||||
return -EINVAL;
|
||||
|
||||
xchg(&memcg->async_distance_factor, factor);
|
||||
|
||||
setup_async_wmark(memcg);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int memory_oom_group_show(struct seq_file *m, void *v);
|
||||
static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off);
|
||||
|
@ -5972,6 +6140,30 @@ static struct cftype mem_cgroup_legacy_files[] = {
|
|||
.seq_show = memory_oom_group_show,
|
||||
.write = memory_oom_group_write,
|
||||
},
|
||||
{
|
||||
.name = "async_ratio",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = memory_async_reclaim_wmark_show,
|
||||
.write = memory_async_reclaim_wmark_write,
|
||||
},
|
||||
{
|
||||
.name = "async_high",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.private = MEMFILE_PRIVATE(_MEM, ASYNC_HIGH_LIMIT),
|
||||
.read_u64 = mem_cgroup_read_u64,
|
||||
},
|
||||
{
|
||||
.name = "async_low",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.private = MEMFILE_PRIVATE(_MEM, ASYNC_LOW_LIMIT),
|
||||
.read_u64 = mem_cgroup_read_u64,
|
||||
},
|
||||
{
|
||||
.name = "async_distance_factor",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = memory_async_distance_factor_show,
|
||||
.write = memory_async_distance_factor_write,
|
||||
},
|
||||
{
|
||||
.name = "cgroup.event_control", /* XXX: for compat */
|
||||
.write = memcg_write_event_control,
|
||||
|
@ -6352,6 +6544,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
|
|||
goto fail;
|
||||
|
||||
INIT_WORK(&memcg->high_work, high_work_func);
|
||||
INIT_WORK(&memcg->async_work, async_reclaim_func);
|
||||
INIT_LIST_HEAD(&memcg->oom_notify);
|
||||
mutex_init(&memcg->thresholds_lock);
|
||||
spin_lock_init(&memcg->move_lock);
|
||||
|
@ -6413,6 +6606,12 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|||
#endif
|
||||
WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
|
||||
WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable));
|
||||
memcg->async_wmark = parent->async_wmark;
|
||||
memcg->async_distance_factor = parent->async_distance_factor ?
|
||||
: ASYNC_DISTANCE_DEF;
|
||||
memcg->async_wmark_delta = parent->async_wmark_delta;
|
||||
memcg->async_distance_delta = parent->async_distance_delta ?
|
||||
: ASYNC_DISTANCE_DEF;
|
||||
#ifdef CONFIG_MEMCG_ZRAM
|
||||
memcg->zram_prio = parent->zram_prio;
|
||||
#endif
|
||||
|
@ -6426,7 +6625,12 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|||
page_counter_init(&memcg->swap, NULL);
|
||||
page_counter_init(&memcg->kmem, NULL);
|
||||
page_counter_init(&memcg->tcpmem, NULL);
|
||||
}
|
||||
|
||||
setup_async_wmark(memcg);
|
||||
|
||||
if (!parent) {
|
||||
memcg->async_wmark_delta = -1;
|
||||
root_mem_cgroup = memcg;
|
||||
return &memcg->css;
|
||||
}
|
||||
|
@ -6483,6 +6687,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
|
|||
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
|
||||
spin_unlock(&memcg_idr_lock);
|
||||
|
||||
async_reclaim_reset_factor(memcg, memcg_get_prio(memcg));
|
||||
memcg_notify_prio_change(memcg, 0, memcg_get_prio(memcg));
|
||||
|
||||
return 0;
|
||||
|
@ -6514,6 +6719,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
|||
|
||||
page_counter_set_min(&memcg->memory, 0);
|
||||
page_counter_set_low(&memcg->memory, 0);
|
||||
page_counter_set_async_high(&memcg->memory, PAGE_COUNTER_MAX);
|
||||
page_counter_set_async_low(&memcg->memory, PAGE_COUNTER_MAX);
|
||||
|
||||
memcg_offline_kmem(memcg);
|
||||
reparent_shrinker_deferred(memcg);
|
||||
|
@ -6557,6 +6764,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
|
|||
|
||||
vmpressure_cleanup(&memcg->vmpressure);
|
||||
cancel_work_sync(&memcg->high_work);
|
||||
cancel_work_sync(&memcg->async_work);
|
||||
mem_cgroup_remove_from_trees(memcg);
|
||||
free_shrinker_info(memcg);
|
||||
mem_cgroup_free(memcg);
|
||||
|
@ -6585,6 +6793,8 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
|
|||
page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
|
||||
page_counter_set_min(&memcg->memory, 0);
|
||||
page_counter_set_low(&memcg->memory, 0);
|
||||
page_counter_set_async_high(&memcg->memory, PAGE_COUNTER_MAX);
|
||||
page_counter_set_async_low(&memcg->memory, PAGE_COUNTER_MAX);
|
||||
page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
|
||||
WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
|
||||
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
|
||||
|
@ -7587,6 +7797,10 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
|
|||
break;
|
||||
}
|
||||
|
||||
setup_async_wmark(memcg);
|
||||
if (need_memcg_async_reclaim(memcg))
|
||||
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
|
||||
|
||||
memcg_wb_domain_size_changed(memcg);
|
||||
return nbytes;
|
||||
}
|
||||
|
@ -7640,6 +7854,10 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
|
|||
break;
|
||||
}
|
||||
|
||||
setup_async_wmark(memcg);
|
||||
if (need_memcg_async_reclaim(memcg))
|
||||
queue_work(memcg_async_reclaim_wq, &memcg->async_work);
|
||||
|
||||
memcg_wb_domain_size_changed(memcg);
|
||||
return nbytes;
|
||||
}
|
||||
|
@ -7817,6 +8035,84 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
|
|||
return nbytes;
|
||||
}
|
||||
|
||||
static int memory_async_high_wmark_show(struct seq_file *m, void *v)
|
||||
{
|
||||
return seq_puts_memcg_tunable(m,
|
||||
READ_ONCE(mem_cgroup_from_seq(m)->memory.async_high));
|
||||
}
|
||||
|
||||
static int memory_async_low_wmark_show(struct seq_file *m, void *v)
|
||||
{
|
||||
return seq_puts_memcg_tunable(m,
|
||||
READ_ONCE(mem_cgroup_from_seq(m)->memory.async_low));
|
||||
}
|
||||
|
||||
static int memory_async_distance_delta_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
|
||||
|
||||
seq_printf(m, "%d\n", READ_ONCE(memcg->async_distance_delta));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t memory_async_distance_delta_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
||||
int ret, delta;
|
||||
|
||||
buf = strstrip(buf);
|
||||
if (!buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = kstrtoint(buf, 0, &delta);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if ((delta > 50) || (delta < 1))
|
||||
return -EINVAL;
|
||||
|
||||
xchg(&memcg->async_distance_delta, delta);
|
||||
|
||||
async_reclaim_reset_factor(memcg, memcg_get_prio(memcg));
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int memory_async_wmark_delta_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
|
||||
|
||||
seq_printf(m, "%d\n", READ_ONCE(memcg->async_wmark_delta));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t memory_async_wmark_delta_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
||||
int ret, delta;
|
||||
|
||||
buf = strstrip(buf);
|
||||
if (!buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = kstrtoint(buf, 0, &delta);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (((delta > 10) || (delta < 1)) && (delta != -1))
|
||||
return -EINVAL;
|
||||
|
||||
xchg(&memcg->async_wmark_delta, delta);
|
||||
|
||||
async_reclaim_reset_factor(memcg, memcg_get_prio(memcg));
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static struct cftype memory_files[] = {
|
||||
{
|
||||
.name = "current",
|
||||
|
@ -7890,6 +8186,40 @@ static struct cftype memory_files[] = {
|
|||
.flags = CFTYPE_NS_DELEGATABLE,
|
||||
.write = memory_reclaim,
|
||||
},
|
||||
{
|
||||
.name = "async_ratio",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = memory_async_reclaim_wmark_show,
|
||||
.write = memory_async_reclaim_wmark_write,
|
||||
},
|
||||
{
|
||||
.name = "async_high",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = memory_async_high_wmark_show,
|
||||
},
|
||||
{
|
||||
.name = "async_low",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = memory_async_low_wmark_show,
|
||||
},
|
||||
{
|
||||
.name = "async_distance_factor",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = memory_async_distance_factor_show,
|
||||
.write = memory_async_distance_factor_write,
|
||||
},
|
||||
{
|
||||
.name = "async_ratio_delta",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = memory_async_wmark_delta_show,
|
||||
.write = memory_async_wmark_delta_write,
|
||||
},
|
||||
{
|
||||
.name = "async_distance_delta",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = memory_async_distance_delta_show,
|
||||
.write = memory_async_distance_delta_write,
|
||||
},
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
|
@ -8479,6 +8809,13 @@ static int __init mem_cgroup_init(void)
|
|||
{
|
||||
int cpu, node;
|
||||
|
||||
memcg_async_reclaim_wq = alloc_workqueue("memcg_async_reclaim",
|
||||
WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_FREEZABLE,
|
||||
WQ_UNBOUND_MAX_ACTIVE);
|
||||
|
||||
if (!memcg_async_reclaim_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Currently s32 type (can refer to struct batched_lruvec_stat) is
|
||||
* used for per-memcg-per-cpu caching of per-node statistics. In order
|
||||
|
|
|
@ -234,6 +234,34 @@ void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
|
|||
propagate_protected_usage(c, atomic_long_read(&c->usage));
|
||||
}
|
||||
|
||||
/**
|
||||
* page_counter_set_async_high - set the start throttle of memory for
|
||||
* memcg async reclaim
|
||||
* @counter: counter
|
||||
* @nr_pages: value to set
|
||||
*
|
||||
* The caller must serialize invocations on the same counter.
|
||||
*/
|
||||
void page_counter_set_async_high(struct page_counter *counter,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
xchg(&counter->async_high, nr_pages);
|
||||
}
|
||||
|
||||
/**
|
||||
* page_counter_set_async_low - set the stop throttle of memory for
|
||||
* memcg async reclaim
|
||||
* @counter: counter
|
||||
* @nr_pages: value to set
|
||||
*
|
||||
* The caller must serialize invocations on the same counter.
|
||||
*/
|
||||
void page_counter_set_async_low(struct page_counter *counter,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
xchg(&counter->async_low, nr_pages);
|
||||
}
|
||||
|
||||
/**
|
||||
* page_counter_memparse - memparse() for page counter limits
|
||||
* @buf: string to parse
|
||||
|
|
32
mm/vmscan.c
32
mm/vmscan.c
|
@ -1163,6 +1163,24 @@ static int reclaimer_offset(void)
|
|||
return PGSTEAL_DIRECT - PGSTEAL_KSWAPD;
|
||||
}
|
||||
|
||||
static int rue_reclaimer_offset(struct scan_control *sc)
|
||||
{
|
||||
BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
|
||||
PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD);
|
||||
BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
|
||||
PGSCAN_DIRECT - PGSCAN_KSWAPD);
|
||||
BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
|
||||
PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD);
|
||||
BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
|
||||
PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD);
|
||||
|
||||
if (current_is_kswapd() || (cgroup_reclaim(sc) && current_work()))
|
||||
return 0;
|
||||
if (current_is_khugepaged())
|
||||
return PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD;
|
||||
return PGSTEAL_DIRECT - PGSTEAL_KSWAPD;
|
||||
}
|
||||
|
||||
static inline int is_page_cache_freeable(struct folio *folio)
|
||||
{
|
||||
/*
|
||||
|
@ -2676,7 +2694,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
|
|||
&nr_scanned, sc, lru);
|
||||
|
||||
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
|
||||
item = PGSCAN_KSWAPD + reclaimer_offset();
|
||||
item = PGSCAN_KSWAPD + rue_reclaimer_offset(sc);
|
||||
if (!cgroup_reclaim(sc))
|
||||
__count_vm_events(item, nr_scanned);
|
||||
__count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
|
||||
|
@ -2693,7 +2711,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
|
|||
move_folios_to_lru(lruvec, &folio_list);
|
||||
|
||||
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
|
||||
item = PGSTEAL_KSWAPD + reclaimer_offset();
|
||||
item = PGSTEAL_KSWAPD + rue_reclaimer_offset(sc);
|
||||
if (!cgroup_reclaim(sc))
|
||||
__count_vm_events(item, nr_reclaimed);
|
||||
__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
|
||||
|
@ -5200,7 +5218,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
|
|||
break;
|
||||
}
|
||||
|
||||
item = PGSCAN_KSWAPD + reclaimer_offset();
|
||||
item = PGSCAN_KSWAPD + rue_reclaimer_offset(sc);
|
||||
if (!cgroup_reclaim(sc)) {
|
||||
__count_vm_events(item, isolated);
|
||||
__count_vm_events(PGREFILL, sorted);
|
||||
|
@ -5390,7 +5408,7 @@ retry:
|
|||
if (walk && walk->batched)
|
||||
reset_batch_size(lruvec, walk);
|
||||
|
||||
item = PGSTEAL_KSWAPD + reclaimer_offset();
|
||||
item = PGSTEAL_KSWAPD + rue_reclaimer_offset(sc);
|
||||
if (!cgroup_reclaim(sc))
|
||||
__count_vm_events(item, reclaimed);
|
||||
__count_memcg_events(memcg, item, reclaimed);
|
||||
|
@ -6717,6 +6735,12 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
|
|||
sc->nr_scanned - scanned,
|
||||
sc->nr_reclaimed - reclaimed);
|
||||
|
||||
if (cgroup_reclaim(sc) &&
|
||||
((sc->nr_reclaimed >= sc->nr_to_reclaim))) {
|
||||
mem_cgroup_iter_break(target_memcg, memcg);
|
||||
break;
|
||||
}
|
||||
|
||||
} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue