anolis: mm: unevictable: add memcg granularity core implementation
commit 179050e6e6a2758db0549e42e3e5841050271488 openAnolis Conflicts: add memory.text_unevictable_size to display the size of code segments. Picked from 5.4. Backport-reason: Add code segment unevictable feature support [PATCH 7/8] ANBZ: #2674 This patch introduces the core implementation about code section unevictable with memcg granularity, after this patch, the unevictable of memcg granularity has been realized. With this patch, we mainly implemented the global switch "/sys/kernel/mm/unevictable/enabled" to enable or disable this function, and we also implemented the "memory.allow_text_unevictable" and "memory.text_unevictable_percent" to enable and limit the size of code section unevictable on per memcg. About the unevictable text size, you can check through memory.exstat interface. Signed-off-by: Xin Hao <xhao@linux.alibaba.com> Reviewed-by: Xu Yu <xuyu@linux.alibaba.com> Link: https://gitee.com/anolis/cloud-kernel/pulls/953 Signed-off-by: Xin Hao <vernhao@tencent.com>
This commit is contained in:
parent
19dac875c2
commit
03165bd0f4
|
@ -352,6 +352,12 @@ struct mem_cgroup {
|
|||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
bool allow_unevictable;
|
||||
unsigned int unevictable_percent;
|
||||
/*
|
||||
* the unevictable_size is larger than the real unevictable memory
|
||||
* size, due to there may be multiple tasks sharing the same memory,
|
||||
* such as binary and dynamic library sharing.
|
||||
*/
|
||||
atomic_long_t unevictable_size;
|
||||
#endif
|
||||
|
||||
KABI_RESERVE(1);
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
#ifndef _TEXT_UNEVICTABLE_H
|
||||
#define _TEXT_UNEVICTABLE_H
|
||||
|
||||
struct mem_cgroup;
|
||||
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
DECLARE_STATIC_KEY_FALSE(unevictable_enabled_key);
|
||||
|
||||
|
@ -10,10 +12,56 @@ static inline bool unevictable_enabled(void)
|
|||
{
|
||||
return static_branch_unlikely(&unevictable_enabled_key);
|
||||
}
|
||||
bool is_memcg_unevictable_enabled(struct mem_cgroup *memcg);
|
||||
void memcg_increase_unevict_size(struct mem_cgroup *memcg, unsigned long size);
|
||||
void memcg_decrease_unevict_size(struct mem_cgroup *memcg, unsigned long size);
|
||||
bool is_unevictable_size_overflow(struct mem_cgroup *memcg);
|
||||
unsigned long memcg_exstat_text_unevict_gather(struct mem_cgroup *memcg);
|
||||
void mem_cgroup_can_unevictable(struct task_struct *tsk, struct mem_cgroup *to);
|
||||
void mem_cgroup_cancel_unevictable(struct cgroup_taskset *tset);
|
||||
void memcg_all_processes_unevict(struct mem_cgroup *memcg, bool enable);
|
||||
void del_unevict_task(struct task_struct *tsk);
|
||||
void clean_task_unevict_size(struct task_struct *tsk);
|
||||
#else
|
||||
static inline bool unevictable_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool is_memcg_unevictable_enabled(struct mem_cgroup *memcg)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void memcg_increase_unevict_size(struct mem_cgroup *memcg,
|
||||
unsigned long size)
|
||||
{
|
||||
}
|
||||
static inline void memcg_decrease_unevict_size(struct mem_cgroup *memcg,
|
||||
unsigned long size)
|
||||
{
|
||||
}
|
||||
static inline bool is_unevictable_size_overflow(struct mem_cgroup *memcg)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline unsigned long memcg_exstat_text_unevict_gather(struct mem_cgroup *memcg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void mem_cgroup_can_unevictable(struct task_struct *tsk,
|
||||
struct mem_cgroup *to)
|
||||
{
|
||||
}
|
||||
static inline void mem_cgroup_cancel_unevictable(struct cgroup_taskset *tset)
|
||||
{
|
||||
}
|
||||
static inline void memcg_all_processes_unevict(struct mem_cgroup *memcg, bool enable)
|
||||
{
|
||||
}
|
||||
static inline void del_unevict_task(struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
static inline void clean_task_unevict_size(struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -69,6 +69,9 @@
|
|||
#include <linux/rethook.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/user_events.h>
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
#include <linux/unevictable.h>
|
||||
#endif
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
|
@ -856,6 +859,9 @@ void __noreturn do_exit(long code)
|
|||
tsk->exit_code = code;
|
||||
taskstats_exit(tsk, group_dead);
|
||||
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
clean_task_unevict_size(tsk);
|
||||
#endif
|
||||
exit_mm();
|
||||
|
||||
if (group_dead)
|
||||
|
|
|
@ -70,6 +70,9 @@
|
|||
#include <net/ip.h>
|
||||
#include "slab.h"
|
||||
#include "swap.h"
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
#include <linux/unevictable.h>
|
||||
#endif
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
|
@ -4214,6 +4217,18 @@ static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
static int memcg_unevict_size_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
||||
|
||||
seq_printf(m, "unevictable_text_size_kb %lu\n",
|
||||
memcg_exstat_text_unevict_gather(memcg) >> 10);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
|
@ -5413,6 +5428,10 @@ static int mem_cgroup_allow_unevictable_write(struct cgroup_subsys_state *css,
|
|||
return 0;
|
||||
|
||||
memcg->allow_unevictable = val;
|
||||
if (val)
|
||||
memcg_all_processes_unevict(memcg, true);
|
||||
else
|
||||
memcg_all_processes_unevict(memcg, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -5455,6 +5474,10 @@ static struct cftype mem_cgroup_legacy_files[] = {
|
|||
.read_u64 = mem_cgroup_unevictable_percent_read,
|
||||
.write_u64 = mem_cgroup_unevictable_percent_write,
|
||||
},
|
||||
{
|
||||
.name = "text_unevictable_size",
|
||||
.seq_show = memcg_unevict_size_show,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.name = "max_usage_in_bytes",
|
||||
|
@ -5870,6 +5893,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|||
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
memcg->unevictable_percent = 100;
|
||||
atomic_long_set(&memcg->unevictable_size, 0);
|
||||
#endif
|
||||
if (parent) {
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
|
@ -6674,6 +6698,10 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
|||
if (!p)
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
mem_cgroup_can_unevictable(p, memcg);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We are now committed to this value whatever it is. Changes in this
|
||||
* tunable will only affect upcoming migrations, not the current one.
|
||||
|
@ -6717,6 +6745,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
|
|||
|
||||
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
mem_cgroup_cancel_unevictable(tset);
|
||||
#endif
|
||||
if (mc.to)
|
||||
mem_cgroup_clear_mc();
|
||||
}
|
||||
|
|
206
mm/unevictable.c
206
mm/unevictable.c
|
@ -40,6 +40,11 @@
|
|||
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
DEFINE_STATIC_KEY_FALSE(unevictable_enabled_key);
|
||||
|
||||
#define for_each_mem_cgroup(iter) \
|
||||
for (iter = mem_cgroup_iter(NULL, NULL, NULL); \
|
||||
iter != NULL; \
|
||||
iter = mem_cgroup_iter(NULL, iter, NULL))
|
||||
#endif
|
||||
|
||||
struct evict_pids_t {
|
||||
|
@ -51,6 +56,9 @@ struct evict_pid_entry {
|
|||
struct list_head list;
|
||||
pid_t rootpid;
|
||||
u64 start_time;
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
u64 unevict_size;
|
||||
#endif
|
||||
struct task_struct *tsk;
|
||||
bool done;
|
||||
};
|
||||
|
@ -102,6 +110,10 @@ static void __evict_pid(struct evict_pid_entry *pid)
|
|||
if (!(mm->def_flags & VM_LOCKED)) {
|
||||
struct vm_area_struct *vma, *prev = NULL;
|
||||
vm_flags_t flag;
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
unsigned long size = 0;
|
||||
struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
|
||||
#endif
|
||||
|
||||
VMA_ITERATOR(vmi, mm, 0);
|
||||
mmap_write_lock(mm);
|
||||
|
@ -113,10 +125,18 @@ static void __evict_pid(struct evict_pid_entry *pid)
|
|||
flag = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
|
||||
mlock_fixup(&vmi, vma, &prev,
|
||||
vma->vm_start, vma->vm_end, flag);
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
size += vma->vm_end - vma->vm_start;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
mmap_write_unlock(mm);
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
memcg_decrease_unevict_size(memcg, size);
|
||||
css_put(&memcg->css);
|
||||
pid->unevict_size -= size;
|
||||
#endif
|
||||
}
|
||||
mmput(mm);
|
||||
}
|
||||
|
@ -250,6 +270,9 @@ static void add_unevict_task(struct task_struct *tsk)
|
|||
if (!result) {
|
||||
result = new_entry;
|
||||
result->rootpid = rootpid;
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
result->unevict_size = 0;
|
||||
#endif
|
||||
rb_link_node(&result->node, parent, link);
|
||||
rb_insert_color(&result->node, &base_tree->root);
|
||||
list_add_tail(&result->list, &pid_list);
|
||||
|
@ -294,6 +317,12 @@ static void unevict_pid(pid_t pid)
|
|||
get_task_struct(tsk);
|
||||
rcu_read_unlock();
|
||||
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
if (is_memcg_unevictable_enabled(mem_cgroup_from_task(tsk))) {
|
||||
put_task_struct(tsk);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
add_unevict_task(tsk);
|
||||
put_task_struct(tsk);
|
||||
}
|
||||
|
@ -431,6 +460,9 @@ static void execute_vm_lock(struct work_struct *unused)
|
|||
|
||||
mm = get_task_mm(tsk);
|
||||
if (mm && !(mm->def_flags & VM_LOCKED)) {
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
|
||||
#endif
|
||||
struct vm_area_struct *vma, *prev = NULL;
|
||||
vm_flags_t flag;
|
||||
|
||||
|
@ -438,6 +470,10 @@ static void execute_vm_lock(struct work_struct *unused)
|
|||
mmap_write_lock(mm);
|
||||
|
||||
for_each_vma(vmi, vma) {
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
if (is_unevictable_size_overflow(memcg))
|
||||
break;
|
||||
#endif
|
||||
if (vma->vm_file &&
|
||||
(vma->vm_flags & VM_EXEC) &&
|
||||
(vma->vm_flags & VM_READ)) {
|
||||
|
@ -445,6 +481,9 @@ static void execute_vm_lock(struct work_struct *unused)
|
|||
flag |= (VM_LOCKED | VM_LOCKONFAULT);
|
||||
mlock_fixup(&vmi, vma, &prev,
|
||||
vma->vm_start, vma->vm_end, flag);
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
result->unevict_size += vma->vm_end - vma->vm_start;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -452,6 +491,11 @@ static void execute_vm_lock(struct work_struct *unused)
|
|||
result->start_time = tsk->start_boottime;
|
||||
result->done = true;
|
||||
mmap_write_unlock(mm);
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
memcg_increase_unevict_size(memcg,
|
||||
result->unevict_size);
|
||||
css_put(&memcg->css);
|
||||
#endif
|
||||
} else {
|
||||
list_del(&result->list);
|
||||
__remove_entry(result);
|
||||
|
@ -546,6 +590,163 @@ const static struct proc_ops del_proc_fops = {
|
|||
};
|
||||
|
||||
#ifdef CONFIG_TEXT_UNEVICTABLE
|
||||
void clean_task_unevict_size(struct task_struct *tsk)
|
||||
{
|
||||
struct evict_pid_entry *result;
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
/*
|
||||
* There must make sure unevictable
|
||||
* function is finished.
|
||||
*/
|
||||
if (!tsk || !base_tree)
|
||||
return;
|
||||
|
||||
mutex_lock(&pid_mutex);
|
||||
result = lookup_unevict_entry(tsk);
|
||||
if (result) {
|
||||
if (result->unevict_size) {
|
||||
rcu_read_lock();
|
||||
memcg = mem_cgroup_from_task(tsk);
|
||||
memcg_decrease_unevict_size(memcg, result->unevict_size);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
list_del(&result->list);
|
||||
__remove_entry(result);
|
||||
mutex_unlock(&pid_mutex);
|
||||
kfree(result);
|
||||
} else
|
||||
mutex_unlock(&pid_mutex);
|
||||
}
|
||||
|
||||
bool is_memcg_unevictable_enabled(struct mem_cgroup *memcg)
|
||||
{
|
||||
if (!unevictable_enabled())
|
||||
return false;
|
||||
|
||||
if (!memcg)
|
||||
return false;
|
||||
|
||||
if (memcg->allow_unevictable)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void memcg_increase_unevict_size(struct mem_cgroup *memcg, unsigned long size)
|
||||
{
|
||||
atomic_long_add(size, &memcg->unevictable_size);
|
||||
}
|
||||
|
||||
void memcg_decrease_unevict_size(struct mem_cgroup *memcg, unsigned long size)
|
||||
{
|
||||
atomic_long_sub(size, &memcg->unevictable_size);
|
||||
}
|
||||
|
||||
bool is_unevictable_size_overflow(struct mem_cgroup *memcg)
|
||||
{
|
||||
struct page_counter *counter;
|
||||
u64 res_limit;
|
||||
u64 size;
|
||||
|
||||
counter = &memcg->memory;
|
||||
res_limit = (u64)counter->max * PAGE_SIZE;
|
||||
size = atomic_long_read(&memcg->unevictable_size);
|
||||
size = size * 100 / res_limit;
|
||||
if (size >= memcg->unevictable_percent)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned long memcg_exstat_text_unevict_gather(struct mem_cgroup *memcg)
|
||||
{
|
||||
return atomic_long_read(&memcg->unevictable_size);
|
||||
}
|
||||
|
||||
void mem_cgroup_can_unevictable(struct task_struct *tsk, struct mem_cgroup *to)
|
||||
{
|
||||
struct mem_cgroup *from;
|
||||
|
||||
if (!unevictable_enabled())
|
||||
return;
|
||||
|
||||
from = mem_cgroup_from_task(tsk);
|
||||
VM_BUG_ON(from == to);
|
||||
|
||||
if (to->allow_unevictable && !from->allow_unevictable) {
|
||||
add_unevict_task(tsk);
|
||||
schedule_delayed_work(&evict_work, HZ);
|
||||
}
|
||||
|
||||
if (!to->allow_unevictable && from->allow_unevictable)
|
||||
del_unevict_task(tsk);
|
||||
}
|
||||
|
||||
void mem_cgroup_cancel_unevictable(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
struct cgroup_subsys_state *dst_css;
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
if (!unevictable_enabled())
|
||||
return;
|
||||
|
||||
cgroup_taskset_for_each(tsk, dst_css, tset) {
|
||||
memcg = mem_cgroup_from_task(tsk);
|
||||
|
||||
if (memcg->allow_unevictable)
|
||||
del_unevict_task(tsk);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int schedule_unevict_task(struct task_struct *tsk, void *arg)
|
||||
{
|
||||
add_unevict_task(tsk);
|
||||
schedule_delayed_work(&evict_work, HZ);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int schedule_evict_task(struct task_struct *tsk, void *arg)
|
||||
{
|
||||
del_unevict_task(tsk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void make_all_memcg_evictable(void)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
for_each_mem_cgroup(memcg) {
|
||||
if (!memcg->allow_unevictable)
|
||||
continue;
|
||||
mem_cgroup_scan_tasks(memcg, schedule_unevict_task, NULL);
|
||||
memcg->allow_unevictable = 0;
|
||||
memcg->unevictable_percent = 100;
|
||||
atomic_long_set(&memcg->unevictable_size, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void memcg_all_processes_unevict(struct mem_cgroup *memcg, bool enable)
|
||||
{
|
||||
struct mem_cgroup *tmp_memcg;
|
||||
|
||||
if (!unevictable_enabled())
|
||||
return;
|
||||
|
||||
if (!memcg)
|
||||
tmp_memcg = root_mem_cgroup;
|
||||
else
|
||||
tmp_memcg = memcg;
|
||||
|
||||
if (enable)
|
||||
mem_cgroup_scan_tasks(tmp_memcg, schedule_unevict_task, NULL);
|
||||
else
|
||||
mem_cgroup_scan_tasks(tmp_memcg, schedule_evict_task, NULL);
|
||||
}
|
||||
|
||||
static int __init setup_unevictable(char *s)
|
||||
{
|
||||
if (!strcmp(s, "1"))
|
||||
|
@ -573,9 +774,10 @@ static ssize_t unevictable_enabled_store(struct kobject *kobj,
|
|||
|
||||
if (!strncmp(buf, "1", 1))
|
||||
static_branch_enable(&unevictable_enabled_key);
|
||||
else if (!strncmp(buf, "0", 1))
|
||||
else if (!strncmp(buf, "0", 1)) {
|
||||
static_branch_disable(&unevictable_enabled_key);
|
||||
else
|
||||
make_all_memcg_evictable();
|
||||
} else
|
||||
ret = -EINVAL;
|
||||
|
||||
mutex_unlock(&mutex);
|
||||
|
|
Loading…
Reference in New Issue