866 lines
18 KiB
C
866 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Pin Process Code Section:
|
|
* echo PID > /proc/unevictable/add_pid
|
|
* echo PID > /proc/unevictable/del_pid
|
|
* cat /proc/unevictable/add_pid
|
|
*
|
|
* Copyright (C) 2019 Alibaba
|
|
* Author: Xunlei Pang <xlpang@linux.alibaba.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*/
|
|
#include <linux/types.h>
|
|
#include <linux/module.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/ksm.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/kprobes.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/pid_namespace.h>
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
#include <linux/unevictable.h>
|
|
#endif
|
|
|
|
#define PROC_NAME "unevictable"
|
|
#define NAME_BUF 8
|
|
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
DEFINE_STATIC_KEY_FALSE(unevictable_enabled_key);
|
|
|
|
#define for_each_mem_cgroup(iter) \
|
|
for (iter = mem_cgroup_iter(NULL, NULL, NULL); \
|
|
iter != NULL; \
|
|
iter = mem_cgroup_iter(NULL, iter, NULL))
|
|
#endif
|
|
|
|
struct evict_pids_t {
|
|
struct rb_root root;
|
|
};
|
|
|
|
struct evict_pid_entry {
|
|
struct rb_node node;
|
|
struct list_head list;
|
|
pid_t rootpid;
|
|
u64 start_time;
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
u64 unevict_size;
|
|
#endif
|
|
struct task_struct *tsk;
|
|
bool done;
|
|
};
|
|
|
|
static void execute_vm_lock(struct work_struct *unused);
|
|
static struct evict_pids_t *base_tree;
|
|
static DEFINE_MUTEX(pid_mutex);
|
|
|
|
LIST_HEAD(pid_list);
|
|
static int proc_pids_count;
|
|
|
|
static DECLARE_DELAYED_WORK(evict_work, execute_vm_lock);
|
|
|
|
struct proc_pids_t {
|
|
struct rb_root proc_pids_tree;
|
|
};
|
|
|
|
/* Called with pid_mutex held always */
|
|
static void __remove_entry(struct evict_pid_entry *pid)
|
|
{
|
|
if (pid == NULL)
|
|
return;
|
|
|
|
rb_erase(&pid->node, &base_tree->root);
|
|
proc_pids_count--;
|
|
}
|
|
|
|
/* should not be in atomic context(i.e. hrtimer) */
|
|
static void __evict_pid(struct evict_pid_entry *pid)
|
|
{
|
|
struct task_struct *tsk;
|
|
struct mm_struct *mm;
|
|
|
|
if (!pid)
|
|
return;
|
|
|
|
rcu_read_lock();
|
|
tsk = find_task_by_pid_ns(pid->rootpid, &init_pid_ns);
|
|
if (tsk)
|
|
get_task_struct(tsk);
|
|
rcu_read_unlock();
|
|
|
|
if (!tsk)
|
|
return;
|
|
|
|
if (tsk == pid->tsk && pid->start_time == tsk->start_boottime) {
|
|
mm = get_task_mm(tsk);
|
|
if (mm) {
|
|
if (!(mm->def_flags & VM_LOCKED)) {
|
|
struct vm_area_struct *vma, *prev = NULL;
|
|
vm_flags_t flag;
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
unsigned long size = 0;
|
|
struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
|
|
#endif
|
|
|
|
VMA_ITERATOR(vmi, mm, 0);
|
|
mmap_write_lock(mm);
|
|
|
|
for_each_vma(vmi, vma) {
|
|
if (vma->vm_file &&
|
|
(vma->vm_flags & VM_EXEC) &&
|
|
(vma->vm_flags & VM_READ)) {
|
|
flag = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
|
|
mlock_fixup(&vmi, vma, &prev,
|
|
vma->vm_start, vma->vm_end, flag);
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
size += vma->vm_end - vma->vm_start;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
mmap_write_unlock(mm);
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
memcg_decrease_unevict_size(memcg, size);
|
|
css_put(&memcg->css);
|
|
pid->unevict_size -= size;
|
|
#endif
|
|
}
|
|
mmput(mm);
|
|
}
|
|
}
|
|
put_task_struct(tsk);
|
|
}
|
|
|
|
static struct evict_pid_entry *lookup_unevict_entry(struct task_struct *tsk)
|
|
{
|
|
struct evict_pid_entry *entry, *result;
|
|
struct rb_node *parent = NULL;
|
|
struct rb_node **link;
|
|
pid_t rootpid;
|
|
|
|
if (!tsk)
|
|
return NULL;
|
|
|
|
rcu_read_lock();
|
|
get_task_struct(tsk);
|
|
rootpid = __task_pid_nr_ns(tsk, PIDTYPE_PID, &init_pid_ns);
|
|
put_task_struct(tsk);
|
|
rcu_read_unlock();
|
|
|
|
result = NULL;
|
|
link = &base_tree->root.rb_node;
|
|
/*maybe unevictable feature not ready */
|
|
while (*link) {
|
|
parent = *link;
|
|
entry = rb_entry(parent, struct evict_pid_entry, node);
|
|
if (rootpid < entry->rootpid)
|
|
link = &(*link)->rb_left;
|
|
else if (rootpid > entry->rootpid)
|
|
link = &(*link)->rb_right;
|
|
else {
|
|
result = entry;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void del_unevict_task(struct task_struct *tsk)
|
|
{
|
|
struct evict_pid_entry *result;
|
|
|
|
if (!tsk) {
|
|
struct evict_pid_entry *pid_entry, *tmp;
|
|
|
|
mutex_lock(&pid_mutex);
|
|
list_for_each_entry_safe(pid_entry, tmp, &pid_list, list) {
|
|
rcu_read_lock();
|
|
tsk = find_task_by_pid_ns(pid_entry->rootpid,
|
|
&init_pid_ns);
|
|
rcu_read_unlock();
|
|
if (!tsk) {
|
|
list_del(&pid_entry->list);
|
|
__remove_entry(pid_entry);
|
|
kfree(pid_entry);
|
|
}
|
|
}
|
|
mutex_unlock(&pid_mutex);
|
|
return;
|
|
}
|
|
|
|
mutex_lock(&pid_mutex);
|
|
result = lookup_unevict_entry(tsk);
|
|
if (result) {
|
|
list_del(&result->list);
|
|
__remove_entry(result);
|
|
mutex_unlock(&pid_mutex);
|
|
__evict_pid(result);
|
|
kfree(result);
|
|
} else
|
|
mutex_unlock(&pid_mutex);
|
|
}
|
|
|
|
static void evict_pid(pid_t pid)
|
|
{
|
|
struct task_struct *tsk;
|
|
|
|
if (pid <= 0)
|
|
return;
|
|
|
|
rcu_read_lock();
|
|
tsk = find_task_by_pid_ns(pid, task_active_pid_ns(current));
|
|
if (!tsk) {
|
|
rcu_read_unlock();
|
|
return;
|
|
}
|
|
get_task_struct(tsk);
|
|
rcu_read_unlock();
|
|
|
|
del_unevict_task(tsk);
|
|
put_task_struct(tsk);
|
|
}
|
|
|
|
static void add_unevict_task(struct task_struct *tsk)
|
|
{
|
|
struct evict_pid_entry *entry, *new_entry, *result;
|
|
struct rb_node *parent = NULL;
|
|
struct rb_node **link;
|
|
pid_t rootpid;
|
|
|
|
if (!tsk)
|
|
return;
|
|
|
|
new_entry = kzalloc(sizeof(*new_entry), GFP_NOWAIT);
|
|
if (!new_entry)
|
|
return;
|
|
|
|
result = NULL;
|
|
get_task_struct(tsk);
|
|
rootpid = __task_pid_nr_ns(tsk, PIDTYPE_PID, &init_pid_ns);
|
|
put_task_struct(tsk);
|
|
mutex_lock(&pid_mutex);
|
|
link = &base_tree->root.rb_node;
|
|
while (*link) {
|
|
parent = *link;
|
|
entry = rb_entry(parent, struct evict_pid_entry, node);
|
|
if (rootpid < entry->rootpid) {
|
|
link = &(*link)->rb_left;
|
|
} else if (rootpid > entry->rootpid) {
|
|
link = &(*link)->rb_right;
|
|
} else {
|
|
result = entry;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!result) {
|
|
result = new_entry;
|
|
result->rootpid = rootpid;
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
result->unevict_size = 0;
|
|
#endif
|
|
rb_link_node(&result->node, parent, link);
|
|
rb_insert_color(&result->node, &base_tree->root);
|
|
list_add_tail(&result->list, &pid_list);
|
|
proc_pids_count++;
|
|
mutex_unlock(&pid_mutex);
|
|
} else {
|
|
rcu_read_lock();
|
|
tsk = find_task_by_pid_ns(rootpid, &init_pid_ns);
|
|
if (tsk)
|
|
get_task_struct(tsk);
|
|
rcu_read_unlock();
|
|
if (!tsk) {
|
|
list_del(&result->list);
|
|
__remove_entry(result);
|
|
mutex_unlock(&pid_mutex);
|
|
kfree(result);
|
|
kfree(new_entry);
|
|
return;
|
|
} else if (tsk != result->tsk ||
|
|
result->start_time != tsk->start_boottime) {
|
|
result->done = false;
|
|
}
|
|
put_task_struct(tsk);
|
|
mutex_unlock(&pid_mutex);
|
|
kfree(new_entry);
|
|
}
|
|
}
|
|
|
|
static void unevict_pid(pid_t pid)
|
|
{
|
|
struct task_struct *tsk;
|
|
|
|
if (pid <= 0)
|
|
return;
|
|
|
|
rcu_read_lock();
|
|
tsk = find_task_by_pid_ns(pid, task_active_pid_ns(current));
|
|
if (!tsk) {
|
|
rcu_read_unlock();
|
|
return;
|
|
}
|
|
get_task_struct(tsk);
|
|
rcu_read_unlock();
|
|
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
if (is_memcg_unevictable_enabled(mem_cgroup_from_task(tsk))) {
|
|
put_task_struct(tsk);
|
|
return;
|
|
}
|
|
#endif
|
|
add_unevict_task(tsk);
|
|
put_task_struct(tsk);
|
|
}
|
|
|
|
struct add_pid_seq_context {
|
|
int idx;
|
|
int count;
|
|
int pids[0];
|
|
};
|
|
|
|
/*
|
|
* Note there exists a race condition that we may get inconsistent snapshots
|
|
* of pid array if call add_pid_start() more than one round due to users add
|
|
* or delete the pid. However, I think it's acceptable because the pid may
|
|
* still change even we get a consistent snapshot to show.
|
|
*/
|
|
static void *add_pid_start(struct seq_file *m, loff_t *pos)
|
|
{
|
|
struct add_pid_seq_context *ctx = NULL;
|
|
struct evict_pid_entry *pid_entry;
|
|
struct task_struct *tsk;
|
|
struct evict_pid_entry *tmp;
|
|
pid_t pid;
|
|
|
|
mutex_lock(&pid_mutex);
|
|
if (*pos >= proc_pids_count)
|
|
goto done;
|
|
ctx = kvzalloc(sizeof(*ctx) + proc_pids_count * sizeof(int), GFP_KERNEL);
|
|
if (unlikely(!ctx))
|
|
goto done;
|
|
|
|
if (proc_pids_count > 0) {
|
|
list_for_each_entry_safe(pid_entry, tmp, &pid_list, list) {
|
|
rcu_read_lock();
|
|
tsk = find_task_by_pid_ns(pid_entry->rootpid,
|
|
&init_pid_ns);
|
|
if (tsk) {
|
|
get_task_struct(tsk);
|
|
pid = __task_pid_nr_ns(tsk, PIDTYPE_PID,
|
|
task_active_pid_ns(current));
|
|
put_task_struct(tsk);
|
|
} else {
|
|
pid = -1;
|
|
}
|
|
rcu_read_unlock();
|
|
|
|
if (pid != -1) {
|
|
ctx->pids[ctx->count++] = pid;
|
|
} else {
|
|
list_del(&pid_entry->list);
|
|
__remove_entry(pid_entry);
|
|
kfree(pid_entry);
|
|
}
|
|
}
|
|
}
|
|
if (*pos >= ctx->count)
|
|
goto done;
|
|
mutex_unlock(&pid_mutex);
|
|
ctx->idx = *pos;
|
|
m->private = ctx;
|
|
return ctx;
|
|
done:
|
|
mutex_unlock(&pid_mutex);
|
|
kvfree(ctx);
|
|
return NULL;
|
|
}
|
|
|
|
static void *add_pid_next(struct seq_file *m, void *p, loff_t *pos)
|
|
{
|
|
struct add_pid_seq_context *ctx = p;
|
|
|
|
ctx->idx = ++*pos;
|
|
return (ctx->idx < ctx->count) ? ctx : NULL;
|
|
}
|
|
|
|
static void add_pid_stop(struct seq_file *m, void *p)
|
|
{
|
|
kvfree(m->private);
|
|
m->private = NULL;
|
|
}
|
|
|
|
static int add_pid_show(struct seq_file *m, void *p)
|
|
{
|
|
struct add_pid_seq_context *ctx = p;
|
|
|
|
seq_printf(m, "%d", ctx->pids[ctx->idx]);
|
|
seq_putc(m, (ctx->idx == ctx->count - 1) ? '\n' : ',');
|
|
return 0;
|
|
}
|
|
|
|
static const struct seq_operations seq_add_pid_op = {
|
|
.start = add_pid_start,
|
|
.next = add_pid_next,
|
|
.stop = add_pid_stop,
|
|
.show = add_pid_show,
|
|
};
|
|
|
|
static int proc_open_add_pid(struct inode *inode, struct file *file)
|
|
{
|
|
return seq_open(file, &seq_add_pid_op);
|
|
}
|
|
|
|
static void execute_vm_lock(struct work_struct *unused)
|
|
{
|
|
struct task_struct *tsk;
|
|
struct mm_struct *mm;
|
|
struct evict_pid_entry *result, *tmp;
|
|
pid_t rootpid;
|
|
|
|
if (!mutex_trylock(&pid_mutex)) {
|
|
goto out;
|
|
}
|
|
|
|
if (proc_pids_count <= 0) {
|
|
mutex_unlock(&pid_mutex);
|
|
goto out;
|
|
}
|
|
|
|
list_for_each_entry_safe(result, tmp, &pid_list, list) {
|
|
rootpid = result->rootpid;
|
|
if (result->done || rootpid <= 0)
|
|
continue;
|
|
|
|
rcu_read_lock();
|
|
tsk = find_task_by_pid_ns(rootpid, &init_pid_ns);
|
|
if (tsk)
|
|
get_task_struct(tsk);
|
|
rcu_read_unlock();
|
|
if (!tsk) {
|
|
list_del(&result->list);
|
|
__remove_entry(result);
|
|
kfree(result);
|
|
continue;
|
|
}
|
|
|
|
mm = get_task_mm(tsk);
|
|
if (mm && !(mm->def_flags & VM_LOCKED)) {
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
|
|
#endif
|
|
struct vm_area_struct *vma, *prev = NULL;
|
|
vm_flags_t flag;
|
|
|
|
VMA_ITERATOR(vmi, mm, 0);
|
|
mmap_write_lock(mm);
|
|
|
|
for_each_vma(vmi, vma) {
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
if (is_unevictable_size_overflow(memcg))
|
|
break;
|
|
#endif
|
|
if (vma->vm_file &&
|
|
(vma->vm_flags & VM_EXEC) &&
|
|
(vma->vm_flags & VM_READ)) {
|
|
flag = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
|
|
flag |= (VM_LOCKED | VM_LOCKONFAULT);
|
|
mlock_fixup(&vmi, vma, &prev,
|
|
vma->vm_start, vma->vm_end, flag);
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
result->unevict_size += vma->vm_end - vma->vm_start;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
result->tsk = tsk;
|
|
result->start_time = tsk->start_boottime;
|
|
result->done = true;
|
|
mmap_write_unlock(mm);
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
memcg_increase_unevict_size(memcg,
|
|
result->unevict_size);
|
|
css_put(&memcg->css);
|
|
#endif
|
|
} else {
|
|
list_del(&result->list);
|
|
__remove_entry(result);
|
|
kfree(result);
|
|
}
|
|
|
|
if (mm)
|
|
mmput(mm);
|
|
if (tsk)
|
|
put_task_struct(tsk);
|
|
}
|
|
mutex_unlock(&pid_mutex);
|
|
|
|
out:
|
|
return;
|
|
}
|
|
|
|
|
|
static ssize_t proc_write_add_pid(struct file *file,
|
|
const char __user *buffer, size_t count, loff_t *ppos)
|
|
{
|
|
char buf[NAME_BUF];
|
|
int err;
|
|
long pid;
|
|
int ret = count;
|
|
|
|
if (count > NAME_BUF - 1) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
memset(buf, 0, sizeof(buf));
|
|
if (copy_from_user(buf, buffer, count)) {
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
err = kstrtol(strstrip(buf), 0, &pid);
|
|
if (err || pid <= 0) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
} else {
|
|
unevict_pid((pid_t)pid);
|
|
schedule_delayed_work(&evict_work, HZ);
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static ssize_t proc_write_del_pid(struct file *file,
|
|
const char __user *buffer, size_t count, loff_t *ppos)
|
|
{
|
|
char buf[NAME_BUF];
|
|
int err;
|
|
long pid;
|
|
int ret = count;
|
|
|
|
memset(buf, 0, sizeof(buf));
|
|
if (count > NAME_BUF - 1) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
if (copy_from_user(buf, buffer, count)) {
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
err = kstrtol(strstrip(buf), 0, &pid);
|
|
if (err || pid <= 0) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
} else {
|
|
evict_pid(pid);
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
const static struct proc_ops add_proc_fops = {
|
|
.proc_open = proc_open_add_pid,
|
|
.proc_read = seq_read,
|
|
.proc_write = proc_write_add_pid,
|
|
.proc_lseek = seq_lseek,
|
|
.proc_release = seq_release,
|
|
};
|
|
|
|
const static struct proc_ops del_proc_fops = {
|
|
.proc_write = proc_write_del_pid,
|
|
};
|
|
|
|
#ifdef CONFIG_TEXT_UNEVICTABLE
|
|
void clean_task_unevict_size(struct task_struct *tsk)
|
|
{
|
|
struct evict_pid_entry *result;
|
|
struct mem_cgroup *memcg;
|
|
|
|
/*
|
|
* There must make sure unevictable
|
|
* function is finished.
|
|
*/
|
|
if (!tsk || !base_tree)
|
|
return;
|
|
|
|
mutex_lock(&pid_mutex);
|
|
result = lookup_unevict_entry(tsk);
|
|
if (result) {
|
|
if (result->unevict_size) {
|
|
rcu_read_lock();
|
|
memcg = mem_cgroup_from_task(tsk);
|
|
memcg_decrease_unevict_size(memcg, result->unevict_size);
|
|
rcu_read_unlock();
|
|
}
|
|
list_del(&result->list);
|
|
__remove_entry(result);
|
|
mutex_unlock(&pid_mutex);
|
|
kfree(result);
|
|
} else
|
|
mutex_unlock(&pid_mutex);
|
|
}
|
|
|
|
bool is_memcg_unevictable_enabled(struct mem_cgroup *memcg)
|
|
{
|
|
if (!unevictable_enabled())
|
|
return false;
|
|
|
|
if (!memcg)
|
|
return false;
|
|
|
|
if (memcg->allow_unevictable)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
void memcg_increase_unevict_size(struct mem_cgroup *memcg, unsigned long size)
|
|
{
|
|
atomic_long_add(size, &memcg->unevictable_size);
|
|
}
|
|
|
|
void memcg_decrease_unevict_size(struct mem_cgroup *memcg, unsigned long size)
|
|
{
|
|
atomic_long_sub(size, &memcg->unevictable_size);
|
|
}
|
|
|
|
bool is_unevictable_size_overflow(struct mem_cgroup *memcg)
|
|
{
|
|
struct page_counter *counter;
|
|
u64 res_limit;
|
|
u64 size;
|
|
|
|
counter = &memcg->memory;
|
|
res_limit = (u64)counter->max * PAGE_SIZE;
|
|
size = atomic_long_read(&memcg->unevictable_size);
|
|
size = size * 100 / res_limit;
|
|
if (size >= memcg->unevictable_percent)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
unsigned long memcg_exstat_text_unevict_gather(struct mem_cgroup *memcg)
|
|
{
|
|
return atomic_long_read(&memcg->unevictable_size);
|
|
}
|
|
|
|
void mem_cgroup_can_unevictable(struct task_struct *tsk, struct mem_cgroup *to)
|
|
{
|
|
struct mem_cgroup *from;
|
|
|
|
if (!unevictable_enabled())
|
|
return;
|
|
|
|
from = mem_cgroup_from_task(tsk);
|
|
VM_BUG_ON(from == to);
|
|
|
|
if (to->allow_unevictable && !from->allow_unevictable) {
|
|
add_unevict_task(tsk);
|
|
schedule_delayed_work(&evict_work, HZ);
|
|
}
|
|
|
|
if (!to->allow_unevictable && from->allow_unevictable)
|
|
del_unevict_task(tsk);
|
|
}
|
|
|
|
void mem_cgroup_cancel_unevictable(struct cgroup_taskset *tset)
|
|
{
|
|
struct task_struct *tsk;
|
|
struct cgroup_subsys_state *dst_css;
|
|
struct mem_cgroup *memcg;
|
|
|
|
if (!unevictable_enabled())
|
|
return;
|
|
|
|
cgroup_taskset_for_each(tsk, dst_css, tset) {
|
|
memcg = mem_cgroup_from_task(tsk);
|
|
|
|
if (memcg->allow_unevictable)
|
|
del_unevict_task(tsk);
|
|
}
|
|
}
|
|
|
|
static inline int schedule_unevict_task(struct task_struct *tsk, void *arg)
|
|
{
|
|
add_unevict_task(tsk);
|
|
schedule_delayed_work(&evict_work, HZ);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int schedule_evict_task(struct task_struct *tsk, void *arg)
|
|
{
|
|
del_unevict_task(tsk);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void make_all_memcg_evictable(void)
|
|
{
|
|
struct mem_cgroup *memcg;
|
|
|
|
for_each_mem_cgroup(memcg) {
|
|
if (!memcg->allow_unevictable)
|
|
continue;
|
|
mem_cgroup_scan_tasks(memcg, schedule_unevict_task, NULL);
|
|
memcg->allow_unevictable = 0;
|
|
memcg->unevictable_percent = 100;
|
|
atomic_long_set(&memcg->unevictable_size, 0);
|
|
}
|
|
}
|
|
|
|
void memcg_all_processes_unevict(struct mem_cgroup *memcg, bool enable)
|
|
{
|
|
struct mem_cgroup *tmp_memcg;
|
|
|
|
if (!unevictable_enabled())
|
|
return;
|
|
|
|
if (!memcg)
|
|
tmp_memcg = root_mem_cgroup;
|
|
else
|
|
tmp_memcg = memcg;
|
|
|
|
if (enable)
|
|
mem_cgroup_scan_tasks(tmp_memcg, schedule_unevict_task, NULL);
|
|
else
|
|
mem_cgroup_scan_tasks(tmp_memcg, schedule_evict_task, NULL);
|
|
}
|
|
|
|
static int __init setup_unevictable(char *s)
|
|
{
|
|
if (!strcmp(s, "1"))
|
|
static_branch_enable(&unevictable_enabled_key);
|
|
else if (!strcmp(s, "0"))
|
|
static_branch_disable(&unevictable_enabled_key);
|
|
return 1;
|
|
}
|
|
__setup("unevictable=", setup_unevictable);
|
|
|
|
#ifdef CONFIG_SYSFS
|
|
static ssize_t unevictable_enabled_show(struct kobject *kobj,
|
|
struct kobj_attribute *attr, char *buf)
|
|
{
|
|
return sprintf(buf, "%d\n", !!static_branch_unlikely(&unevictable_enabled_key));
|
|
}
|
|
static ssize_t unevictable_enabled_store(struct kobject *kobj,
|
|
struct kobj_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
static DEFINE_MUTEX(mutex);
|
|
ssize_t ret = count;
|
|
|
|
mutex_lock(&mutex);
|
|
|
|
if (!strncmp(buf, "1", 1))
|
|
static_branch_enable(&unevictable_enabled_key);
|
|
else if (!strncmp(buf, "0", 1)) {
|
|
static_branch_disable(&unevictable_enabled_key);
|
|
make_all_memcg_evictable();
|
|
} else
|
|
ret = -EINVAL;
|
|
|
|
mutex_unlock(&mutex);
|
|
return ret;
|
|
}
|
|
static struct kobj_attribute unevictable_enabled_attr =
|
|
__ATTR(enabled, 0644, unevictable_enabled_show,
|
|
unevictable_enabled_store);
|
|
|
|
static struct attribute *unevictable_attrs[] = {
|
|
&unevictable_enabled_attr.attr,
|
|
NULL,
|
|
};
|
|
|
|
static struct attribute_group unevictable_attr_group = {
|
|
.attrs = unevictable_attrs,
|
|
};
|
|
|
|
static int __init unevictable_init_sysfs(void)
|
|
{
|
|
int err;
|
|
struct kobject *unevictable_kobj;
|
|
|
|
unevictable_kobj = kobject_create_and_add("unevictable", mm_kobj);
|
|
if (!unevictable_kobj) {
|
|
pr_err("failed to create unevictable kobject\n");
|
|
return -ENOMEM;
|
|
}
|
|
err = sysfs_create_group(unevictable_kobj, &unevictable_attr_group);
|
|
if (err) {
|
|
pr_err("failed to register unevictable group\n");
|
|
goto delete_obj;
|
|
}
|
|
return 0;
|
|
|
|
delete_obj:
|
|
kobject_put(unevictable_kobj);
|
|
return err;
|
|
}
|
|
#endif /* CONFIG_SYSFS */
|
|
#endif /* CONFIG_TEXT_UNEVICTABLE */
|
|
|
|
static int __init unevictable_init(void)
|
|
{
|
|
struct proc_dir_entry *monitor_dir, *add_pid_file, *del_pid_file;
|
|
|
|
monitor_dir = proc_mkdir(PROC_NAME, NULL);
|
|
if (!monitor_dir)
|
|
goto out;
|
|
|
|
add_pid_file = proc_create("add_pid", 0600,
|
|
monitor_dir, &add_proc_fops);
|
|
if (!add_pid_file)
|
|
goto out_dir;
|
|
|
|
del_pid_file = proc_create("del_pid", 0200,
|
|
monitor_dir, &del_proc_fops);
|
|
if (!del_pid_file)
|
|
goto out_add_pid;
|
|
|
|
base_tree = kzalloc(sizeof(*base_tree), GFP_KERNEL);
|
|
if (!base_tree)
|
|
goto out_del_pid;
|
|
|
|
INIT_LIST_HEAD(&pid_list);
|
|
|
|
#if defined(CONFIG_SYSFS) && defined(CONFIG_TEXT_UNEVICTABLE)
|
|
if (unevictable_init_sysfs())
|
|
pr_err("memcg text unevictable sysfs create failed\n");
|
|
#endif
|
|
return 0;
|
|
|
|
pr_err("unevictpid create proc dir failed\n");
|
|
|
|
out_del_pid:
|
|
remove_proc_entry("del_pid", monitor_dir);
|
|
out_add_pid:
|
|
remove_proc_entry("add_pid", monitor_dir);
|
|
out_dir:
|
|
remove_proc_entry(PROC_NAME, NULL);
|
|
out:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
module_init(unevictable_init);
|