mm, oom: reintroduce /proc/pid/oom_adj

This is mostly a revert of 01dc52ebdf ("oom: remove deprecated oom_adj")
from Davidlohr Bueso.

It reintroduces /proc/pid/oom_adj for backwards compatibility with earlier
kernels.  It simply scales the value linearly when /proc/pid/oom_score_adj
is written.

The major difference is that its scheduled removal is no longer included
in Documentation/feature-removal-schedule.txt.  We do warn users with a
single printk, though, to suggest the more powerful and supported
/proc/pid/oom_score_adj interface.

Reported-by: Artem S. Tashkinov <t.artem@lycos.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
David Rientjes 2012-11-12 17:53:04 -08:00 committed by Linus Torvalds
parent f4bcd79c88
commit fa0cbbf145
3 changed files with 130 additions and 4 deletions

View File

@ -33,7 +33,7 @@ Table of Contents
2 Modifying System Parameters
3 Per-Process Parameters
3.1 /proc/<pid>/oom_score_adj - Adjust the oom-killer
3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer
score
3.2 /proc/<pid>/oom_score - Display current oom-killer score
3.3 /proc/<pid>/io - Display the IO accounting fields
@ -1320,10 +1320,10 @@ of the kernel.
CHAPTER 3: PER-PROCESS PARAMETERS
------------------------------------------------------------------------------
3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score
3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score
--------------------------------------------------------------------------------
This file can be used to adjust the badness heuristic used to select which
These file can be used to adjust the badness heuristic used to select which
process gets killed in out of memory conditions.
The badness heuristic assigns a value to each candidate task ranging from 0
@ -1361,6 +1361,12 @@ same system, cpuset, mempolicy, or memory controller resources to use at least
equivalent to discounting 50% of the task's allowed memory from being considered
as scoring against the task.
For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also
be used to tune the badness score. Its acceptable values range from -16
(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17
(OOM_DISABLE) to disable oom killing entirely for that task. Its value is
scaled linearly with /proc/<pid>/oom_score_adj.
The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
requires CAP_SYS_RESOURCE.
@ -1375,7 +1381,9 @@ minimal amount of work.
-------------------------------------------------------------
This file can be used to check the current score used by the oom-killer is for
any given <pid>.
any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which
process should be killed in an out-of-memory situation.
3.3 /proc/<pid>/io - Display the IO accounting fields
-------------------------------------------------------

View File

@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = {
.release = mem_release,
};
static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
char buffer[PROC_NUMBUF];
int oom_adj = OOM_ADJUST_MIN;
size_t len;
unsigned long flags;
if (!task)
return -ESRCH;
if (lock_task_sighand(task, &flags)) {
if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
oom_adj = OOM_ADJUST_MAX;
else
oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
OOM_SCORE_ADJ_MAX;
unlock_task_sighand(task, &flags);
}
put_task_struct(task);
len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
return simple_read_from_buffer(buf, count, ppos, buffer, len);
}
static ssize_t oom_adj_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task;
char buffer[PROC_NUMBUF];
int oom_adj;
unsigned long flags;
int err;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count)) {
err = -EFAULT;
goto out;
}
err = kstrtoint(strstrip(buffer), 0, &oom_adj);
if (err)
goto out;
if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
oom_adj != OOM_DISABLE) {
err = -EINVAL;
goto out;
}
task = get_proc_task(file->f_path.dentry->d_inode);
if (!task) {
err = -ESRCH;
goto out;
}
task_lock(task);
if (!task->mm) {
err = -EINVAL;
goto err_task_lock;
}
if (!lock_task_sighand(task, &flags)) {
err = -ESRCH;
goto err_task_lock;
}
/*
* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
* value is always attainable.
*/
if (oom_adj == OOM_ADJUST_MAX)
oom_adj = OOM_SCORE_ADJ_MAX;
else
oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
if (oom_adj < task->signal->oom_score_adj &&
!capable(CAP_SYS_RESOURCE)) {
err = -EACCES;
goto err_sighand;
}
/*
* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
* /proc/pid/oom_score_adj instead.
*/
printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
current->comm, task_pid_nr(current), task_pid_nr(task),
task_pid_nr(task));
task->signal->oom_score_adj = oom_adj;
trace_oom_score_adj_update(task);
err_sighand:
unlock_task_sighand(task, &flags);
err_task_lock:
task_unlock(task);
put_task_struct(task);
out:
return err < 0 ? err : count;
}
static const struct file_operations proc_oom_adj_operations = {
.read = oom_adj_read,
.write = oom_adj_write,
.llseek = generic_file_llseek,
};
static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@ -2598,6 +2705,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
@ -2964,6 +3072,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),

View File

@ -8,4 +8,13 @@
#define OOM_SCORE_ADJ_MIN (-1000)
#define OOM_SCORE_ADJ_MAX 1000
/*
* /proc/<pid>/oom_adj set to -17 protects from the oom killer for legacy
* purposes.
*/
#define OOM_DISABLE (-17)
/* inclusive */
#define OOM_ADJUST_MIN (-16)
#define OOM_ADJUST_MAX 15
#endif /* _UAPI__INCLUDE_LINUX_OOM_H */