2010-03-08 08:41:34 +08:00
|
|
|
#include <linux/proc_fs.h>
|
|
|
|
#include <linux/nsproxy.h>
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
#include <linux/namei.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/utsname.h>
|
|
|
|
#include <net/net_namespace.h>
|
|
|
|
#include <linux/ipc_namespace.h>
|
|
|
|
#include <linux/pid_namespace.h>
|
2012-07-26 21:24:06 +08:00
|
|
|
#include <linux/user_namespace.h>
|
2010-03-08 08:41:34 +08:00
|
|
|
#include "internal.h"
|
|
|
|
|
|
|
|
|
|
|
|
static const struct proc_ns_operations *ns_entries[] = {
|
2010-03-08 10:14:23 +08:00
|
|
|
#ifdef CONFIG_NET_NS
|
|
|
|
&netns_operations,
|
|
|
|
#endif
|
2010-03-08 10:43:27 +08:00
|
|
|
#ifdef CONFIG_UTS_NS
|
|
|
|
&utsns_operations,
|
|
|
|
#endif
|
2010-03-08 10:48:39 +08:00
|
|
|
#ifdef CONFIG_IPC_NS
|
|
|
|
&ipcns_operations,
|
|
|
|
#endif
|
2010-03-08 10:17:03 +08:00
|
|
|
#ifdef CONFIG_PID_NS
|
|
|
|
&pidns_operations,
|
2012-07-26 21:24:06 +08:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_USER_NS
|
|
|
|
&userns_operations,
|
2010-03-08 10:17:03 +08:00
|
|
|
#endif
|
2010-03-08 10:49:36 +08:00
|
|
|
&mntns_operations,
|
2010-03-08 08:41:34 +08:00
|
|
|
};
|
|
|
|
|
2015-11-17 23:20:54 +08:00
|
|
|
static const char *proc_ns_get_link(struct dentry *dentry,
|
2015-12-30 04:58:39 +08:00
|
|
|
struct inode *inode,
|
|
|
|
struct delayed_call *done)
|
2011-06-19 08:48:18 +08:00
|
|
|
{
|
2014-11-01 23:10:28 +08:00
|
|
|
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
|
2011-06-19 08:48:18 +08:00
|
|
|
struct task_struct *task;
|
2013-03-09 16:14:45 +08:00
|
|
|
struct path ns_path;
|
2011-06-19 08:48:18 +08:00
|
|
|
void *error = ERR_PTR(-EACCES);
|
|
|
|
|
2015-11-17 23:20:54 +08:00
|
|
|
if (!dentry)
|
|
|
|
return ERR_PTR(-ECHILD);
|
|
|
|
|
2011-06-19 08:48:18 +08:00
|
|
|
task = get_proc_task(inode);
|
|
|
|
if (!task)
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 22:57:28 +08:00
|
|
|
return error;
|
2011-06-19 08:48:18 +08:00
|
|
|
|
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 07:00:04 +08:00
|
|
|
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 22:57:28 +08:00
|
|
|
error = ns_get_path(&ns_path, task, ns_ops);
|
|
|
|
if (!error)
|
2015-05-03 01:37:52 +08:00
|
|
|
nd_jump_link(&ns_path);
|
2011-06-19 08:48:18 +08:00
|
|
|
}
|
|
|
|
put_task_struct(task);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
|
|
|
{
|
2015-03-18 06:25:59 +08:00
|
|
|
struct inode *inode = d_inode(dentry);
|
2014-11-01 23:10:28 +08:00
|
|
|
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
|
2011-06-19 08:48:18 +08:00
|
|
|
struct task_struct *task;
|
|
|
|
char name[50];
|
2014-03-15 01:42:45 +08:00
|
|
|
int res = -EACCES;
|
2011-06-19 08:48:18 +08:00
|
|
|
|
|
|
|
task = get_proc_task(inode);
|
|
|
|
if (!task)
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 22:57:28 +08:00
|
|
|
return res;
|
2011-06-19 08:48:18 +08:00
|
|
|
|
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 07:00:04 +08:00
|
|
|
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 22:57:28 +08:00
|
|
|
res = ns_get_name(name, sizeof(name), task, ns_ops);
|
|
|
|
if (res >= 0)
|
|
|
|
res = readlink_copy(buffer, buflen, name);
|
|
|
|
}
|
2011-06-19 08:48:18 +08:00
|
|
|
put_task_struct(task);
|
2014-03-15 01:42:45 +08:00
|
|
|
return res;
|
2011-06-19 08:48:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct inode_operations proc_ns_link_inode_operations = {
|
|
|
|
.readlink = proc_ns_readlink,
|
2015-11-17 23:20:54 +08:00
|
|
|
.get_link = proc_ns_get_link,
|
2011-06-19 08:48:18 +08:00
|
|
|
.setattr = proc_setattr,
|
|
|
|
};
|
|
|
|
|
2013-06-15 15:15:20 +08:00
|
|
|
static int proc_ns_instantiate(struct inode *dir,
|
2010-03-08 08:41:34 +08:00
|
|
|
struct dentry *dentry, struct task_struct *task, const void *ptr)
|
|
|
|
{
|
|
|
|
const struct proc_ns_operations *ns_ops = ptr;
|
|
|
|
struct inode *inode;
|
|
|
|
struct proc_inode *ei;
|
|
|
|
|
|
|
|
inode = proc_pid_make_inode(dir->i_sb, task);
|
|
|
|
if (!inode)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ei = PROC_I(inode);
|
2011-06-19 08:48:18 +08:00
|
|
|
inode->i_mode = S_IFLNK|S_IRWXUGO;
|
|
|
|
inode->i_op = &proc_ns_link_inode_operations;
|
2014-11-01 23:10:28 +08:00
|
|
|
ei->ns_ops = ns_ops;
|
2010-03-08 08:41:34 +08:00
|
|
|
|
2012-03-24 06:02:55 +08:00
|
|
|
d_set_d_op(dentry, &pid_dentry_operations);
|
2010-03-08 08:41:34 +08:00
|
|
|
d_add(dentry, inode);
|
|
|
|
/* Close the race of the process dying before we return the dentry */
|
2012-06-11 04:03:43 +08:00
|
|
|
if (pid_revalidate(dentry, 0))
|
2013-06-15 15:15:20 +08:00
|
|
|
return 0;
|
2010-03-08 08:41:34 +08:00
|
|
|
out:
|
2013-06-15 15:15:20 +08:00
|
|
|
return -ENOENT;
|
2010-03-08 08:41:34 +08:00
|
|
|
}
|
|
|
|
|
2013-05-17 00:07:31 +08:00
|
|
|
static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
|
2010-03-08 08:41:34 +08:00
|
|
|
{
|
2013-05-17 00:07:31 +08:00
|
|
|
struct task_struct *task = get_proc_task(file_inode(file));
|
2010-03-08 08:41:34 +08:00
|
|
|
const struct proc_ns_operations **entry, **last;
|
|
|
|
|
|
|
|
if (!task)
|
2013-05-17 00:07:31 +08:00
|
|
|
return -ENOENT;
|
2010-03-08 08:41:34 +08:00
|
|
|
|
2013-05-17 00:07:31 +08:00
|
|
|
if (!dir_emit_dots(file, ctx))
|
|
|
|
goto out;
|
|
|
|
if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries))
|
|
|
|
goto out;
|
|
|
|
entry = ns_entries + (ctx->pos - 2);
|
|
|
|
last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
|
|
|
|
while (entry <= last) {
|
|
|
|
const struct proc_ns_operations *ops = *entry;
|
|
|
|
if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name),
|
|
|
|
proc_ns_instantiate, task, ops))
|
|
|
|
break;
|
|
|
|
ctx->pos++;
|
|
|
|
entry++;
|
|
|
|
}
|
2010-03-08 08:41:34 +08:00
|
|
|
out:
|
|
|
|
put_task_struct(task);
|
2013-05-17 00:07:31 +08:00
|
|
|
return 0;
|
2010-03-08 08:41:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct file_operations proc_ns_dir_operations = {
|
|
|
|
.read = generic_read_dir,
|
2013-05-17 00:07:31 +08:00
|
|
|
.iterate = proc_ns_dir_readdir,
|
2010-03-08 08:41:34 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct dentry *proc_ns_dir_lookup(struct inode *dir,
|
2012-06-11 05:13:09 +08:00
|
|
|
struct dentry *dentry, unsigned int flags)
|
2010-03-08 08:41:34 +08:00
|
|
|
{
|
2013-06-15 15:15:20 +08:00
|
|
|
int error;
|
2010-03-08 08:41:34 +08:00
|
|
|
struct task_struct *task = get_proc_task(dir);
|
|
|
|
const struct proc_ns_operations **entry, **last;
|
|
|
|
unsigned int len = dentry->d_name.len;
|
|
|
|
|
2013-06-15 15:15:20 +08:00
|
|
|
error = -ENOENT;
|
2010-03-08 08:41:34 +08:00
|
|
|
|
|
|
|
if (!task)
|
|
|
|
goto out_no_task;
|
|
|
|
|
2012-03-29 05:42:52 +08:00
|
|
|
last = &ns_entries[ARRAY_SIZE(ns_entries)];
|
|
|
|
for (entry = ns_entries; entry < last; entry++) {
|
2010-03-08 08:41:34 +08:00
|
|
|
if (strlen((*entry)->name) != len)
|
|
|
|
continue;
|
|
|
|
if (!memcmp(dentry->d_name.name, (*entry)->name, len))
|
|
|
|
break;
|
|
|
|
}
|
2012-03-29 05:42:52 +08:00
|
|
|
if (entry == last)
|
2010-03-08 08:41:34 +08:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
error = proc_ns_instantiate(dir, dentry, task, *entry);
|
|
|
|
out:
|
|
|
|
put_task_struct(task);
|
|
|
|
out_no_task:
|
2013-06-15 15:15:20 +08:00
|
|
|
return ERR_PTR(error);
|
2010-03-08 08:41:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct inode_operations proc_ns_dir_inode_operations = {
|
|
|
|
.lookup = proc_ns_dir_lookup,
|
|
|
|
.getattr = pid_getattr,
|
|
|
|
.setattr = proc_setattr,
|
|
|
|
};
|