take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now. It's not mountable (not even registered, so it's not in /proc/filesystems, etc.). Files on it *are* bindable - we explicitly permit that in do_loopback(). This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well. get_proc_ns() is a macro now (it's simply returning ->i_private; would have been an inline, if not for header ordering headache). proc_ns_inode() is an ex-parrot. The interface used in procfs is ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops). Dentries and inodes are never hashed; a non-counting reference to dentry is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path() if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details of that mechanism. As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt; it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets from ns_get_path(). Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
f77c80142e
commit
e149ed2b80
|
@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
|
|||
attr.o bad_inode.o file.o filesystems.o namespace.o \
|
||||
seq_file.o xattr.o libfs.o fs-writeback.o \
|
||||
pnode.o splice.o sync.o utimes.o \
|
||||
stack.o fs_struct.o statfs.o fs_pin.o
|
||||
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
|
||||
|
||||
ifeq ($(CONFIG_BLOCK),y)
|
||||
obj-y += buffer.o block_dev.o direct-io.o mpage.o
|
||||
|
|
|
@ -147,3 +147,8 @@ extern const struct file_operations pipefifo_fops;
|
|||
*/
|
||||
extern void sb_pin_kill(struct super_block *sb);
|
||||
extern void mnt_pin_kill(struct mount *m);
|
||||
|
||||
/*
|
||||
* fs/nsfs.c
|
||||
*/
|
||||
extern struct dentry_operations ns_dentry_operations;
|
||||
|
|
|
@ -1569,8 +1569,8 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
|
|||
static bool is_mnt_ns_file(struct dentry *dentry)
|
||||
{
|
||||
/* Is this a proxy for a mount namespace? */
|
||||
struct inode *inode = dentry->d_inode;
|
||||
return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations;
|
||||
return dentry->d_op == &ns_dentry_operations &&
|
||||
dentry->d_fsdata == &mntns_operations;
|
||||
}
|
||||
|
||||
struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
|
||||
|
@ -2016,7 +2016,10 @@ static int do_loopback(struct path *path, const char *old_name,
|
|||
if (IS_MNT_UNBINDABLE(old))
|
||||
goto out2;
|
||||
|
||||
if (!check_mnt(parent) || !check_mnt(old))
|
||||
if (!check_mnt(parent))
|
||||
goto out2;
|
||||
|
||||
if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
|
||||
goto out2;
|
||||
|
||||
if (!recurse && has_locked_children(old, old_path.dentry))
|
||||
|
|
|
@ -0,0 +1,161 @@
|
|||
#include <linux/mount.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/proc_ns.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/ktime.h>
|
||||
|
||||
static struct vfsmount *nsfs_mnt;
|
||||
|
||||
static const struct file_operations ns_file_operations = {
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
|
||||
|
||||
return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
|
||||
ns_ops->name, inode->i_ino);
|
||||
}
|
||||
|
||||
static void ns_prune_dentry(struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
if (inode) {
|
||||
struct ns_common *ns = inode->i_private;
|
||||
atomic_long_set(&ns->stashed, 0);
|
||||
}
|
||||
}
|
||||
|
||||
const struct dentry_operations ns_dentry_operations =
|
||||
{
|
||||
.d_prune = ns_prune_dentry,
|
||||
.d_delete = always_delete_dentry,
|
||||
.d_dname = ns_dname,
|
||||
};
|
||||
|
||||
static void nsfs_evict(struct inode *inode)
|
||||
{
|
||||
struct ns_common *ns = inode->i_private;
|
||||
clear_inode(inode);
|
||||
ns->ops->put(ns);
|
||||
}
|
||||
|
||||
void *ns_get_path(struct path *path, struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops)
|
||||
{
|
||||
struct vfsmount *mnt = mntget(nsfs_mnt);
|
||||
struct qstr qname = { .name = "", };
|
||||
struct dentry *dentry;
|
||||
struct inode *inode;
|
||||
struct ns_common *ns;
|
||||
unsigned long d;
|
||||
|
||||
again:
|
||||
ns = ns_ops->get(task);
|
||||
if (!ns) {
|
||||
mntput(mnt);
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
rcu_read_lock();
|
||||
d = atomic_long_read(&ns->stashed);
|
||||
if (!d)
|
||||
goto slow;
|
||||
dentry = (struct dentry *)d;
|
||||
if (!lockref_get_not_dead(&dentry->d_lockref))
|
||||
goto slow;
|
||||
rcu_read_unlock();
|
||||
ns_ops->put(ns);
|
||||
got_it:
|
||||
path->mnt = mnt;
|
||||
path->dentry = dentry;
|
||||
return NULL;
|
||||
slow:
|
||||
rcu_read_unlock();
|
||||
inode = new_inode_pseudo(mnt->mnt_sb);
|
||||
if (!inode) {
|
||||
ns_ops->put(ns);
|
||||
mntput(mnt);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
inode->i_ino = ns->inum;
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
|
||||
inode->i_flags |= S_IMMUTABLE;
|
||||
inode->i_mode = S_IFREG | S_IRUGO;
|
||||
inode->i_fop = &ns_file_operations;
|
||||
inode->i_private = ns;
|
||||
|
||||
dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
|
||||
if (!dentry) {
|
||||
iput(inode);
|
||||
mntput(mnt);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
d_instantiate(dentry, inode);
|
||||
dentry->d_fsdata = (void *)ns_ops;
|
||||
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
|
||||
if (d) {
|
||||
d_delete(dentry); /* make sure ->d_prune() does nothing */
|
||||
dput(dentry);
|
||||
cpu_relax();
|
||||
goto again;
|
||||
}
|
||||
goto got_it;
|
||||
}
|
||||
|
||||
int ns_get_name(char *buf, size_t size, struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops)
|
||||
{
|
||||
struct ns_common *ns;
|
||||
int res = -ENOENT;
|
||||
ns = ns_ops->get(task);
|
||||
if (ns) {
|
||||
res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum);
|
||||
ns_ops->put(ns);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
struct file *proc_ns_fget(int fd)
|
||||
{
|
||||
struct file *file;
|
||||
|
||||
file = fget(fd);
|
||||
if (!file)
|
||||
return ERR_PTR(-EBADF);
|
||||
|
||||
if (file->f_op != &ns_file_operations)
|
||||
goto out_invalid;
|
||||
|
||||
return file;
|
||||
|
||||
out_invalid:
|
||||
fput(file);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static const struct super_operations nsfs_ops = {
|
||||
.statfs = simple_statfs,
|
||||
.evict_inode = nsfs_evict,
|
||||
};
|
||||
static struct dentry *nsfs_mount(struct file_system_type *fs_type,
|
||||
int flags, const char *dev_name, void *data)
|
||||
{
|
||||
return mount_pseudo(fs_type, "nsfs:", &nsfs_ops,
|
||||
&ns_dentry_operations, NSFS_MAGIC);
|
||||
}
|
||||
static struct file_system_type nsfs = {
|
||||
.name = "nsfs",
|
||||
.mount = nsfs_mount,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
|
||||
void __init nsfs_init(void)
|
||||
{
|
||||
nsfs_mnt = kern_mount(&nsfs);
|
||||
if (IS_ERR(nsfs_mnt))
|
||||
panic("can't set nsfs up\n");
|
||||
nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
|
||||
}
|
|
@ -32,7 +32,6 @@ static void proc_evict_inode(struct inode *inode)
|
|||
{
|
||||
struct proc_dir_entry *de;
|
||||
struct ctl_table_header *head;
|
||||
struct ns_common *ns;
|
||||
|
||||
truncate_inode_pages_final(&inode->i_data);
|
||||
clear_inode(inode);
|
||||
|
@ -49,10 +48,6 @@ static void proc_evict_inode(struct inode *inode)
|
|||
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
|
||||
sysctl_head_put(head);
|
||||
}
|
||||
/* Release any associated namespace */
|
||||
ns = PROC_I(inode)->ns.ns;
|
||||
if (ns && ns->ops)
|
||||
ns->ops->put(ns);
|
||||
}
|
||||
|
||||
static struct kmem_cache * proc_inode_cachep;
|
||||
|
|
|
@ -1,10 +1,6 @@
|
|||
#include <linux/proc_fs.h>
|
||||
#include <linux/nsproxy.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/fs_struct.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/path.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/utsname.h>
|
||||
|
@ -34,139 +30,45 @@ static const struct proc_ns_operations *ns_entries[] = {
|
|||
&mntns_operations,
|
||||
};
|
||||
|
||||
static const struct file_operations ns_file_operations = {
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
static const struct inode_operations ns_inode_operations = {
|
||||
.setattr = proc_setattr,
|
||||
};
|
||||
|
||||
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
|
||||
|
||||
return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
|
||||
ns_ops->name, inode->i_ino);
|
||||
}
|
||||
|
||||
const struct dentry_operations ns_dentry_operations =
|
||||
{
|
||||
.d_delete = always_delete_dentry,
|
||||
.d_dname = ns_dname,
|
||||
};
|
||||
|
||||
static struct dentry *proc_ns_get_dentry(struct super_block *sb,
|
||||
struct task_struct *task, const struct proc_ns_operations *ns_ops)
|
||||
{
|
||||
struct dentry *dentry, *result;
|
||||
struct inode *inode;
|
||||
struct proc_inode *ei;
|
||||
struct qstr qname = { .name = "", };
|
||||
struct ns_common *ns;
|
||||
|
||||
ns = ns_ops->get(task);
|
||||
if (!ns)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
dentry = d_alloc_pseudo(sb, &qname);
|
||||
if (!dentry) {
|
||||
ns_ops->put(ns);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
dentry->d_fsdata = (void *)ns_ops;
|
||||
|
||||
inode = iget_locked(sb, ns->inum);
|
||||
if (!inode) {
|
||||
dput(dentry);
|
||||
ns_ops->put(ns);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
ei = PROC_I(inode);
|
||||
if (inode->i_state & I_NEW) {
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
|
||||
inode->i_op = &ns_inode_operations;
|
||||
inode->i_mode = S_IFREG | S_IRUGO;
|
||||
inode->i_fop = &ns_file_operations;
|
||||
ei->ns.ns_ops = ns_ops;
|
||||
ei->ns.ns = ns;
|
||||
unlock_new_inode(inode);
|
||||
} else {
|
||||
ns_ops->put(ns);
|
||||
}
|
||||
|
||||
d_set_d_op(dentry, &ns_dentry_operations);
|
||||
result = d_instantiate_unique(dentry, inode);
|
||||
if (result) {
|
||||
dput(dentry);
|
||||
dentry = result;
|
||||
}
|
||||
|
||||
return dentry;
|
||||
}
|
||||
|
||||
static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct proc_inode *ei = PROC_I(inode);
|
||||
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
|
||||
struct task_struct *task;
|
||||
struct path ns_path;
|
||||
void *error = ERR_PTR(-EACCES);
|
||||
|
||||
task = get_proc_task(inode);
|
||||
if (!task)
|
||||
goto out;
|
||||
return error;
|
||||
|
||||
if (!ptrace_may_access(task, PTRACE_MODE_READ))
|
||||
goto out_put_task;
|
||||
|
||||
ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
|
||||
if (IS_ERR(ns_path.dentry)) {
|
||||
error = ERR_CAST(ns_path.dentry);
|
||||
goto out_put_task;
|
||||
if (ptrace_may_access(task, PTRACE_MODE_READ)) {
|
||||
error = ns_get_path(&ns_path, task, ns_ops);
|
||||
if (!error)
|
||||
nd_jump_link(nd, &ns_path);
|
||||
}
|
||||
|
||||
ns_path.mnt = mntget(nd->path.mnt);
|
||||
nd_jump_link(nd, &ns_path);
|
||||
error = NULL;
|
||||
|
||||
out_put_task:
|
||||
put_task_struct(task);
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct proc_inode *ei = PROC_I(inode);
|
||||
const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
|
||||
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
|
||||
struct task_struct *task;
|
||||
struct ns_common *ns;
|
||||
char name[50];
|
||||
int res = -EACCES;
|
||||
|
||||
task = get_proc_task(inode);
|
||||
if (!task)
|
||||
goto out;
|
||||
return res;
|
||||
|
||||
if (!ptrace_may_access(task, PTRACE_MODE_READ))
|
||||
goto out_put_task;
|
||||
|
||||
res = -ENOENT;
|
||||
ns = ns_ops->get(task);
|
||||
if (!ns)
|
||||
goto out_put_task;
|
||||
|
||||
snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum);
|
||||
res = readlink_copy(buffer, buflen, name);
|
||||
ns_ops->put(ns);
|
||||
out_put_task:
|
||||
if (ptrace_may_access(task, PTRACE_MODE_READ)) {
|
||||
res = ns_get_name(name, sizeof(name), task, ns_ops);
|
||||
if (res >= 0)
|
||||
res = readlink_copy(buffer, buflen, name);
|
||||
}
|
||||
put_task_struct(task);
|
||||
out:
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -268,31 +170,3 @@ const struct inode_operations proc_ns_dir_inode_operations = {
|
|||
.getattr = pid_getattr,
|
||||
.setattr = proc_setattr,
|
||||
};
|
||||
|
||||
struct file *proc_ns_fget(int fd)
|
||||
{
|
||||
struct file *file;
|
||||
|
||||
file = fget(fd);
|
||||
if (!file)
|
||||
return ERR_PTR(-EBADF);
|
||||
|
||||
if (file->f_op != &ns_file_operations)
|
||||
goto out_invalid;
|
||||
|
||||
return file;
|
||||
|
||||
out_invalid:
|
||||
fput(file);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
struct ns_common *get_proc_ns(struct inode *inode)
|
||||
{
|
||||
return PROC_I(inode)->ns.ns;
|
||||
}
|
||||
|
||||
bool proc_ns_inode(struct inode *inode)
|
||||
{
|
||||
return inode->i_fop == &ns_file_operations;
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
struct proc_ns_operations;
|
||||
|
||||
struct ns_common {
|
||||
atomic_long_t stashed;
|
||||
const struct proc_ns_operations *ops;
|
||||
unsigned int inum;
|
||||
};
|
||||
|
|
|
@ -4,9 +4,11 @@
|
|||
#ifndef _LINUX_PROC_NS_H
|
||||
#define _LINUX_PROC_NS_H
|
||||
|
||||
#include <linux/ns_common.h>
|
||||
|
||||
struct pid_namespace;
|
||||
struct nsproxy;
|
||||
struct ns_common;
|
||||
struct path;
|
||||
|
||||
struct proc_ns_operations {
|
||||
const char *name;
|
||||
|
@ -38,35 +40,38 @@ enum {
|
|||
|
||||
extern int pid_ns_prepare_proc(struct pid_namespace *ns);
|
||||
extern void pid_ns_release_proc(struct pid_namespace *ns);
|
||||
extern struct file *proc_ns_fget(int fd);
|
||||
extern struct ns_common *get_proc_ns(struct inode *);
|
||||
extern int proc_alloc_inum(unsigned int *pino);
|
||||
extern void proc_free_inum(unsigned int inum);
|
||||
extern bool proc_ns_inode(struct inode *inode);
|
||||
|
||||
#else /* CONFIG_PROC_FS */
|
||||
|
||||
static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; }
|
||||
static inline void pid_ns_release_proc(struct pid_namespace *ns) {}
|
||||
|
||||
static inline struct file *proc_ns_fget(int fd)
|
||||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; }
|
||||
|
||||
static inline int proc_alloc_inum(unsigned int *inum)
|
||||
{
|
||||
*inum = 1;
|
||||
return 0;
|
||||
}
|
||||
static inline void proc_free_inum(unsigned int inum) {}
|
||||
static inline bool proc_ns_inode(struct inode *inode) { return false; }
|
||||
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
#define ns_alloc_inum(ns) proc_alloc_inum(&(ns)->inum)
|
||||
static inline int ns_alloc_inum(struct ns_common *ns)
|
||||
{
|
||||
atomic_long_set(&ns->stashed, 0);
|
||||
return proc_alloc_inum(&ns->inum);
|
||||
}
|
||||
|
||||
#define ns_free_inum(ns) proc_free_inum((ns)->inum)
|
||||
|
||||
extern struct file *proc_ns_fget(int fd);
|
||||
#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
|
||||
extern void *ns_get_path(struct path *path, struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops);
|
||||
|
||||
extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops);
|
||||
extern void nsfs_init(void);
|
||||
|
||||
#endif /* _LINUX_PROC_NS_H */
|
||||
|
|
|
@ -72,5 +72,6 @@
|
|||
#define MTD_INODE_FS_MAGIC 0x11307854
|
||||
#define ANON_INODE_FS_MAGIC 0x09041934
|
||||
#define BTRFS_TEST_MAGIC 0x73727279
|
||||
#define NSFS_MAGIC 0x6e736673
|
||||
|
||||
#endif /* __LINUX_MAGIC_H__ */
|
||||
|
|
|
@ -78,6 +78,7 @@
|
|||
#include <linux/context_tracking.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/proc_ns.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/bugs.h>
|
||||
|
@ -660,6 +661,7 @@ asmlinkage __visible void __init start_kernel(void)
|
|||
/* rootfs populating might need page-writeback */
|
||||
page_writeback_init();
|
||||
proc_root_init();
|
||||
nsfs_init();
|
||||
cgroup_init();
|
||||
cpuset_init();
|
||||
taskstats_init_early();
|
||||
|
|
Loading…
Reference in New Issue