cgroup: fix cgroup_path() vs rename() race
rename() will change dentry->d_name. The result of this race can be worse than seeing partially rewritten name, but we might access a stale pointer because rename() will re-allocate memory to hold a longer name. As accessing dentry->name must be protected by dentry->d_lock or parent inode's i_mutex, while on the other hand cgroup-path() can be called with some irq-safe spinlocks held, we can't generate cgroup path using dentry->d_name. Alternatively we make a copy of dentry->d_name and save it in cgrp->name when a cgroup is created, and update cgrp->name at rename(). v5: use flexible array instead of zero-size array. v4: - allocate root_cgroup_name and all root_cgroup->name points to it. - add cgroup_name() wrapper. v3: use kfree_rcu() instead of synchronize_rcu() in user-visible path. v2: make cgrp->name RCU safe. Signed-off-by: Li Zefan <lizefan@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
6dbe51c251
commit
65dff759d2
|
@ -247,9 +247,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
|
|||
{
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
|
||||
rcu_read_unlock();
|
||||
if (ret)
|
||||
strncpy(buf, "<unavailable>", buflen);
|
||||
return ret;
|
||||
|
|
|
@ -150,6 +150,11 @@ enum {
|
|||
CGRP_CPUSET_CLONE_CHILDREN,
|
||||
};
|
||||
|
||||
struct cgroup_name {
|
||||
struct rcu_head rcu_head;
|
||||
char name[];
|
||||
};
|
||||
|
||||
struct cgroup {
|
||||
unsigned long flags; /* "unsigned long" so bitops work */
|
||||
|
||||
|
@ -172,6 +177,19 @@ struct cgroup {
|
|||
struct cgroup *parent; /* my parent */
|
||||
struct dentry *dentry; /* cgroup fs entry, RCU protected */
|
||||
|
||||
/*
|
||||
* This is a copy of dentry->d_name, and it's needed because
|
||||
* we can't use dentry->d_name in cgroup_path().
|
||||
*
|
||||
* You must acquire rcu_read_lock() to access cgrp->name, and
|
||||
* the only place that can change it is rename(), which is
|
||||
* protected by parent dir's i_mutex.
|
||||
*
|
||||
* Normally you should use cgroup_name() wrapper rather than
|
||||
* access it directly.
|
||||
*/
|
||||
struct cgroup_name __rcu *name;
|
||||
|
||||
/* Private pointers for each registered subsystem */
|
||||
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
|
||||
|
||||
|
@ -404,6 +422,12 @@ struct cgroup_scanner {
|
|||
void *data;
|
||||
};
|
||||
|
||||
/* Caller should hold rcu_read_lock() */
|
||||
static inline const char *cgroup_name(const struct cgroup *cgrp)
|
||||
{
|
||||
return rcu_dereference(cgrp->name)->name;
|
||||
}
|
||||
|
||||
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
|
||||
int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
|
||||
|
||||
|
|
106
kernel/cgroup.c
106
kernel/cgroup.c
|
@ -238,6 +238,8 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
|
|||
/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
|
||||
#define dummytop (&rootnode.top_cgroup)
|
||||
|
||||
static struct cgroup_name root_cgroup_name = { .name = "/" };
|
||||
|
||||
/* This flag indicates whether tasks in the fork and exit paths should
|
||||
* check for fork/exit handlers to call. This avoids us having to do
|
||||
* extra work in the fork/exit path if none of the subsystems need to
|
||||
|
@ -859,6 +861,17 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
|
|||
return inode;
|
||||
}
|
||||
|
||||
static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
|
||||
{
|
||||
struct cgroup_name *name;
|
||||
|
||||
name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL);
|
||||
if (!name)
|
||||
return NULL;
|
||||
strcpy(name->name, dentry->d_name.name);
|
||||
return name;
|
||||
}
|
||||
|
||||
static void cgroup_free_fn(struct work_struct *work)
|
||||
{
|
||||
struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
|
||||
|
@ -889,6 +902,7 @@ static void cgroup_free_fn(struct work_struct *work)
|
|||
simple_xattrs_free(&cgrp->xattrs);
|
||||
|
||||
ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
|
||||
kfree(rcu_dereference_raw(cgrp->name));
|
||||
kfree(cgrp);
|
||||
}
|
||||
|
||||
|
@ -1421,6 +1435,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
|
|||
INIT_LIST_HEAD(&root->allcg_list);
|
||||
root->number_of_cgroups = 1;
|
||||
cgrp->root = root;
|
||||
cgrp->name = &root_cgroup_name;
|
||||
cgrp->top_cgroup = cgrp;
|
||||
init_cgroup_housekeeping(cgrp);
|
||||
list_add_tail(&cgrp->allcg_node, &root->allcg_list);
|
||||
|
@ -1769,49 +1784,45 @@ static struct kobject *cgroup_kobj;
|
|||
* @buf: the buffer to write the path into
|
||||
* @buflen: the length of the buffer
|
||||
*
|
||||
* Called with cgroup_mutex held or else with an RCU-protected cgroup
|
||||
* reference. Writes path of cgroup into buf. Returns 0 on success,
|
||||
* -errno on error.
|
||||
* Writes path of cgroup into buf. Returns 0 on success, -errno on error.
|
||||
*
|
||||
* We can't generate cgroup path using dentry->d_name, as accessing
|
||||
* dentry->name must be protected by irq-unsafe dentry->d_lock or parent
|
||||
* inode's i_mutex, while on the other hand cgroup_path() can be called
|
||||
* with some irq-safe spinlocks held.
|
||||
*/
|
||||
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
|
||||
{
|
||||
struct dentry *dentry = cgrp->dentry;
|
||||
int ret = -ENAMETOOLONG;
|
||||
char *start;
|
||||
|
||||
rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
|
||||
"cgroup_path() called without proper locking");
|
||||
|
||||
if (cgrp == dummytop) {
|
||||
/*
|
||||
* Inactive subsystems have no dentry for their root
|
||||
* cgroup
|
||||
*/
|
||||
strcpy(buf, "/");
|
||||
return 0;
|
||||
}
|
||||
|
||||
start = buf + buflen - 1;
|
||||
|
||||
*start = '\0';
|
||||
for (;;) {
|
||||
int len = dentry->d_name.len;
|
||||
|
||||
rcu_read_lock();
|
||||
while (cgrp) {
|
||||
const char *name = cgroup_name(cgrp);
|
||||
int len;
|
||||
|
||||
len = strlen(name);
|
||||
if ((start -= len) < buf)
|
||||
return -ENAMETOOLONG;
|
||||
memcpy(start, dentry->d_name.name, len);
|
||||
cgrp = cgrp->parent;
|
||||
if (!cgrp)
|
||||
goto out;
|
||||
memcpy(start, name, len);
|
||||
|
||||
if (!cgrp->parent)
|
||||
break;
|
||||
|
||||
dentry = cgrp->dentry;
|
||||
if (!cgrp->parent)
|
||||
continue;
|
||||
if (--start < buf)
|
||||
return -ENAMETOOLONG;
|
||||
goto out;
|
||||
*start = '/';
|
||||
|
||||
cgrp = cgrp->parent;
|
||||
}
|
||||
ret = 0;
|
||||
memmove(buf, start, buf + buflen - start);
|
||||
return 0;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cgroup_path);
|
||||
|
||||
|
@ -2537,13 +2548,40 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
|
|||
static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry)
|
||||
{
|
||||
int ret;
|
||||
struct cgroup_name *name, *old_name;
|
||||
struct cgroup *cgrp;
|
||||
|
||||
/*
|
||||
* It's convinient to use parent dir's i_mutex to protected
|
||||
* cgrp->name.
|
||||
*/
|
||||
lockdep_assert_held(&old_dir->i_mutex);
|
||||
|
||||
if (!S_ISDIR(old_dentry->d_inode->i_mode))
|
||||
return -ENOTDIR;
|
||||
if (new_dentry->d_inode)
|
||||
return -EEXIST;
|
||||
if (old_dir != new_dir)
|
||||
return -EIO;
|
||||
return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
|
||||
|
||||
cgrp = __d_cgrp(old_dentry);
|
||||
|
||||
name = cgroup_alloc_name(new_dentry);
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
|
||||
if (ret) {
|
||||
kfree(name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
old_name = cgrp->name;
|
||||
rcu_assign_pointer(cgrp->name, name);
|
||||
|
||||
kfree_rcu(old_name, rcu_head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
|
||||
|
@ -4158,6 +4196,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|||
umode_t mode)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
struct cgroup_name *name;
|
||||
struct cgroupfs_root *root = parent->root;
|
||||
int err = 0;
|
||||
struct cgroup_subsys *ss;
|
||||
|
@ -4168,9 +4207,14 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|||
if (!cgrp)
|
||||
return -ENOMEM;
|
||||
|
||||
name = cgroup_alloc_name(dentry);
|
||||
if (!name)
|
||||
goto err_free_cgrp;
|
||||
rcu_assign_pointer(cgrp->name, name);
|
||||
|
||||
cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
|
||||
if (cgrp->id < 0)
|
||||
goto err_free_cgrp;
|
||||
goto err_free_name;
|
||||
|
||||
/*
|
||||
* Only live parents can have children. Note that the liveliness
|
||||
|
@ -4276,6 +4320,8 @@ err_free_all:
|
|||
deactivate_super(sb);
|
||||
err_free_id:
|
||||
ida_simple_remove(&root->cgroup_ida, cgrp->id);
|
||||
err_free_name:
|
||||
kfree(rcu_dereference_raw(cgrp->name));
|
||||
err_free_cgrp:
|
||||
kfree(cgrp);
|
||||
return err;
|
||||
|
|
Loading…
Reference in New Issue