Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull usernamespace mount fixes from Eric Biederman: "Way back in October Andrey Vagin reported that umount(MNT_DETACH) could be used to defeat MNT_LOCKED. As I worked to fix this I discovered that combined with mount propagation and an appropriate selection of shared subtrees a reference to a directory on an unmounted filesystem is not necessary. That MNT_DETACH is allowed in user namespace in a form that can break MNT_LOCKED comes from my early misunderstanding what MNT_DETACH does. To avoid breaking existing userspace the conflict between MNT_DETACH and MNT_LOCKED is fixed by leaving mounts that are locked to their parents in the mount hash table until the last reference goes away. While investigating this issue I also found an issue with __detach_mounts. The code was unnecessarily and incorrectly triggering mount propagation. Resulting in too many mounts going away when a directory is deleted, and too many cpu cycles are burned while doing that. Looking some more I realized that __detach_mounts by only keeping mounts connected that were MNT_LOCKED it had the potential to still leak information so I tweaked the code to keep everything locked together that possibly could be. This code was almost ready last cycle but Al invented fs_pin which slightly simplifies this code but required rewrites and retesting, and I have not been in top form for a while so it took me a while to get all of that done. Similiarly this pull request is late because I have been feeling absolutely miserable all week. The issue of being able to escape a bind mount has not yet been addressed, as the fixes are not yet mature" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: mnt: Update detach_mounts to leave mounts connected mnt: Fix the error check in __detach_mounts mnt: Honor MNT_LOCKED when detaching mounts fs_pin: Allow for the possibility that m_list or s_list go unused. mnt: Factor umount_mnt from umount_tree mnt: Factor out unhash_mnt from detach_mnt and umount_tree mnt: Fail collect_mounts when applied to unmounted mounts mnt: Don't propagate unmounts to locked mounts mnt: On an unmount propagate clearing of MNT_LOCKED mnt: Delay removal from the mount hash. mnt: Add MNT_UMOUNT flag mnt: In umount_tree reuse mnt_list instead of mnt_hash mnt: Don't propagate umounts in __detach_mounts mnt: Improve the umount_tree flags mnt: Use hlist_move_list in namespace_unlock
This commit is contained in:
commit
8f502d5b9e
|
@ -9,8 +9,8 @@ static DEFINE_SPINLOCK(pin_lock);
|
|||
void pin_remove(struct fs_pin *pin)
|
||||
{
|
||||
spin_lock(&pin_lock);
|
||||
hlist_del(&pin->m_list);
|
||||
hlist_del(&pin->s_list);
|
||||
hlist_del_init(&pin->m_list);
|
||||
hlist_del_init(&pin->s_list);
|
||||
spin_unlock(&pin_lock);
|
||||
spin_lock_irq(&pin->wait.lock);
|
||||
pin->done = 1;
|
||||
|
|
142
fs/namespace.c
142
fs/namespace.c
|
@ -632,14 +632,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
|
|||
*/
|
||||
struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
|
||||
{
|
||||
struct mount *p, *res;
|
||||
res = p = __lookup_mnt(mnt, dentry);
|
||||
struct mount *p, *res = NULL;
|
||||
p = __lookup_mnt(mnt, dentry);
|
||||
if (!p)
|
||||
goto out;
|
||||
if (!(p->mnt.mnt_flags & MNT_UMOUNT))
|
||||
res = p;
|
||||
hlist_for_each_entry_continue(p, mnt_hash) {
|
||||
if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
|
||||
break;
|
||||
res = p;
|
||||
if (!(p->mnt.mnt_flags & MNT_UMOUNT))
|
||||
res = p;
|
||||
}
|
||||
out:
|
||||
return res;
|
||||
|
@ -795,10 +798,8 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
|
|||
/*
|
||||
* vfsmount lock must be held for write
|
||||
*/
|
||||
static void detach_mnt(struct mount *mnt, struct path *old_path)
|
||||
static void unhash_mnt(struct mount *mnt)
|
||||
{
|
||||
old_path->dentry = mnt->mnt_mountpoint;
|
||||
old_path->mnt = &mnt->mnt_parent->mnt;
|
||||
mnt->mnt_parent = mnt;
|
||||
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
|
||||
list_del_init(&mnt->mnt_child);
|
||||
|
@ -808,6 +809,26 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
|
|||
mnt->mnt_mp = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* vfsmount lock must be held for write
|
||||
*/
|
||||
static void detach_mnt(struct mount *mnt, struct path *old_path)
|
||||
{
|
||||
old_path->dentry = mnt->mnt_mountpoint;
|
||||
old_path->mnt = &mnt->mnt_parent->mnt;
|
||||
unhash_mnt(mnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* vfsmount lock must be held for write
|
||||
*/
|
||||
static void umount_mnt(struct mount *mnt)
|
||||
{
|
||||
/* old mountpoint will be dropped when we can do that */
|
||||
mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
|
||||
unhash_mnt(mnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* vfsmount lock must be held for write
|
||||
*/
|
||||
|
@ -1078,6 +1099,13 @@ static void mntput_no_expire(struct mount *mnt)
|
|||
rcu_read_unlock();
|
||||
|
||||
list_del(&mnt->mnt_instance);
|
||||
|
||||
if (unlikely(!list_empty(&mnt->mnt_mounts))) {
|
||||
struct mount *p, *tmp;
|
||||
list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
|
||||
umount_mnt(p);
|
||||
}
|
||||
}
|
||||
unlock_mount_hash();
|
||||
|
||||
if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
|
||||
|
@ -1298,17 +1326,15 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
|
|||
|
||||
static void namespace_unlock(void)
|
||||
{
|
||||
struct hlist_head head = unmounted;
|
||||
struct hlist_head head;
|
||||
|
||||
if (likely(hlist_empty(&head))) {
|
||||
up_write(&namespace_sem);
|
||||
return;
|
||||
}
|
||||
hlist_move_list(&unmounted, &head);
|
||||
|
||||
head.first->pprev = &head.first;
|
||||
INIT_HLIST_HEAD(&unmounted);
|
||||
up_write(&namespace_sem);
|
||||
|
||||
if (likely(hlist_empty(&head)))
|
||||
return;
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
group_pin_kill(&head);
|
||||
|
@ -1319,49 +1345,63 @@ static inline void namespace_lock(void)
|
|||
down_write(&namespace_sem);
|
||||
}
|
||||
|
||||
enum umount_tree_flags {
|
||||
UMOUNT_SYNC = 1,
|
||||
UMOUNT_PROPAGATE = 2,
|
||||
UMOUNT_CONNECTED = 4,
|
||||
};
|
||||
/*
|
||||
* mount_lock must be held
|
||||
* namespace_sem must be held for write
|
||||
* how = 0 => just this tree, don't propagate
|
||||
* how = 1 => propagate; we know that nobody else has reference to any victims
|
||||
* how = 2 => lazy umount
|
||||
*/
|
||||
void umount_tree(struct mount *mnt, int how)
|
||||
static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
|
||||
{
|
||||
HLIST_HEAD(tmp_list);
|
||||
LIST_HEAD(tmp_list);
|
||||
struct mount *p;
|
||||
|
||||
if (how & UMOUNT_PROPAGATE)
|
||||
propagate_mount_unlock(mnt);
|
||||
|
||||
/* Gather the mounts to umount */
|
||||
for (p = mnt; p; p = next_mnt(p, mnt)) {
|
||||
hlist_del_init_rcu(&p->mnt_hash);
|
||||
hlist_add_head(&p->mnt_hash, &tmp_list);
|
||||
p->mnt.mnt_flags |= MNT_UMOUNT;
|
||||
list_move(&p->mnt_list, &tmp_list);
|
||||
}
|
||||
|
||||
hlist_for_each_entry(p, &tmp_list, mnt_hash)
|
||||
/* Hide the mounts from mnt_mounts */
|
||||
list_for_each_entry(p, &tmp_list, mnt_list) {
|
||||
list_del_init(&p->mnt_child);
|
||||
}
|
||||
|
||||
if (how)
|
||||
/* Add propogated mounts to the tmp_list */
|
||||
if (how & UMOUNT_PROPAGATE)
|
||||
propagate_umount(&tmp_list);
|
||||
|
||||
while (!hlist_empty(&tmp_list)) {
|
||||
p = hlist_entry(tmp_list.first, struct mount, mnt_hash);
|
||||
hlist_del_init_rcu(&p->mnt_hash);
|
||||
while (!list_empty(&tmp_list)) {
|
||||
bool disconnect;
|
||||
p = list_first_entry(&tmp_list, struct mount, mnt_list);
|
||||
list_del_init(&p->mnt_expire);
|
||||
list_del_init(&p->mnt_list);
|
||||
__touch_mnt_namespace(p->mnt_ns);
|
||||
p->mnt_ns = NULL;
|
||||
if (how < 2)
|
||||
if (how & UMOUNT_SYNC)
|
||||
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
|
||||
|
||||
pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted);
|
||||
disconnect = !(((how & UMOUNT_CONNECTED) &&
|
||||
mnt_has_parent(p) &&
|
||||
(p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) ||
|
||||
IS_MNT_LOCKED_AND_LAZY(p));
|
||||
|
||||
pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
|
||||
disconnect ? &unmounted : NULL);
|
||||
if (mnt_has_parent(p)) {
|
||||
hlist_del_init(&p->mnt_mp_list);
|
||||
put_mountpoint(p->mnt_mp);
|
||||
mnt_add_count(p->mnt_parent, -1);
|
||||
/* old mountpoint will be dropped when we can do that */
|
||||
p->mnt_ex_mountpoint = p->mnt_mountpoint;
|
||||
p->mnt_mountpoint = p->mnt.mnt_root;
|
||||
p->mnt_parent = p;
|
||||
p->mnt_mp = NULL;
|
||||
if (!disconnect) {
|
||||
/* Don't forget about p */
|
||||
list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
|
||||
} else {
|
||||
umount_mnt(p);
|
||||
}
|
||||
}
|
||||
change_mnt_propagation(p, MS_PRIVATE);
|
||||
}
|
||||
|
@ -1447,14 +1487,14 @@ static int do_umount(struct mount *mnt, int flags)
|
|||
|
||||
if (flags & MNT_DETACH) {
|
||||
if (!list_empty(&mnt->mnt_list))
|
||||
umount_tree(mnt, 2);
|
||||
umount_tree(mnt, UMOUNT_PROPAGATE);
|
||||
retval = 0;
|
||||
} else {
|
||||
shrink_submounts(mnt);
|
||||
retval = -EBUSY;
|
||||
if (!propagate_mount_busy(mnt, 2)) {
|
||||
if (!list_empty(&mnt->mnt_list))
|
||||
umount_tree(mnt, 1);
|
||||
umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
|
||||
retval = 0;
|
||||
}
|
||||
}
|
||||
|
@ -1480,13 +1520,20 @@ void __detach_mounts(struct dentry *dentry)
|
|||
|
||||
namespace_lock();
|
||||
mp = lookup_mountpoint(dentry);
|
||||
if (!mp)
|
||||
if (IS_ERR_OR_NULL(mp))
|
||||
goto out_unlock;
|
||||
|
||||
lock_mount_hash();
|
||||
while (!hlist_empty(&mp->m_list)) {
|
||||
mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
|
||||
umount_tree(mnt, 2);
|
||||
if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
|
||||
struct mount *p, *tmp;
|
||||
list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
|
||||
hlist_add_head(&p->mnt_umount.s_list, &unmounted);
|
||||
umount_mnt(p);
|
||||
}
|
||||
}
|
||||
else umount_tree(mnt, UMOUNT_CONNECTED);
|
||||
}
|
||||
unlock_mount_hash();
|
||||
put_mountpoint(mp);
|
||||
|
@ -1648,7 +1695,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
|
|||
out:
|
||||
if (res) {
|
||||
lock_mount_hash();
|
||||
umount_tree(res, 0);
|
||||
umount_tree(res, UMOUNT_SYNC);
|
||||
unlock_mount_hash();
|
||||
}
|
||||
return q;
|
||||
|
@ -1660,8 +1707,11 @@ struct vfsmount *collect_mounts(struct path *path)
|
|||
{
|
||||
struct mount *tree;
|
||||
namespace_lock();
|
||||
tree = copy_tree(real_mount(path->mnt), path->dentry,
|
||||
CL_COPY_ALL | CL_PRIVATE);
|
||||
if (!check_mnt(real_mount(path->mnt)))
|
||||
tree = ERR_PTR(-EINVAL);
|
||||
else
|
||||
tree = copy_tree(real_mount(path->mnt), path->dentry,
|
||||
CL_COPY_ALL | CL_PRIVATE);
|
||||
namespace_unlock();
|
||||
if (IS_ERR(tree))
|
||||
return ERR_CAST(tree);
|
||||
|
@ -1672,7 +1722,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
|
|||
{
|
||||
namespace_lock();
|
||||
lock_mount_hash();
|
||||
umount_tree(real_mount(mnt), 0);
|
||||
umount_tree(real_mount(mnt), UMOUNT_SYNC);
|
||||
unlock_mount_hash();
|
||||
namespace_unlock();
|
||||
}
|
||||
|
@ -1855,7 +1905,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
|
|||
out_cleanup_ids:
|
||||
while (!hlist_empty(&tree_list)) {
|
||||
child = hlist_entry(tree_list.first, struct mount, mnt_hash);
|
||||
umount_tree(child, 0);
|
||||
umount_tree(child, UMOUNT_SYNC);
|
||||
}
|
||||
unlock_mount_hash();
|
||||
cleanup_group_ids(source_mnt, NULL);
|
||||
|
@ -2035,7 +2085,7 @@ static int do_loopback(struct path *path, const char *old_name,
|
|||
err = graft_tree(mnt, parent, mp);
|
||||
if (err) {
|
||||
lock_mount_hash();
|
||||
umount_tree(mnt, 0);
|
||||
umount_tree(mnt, UMOUNT_SYNC);
|
||||
unlock_mount_hash();
|
||||
}
|
||||
out2:
|
||||
|
@ -2406,7 +2456,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
|
|||
while (!list_empty(&graveyard)) {
|
||||
mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
|
||||
touch_mnt_namespace(mnt->mnt_ns);
|
||||
umount_tree(mnt, 1);
|
||||
umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
|
||||
}
|
||||
unlock_mount_hash();
|
||||
namespace_unlock();
|
||||
|
@ -2477,7 +2527,7 @@ static void shrink_submounts(struct mount *mnt)
|
|||
m = list_first_entry(&graveyard, struct mount,
|
||||
mnt_expire);
|
||||
touch_mnt_namespace(m->mnt_ns);
|
||||
umount_tree(m, 1);
|
||||
umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
60
fs/pnode.c
60
fs/pnode.c
|
@ -361,6 +361,46 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear MNT_LOCKED when it can be shown to be safe.
|
||||
*
|
||||
* mount_lock lock must be held for write
|
||||
*/
|
||||
void propagate_mount_unlock(struct mount *mnt)
|
||||
{
|
||||
struct mount *parent = mnt->mnt_parent;
|
||||
struct mount *m, *child;
|
||||
|
||||
BUG_ON(parent == mnt);
|
||||
|
||||
for (m = propagation_next(parent, parent); m;
|
||||
m = propagation_next(m, parent)) {
|
||||
child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
|
||||
if (child)
|
||||
child->mnt.mnt_flags &= ~MNT_LOCKED;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
|
||||
*/
|
||||
static void mark_umount_candidates(struct mount *mnt)
|
||||
{
|
||||
struct mount *parent = mnt->mnt_parent;
|
||||
struct mount *m;
|
||||
|
||||
BUG_ON(parent == mnt);
|
||||
|
||||
for (m = propagation_next(parent, parent); m;
|
||||
m = propagation_next(m, parent)) {
|
||||
struct mount *child = __lookup_mnt_last(&m->mnt,
|
||||
mnt->mnt_mountpoint);
|
||||
if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
|
||||
SET_MNT_MARK(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: unmounting 'mnt' naturally propagates to all other mounts its
|
||||
* parent propagates to.
|
||||
|
@ -378,13 +418,16 @@ static void __propagate_umount(struct mount *mnt)
|
|||
struct mount *child = __lookup_mnt_last(&m->mnt,
|
||||
mnt->mnt_mountpoint);
|
||||
/*
|
||||
* umount the child only if the child has no
|
||||
* other children
|
||||
* umount the child only if the child has no children
|
||||
* and the child is marked safe to unmount.
|
||||
*/
|
||||
if (child && list_empty(&child->mnt_mounts)) {
|
||||
if (!child || !IS_MNT_MARKED(child))
|
||||
continue;
|
||||
CLEAR_MNT_MARK(child);
|
||||
if (list_empty(&child->mnt_mounts)) {
|
||||
list_del_init(&child->mnt_child);
|
||||
hlist_del_init_rcu(&child->mnt_hash);
|
||||
hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
|
||||
child->mnt.mnt_flags |= MNT_UMOUNT;
|
||||
list_move_tail(&child->mnt_list, &mnt->mnt_list);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -396,11 +439,14 @@ static void __propagate_umount(struct mount *mnt)
|
|||
*
|
||||
* vfsmount lock must be held for write
|
||||
*/
|
||||
int propagate_umount(struct hlist_head *list)
|
||||
int propagate_umount(struct list_head *list)
|
||||
{
|
||||
struct mount *mnt;
|
||||
|
||||
hlist_for_each_entry(mnt, list, mnt_hash)
|
||||
list_for_each_entry_reverse(mnt, list, mnt_list)
|
||||
mark_umount_candidates(mnt);
|
||||
|
||||
list_for_each_entry(mnt, list, mnt_list)
|
||||
__propagate_umount(mnt);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
|
||||
#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
|
||||
#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
|
||||
#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
|
||||
#define IS_MNT_LOCKED_AND_LAZY(m) \
|
||||
(((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED)
|
||||
|
||||
#define CL_EXPIRE 0x01
|
||||
#define CL_SLAVE 0x02
|
||||
|
@ -40,14 +43,14 @@ static inline void set_mnt_shared(struct mount *mnt)
|
|||
void change_mnt_propagation(struct mount *, int);
|
||||
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
|
||||
struct hlist_head *);
|
||||
int propagate_umount(struct hlist_head *);
|
||||
int propagate_umount(struct list_head *);
|
||||
int propagate_mount_busy(struct mount *, int);
|
||||
void propagate_mount_unlock(struct mount *);
|
||||
void mnt_release_group_id(struct mount *);
|
||||
int get_dominating_id(struct mount *mnt, const struct path *root);
|
||||
unsigned int mnt_get_count(struct mount *mnt);
|
||||
void mnt_set_mountpoint(struct mount *, struct mountpoint *,
|
||||
struct mount *);
|
||||
void umount_tree(struct mount *, int);
|
||||
struct mount *copy_tree(struct mount *, struct dentry *, int);
|
||||
bool is_path_reachable(struct mount *, struct dentry *,
|
||||
const struct path *root);
|
||||
|
|
|
@ -13,6 +13,8 @@ struct vfsmount;
|
|||
static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
|
||||
{
|
||||
init_waitqueue_head(&p->wait);
|
||||
INIT_HLIST_NODE(&p->s_list);
|
||||
INIT_HLIST_NODE(&p->m_list);
|
||||
p->kill = kill;
|
||||
}
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ struct mnt_namespace;
|
|||
#define MNT_DOOMED 0x1000000
|
||||
#define MNT_SYNC_UMOUNT 0x2000000
|
||||
#define MNT_MARKED 0x4000000
|
||||
#define MNT_UMOUNT 0x8000000
|
||||
|
||||
struct vfsmount {
|
||||
struct dentry *mnt_root; /* root of the mounted tree */
|
||||
|
|
Loading…
Reference in New Issue