fs.move_mount.move_mount_set_group.v5.15

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCYS3iBAAKCRCRxhvAZXjc
 olWeAP9CK0NMvXM4eZDQH8LZ7Bg3COvYoGhwuWFoLtHnvYHZ/AEA0jvoe8jH1ekK
 wYVkuquIE4Dw735mpjIOThByUUP3CQE=
 =+ham
 -----END PGP SIGNATURE-----

Merge tag 'fs.move_mount.move_mount_set_group.v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux

Pull move_mount updates from Christian Brauner:
 "This contains an extension to the move_mount() syscall making it
  possible to add a single private mount into an existing propagation
  tree.

  The use-case comes from the criu folks which have been struggling with
  restoring complex mount trees for a long time. Variations of this work
  have been discussed at Plumbers before, e.g.

      https://www.linuxplumbersconf.org/event/7/contributions/640/

  The extension to move_mount() enables criu to restore any set of mount
  namespaces, mount trees and sharing group trees without introducing
  yet more complexity into mount propagation itself.

  The changes required to criu to make use of this and restore complex
  propagation trees are available at

      https://github.com/Snorch/criu/commits/mount-v2-poc

  A cleaned-up version of this will go up for merging into the main criu
  repo after this lands"

* tag 'fs.move_mount.move_mount_set_group.v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  tests: add move_mount(MOVE_MOUNT_SET_GROUP) selftest
  move_mount: allow to add a mount into an existing group
This commit is contained in:
Linus Torvalds 2021-08-31 11:54:02 -07:00
commit 1dd5915a5c
7 changed files with 463 additions and 2 deletions

View File

@ -2694,6 +2694,78 @@ out:
return ret;
}
static int do_set_group(struct path *from_path, struct path *to_path)
{
struct mount *from, *to;
int err;
from = real_mount(from_path->mnt);
to = real_mount(to_path->mnt);
namespace_lock();
err = -EINVAL;
/* To and From must be mounted */
if (!is_mounted(&from->mnt))
goto out;
if (!is_mounted(&to->mnt))
goto out;
err = -EPERM;
/* We should be allowed to modify mount namespaces of both mounts */
if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
goto out;
if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
goto out;
err = -EINVAL;
/* To and From paths should be mount roots */
if (from_path->dentry != from_path->mnt->mnt_root)
goto out;
if (to_path->dentry != to_path->mnt->mnt_root)
goto out;
/* Setting sharing groups is only allowed across same superblock */
if (from->mnt.mnt_sb != to->mnt.mnt_sb)
goto out;
/* From mount root should be wider than To mount root */
if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
goto out;
/* From mount should not have locked children in place of To's root */
if (has_locked_children(from, to->mnt.mnt_root))
goto out;
/* Setting sharing groups is only allowed on private mounts */
if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
goto out;
/* From should not be private */
if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
goto out;
if (IS_MNT_SLAVE(from)) {
struct mount *m = from->mnt_master;
list_add(&to->mnt_slave, &m->mnt_slave_list);
to->mnt_master = m;
}
if (IS_MNT_SHARED(from)) {
to->mnt_group_id = from->mnt_group_id;
list_add(&to->mnt_share, &from->mnt_share);
lock_mount_hash();
set_mnt_shared(to);
unlock_mount_hash();
}
err = 0;
out:
namespace_unlock();
return err;
}
static int do_move_mount(struct path *old_path, struct path *new_path)
{
struct mnt_namespace *ns;
@ -3678,7 +3750,10 @@ SYSCALL_DEFINE5(move_mount,
if (ret < 0)
goto out_to;
ret = do_move_mount(&from_path, &to_path);
if (flags & MOVE_MOUNT_SET_GROUP)
ret = do_set_group(&from_path, &to_path);
else
ret = do_move_mount(&from_path, &to_path);
out_to:
path_put(&to_path);

View File

@ -73,7 +73,8 @@
#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
#define MOVE_MOUNT__MASK 0x00000077
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
#define MOVE_MOUNT__MASK 0x00000177
/*
* fsopen() flags.

View File

@ -35,6 +35,7 @@ TARGETS += memory-hotplug
TARGETS += mincore
TARGETS += mount
TARGETS += mount_setattr
TARGETS += move_mount_set_group
TARGETS += mqueue
TARGETS += nci
TARGETS += net

View File

@ -0,0 +1 @@
move_mount_set_group_test

View File

@ -0,0 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for mount selftests.
CFLAGS = -g -I../../../../usr/include/ -Wall -O2
TEST_GEN_FILES += move_mount_set_group_test
include ../lib.mk

View File

@ -0,0 +1 @@
CONFIG_USER_NS=y

View File

@ -0,0 +1,375 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdarg.h>
#include <sys/syscall.h>
#include "../kselftest_harness.h"
#ifndef CLONE_NEWNS
#define CLONE_NEWNS 0x00020000
#endif
#ifndef CLONE_NEWUSER
#define CLONE_NEWUSER 0x10000000
#endif
#ifndef MS_SHARED
#define MS_SHARED (1 << 20)
#endif
#ifndef MS_PRIVATE
#define MS_PRIVATE (1<<18)
#endif
#ifndef MOVE_MOUNT_SET_GROUP
#define MOVE_MOUNT_SET_GROUP 0x00000100
#endif
#ifndef MOVE_MOUNT_F_EMPTY_PATH
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
#endif
#ifndef MOVE_MOUNT_T_EMPTY_PATH
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040
#endif
static ssize_t write_nointr(int fd, const void *buf, size_t count)
{
ssize_t ret;
do {
ret = write(fd, buf, count);
} while (ret < 0 && errno == EINTR);
return ret;
}
static int write_file(const char *path, const void *buf, size_t count)
{
int fd;
ssize_t ret;
fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
if (fd < 0)
return -1;
ret = write_nointr(fd, buf, count);
close(fd);
if (ret < 0 || (size_t)ret != count)
return -1;
return 0;
}
static int create_and_enter_userns(void)
{
uid_t uid;
gid_t gid;
char map[100];
uid = getuid();
gid = getgid();
if (unshare(CLONE_NEWUSER))
return -1;
if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
errno != ENOENT)
return -1;
snprintf(map, sizeof(map), "0 %d 1", uid);
if (write_file("/proc/self/uid_map", map, strlen(map)))
return -1;
snprintf(map, sizeof(map), "0 %d 1", gid);
if (write_file("/proc/self/gid_map", map, strlen(map)))
return -1;
if (setgid(0))
return -1;
if (setuid(0))
return -1;
return 0;
}
static int prepare_unpriv_mountns(void)
{
if (create_and_enter_userns())
return -1;
if (unshare(CLONE_NEWNS))
return -1;
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
return -1;
return 0;
}
static char *get_field(char *src, int nfields)
{
int i;
char *p = src;
for (i = 0; i < nfields; i++) {
while (*p && *p != ' ' && *p != '\t')
p++;
if (!*p)
break;
p++;
}
return p;
}
static void null_endofword(char *word)
{
while (*word && *word != ' ' && *word != '\t')
word++;
*word = '\0';
}
static bool is_shared_mount(const char *path)
{
size_t len = 0;
char *line = NULL;
FILE *f = NULL;
f = fopen("/proc/self/mountinfo", "re");
if (!f)
return false;
while (getline(&line, &len, f) != -1) {
char *opts, *target;
target = get_field(line, 4);
if (!target)
continue;
opts = get_field(target, 2);
if (!opts)
continue;
null_endofword(target);
if (strcmp(target, path) != 0)
continue;
null_endofword(opts);
if (strstr(opts, "shared:"))
return true;
}
free(line);
fclose(f);
return false;
}
/* Attempt to de-conflict with the selftests tree. */
#ifndef SKIP
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
#endif
#define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from"
#define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to"
static int move_mount_set_group_supported(void)
{
int ret;
if (mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
"size=100000,mode=700"))
return -1;
if (mount(NULL, "/tmp", NULL, MS_PRIVATE, 0))
return -1;
if (mkdir(SET_GROUP_FROM, 0777))
return -1;
if (mkdir(SET_GROUP_TO, 0777))
return -1;
if (mount("testing", SET_GROUP_FROM, "tmpfs", MS_NOATIME | MS_NODEV,
"size=100000,mode=700"))
return -1;
if (mount(SET_GROUP_FROM, SET_GROUP_TO, NULL, MS_BIND, NULL))
return -1;
if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
return -1;
ret = syscall(SYS_move_mount, AT_FDCWD, SET_GROUP_FROM,
AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
umount2("/tmp", MNT_DETACH);
return ret < 0 ? false : true;
}
FIXTURE(move_mount_set_group) {
};
#define SET_GROUP_A "/tmp/A"
FIXTURE_SETUP(move_mount_set_group)
{
int ret;
ASSERT_EQ(prepare_unpriv_mountns(), 0);
ret = move_mount_set_group_supported();
ASSERT_GE(ret, 0);
if (!ret)
SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
umount2("/tmp", MNT_DETACH);
ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
"size=100000,mode=700"), 0);
ASSERT_EQ(mkdir(SET_GROUP_A, 0777), 0);
ASSERT_EQ(mount("testing", SET_GROUP_A, "tmpfs", MS_NOATIME | MS_NODEV,
"size=100000,mode=700"), 0);
}
FIXTURE_TEARDOWN(move_mount_set_group)
{
int ret;
ret = move_mount_set_group_supported();
ASSERT_GE(ret, 0);
if (!ret)
SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
umount2("/tmp", MNT_DETACH);
}
#define __STACK_SIZE (8 * 1024 * 1024)
static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
{
void *stack;
stack = malloc(__STACK_SIZE);
if (!stack)
return -ENOMEM;
#ifdef __ia64__
return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
#else
return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
#endif
}
static int wait_for_pid(pid_t pid)
{
int status, ret;
again:
ret = waitpid(pid, &status, 0);
if (ret == -1) {
if (errno == EINTR)
goto again;
return -1;
}
if (!WIFEXITED(status))
return -1;
return WEXITSTATUS(status);
}
struct child_args {
int unsfd;
int mntnsfd;
bool shared;
int mntfd;
};
static int get_nestedns_mount_cb(void *data)
{
struct child_args *ca = (struct child_args *)data;
int ret;
ret = prepare_unpriv_mountns();
if (ret)
return 1;
if (ca->shared) {
ret = mount(NULL, SET_GROUP_A, NULL, MS_SHARED, 0);
if (ret)
return 1;
}
ret = open("/proc/self/ns/user", O_RDONLY);
if (ret < 0)
return 1;
ca->unsfd = ret;
ret = open("/proc/self/ns/mnt", O_RDONLY);
if (ret < 0)
return 1;
ca->mntnsfd = ret;
ret = open(SET_GROUP_A, O_RDONLY);
if (ret < 0)
return 1;
ca->mntfd = ret;
return 0;
}
TEST_F(move_mount_set_group, complex_sharing_copying)
{
struct child_args ca_from = {
.shared = true,
};
struct child_args ca_to = {
.shared = false,
};
pid_t pid;
int ret;
ret = move_mount_set_group_supported();
ASSERT_GE(ret, 0);
if (!ret)
SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
pid = do_clone(get_nestedns_mount_cb, (void *)&ca_from, CLONE_VFORK |
CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
ASSERT_EQ(wait_for_pid(pid), 0);
pid = do_clone(get_nestedns_mount_cb, (void *)&ca_to, CLONE_VFORK |
CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
ASSERT_EQ(wait_for_pid(pid), 0);
ASSERT_EQ(syscall(SYS_move_mount, ca_from.mntfd, "",
ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
| MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
0);
ASSERT_EQ(setns(ca_to.mntnsfd, CLONE_NEWNS), 0);
ASSERT_EQ(is_shared_mount(SET_GROUP_A), 1);
}
TEST_HARNESS_MAIN