close-range-openat2-v5.11

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCX9dpfgAKCRCRxhvAZXjc
 oo5kAP9PrqQAfEe9+CNlnOb4ZawcZaa3osUkr/ZkfoxI/dO2awEAgGCgWQ5PLtQF
 gtfz6I5IT2sc3G4D+nGZxef6Q29J2Qc=
 =fZNu
 -----END PGP SIGNATURE-----

Merge tag 'close-range-openat2-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux

Pull close_range/openat2 updates from Christian Brauner:
 "This contains a fix for openat2() to make RESOLVE_BENEATH and
  RESOLVE_IN_ROOT mutually exclusive. It doesn't make sense to specify
  both at the same time. The openat2() selftests have been extended to
  verify that these two flags can't be specified together.

  This also adds the CLOSE_RANGE_CLOEXEC flag to close_range() which
  allows to mark a range of file descriptors as close-on-exec without
  actually closing them.

  This is useful in general but the use-case that triggered the patch is
  installing a seccomp profile in the calling task before exec. If the
  seccomp profile wants to block the close_range() syscall it obviously
  can't use it to close all fds before exec. If it calls close_range()
  before installing the seccomp profile it needs to take care not to
  close fds that it will still need before the exec meaning it would
  have to call close_range() multiple times on different ranges and then
  still fall back to closing fds one by one right before the exec.

  CLOSE_RANGE_CLOEXEC allows to solve this problem relying on the exec
  codepath to get rid of the unwanted fds. The close_range() tests have
  been expanded to verify that CLOSE_RANGE_CLOEXEC works"

* tag 'close-range-openat2-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  selftests: core: add tests for CLOSE_RANGE_CLOEXEC
  fs, close_range: add flag CLOSE_RANGE_CLOEXEC
  selftests: openat2: add RESOLVE_ conflict test
  openat2: reject RESOLVE_BENEATH|RESOLVE_IN_ROOT
This commit is contained in:
Linus Torvalds 2020-12-15 19:11:47 -08:00
commit 345d4ab5e0
5 changed files with 122 additions and 11 deletions
fs
include/uapi/linux
tools/testing/selftests

View File

@ -674,6 +674,35 @@ int __close_fd(struct files_struct *files, unsigned fd)
}
EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
static inline void __range_cloexec(struct files_struct *cur_fds,
unsigned int fd, unsigned int max_fd)
{
struct fdtable *fdt;
if (fd > max_fd)
return;
spin_lock(&cur_fds->file_lock);
fdt = files_fdtable(cur_fds);
bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
spin_unlock(&cur_fds->file_lock);
}
static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
unsigned int max_fd)
{
while (fd <= max_fd) {
struct file *file;
file = pick_file(cur_fds, fd++);
if (!file)
continue;
filp_close(file, cur_fds);
cond_resched();
}
}
/**
* __close_range() - Close all file descriptors in a given range.
*
@ -689,7 +718,7 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
struct task_struct *me = current;
struct files_struct *cur_fds = me->files, *fds = NULL;
if (flags & ~CLOSE_RANGE_UNSHARE)
if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC))
return -EINVAL;
if (fd > max_fd)
@ -727,16 +756,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
}
max_fd = min(max_fd, cur_max);
while (fd <= max_fd) {
struct file *file;
file = pick_file(cur_fds, fd++);
if (!file)
continue;
filp_close(file, cur_fds);
cond_resched();
}
if (flags & CLOSE_RANGE_CLOEXEC)
__range_cloexec(cur_fds, fd, max_fd);
else
__range_close(cur_fds, fd, max_fd);
if (fds) {
/*

View File

@ -1010,6 +1010,10 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
if (how->resolve & ~VALID_RESOLVE_FLAGS)
return -EINVAL;
/* Scoping flags are mutually exclusive. */
if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
return -EINVAL;
/* Deal with the mode. */
if (WILL_CREATE(flags)) {
if (how->mode & ~S_IALLUGO)

View File

@ -5,5 +5,8 @@
/* Unshare the file descriptor table before closing file descriptors. */
#define CLOSE_RANGE_UNSHARE (1U << 1)
/* Set the FD_CLOEXEC bit instead of closing the file descriptor. */
#define CLOSE_RANGE_CLOEXEC (1U << 2)
#endif /* _UAPI_LINUX_CLOSE_RANGE_H */

View File

@ -11,6 +11,7 @@
#include <string.h>
#include <syscall.h>
#include <unistd.h>
#include <sys/resource.h>
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
@ -23,6 +24,10 @@
#define CLOSE_RANGE_UNSHARE (1U << 1)
#endif
#ifndef CLOSE_RANGE_CLOEXEC
#define CLOSE_RANGE_CLOEXEC (1U << 2)
#endif
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
@ -224,4 +229,73 @@ TEST(close_range_unshare_capped)
EXPECT_EQ(0, WEXITSTATUS(status));
}
TEST(close_range_cloexec)
{
int i, ret;
int open_fds[101];
struct rlimit rlimit;
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
int fd;
fd = open("/dev/null", O_RDONLY);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
XFAIL(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
}
ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
if (ret < 0) {
if (errno == ENOSYS)
XFAIL(return, "close_range() syscall not supported");
if (errno == EINVAL)
XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
}
/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
rlimit.rlim_cur = 25;
ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
/* Set close-on-exec for two ranges: [0-50] and [75-100]. */
ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
ASSERT_EQ(0, ret);
ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
ASSERT_EQ(0, ret);
for (i = 0; i <= 50; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
for (i = 51; i <= 74; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
}
for (i = 75; i <= 100; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
/* Test a common pattern. */
ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
for (i = 0; i <= 100; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
}
TEST_HARNESS_MAIN

View File

@ -155,7 +155,7 @@ struct flag_test {
int err;
};
#define NUM_OPENAT2_FLAG_TESTS 23
#define NUM_OPENAT2_FLAG_TESTS 24
void test_openat2_flags(void)
{
@ -210,6 +210,12 @@ void test_openat2_flags(void)
.how.flags = O_TMPFILE | O_RDWR,
.how.mode = 0x0000A00000000000ULL, .err = -EINVAL },
/* ->resolve flags must not conflict. */
{ .name = "incompatible resolve flags (BENEATH | IN_ROOT)",
.how.flags = O_RDONLY,
.how.resolve = RESOLVE_BENEATH | RESOLVE_IN_ROOT,
.err = -EINVAL },
/* ->resolve must only contain RESOLVE_* flags. */
{ .name = "invalid how.resolve and O_RDONLY",
.how.flags = O_RDONLY,