close-range-openat2-v5.11
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCX9dpfgAKCRCRxhvAZXjc oo5kAP9PrqQAfEe9+CNlnOb4ZawcZaa3osUkr/ZkfoxI/dO2awEAgGCgWQ5PLtQF gtfz6I5IT2sc3G4D+nGZxef6Q29J2Qc= =fZNu -----END PGP SIGNATURE----- Merge tag 'close-range-openat2-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux Pull close_range/openat2 updates from Christian Brauner: "This contains a fix for openat2() to make RESOLVE_BENEATH and RESOLVE_IN_ROOT mutually exclusive. It doesn't make sense to specify both at the same time. The openat2() selftests have been extended to verify that these two flags can't be specified together. This also adds the CLOSE_RANGE_CLOEXEC flag to close_range() which allows to mark a range of file descriptors as close-on-exec without actually closing them. This is useful in general but the use-case that triggered the patch is installing a seccomp profile in the calling task before exec. If the seccomp profile wants to block the close_range() syscall it obviously can't use it to close all fds before exec. If it calls close_range() before installing the seccomp profile it needs to take care not to close fds that it will still need before the exec meaning it would have to call close_range() multiple times on different ranges and then still fall back to closing fds one by one right before the exec. CLOSE_RANGE_CLOEXEC allows to solve this problem relying on the exec codepath to get rid of the unwanted fds. The close_range() tests have been expanded to verify that CLOSE_RANGE_CLOEXEC works" * tag 'close-range-openat2-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: selftests: core: add tests for CLOSE_RANGE_CLOEXEC fs, close_range: add flag CLOSE_RANGE_CLOEXEC selftests: openat2: add RESOLVE_ conflict test openat2: reject RESOLVE_BENEATH|RESOLVE_IN_ROOT
This commit is contained in:
commit
345d4ab5e0
44
fs/file.c
44
fs/file.c
|
@ -674,6 +674,35 @@ int __close_fd(struct files_struct *files, unsigned fd)
|
|||
}
|
||||
EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
|
||||
|
||||
static inline void __range_cloexec(struct files_struct *cur_fds,
|
||||
unsigned int fd, unsigned int max_fd)
|
||||
{
|
||||
struct fdtable *fdt;
|
||||
|
||||
if (fd > max_fd)
|
||||
return;
|
||||
|
||||
spin_lock(&cur_fds->file_lock);
|
||||
fdt = files_fdtable(cur_fds);
|
||||
bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
|
||||
spin_unlock(&cur_fds->file_lock);
|
||||
}
|
||||
|
||||
static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
|
||||
unsigned int max_fd)
|
||||
{
|
||||
while (fd <= max_fd) {
|
||||
struct file *file;
|
||||
|
||||
file = pick_file(cur_fds, fd++);
|
||||
if (!file)
|
||||
continue;
|
||||
|
||||
filp_close(file, cur_fds);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* __close_range() - Close all file descriptors in a given range.
|
||||
*
|
||||
|
@ -689,7 +718,7 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
|
|||
struct task_struct *me = current;
|
||||
struct files_struct *cur_fds = me->files, *fds = NULL;
|
||||
|
||||
if (flags & ~CLOSE_RANGE_UNSHARE)
|
||||
if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC))
|
||||
return -EINVAL;
|
||||
|
||||
if (fd > max_fd)
|
||||
|
@ -727,16 +756,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
|
|||
}
|
||||
|
||||
max_fd = min(max_fd, cur_max);
|
||||
while (fd <= max_fd) {
|
||||
struct file *file;
|
||||
|
||||
file = pick_file(cur_fds, fd++);
|
||||
if (!file)
|
||||
continue;
|
||||
|
||||
filp_close(file, cur_fds);
|
||||
cond_resched();
|
||||
}
|
||||
if (flags & CLOSE_RANGE_CLOEXEC)
|
||||
__range_cloexec(cur_fds, fd, max_fd);
|
||||
else
|
||||
__range_close(cur_fds, fd, max_fd);
|
||||
|
||||
if (fds) {
|
||||
/*
|
||||
|
|
|
@ -1010,6 +1010,10 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
|
|||
if (how->resolve & ~VALID_RESOLVE_FLAGS)
|
||||
return -EINVAL;
|
||||
|
||||
/* Scoping flags are mutually exclusive. */
|
||||
if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
|
||||
return -EINVAL;
|
||||
|
||||
/* Deal with the mode. */
|
||||
if (WILL_CREATE(flags)) {
|
||||
if (how->mode & ~S_IALLUGO)
|
||||
|
|
|
@ -5,5 +5,8 @@
|
|||
/* Unshare the file descriptor table before closing file descriptors. */
|
||||
#define CLOSE_RANGE_UNSHARE (1U << 1)
|
||||
|
||||
/* Set the FD_CLOEXEC bit instead of closing the file descriptor. */
|
||||
#define CLOSE_RANGE_CLOEXEC (1U << 2)
|
||||
|
||||
#endif /* _UAPI_LINUX_CLOSE_RANGE_H */
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
#include "../clone3/clone3_selftests.h"
|
||||
|
@ -23,6 +24,10 @@
|
|||
#define CLOSE_RANGE_UNSHARE (1U << 1)
|
||||
#endif
|
||||
|
||||
#ifndef CLOSE_RANGE_CLOEXEC
|
||||
#define CLOSE_RANGE_CLOEXEC (1U << 2)
|
||||
#endif
|
||||
|
||||
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
|
||||
unsigned int flags)
|
||||
{
|
||||
|
@ -224,4 +229,73 @@ TEST(close_range_unshare_capped)
|
|||
EXPECT_EQ(0, WEXITSTATUS(status));
|
||||
}
|
||||
|
||||
TEST(close_range_cloexec)
|
||||
{
|
||||
int i, ret;
|
||||
int open_fds[101];
|
||||
struct rlimit rlimit;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
|
||||
int fd;
|
||||
|
||||
fd = open("/dev/null", O_RDONLY);
|
||||
ASSERT_GE(fd, 0) {
|
||||
if (errno == ENOENT)
|
||||
XFAIL(return, "Skipping test since /dev/null does not exist");
|
||||
}
|
||||
|
||||
open_fds[i] = fd;
|
||||
}
|
||||
|
||||
ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
|
||||
if (ret < 0) {
|
||||
if (errno == ENOSYS)
|
||||
XFAIL(return, "close_range() syscall not supported");
|
||||
if (errno == EINVAL)
|
||||
XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
|
||||
}
|
||||
|
||||
/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
|
||||
ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
|
||||
rlimit.rlim_cur = 25;
|
||||
ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
|
||||
|
||||
/* Set close-on-exec for two ranges: [0-50] and [75-100]. */
|
||||
ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
|
||||
ASSERT_EQ(0, ret);
|
||||
ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
for (i = 0; i <= 50; i++) {
|
||||
int flags = fcntl(open_fds[i], F_GETFD);
|
||||
|
||||
EXPECT_GT(flags, -1);
|
||||
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
|
||||
}
|
||||
|
||||
for (i = 51; i <= 74; i++) {
|
||||
int flags = fcntl(open_fds[i], F_GETFD);
|
||||
|
||||
EXPECT_GT(flags, -1);
|
||||
EXPECT_EQ(flags & FD_CLOEXEC, 0);
|
||||
}
|
||||
|
||||
for (i = 75; i <= 100; i++) {
|
||||
int flags = fcntl(open_fds[i], F_GETFD);
|
||||
|
||||
EXPECT_GT(flags, -1);
|
||||
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
|
||||
}
|
||||
|
||||
/* Test a common pattern. */
|
||||
ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
|
||||
for (i = 0; i <= 100; i++) {
|
||||
int flags = fcntl(open_fds[i], F_GETFD);
|
||||
|
||||
EXPECT_GT(flags, -1);
|
||||
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
|
|
|
@ -155,7 +155,7 @@ struct flag_test {
|
|||
int err;
|
||||
};
|
||||
|
||||
#define NUM_OPENAT2_FLAG_TESTS 23
|
||||
#define NUM_OPENAT2_FLAG_TESTS 24
|
||||
|
||||
void test_openat2_flags(void)
|
||||
{
|
||||
|
@ -210,6 +210,12 @@ void test_openat2_flags(void)
|
|||
.how.flags = O_TMPFILE | O_RDWR,
|
||||
.how.mode = 0x0000A00000000000ULL, .err = -EINVAL },
|
||||
|
||||
/* ->resolve flags must not conflict. */
|
||||
{ .name = "incompatible resolve flags (BENEATH | IN_ROOT)",
|
||||
.how.flags = O_RDONLY,
|
||||
.how.resolve = RESOLVE_BENEATH | RESOLVE_IN_ROOT,
|
||||
.err = -EINVAL },
|
||||
|
||||
/* ->resolve must only contain RESOLVE_* flags. */
|
||||
{ .name = "invalid how.resolve and O_RDONLY",
|
||||
.how.flags = O_RDONLY,
|
||||
|
|
Loading…
Reference in New Issue