seccomp updates for v5.10-rc1

- heavily refactor seccomp selftests (and clone3 selftests dependency) to
   fix powerpc (Kees Cook, Thadeu Lima de Souza Cascardo)
 - fix style issue in selftests (Zou Wei)
 - upgrade "unknown action" from KILL_THREAD to KILL_PROCESS (Rich Felker)
 - replace task_pt_regs(current) with current_pt_regs() (Denis Efremov)
 - fix corner-case race in USER_NOTIF (Jann Horn)
 - make CONFIG_SECCOMP no longer per-arch (YiFei Zhu)
 -----BEGIN PGP SIGNATURE-----
 
 iQJKBAABCgA0FiEEpcP2jyKd1g9yPm4TiXL039xtwCYFAl+E1LAWHGtlZXNjb29r
 QGNocm9taXVtLm9yZwAKCRCJcvTf3G3AJgRfD/0cq7W51+o34719vefC+oZaMjJJ
 Bd5HYshmr6NRpMqn0OhtT9kVi6OeV0sK0VJeNxSISDIaGNJ8xCI9YhnXwzY+7myK
 +IQu3i2Hv7dlWvTaXWFLL+mvfk6WopLntFGGJQ8KPMnP2gcfH2AZmOeAKGFGhBDe
 NwpAUZ9zriXg9JCQp6u0FzPJgk8KfgfHjUY6Hsa095gg0aPSJhc8bWEUNBQwjCe6
 uIcxDP/zK2WWaEhO9BfHt6/VTcXw7QgTLS3yM+pwBCgR1JHs7HMhtgcwPT410qES
 LmYD8OiHmv5AZhDjcCcNipKEv3ZnxkLnpU/6hfaKM4zn/DoaR/zbfjO9U017rcNV
 9gf7k5siAP7DH48IFlqf4Erzd3xyF0OJDnVfC7NiPtggPfO9aWOHJJZCuJRQOdrN
 qPMjkaQzFb02qb501PLEn55F24OLDjz1vFOqpkJm2/XamOBVV4uiRKmfpNEo/MOf
 QkhSvzvwEFErWwzPH95uFyVhs42stwnM3ppnwtya2+U5kxXdNvbAR8N5leH7siaU
 ab+YJIHW59+BxXTlKgXIcqBP/6RqJWJtuT9OqGs0K2A7FhQSexh5MOm+9vvGgIwZ
 Qjyijku8dB3aV94BNGnlJq6BV+4Hc6EGadh7h3b8GiRAUTYo0pk5G/iKL6Ii+R6p
 0msJENqalKFtNCr70w==
 =a4u2
 -----END PGP SIGNATURE-----

Merge tag 'seccomp-v5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp updates from Kees Cook:
 "The bulk of the changes are with the seccomp selftests to accommodate
  some powerpc-specific behavioral characteristics. Additional cleanups,
  fixes, and improvements are also included:

   - heavily refactor seccomp selftests (and clone3 selftests
     dependency) to fix powerpc (Kees Cook, Thadeu Lima de Souza
     Cascardo)

   - fix style issue in selftests (Zou Wei)

   - upgrade "unknown action" from KILL_THREAD to KILL_PROCESS (Rich
     Felker)

   - replace task_pt_regs(current) with current_pt_regs() (Denis
     Efremov)

   - fix corner-case race in USER_NOTIF (Jann Horn)

   - make CONFIG_SECCOMP no longer per-arch (YiFei Zhu)"

* tag 'seccomp-v5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (23 commits)
  seccomp: Make duplicate listener detection non-racy
  seccomp: Move config option SECCOMP to arch/Kconfig
  selftests/clone3: Avoid OS-defined clone_args
  selftests/seccomp: powerpc: Set syscall return during ptrace syscall exit
  selftests/seccomp: Allow syscall nr and ret value to be set separately
  selftests/seccomp: Record syscall during ptrace entry
  selftests/seccomp: powerpc: Fix seccomp return value testing
  selftests/seccomp: Remove SYSCALL_NUM_RET_SHARE_REG in favor of SYSCALL_RET_SET
  selftests/seccomp: Avoid redundant register flushes
  selftests/seccomp: Convert REGSET calls into ARCH_GETREG/ARCH_SETREG
  selftests/seccomp: Convert HAVE_GETREG into ARCH_GETREG/ARCH_SETREG
  selftests/seccomp: Remove syscall setting #ifdefs
  selftests/seccomp: mips: Remove O32-specific macro
  selftests/seccomp: arm64: Define SYSCALL_NUM_SET macro
  selftests/seccomp: arm: Define SYSCALL_NUM_SET macro
  selftests/seccomp: mips: Define SYSCALL_NUM_SET macro
  selftests/seccomp: Provide generic syscall setting macro
  selftests/seccomp: Refactor arch register macros to avoid xtensa special case
  selftests/seccomp: Use __NR_mknodat instead of __NR_mknod
  selftests/seccomp: Use bitwise instead of arithmetic operator for flags
  ...
This commit is contained in:
Linus Torvalds 2020-10-13 16:33:43 -07:00
commit 8b05418b25
23 changed files with 399 additions and 439 deletions

View File

@ -450,10 +450,23 @@ config ARCH_WANT_OLD_COMPAT_IPC
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
bool
config HAVE_ARCH_SECCOMP_FILTER
config HAVE_ARCH_SECCOMP
bool
help
An arch should select this symbol to support seccomp mode 1 (the fixed
syscall policy), and must provide an overrides for __NR_seccomp_sigreturn,
and compat syscalls if the asm-generic/seccomp.h defaults need adjustment:
- __NR_seccomp_read_32
- __NR_seccomp_write_32
- __NR_seccomp_exit_32
- __NR_seccomp_sigreturn_32
config HAVE_ARCH_SECCOMP_FILTER
bool
select HAVE_ARCH_SECCOMP
help
An arch should select this symbol if it provides all of these things:
- all the requirements for HAVE_ARCH_SECCOMP
- syscall_get_arch()
- syscall_get_arguments()
- syscall_rollback()
@ -464,6 +477,23 @@ config HAVE_ARCH_SECCOMP_FILTER
results in the system call being skipped immediately.
- seccomp syscall wired up
config SECCOMP
prompt "Enable seccomp to safely execute untrusted bytecode"
def_bool y
depends on HAVE_ARCH_SECCOMP
help
This kernel feature is useful for number crunching applications
that may need to handle untrusted bytecode during their
execution. By using pipes or other transports made available
to the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in their
own address space using seccomp. Once seccomp is enabled via
prctl(PR_SET_SECCOMP) or the seccomp() syscall, it cannot be
disabled and the task is only allowed to execute a few safe
syscalls defined by each seccomp mode.
If unsure, say Y.
config SECCOMP_FILTER
def_bool y
depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET

View File

@ -68,6 +68,7 @@ config ARM
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP
select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
@ -1618,20 +1619,6 @@ config UACCESS_WITH_MEMCPY
However, if the CPU data cache is using a write-allocate mode,
this option is unlikely to provide any performance gain.
config SECCOMP
bool
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config PARAVIRT
bool "Enable paravirtualization code"
help

View File

@ -1041,19 +1041,6 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
config CC_HAVE_SHADOW_CALL_STACK
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config PARAVIRT
bool "Enable paravirtualization code"
help

View File

@ -309,16 +309,3 @@ endmenu
source "arch/csky/Kconfig.platforms"
source "kernel/Kconfig.hz"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.

View File

@ -26,6 +26,7 @@ config MICROBLAZE
select GENERIC_SCHED_CLOCK
select HAVE_ARCH_HASH
select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
@ -120,23 +121,6 @@ config CMDLINE_FORCE
Set this to have arguments from the default kernel command string
override those passed by the boot loader.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
endmenu
menu "Kernel features"

View File

@ -3006,23 +3006,6 @@ config PHYSICAL_START
specified in the "crashkernel=YM@XM" command line boot parameter
passed to the panic-ed kernel).
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config MIPS_O32_FP64_SUPPORT
bool "Support for O32 binaries using 64-bit FP" if !CPU_MIPSR6
depends on 32BIT || MIPS32_O32

View File

@ -378,19 +378,3 @@ endmenu
source "drivers/parisc/Kconfig"
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.

View File

@ -946,23 +946,6 @@ config ARCH_WANTS_FREEZER_CONTROL
source "kernel/power/Kconfig"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config PPC_MEM_KEYS
prompt "PowerPC Memory Protection Keys"
def_bool y

View File

@ -334,19 +334,6 @@ menu "Kernel features"
source "kernel/Kconfig.hz"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config RISCV_SBI_V01
bool "SBI v0.1 support"
default y

View File

@ -792,23 +792,6 @@ config CRASH_DUMP
endmenu
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y.
config CCW
def_bool y

View File

@ -600,22 +600,6 @@ config PHYSICAL_START
where the fail safe kernel needs to run at a different address
than the panic-ed kernel.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl, it cannot be disabled and the task is only
allowed to execute a few safe syscalls defined by each seccomp
mode.
If unsure, say N.
config SMP
bool "Symmetric multi-processing support"
depends on SYS_SUPPORTS_SMP

View File

@ -23,6 +23,7 @@ config SPARC
select HAVE_OPROFILE
select HAVE_ARCH_KGDB if !SMP || SPARC64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_SECCOMP if SPARC64
select HAVE_EXIT_THREAD
select HAVE_PCI
select SYSCTL_EXCEPTION_TRACE
@ -227,23 +228,6 @@ config EARLYFB
help
Say Y here to enable a faster early framebuffer boot console.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on SPARC64 && PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SPARC64 && SMP

View File

@ -173,22 +173,6 @@ config PGTABLE_LEVELS
default 3 if 3_LEVEL_PGTABLES
default 2
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y.
config UML_TIME_TRAVEL_SUPPORT
bool
prompt "Support time-travel mode (e.g. for test execution)"

View File

@ -1970,22 +1970,6 @@ config EFI_MIXED
If unsure, say N.
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
source "kernel/Kconfig.hz"
config KEXEC

View File

@ -217,20 +217,6 @@ config HOTPLUG_CPU
Say N if you want to disable CPU hotplug.
config SECCOMP
bool
prompt "Enable seccomp to safely compute untrusted bytecode"
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config FAST_SYSCALL_XTENSA
bool "Enable fast atomic syscalls"
default n

View File

@ -196,6 +196,10 @@ struct seccomp_filter {
*/
static void populate_seccomp_data(struct seccomp_data *sd)
{
/*
* Instead of using current_pt_reg(), we're already doing the work
* to safely fetch "current", so just use "task" everywhere below.
*/
struct task_struct *task = current;
struct pt_regs *regs = task_pt_regs(task);
unsigned long args[6];
@ -910,7 +914,7 @@ out:
if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
return 0;
syscall_set_return_value(current, task_pt_regs(current),
syscall_set_return_value(current, current_pt_regs(),
err, ret);
return -1;
}
@ -943,13 +947,13 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
/* Set low-order bits as an errno, capped at MAX_ERRNO. */
if (data > MAX_ERRNO)
data = MAX_ERRNO;
syscall_set_return_value(current, task_pt_regs(current),
syscall_set_return_value(current, current_pt_regs(),
-data, 0);
goto skip;
case SECCOMP_RET_TRAP:
/* Show the handler the original registers. */
syscall_rollback(current, task_pt_regs(current));
syscall_rollback(current, current_pt_regs());
/* Let the filter pass back 16 bits of data. */
seccomp_send_sigsys(this_syscall, data);
goto skip;
@ -962,7 +966,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
/* ENOSYS these calls if there is no tracer attached. */
if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
syscall_set_return_value(current,
task_pt_regs(current),
current_pt_regs(),
-ENOSYS, 0);
goto skip;
}
@ -982,7 +986,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
if (fatal_signal_pending(current))
goto skip;
/* Check if the tracer forced the syscall to be skipped. */
this_syscall = syscall_get_nr(current, task_pt_regs(current));
this_syscall = syscall_get_nr(current, current_pt_regs());
if (this_syscall < 0)
goto skip;
@ -1020,20 +1024,20 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
default:
seccomp_log(this_syscall, SIGSYS, action, true);
/* Dump core only if this is the last remaining thread. */
if (action == SECCOMP_RET_KILL_PROCESS ||
if (action != SECCOMP_RET_KILL_THREAD ||
get_nr_threads(current) == 1) {
kernel_siginfo_t info;
/* Show the original registers in the dump. */
syscall_rollback(current, task_pt_regs(current));
syscall_rollback(current, current_pt_regs());
/* Trigger a manual coredump since do_exit skips it. */
seccomp_init_siginfo(&info, this_syscall, data);
do_coredump(&info);
}
if (action == SECCOMP_RET_KILL_PROCESS)
do_group_exit(SIGSYS);
else
if (action == SECCOMP_RET_KILL_THREAD)
do_exit(SIGSYS);
else
do_group_exit(SIGSYS);
}
unreachable();
@ -1060,7 +1064,7 @@ int __secure_computing(const struct seccomp_data *sd)
return 0;
this_syscall = sd ? sd->nr :
syscall_get_nr(current, task_pt_regs(current));
syscall_get_nr(current, current_pt_regs());
switch (mode) {
case SECCOMP_MODE_STRICT:
@ -1472,13 +1476,7 @@ static const struct file_operations seccomp_notify_ops = {
static struct file *init_listener(struct seccomp_filter *filter)
{
struct file *ret = ERR_PTR(-EBUSY);
struct seccomp_filter *cur;
for (cur = current->seccomp.filter; cur; cur = cur->prev) {
if (cur->notif)
goto out;
}
struct file *ret;
ret = ERR_PTR(-ENOMEM);
filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
@ -1504,6 +1502,31 @@ out:
return ret;
}
/*
* Does @new_child have a listener while an ancestor also has a listener?
* If so, we'll want to reject this filter.
* This only has to be tested for the current process, even in the TSYNC case,
* because TSYNC installs @child with the same parent on all threads.
* Note that @new_child is not hooked up to its parent at this point yet, so
* we use current->seccomp.filter.
*/
static bool has_duplicate_listener(struct seccomp_filter *new_child)
{
struct seccomp_filter *cur;
/* must be protected against concurrent TSYNC */
lockdep_assert_held(&current->sighand->siglock);
if (!new_child->notif)
return false;
for (cur = current->seccomp.filter; cur; cur = cur->prev) {
if (cur->notif)
return true;
}
return false;
}
/**
* seccomp_set_mode_filter: internal function for setting seccomp filter
* @flags: flags to change filter behavior
@ -1575,6 +1598,11 @@ static long seccomp_set_mode_filter(unsigned int flags,
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;
if (has_duplicate_listener(prepared)) {
ret = -EBUSY;
goto out;
}
ret = seccomp_attach_filter(flags, prepared);
if (ret)
goto out;

View File

@ -20,13 +20,6 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
/*
* Different sizes of struct clone_args
*/
#ifndef CLONE3_ARGS_SIZE_V0
#define CLONE3_ARGS_SIZE_V0 64
#endif
enum test_mode {
CLONE3_ARGS_NO_TEST,
CLONE3_ARGS_ALL_0,
@ -38,13 +31,13 @@ enum test_mode {
static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
{
struct clone_args args = {
struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
};
struct clone_args_extended {
struct clone_args args;
struct __clone_args args;
__aligned_u64 excess_space[2];
} args_ext;
@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
int status;
memset(&args_ext, 0, sizeof(args_ext));
if (size > sizeof(struct clone_args))
if (size > sizeof(struct __clone_args))
args_ext.excess_space[1] = 1;
if (size == 0)
size = sizeof(struct clone_args);
size = sizeof(struct __clone_args);
switch (test_mode) {
case CLONE3_ARGS_ALL_0:
@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
break;
}
memcpy(&args_ext.args, &args, sizeof(struct clone_args));
memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
pid = sys_clone3((struct clone_args *)&args_ext, size);
pid = sys_clone3((struct __clone_args *)&args_ext, size);
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
@ -144,14 +137,14 @@ int main(int argc, char *argv[])
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
/* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with sizeof(struct clone_args) + 8 */
test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with exit_signal having highest 32 bits non-zero */
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
@ -165,31 +158,31 @@ int main(int argc, char *argv[])
/* Do a clone3() with NSIG < exit_signal < CSIG */
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
CLONE3_ARGS_ALL_0);
test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
CLONE3_ARGS_ALL_0);
/* Do a clone3() with > page size */
test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
/* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
if (uid == 0)
test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
CLONE3_ARGS_NO_TEST);
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
/* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
CLONE3_ARGS_NO_TEST);
/* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
if (uid == 0)
test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
CLONE3_ARGS_NO_TEST);
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");

View File

@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata,
int status;
pid_t pid = -1;
struct clone_args args = {
struct __clone_args args = {
.exit_signal = SIGCHLD,
.set_tid = ptr_to_u64(set_tid),
.set_tid_size = set_tid_size,
};
pid = sys_clone3(&args, sizeof(struct clone_args));
pid = sys_clone3(&args, sizeof(args));
if (pid < 0) {
TH_LOG("%s - Failed to create new process", strerror(errno));
return -errno;

View File

@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void)
{
int ret;
pid_t pid;
struct clone_args args = {};
struct __clone_args args = {};
struct sigaction act;
/*

View File

@ -19,13 +19,11 @@
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
#endif
#ifndef CLONE_ARGS_SIZE_VER0
#define CLONE_ARGS_SIZE_VER0 64
#endif
#ifndef __NR_clone3
#define __NR_clone3 -1
struct clone_args {
#endif
struct __clone_args {
__aligned_u64 flags;
__aligned_u64 pidfd;
__aligned_u64 child_tid;
@ -34,15 +32,21 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
#define CLONE_ARGS_SIZE_VER1 80
#ifndef CLONE_ARGS_SIZE_VER0
#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
#endif
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
#define CLONE_ARGS_SIZE_VER2 88
#ifndef CLONE_ARGS_SIZE_VER1
#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
#endif
__aligned_u64 cgroup;
#ifndef CLONE_ARGS_SIZE_VER2
#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
#endif
};
#endif /* __NR_clone3 */
static pid_t sys_clone3(struct clone_args *args, size_t size)
static pid_t sys_clone3(struct __clone_args *args, size_t size)
{
fflush(stdout);
fflush(stderr);
@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size)
static inline void test_clone3_supported(void)
{
pid_t pid;
struct clone_args args = {};
struct __clone_args args = {};
if (__NR_clone3 < 0)
ksft_exit_skip("clone3() syscall is not supported\n");

View File

@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid,
int status;
pid_t pid = -1;
struct clone_args args = {
struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
.set_tid = ptr_to_u64(set_tid),
.set_tid_size = set_tid_size,
};
pid = sys_clone3(&args, sizeof(struct clone_args));
pid = sys_clone3(&args, sizeof(args));
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));

View File

@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options)
pid_t create_child(int *pidfd, unsigned flags)
{
struct clone_args args = {
struct __clone_args args = {
.flags = CLONE_PIDFD | flags,
.exit_signal = SIGCHLD,
.pidfd = ptr_to_u64(pidfd),

View File

@ -774,8 +774,15 @@ void *kill_thread(void *data)
return (void *)SIBLING_EXIT_UNKILLED;
}
enum kill_t {
KILL_THREAD,
KILL_PROCESS,
RET_UNKNOWN
};
/* Prepare a thread that will kill itself or both of us. */
void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
void kill_thread_or_group(struct __test_metadata *_metadata,
enum kill_t kill_how)
{
pthread_t thread;
void *status;
@ -791,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
.len = (unsigned short)ARRAY_SIZE(filter_thread),
.filter = filter_thread,
};
int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
BPF_STMT(BPF_RET|BPF_K, kill),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog_process = {
@ -808,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
}
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
kill_process ? &prog_process : &prog_thread));
kill_how == KILL_THREAD ? &prog_thread
: &prog_process));
/*
* Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
* flag cannot be downgraded by a new filter.
*/
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
if (kill_how == KILL_PROCESS)
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
/* Start a thread that will exit immediately. */
ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
@ -842,7 +852,7 @@ TEST(KILL_thread)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
kill_thread_or_group(_metadata, false);
kill_thread_or_group(_metadata, KILL_THREAD);
_exit(38);
}
@ -861,7 +871,7 @@ TEST(KILL_process)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
kill_thread_or_group(_metadata, true);
kill_thread_or_group(_metadata, KILL_PROCESS);
_exit(38);
}
@ -872,6 +882,27 @@ TEST(KILL_process)
ASSERT_EQ(SIGSYS, WTERMSIG(status));
}
TEST(KILL_unknown)
{
int status;
pid_t child_pid;
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
kill_thread_or_group(_metadata, RET_UNKNOWN);
_exit(38);
}
ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
/* If the entire process was killed, we'll see SIGSYS. */
EXPECT_TRUE(WIFSIGNALED(status)) {
TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
}
ASSERT_EQ(SIGSYS, WTERMSIG(status));
}
/* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)
{
@ -1667,70 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally)
}
#if defined(__x86_64__)
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM orig_rax
# define SYSCALL_RET rax
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM(_regs) (_regs).orig_rax
# define SYSCALL_RET(_regs) (_regs).rax
#elif defined(__i386__)
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM orig_eax
# define SYSCALL_RET eax
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM(_regs) (_regs).orig_eax
# define SYSCALL_RET(_regs) (_regs).eax
#elif defined(__arm__)
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM ARM_r7
# define SYSCALL_RET ARM_r0
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM(_regs) (_regs).ARM_r7
# ifndef PTRACE_SET_SYSCALL
# define PTRACE_SET_SYSCALL 23
# endif
# define SYSCALL_NUM_SET(_regs, _nr) \
EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
# define SYSCALL_RET(_regs) (_regs).ARM_r0
#elif defined(__aarch64__)
# define ARCH_REGS struct user_pt_regs
# define SYSCALL_NUM regs[8]
# define SYSCALL_RET regs[0]
# define ARCH_REGS struct user_pt_regs
# define SYSCALL_NUM(_regs) (_regs).regs[8]
# ifndef NT_ARM_SYSTEM_CALL
# define NT_ARM_SYSTEM_CALL 0x404
# endif
# define SYSCALL_NUM_SET(_regs, _nr) \
do { \
struct iovec __v; \
typeof(_nr) __nr = (_nr); \
__v.iov_base = &__nr; \
__v.iov_len = sizeof(__nr); \
EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
NT_ARM_SYSTEM_CALL, &__v)); \
} while (0)
# define SYSCALL_RET(_regs) (_regs).regs[0]
#elif defined(__riscv) && __riscv_xlen == 64
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM a7
# define SYSCALL_RET a0
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM(_regs) (_regs).a7
# define SYSCALL_RET(_regs) (_regs).a0
#elif defined(__csky__)
# define ARCH_REGS struct pt_regs
#if defined(__CSKYABIV2__)
# define SYSCALL_NUM regs[3]
#else
# define SYSCALL_NUM regs[9]
#endif
# define SYSCALL_RET a0
# define ARCH_REGS struct pt_regs
# if defined(__CSKYABIV2__)
# define SYSCALL_NUM(_regs) (_regs).regs[3]
# else
# define SYSCALL_NUM(_regs) (_regs).regs[9]
# endif
# define SYSCALL_RET(_regs) (_regs).a0
#elif defined(__hppa__)
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM gr[20]
# define SYSCALL_RET gr[28]
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM(_regs) (_regs).gr[20]
# define SYSCALL_RET(_regs) (_regs).gr[28]
#elif defined(__powerpc__)
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM gpr[0]
# define SYSCALL_RET gpr[3]
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM(_regs) (_regs).gpr[0]
# define SYSCALL_RET(_regs) (_regs).gpr[3]
# define SYSCALL_RET_SET(_regs, _val) \
do { \
typeof(_val) _result = (_val); \
/* \
* A syscall error is signaled by CR0 SO bit \
* and the code is stored as a positive value. \
*/ \
if (_result < 0) { \
SYSCALL_RET(_regs) = -result; \
(_regs).ccr |= 0x10000000; \
} else { \
SYSCALL_RET(_regs) = result; \
(_regs).ccr &= ~0x10000000; \
} \
} while (0)
# define SYSCALL_RET_SET_ON_PTRACE_EXIT
#elif defined(__s390__)
# define ARCH_REGS s390_regs
# define SYSCALL_NUM gprs[2]
# define SYSCALL_RET gprs[2]
# define SYSCALL_NUM_RET_SHARE_REG
# define ARCH_REGS s390_regs
# define SYSCALL_NUM(_regs) (_regs).gprs[2]
# define SYSCALL_RET_SET(_regs, _val) \
TH_LOG("Can't modify syscall return on this architecture")
#elif defined(__mips__)
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM regs[2]
# define SYSCALL_SYSCALL_NUM regs[4]
# define SYSCALL_RET regs[2]
# define SYSCALL_NUM_RET_SHARE_REG
# include <asm/unistd_nr_n32.h>
# include <asm/unistd_nr_n64.h>
# include <asm/unistd_nr_o32.h>
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM(_regs) \
({ \
typeof((_regs).regs[2]) _nr; \
if ((_regs).regs[2] == __NR_O32_Linux) \
_nr = (_regs).regs[4]; \
else \
_nr = (_regs).regs[2]; \
_nr; \
})
# define SYSCALL_NUM_SET(_regs, _nr) \
do { \
if ((_regs).regs[2] == __NR_O32_Linux) \
(_regs).regs[4] = _nr; \
else \
(_regs).regs[2] = _nr; \
} while (0)
# define SYSCALL_RET_SET(_regs, _val) \
TH_LOG("Can't modify syscall return on this architecture")
#elif defined(__xtensa__)
# define ARCH_REGS struct user_pt_regs
# define SYSCALL_NUM syscall
# define ARCH_REGS struct user_pt_regs
# define SYSCALL_NUM(_regs) (_regs).syscall
/*
* On xtensa syscall return value is in the register
* a2 of the current window which is not fixed.
*/
#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
#elif defined(__sh__)
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM gpr[3]
# define SYSCALL_RET gpr[0]
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM(_regs) (_regs).gpr[3]
# define SYSCALL_RET(_regs) (_regs).gpr[0]
#else
# error "Do not know how to find your architecture's registers and syscalls"
#endif
/*
* Most architectures can change the syscall by just updating the
* associated register. This is the default if not defined above.
*/
#ifndef SYSCALL_NUM_SET
# define SYSCALL_NUM_SET(_regs, _nr) \
do { \
SYSCALL_NUM(_regs) = (_nr); \
} while (0)
#endif
/*
* Most architectures can change the syscall return value by just
* writing to the SYSCALL_RET register. This is the default if not
* defined above. If an architecture cannot set the return value
* (for example when the syscall and return value register is
* shared), report it with TH_LOG() in an arch-specific definition
* of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
*/
#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
#endif
#ifndef SYSCALL_RET_SET
# define SYSCALL_RET_SET(_regs, _val) \
do { \
SYSCALL_RET(_regs) = (_val); \
} while (0)
#endif
/* When the syscall return can't be changed, stub out the tests for it. */
#ifdef SYSCALL_NUM_RET_SHARE_REG
#ifndef SYSCALL_RET
# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
#else
# define EXPECT_SYSCALL_RETURN(val, action) \
@ -1745,116 +1854,92 @@ TEST_F(TRACE_poke, getpid_runs_normally)
} while (0)
#endif
/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
/*
* Some architectures (e.g. powerpc) can only set syscall
* return values on syscall exit during ptrace.
*/
const bool ptrace_entry_set_syscall_nr = true;
const bool ptrace_entry_set_syscall_ret =
#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
true;
#else
false;
#endif
/*
* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
* architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
*/
#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
#define HAVE_GETREGS
# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
#else
# define ARCH_GETREGS(_regs) ({ \
struct iovec __v; \
__v.iov_base = &(_regs); \
__v.iov_len = sizeof(_regs); \
ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
})
# define ARCH_SETREGS(_regs) ({ \
struct iovec __v; \
__v.iov_base = &(_regs); \
__v.iov_len = sizeof(_regs); \
ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
})
#endif
/* Architecture-specific syscall fetching routine. */
int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
{
ARCH_REGS regs;
#ifdef HAVE_GETREGS
EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
TH_LOG("PTRACE_GETREGS failed");
EXPECT_EQ(0, ARCH_GETREGS(regs)) {
return -1;
}
#else
struct iovec iov;
iov.iov_base = &regs;
iov.iov_len = sizeof(regs);
EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
TH_LOG("PTRACE_GETREGSET failed");
return -1;
}
#endif
#if defined(__mips__)
if (regs.SYSCALL_NUM == __NR_O32_Linux)
return regs.SYSCALL_SYSCALL_NUM;
#endif
return regs.SYSCALL_NUM;
return SYSCALL_NUM(regs);
}
/* Architecture-specific syscall changing routine. */
void change_syscall(struct __test_metadata *_metadata,
pid_t tracee, int syscall, int result)
void __change_syscall(struct __test_metadata *_metadata,
pid_t tracee, long *syscall, long *ret)
{
int ret;
ARCH_REGS regs;
#ifdef HAVE_GETREGS
ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
#else
struct iovec iov;
iov.iov_base = &regs;
iov.iov_len = sizeof(regs);
ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
#endif
EXPECT_EQ(0, ret) {}
ARCH_REGS orig, regs;
#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
defined(__xtensa__) || defined(__csky__) || defined(__sh__)
{
regs.SYSCALL_NUM = syscall;
}
#elif defined(__mips__)
{
if (regs.SYSCALL_NUM == __NR_O32_Linux)
regs.SYSCALL_SYSCALL_NUM = syscall;
else
regs.SYSCALL_NUM = syscall;
/* Do not get/set registers if we have nothing to do. */
if (!syscall && !ret)
return;
EXPECT_EQ(0, ARCH_GETREGS(regs)) {
return;
}
orig = regs;
#elif defined(__arm__)
# ifndef PTRACE_SET_SYSCALL
# define PTRACE_SET_SYSCALL 23
# endif
{
ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
EXPECT_EQ(0, ret);
}
if (syscall)
SYSCALL_NUM_SET(regs, *syscall);
#elif defined(__aarch64__)
# ifndef NT_ARM_SYSTEM_CALL
# define NT_ARM_SYSTEM_CALL 0x404
# endif
{
iov.iov_base = &syscall;
iov.iov_len = sizeof(syscall);
ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
&iov);
EXPECT_EQ(0, ret);
}
if (ret)
SYSCALL_RET_SET(regs, *ret);
#else
ASSERT_EQ(1, 0) {
TH_LOG("How is the syscall changed on this architecture?");
}
#endif
/* Flush any register changes made. */
if (memcmp(&orig, &regs, sizeof(orig)) != 0)
EXPECT_EQ(0, ARCH_SETREGS(regs));
}
/* If syscall is skipped, change return value. */
if (syscall == -1)
#ifdef SYSCALL_NUM_RET_SHARE_REG
TH_LOG("Can't modify syscall return on this architecture");
/* Change only syscall number. */
void change_syscall_nr(struct __test_metadata *_metadata,
pid_t tracee, long syscall)
{
__change_syscall(_metadata, tracee, &syscall, NULL);
}
#elif defined(__xtensa__)
regs.SYSCALL_RET(regs) = result;
#else
regs.SYSCALL_RET = result;
#endif
/* Change syscall return value (and set syscall number to -1). */
void change_syscall_ret(struct __test_metadata *_metadata,
pid_t tracee, long ret)
{
long syscall = -1;
#ifdef HAVE_GETREGS
ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
#else
iov.iov_base = &regs;
iov.iov_len = sizeof(regs);
ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
#endif
EXPECT_EQ(0, ret);
__change_syscall(_metadata, tracee, &syscall, &ret);
}
void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
@ -1872,17 +1957,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
case 0x1002:
/* change getpid to getppid. */
EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
change_syscall(_metadata, tracee, __NR_getppid, 0);
change_syscall_nr(_metadata, tracee, __NR_getppid);
break;
case 0x1003:
/* skip gettid with valid return code. */
EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
change_syscall(_metadata, tracee, -1, 45000);
change_syscall_ret(_metadata, tracee, 45000);
break;
case 0x1004:
/* skip openat with error. */
EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
change_syscall(_metadata, tracee, -1, -ESRCH);
change_syscall_ret(_metadata, tracee, -ESRCH);
break;
case 0x1005:
/* do nothing (allow getppid) */
@ -1897,12 +1982,21 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
}
FIXTURE(TRACE_syscall) {
struct sock_fprog prog;
pid_t tracer, mytid, mypid, parent;
long syscall_nr;
};
void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
int status, void *args)
{
int ret, nr;
int ret;
unsigned long msg;
static bool entry;
long syscall_nr_val, syscall_ret_val;
long *syscall_nr = NULL, *syscall_ret = NULL;
FIXTURE_DATA(TRACE_syscall) *self = args;
/*
* The traditional way to tell PTRACE_SYSCALL entry/exit
@ -1916,24 +2010,48 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
if (!entry)
/*
* Some architectures only support setting return values during
* syscall exit under ptrace, and on exit the syscall number may
* no longer be available. Therefore, save the initial sycall
* number here, so it can be examined during both entry and exit
* phases.
*/
if (entry)
self->syscall_nr = get_syscall(_metadata, tracee);
/*
* Depending on the architecture's syscall setting abilities, we
* pick which things to set during this phase (entry or exit).
*/
if (entry == ptrace_entry_set_syscall_nr)
syscall_nr = &syscall_nr_val;
if (entry == ptrace_entry_set_syscall_ret)
syscall_ret = &syscall_ret_val;
/* Now handle the actual rewriting cases. */
switch (self->syscall_nr) {
case __NR_getpid:
syscall_nr_val = __NR_getppid;
/* Never change syscall return for this case. */
syscall_ret = NULL;
break;
case __NR_gettid:
syscall_nr_val = -1;
syscall_ret_val = 45000;
break;
case __NR_openat:
syscall_nr_val = -1;
syscall_ret_val = -ESRCH;
break;
default:
/* Unhandled, do nothing. */
return;
}
nr = get_syscall(_metadata, tracee);
if (nr == __NR_getpid)
change_syscall(_metadata, tracee, __NR_getppid, 0);
if (nr == __NR_gettid)
change_syscall(_metadata, tracee, -1, 45000);
if (nr == __NR_openat)
change_syscall(_metadata, tracee, -1, -ESRCH);
__change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
}
FIXTURE(TRACE_syscall) {
struct sock_fprog prog;
pid_t tracer, mytid, mypid, parent;
};
FIXTURE_VARIANT(TRACE_syscall) {
/*
* All of the SECCOMP_RET_TRACE behaviors can be tested with either
@ -1992,7 +2110,7 @@ FIXTURE_SETUP(TRACE_syscall)
self->tracer = setup_trace_fixture(_metadata,
variant->use_ptrace ? tracer_ptrace
: tracer_seccomp,
NULL, variant->use_ptrace);
self, variant->use_ptrace);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
@ -3142,11 +3260,11 @@ skip:
static int user_notif_syscall(int nr, unsigned int flags)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
@ -3699,7 +3817,7 @@ TEST(user_notification_filter_empty)
long ret;
int status;
struct pollfd pollfd;
struct clone_args args = {
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
@ -3715,7 +3833,7 @@ TEST(user_notification_filter_empty)
if (pid == 0) {
int listener;
listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
if (listener < 0)
_exit(EXIT_FAILURE);
@ -3753,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded)
long ret;
int status;
struct pollfd pollfd;
struct clone_args args = {
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};