2017-11-01 22:08:43 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
2012-10-13 17:46:48 +08:00
|
|
|
#ifndef _UAPI_LINUX_PTRACE_H
|
|
|
|
#define _UAPI_LINUX_PTRACE_H
|
|
|
|
/* ptrace.h */
|
|
|
|
/* structs and defines to help the user use the ptrace system call. */
|
|
|
|
|
|
|
|
/* has the defines to get at the registers. */
|
|
|
|
|
2013-05-01 06:27:59 +08:00
|
|
|
#include <linux/types.h>
|
2012-10-13 17:46:48 +08:00
|
|
|
|
|
|
|
#define PTRACE_TRACEME 0
|
|
|
|
#define PTRACE_PEEKTEXT 1
|
|
|
|
#define PTRACE_PEEKDATA 2
|
|
|
|
#define PTRACE_PEEKUSR 3
|
|
|
|
#define PTRACE_POKETEXT 4
|
|
|
|
#define PTRACE_POKEDATA 5
|
|
|
|
#define PTRACE_POKEUSR 6
|
|
|
|
#define PTRACE_CONT 7
|
|
|
|
#define PTRACE_KILL 8
|
|
|
|
#define PTRACE_SINGLESTEP 9
|
|
|
|
|
|
|
|
#define PTRACE_ATTACH 16
|
|
|
|
#define PTRACE_DETACH 17
|
|
|
|
|
|
|
|
#define PTRACE_SYSCALL 24
|
|
|
|
|
|
|
|
/* 0x4200-0x4300 are reserved for architecture-independent additions. */
|
|
|
|
#define PTRACE_SETOPTIONS 0x4200
|
|
|
|
#define PTRACE_GETEVENTMSG 0x4201
|
|
|
|
#define PTRACE_GETSIGINFO 0x4202
|
|
|
|
#define PTRACE_SETSIGINFO 0x4203
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Generic ptrace interface that exports the architecture specific regsets
|
|
|
|
* using the corresponding NT_* types (which are also used in the core dump).
|
|
|
|
* Please note that the NT_PRSTATUS note type in a core dump contains a full
|
|
|
|
* 'struct elf_prstatus'. But the user_regset for NT_PRSTATUS contains just the
|
|
|
|
* elf_gregset_t that is the pr_reg field of 'struct elf_prstatus'. For all the
|
|
|
|
* other user_regset flavors, the user_regset layout and the ELF core dump note
|
|
|
|
* payload are exactly the same layout.
|
|
|
|
*
|
|
|
|
* This interface usage is as follows:
|
|
|
|
* struct iovec iov = { buf, len};
|
|
|
|
*
|
|
|
|
* ret = ptrace(PTRACE_GETREGSET/PTRACE_SETREGSET, pid, NT_XXX_TYPE, &iov);
|
|
|
|
*
|
|
|
|
* On the successful completion, iov.len will be updated by the kernel,
|
|
|
|
* specifying how much the kernel has written/read to/from the user's iov.buf.
|
|
|
|
*/
|
|
|
|
#define PTRACE_GETREGSET 0x4204
|
|
|
|
#define PTRACE_SETREGSET 0x4205
|
|
|
|
|
|
|
|
#define PTRACE_SEIZE 0x4206
|
|
|
|
#define PTRACE_INTERRUPT 0x4207
|
|
|
|
#define PTRACE_LISTEN 0x4208
|
|
|
|
|
2013-05-01 06:27:59 +08:00
|
|
|
#define PTRACE_PEEKSIGINFO 0x4209
|
|
|
|
|
|
|
|
struct ptrace_peeksiginfo_args {
|
|
|
|
__u64 off; /* from which siginfo to start */
|
|
|
|
__u32 flags;
|
|
|
|
__s32 nr; /* how may siginfos to take */
|
|
|
|
};
|
|
|
|
|
2013-07-04 06:08:12 +08:00
|
|
|
#define PTRACE_GETSIGMASK 0x420a
|
|
|
|
#define PTRACE_SETSIGMASK 0x420b
|
|
|
|
|
2015-10-27 08:23:59 +08:00
|
|
|
#define PTRACE_SECCOMP_GET_FILTER 0x420c
|
2017-10-11 23:39:21 +08:00
|
|
|
#define PTRACE_SECCOMP_GET_METADATA 0x420d
|
|
|
|
|
|
|
|
struct seccomp_metadata {
|
2018-02-21 10:47:45 +08:00
|
|
|
__u64 filter_off; /* Input: which filter */
|
|
|
|
__u64 flags; /* Output: filter's flags */
|
2017-10-11 23:39:21 +08:00
|
|
|
};
|
2015-10-27 08:23:59 +08:00
|
|
|
|
ptrace: add PTRACE_GET_SYSCALL_INFO request
PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain
details of the syscall the tracee is blocked in.
There are two reasons for a special syscall-related ptrace request.
Firstly, with the current ptrace API there are cases when ptracer cannot
retrieve necessary information about syscalls. Some examples include:
* The notorious int-0x80-from-64-bit-task issue. See [1] for details.
In short, if a 64-bit task performs a syscall through int 0x80, its
tracer has no reliable means to find out that the syscall was, in
fact, a compat syscall, and misidentifies it.
* Syscall-enter-stop and syscall-exit-stop look the same for the
tracer. Common practice is to keep track of the sequence of
ptrace-stops in order not to mix the two syscall-stops up. But it is
not as simple as it looks; for example, strace had a (just recently
fixed) long-standing bug where attaching strace to a tracee that is
performing the execve system call led to the tracer identifying the
following syscall-exit-stop as syscall-enter-stop, which messed up
all the state tracking.
* Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow
accessing an undumpable mm"), both PTRACE_PEEKDATA and
process_vm_readv become unavailable when the process dumpable flag is
cleared. On such architectures as ia64 this results in all syscall
arguments being unavailable for the tracer.
Secondly, ptracers also have to support a lot of arch-specific code for
obtaining information about the tracee. For some architectures, this
requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall
argument and return value.
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid,
void *addr, void *data);
...
PTRACE_GET_SYSCALL_INFO
Retrieve information about the syscall that caused the stop.
The information is placed into the buffer pointed by "data"
argument, which should be a pointer to a buffer of type
"struct ptrace_syscall_info".
The "addr" argument contains the size of the buffer pointed to
by "data" argument (i.e., sizeof(struct ptrace_syscall_info)).
The return value contains the number of bytes available
to be written by the kernel.
If the size of data to be written by the kernel exceeds the size
specified by "addr" argument, the output is truncated.
[ldv@altlinux.org: selftests/seccomp/seccomp_bpf: update for PTRACE_GET_SYSCALL_INFO]
Link: http://lkml.kernel.org/r/20190708182904.GA12332@altlinux.org
Link: http://lkml.kernel.org/r/20190510152842.GF28558@altlinux.org
Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Co-developed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Eugene Syromyatnikov <esyr@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Helge Deller <deller@gmx.de> [parisc]
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: kbuild test robot <lkp@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 07:29:42 +08:00
|
|
|
#define PTRACE_GET_SYSCALL_INFO 0x420e
|
|
|
|
#define PTRACE_SYSCALL_INFO_NONE 0
|
|
|
|
#define PTRACE_SYSCALL_INFO_ENTRY 1
|
|
|
|
#define PTRACE_SYSCALL_INFO_EXIT 2
|
|
|
|
#define PTRACE_SYSCALL_INFO_SECCOMP 3
|
|
|
|
|
|
|
|
struct ptrace_syscall_info {
|
|
|
|
__u8 op; /* PTRACE_SYSCALL_INFO_* */
|
2020-08-01 23:20:44 +08:00
|
|
|
__u8 pad[3];
|
|
|
|
__u32 arch;
|
ptrace: add PTRACE_GET_SYSCALL_INFO request
PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain
details of the syscall the tracee is blocked in.
There are two reasons for a special syscall-related ptrace request.
Firstly, with the current ptrace API there are cases when ptracer cannot
retrieve necessary information about syscalls. Some examples include:
* The notorious int-0x80-from-64-bit-task issue. See [1] for details.
In short, if a 64-bit task performs a syscall through int 0x80, its
tracer has no reliable means to find out that the syscall was, in
fact, a compat syscall, and misidentifies it.
* Syscall-enter-stop and syscall-exit-stop look the same for the
tracer. Common practice is to keep track of the sequence of
ptrace-stops in order not to mix the two syscall-stops up. But it is
not as simple as it looks; for example, strace had a (just recently
fixed) long-standing bug where attaching strace to a tracee that is
performing the execve system call led to the tracer identifying the
following syscall-exit-stop as syscall-enter-stop, which messed up
all the state tracking.
* Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow
accessing an undumpable mm"), both PTRACE_PEEKDATA and
process_vm_readv become unavailable when the process dumpable flag is
cleared. On such architectures as ia64 this results in all syscall
arguments being unavailable for the tracer.
Secondly, ptracers also have to support a lot of arch-specific code for
obtaining information about the tracee. For some architectures, this
requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall
argument and return value.
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid,
void *addr, void *data);
...
PTRACE_GET_SYSCALL_INFO
Retrieve information about the syscall that caused the stop.
The information is placed into the buffer pointed by "data"
argument, which should be a pointer to a buffer of type
"struct ptrace_syscall_info".
The "addr" argument contains the size of the buffer pointed to
by "data" argument (i.e., sizeof(struct ptrace_syscall_info)).
The return value contains the number of bytes available
to be written by the kernel.
If the size of data to be written by the kernel exceeds the size
specified by "addr" argument, the output is truncated.
[ldv@altlinux.org: selftests/seccomp/seccomp_bpf: update for PTRACE_GET_SYSCALL_INFO]
Link: http://lkml.kernel.org/r/20190708182904.GA12332@altlinux.org
Link: http://lkml.kernel.org/r/20190510152842.GF28558@altlinux.org
Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Co-developed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Eugene Syromyatnikov <esyr@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Helge Deller <deller@gmx.de> [parisc]
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: kbuild test robot <lkp@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 07:29:42 +08:00
|
|
|
__u64 instruction_pointer;
|
|
|
|
__u64 stack_pointer;
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
__u64 nr;
|
|
|
|
__u64 args[6];
|
|
|
|
} entry;
|
|
|
|
struct {
|
|
|
|
__s64 rval;
|
|
|
|
__u8 is_error;
|
|
|
|
} exit;
|
|
|
|
struct {
|
|
|
|
__u64 nr;
|
|
|
|
__u64 args[6];
|
|
|
|
__u32 ret_data;
|
|
|
|
} seccomp;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2021-02-26 21:51:56 +08:00
|
|
|
#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
|
|
|
|
|
|
|
|
struct ptrace_rseq_configuration {
|
|
|
|
__u64 rseq_abi_pointer;
|
|
|
|
__u32 rseq_abi_size;
|
|
|
|
__u32 signature;
|
|
|
|
__u32 flags;
|
|
|
|
__u32 pad;
|
|
|
|
};
|
|
|
|
|
ptrace: add PTRACE_GET_SYSCALL_INFO request
PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain
details of the syscall the tracee is blocked in.
There are two reasons for a special syscall-related ptrace request.
Firstly, with the current ptrace API there are cases when ptracer cannot
retrieve necessary information about syscalls. Some examples include:
* The notorious int-0x80-from-64-bit-task issue. See [1] for details.
In short, if a 64-bit task performs a syscall through int 0x80, its
tracer has no reliable means to find out that the syscall was, in
fact, a compat syscall, and misidentifies it.
* Syscall-enter-stop and syscall-exit-stop look the same for the
tracer. Common practice is to keep track of the sequence of
ptrace-stops in order not to mix the two syscall-stops up. But it is
not as simple as it looks; for example, strace had a (just recently
fixed) long-standing bug where attaching strace to a tracee that is
performing the execve system call led to the tracer identifying the
following syscall-exit-stop as syscall-enter-stop, which messed up
all the state tracking.
* Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow
accessing an undumpable mm"), both PTRACE_PEEKDATA and
process_vm_readv become unavailable when the process dumpable flag is
cleared. On such architectures as ia64 this results in all syscall
arguments being unavailable for the tracer.
Secondly, ptracers also have to support a lot of arch-specific code for
obtaining information about the tracee. For some architectures, this
requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall
argument and return value.
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid,
void *addr, void *data);
...
PTRACE_GET_SYSCALL_INFO
Retrieve information about the syscall that caused the stop.
The information is placed into the buffer pointed by "data"
argument, which should be a pointer to a buffer of type
"struct ptrace_syscall_info".
The "addr" argument contains the size of the buffer pointed to
by "data" argument (i.e., sizeof(struct ptrace_syscall_info)).
The return value contains the number of bytes available
to be written by the kernel.
If the size of data to be written by the kernel exceeds the size
specified by "addr" argument, the output is truncated.
[ldv@altlinux.org: selftests/seccomp/seccomp_bpf: update for PTRACE_GET_SYSCALL_INFO]
Link: http://lkml.kernel.org/r/20190708182904.GA12332@altlinux.org
Link: http://lkml.kernel.org/r/20190510152842.GF28558@altlinux.org
Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Co-developed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Eugene Syromyatnikov <esyr@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Helge Deller <deller@gmx.de> [parisc]
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: kbuild test robot <lkp@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 07:29:42 +08:00
|
|
|
/*
|
|
|
|
* These values are stored in task->ptrace_message
|
2022-01-28 02:15:32 +08:00
|
|
|
* by ptrace_stop to describe the current syscall-stop.
|
ptrace: add PTRACE_GET_SYSCALL_INFO request
PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain
details of the syscall the tracee is blocked in.
There are two reasons for a special syscall-related ptrace request.
Firstly, with the current ptrace API there are cases when ptracer cannot
retrieve necessary information about syscalls. Some examples include:
* The notorious int-0x80-from-64-bit-task issue. See [1] for details.
In short, if a 64-bit task performs a syscall through int 0x80, its
tracer has no reliable means to find out that the syscall was, in
fact, a compat syscall, and misidentifies it.
* Syscall-enter-stop and syscall-exit-stop look the same for the
tracer. Common practice is to keep track of the sequence of
ptrace-stops in order not to mix the two syscall-stops up. But it is
not as simple as it looks; for example, strace had a (just recently
fixed) long-standing bug where attaching strace to a tracee that is
performing the execve system call led to the tracer identifying the
following syscall-exit-stop as syscall-enter-stop, which messed up
all the state tracking.
* Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow
accessing an undumpable mm"), both PTRACE_PEEKDATA and
process_vm_readv become unavailable when the process dumpable flag is
cleared. On such architectures as ia64 this results in all syscall
arguments being unavailable for the tracer.
Secondly, ptracers also have to support a lot of arch-specific code for
obtaining information about the tracee. For some architectures, this
requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall
argument and return value.
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid,
void *addr, void *data);
...
PTRACE_GET_SYSCALL_INFO
Retrieve information about the syscall that caused the stop.
The information is placed into the buffer pointed by "data"
argument, which should be a pointer to a buffer of type
"struct ptrace_syscall_info".
The "addr" argument contains the size of the buffer pointed to
by "data" argument (i.e., sizeof(struct ptrace_syscall_info)).
The return value contains the number of bytes available
to be written by the kernel.
If the size of data to be written by the kernel exceeds the size
specified by "addr" argument, the output is truncated.
[ldv@altlinux.org: selftests/seccomp/seccomp_bpf: update for PTRACE_GET_SYSCALL_INFO]
Link: http://lkml.kernel.org/r/20190708182904.GA12332@altlinux.org
Link: http://lkml.kernel.org/r/20190510152842.GF28558@altlinux.org
Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Co-developed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Eugene Syromyatnikov <esyr@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Helge Deller <deller@gmx.de> [parisc]
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: kbuild test robot <lkp@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 07:29:42 +08:00
|
|
|
*/
|
|
|
|
#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
|
|
|
|
#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
|
|
|
|
|
2013-05-01 06:27:59 +08:00
|
|
|
/* Read signals from a shared (process wide) queue */
|
|
|
|
#define PTRACE_PEEKSIGINFO_SHARED (1 << 0)
|
|
|
|
|
2012-10-13 17:46:48 +08:00
|
|
|
/* Wait extended result codes for the above trace options. */
|
|
|
|
#define PTRACE_EVENT_FORK 1
|
|
|
|
#define PTRACE_EVENT_VFORK 2
|
|
|
|
#define PTRACE_EVENT_CLONE 3
|
|
|
|
#define PTRACE_EVENT_EXEC 4
|
|
|
|
#define PTRACE_EVENT_VFORK_DONE 5
|
|
|
|
#define PTRACE_EVENT_EXIT 6
|
|
|
|
#define PTRACE_EVENT_SECCOMP 7
|
|
|
|
/* Extended result codes which enabled by means other than options. */
|
|
|
|
#define PTRACE_EVENT_STOP 128
|
|
|
|
|
|
|
|
/* Options set using PTRACE_SETOPTIONS or using PTRACE_SEIZE @data param */
|
|
|
|
#define PTRACE_O_TRACESYSGOOD 1
|
|
|
|
#define PTRACE_O_TRACEFORK (1 << PTRACE_EVENT_FORK)
|
|
|
|
#define PTRACE_O_TRACEVFORK (1 << PTRACE_EVENT_VFORK)
|
|
|
|
#define PTRACE_O_TRACECLONE (1 << PTRACE_EVENT_CLONE)
|
|
|
|
#define PTRACE_O_TRACEEXEC (1 << PTRACE_EVENT_EXEC)
|
|
|
|
#define PTRACE_O_TRACEVFORKDONE (1 << PTRACE_EVENT_VFORK_DONE)
|
|
|
|
#define PTRACE_O_TRACEEXIT (1 << PTRACE_EVENT_EXIT)
|
|
|
|
#define PTRACE_O_TRACESECCOMP (1 << PTRACE_EVENT_SECCOMP)
|
|
|
|
|
2012-12-18 08:03:07 +08:00
|
|
|
/* eventless options */
|
seccomp: add ptrace options for suspend/resume
This patch is the first step in enabling checkpoint/restore of processes
with seccomp enabled.
One of the things CRIU does while dumping tasks is inject code into them
via ptrace to collect information that is only available to the process
itself. However, if we are in a seccomp mode where these processes are
prohibited from making these syscalls, then what CRIU does kills the task.
This patch adds a new ptrace option, PTRACE_O_SUSPEND_SECCOMP, that enables
a task from the init user namespace which has CAP_SYS_ADMIN and no seccomp
filters to disable (and re-enable) seccomp filters for another task so that
they can be successfully dumped (and restored). We restrict the set of
processes that can disable seccomp through ptrace because although today
ptrace can be used to bypass seccomp, there is some discussion of closing
this loophole in the future and we would like this patch to not depend on
that behavior and be future proofed for when it is removed.
Note that seccomp can be suspended before any filters are actually
installed; this behavior is useful on criu restore, so that we can suspend
seccomp, restore the filters, unmap our restore code from the restored
process' address space, and then resume the task by detaching and have the
filters resumed as well.
v2 changes:
* require that the tracer have no seccomp filters installed
* drop TIF_NOTSC manipulation from the patch
* change from ptrace command to a ptrace option and use this ptrace option
as the flag to check. This means that as soon as the tracer
detaches/dies, seccomp is re-enabled and as a corrollary that one can not
disable seccomp across PTRACE_ATTACHs.
v3 changes:
* get rid of various #ifdefs everywhere
* report more sensible errors when PTRACE_O_SUSPEND_SECCOMP is incorrectly
used
v4 changes:
* get rid of may_suspend_seccomp() in favor of a capable() check in ptrace
directly
v5 changes:
* check that seccomp is not enabled (or suspended) on the tracer
Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
CC: Will Drewry <wad@chromium.org>
CC: Roland McGrath <roland@hack.frob.com>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
[kees: access seccomp.mode through seccomp_mode() instead]
Signed-off-by: Kees Cook <keescook@chromium.org>
2015-06-13 23:02:48 +08:00
|
|
|
#define PTRACE_O_EXITKILL (1 << 20)
|
|
|
|
#define PTRACE_O_SUSPEND_SECCOMP (1 << 21)
|
2012-12-18 08:03:07 +08:00
|
|
|
|
seccomp: add ptrace options for suspend/resume
This patch is the first step in enabling checkpoint/restore of processes
with seccomp enabled.
One of the things CRIU does while dumping tasks is inject code into them
via ptrace to collect information that is only available to the process
itself. However, if we are in a seccomp mode where these processes are
prohibited from making these syscalls, then what CRIU does kills the task.
This patch adds a new ptrace option, PTRACE_O_SUSPEND_SECCOMP, that enables
a task from the init user namespace which has CAP_SYS_ADMIN and no seccomp
filters to disable (and re-enable) seccomp filters for another task so that
they can be successfully dumped (and restored). We restrict the set of
processes that can disable seccomp through ptrace because although today
ptrace can be used to bypass seccomp, there is some discussion of closing
this loophole in the future and we would like this patch to not depend on
that behavior and be future proofed for when it is removed.
Note that seccomp can be suspended before any filters are actually
installed; this behavior is useful on criu restore, so that we can suspend
seccomp, restore the filters, unmap our restore code from the restored
process' address space, and then resume the task by detaching and have the
filters resumed as well.
v2 changes:
* require that the tracer have no seccomp filters installed
* drop TIF_NOTSC manipulation from the patch
* change from ptrace command to a ptrace option and use this ptrace option
as the flag to check. This means that as soon as the tracer
detaches/dies, seccomp is re-enabled and as a corrollary that one can not
disable seccomp across PTRACE_ATTACHs.
v3 changes:
* get rid of various #ifdefs everywhere
* report more sensible errors when PTRACE_O_SUSPEND_SECCOMP is incorrectly
used
v4 changes:
* get rid of may_suspend_seccomp() in favor of a capable() check in ptrace
directly
v5 changes:
* check that seccomp is not enabled (or suspended) on the tracer
Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
CC: Will Drewry <wad@chromium.org>
CC: Roland McGrath <roland@hack.frob.com>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
[kees: access seccomp.mode through seccomp_mode() instead]
Signed-off-by: Kees Cook <keescook@chromium.org>
2015-06-13 23:02:48 +08:00
|
|
|
#define PTRACE_O_MASK (\
|
|
|
|
0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
|
2012-10-13 17:46:48 +08:00
|
|
|
|
|
|
|
#include <asm/ptrace.h>
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* _UAPI_LINUX_PTRACE_H */
|