2012-06-01 07:26:44 +08:00
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/syscalls.h>
|
|
|
|
#include <linux/fdtable.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/random.h>
|
|
|
|
#include <linux/module.h>
|
2012-12-21 07:05:21 +08:00
|
|
|
#include <linux/ptrace.h>
|
2012-06-01 07:26:44 +08:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/cache.h>
|
|
|
|
#include <linux/bug.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include <linux/kcmp.h>
|
|
|
|
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't expose the real in-memory order of objects for security reasons.
|
|
|
|
* But still the comparison results should be suitable for sorting. So we
|
|
|
|
* obfuscate kernel pointers values and compare the production instead.
|
|
|
|
*
|
|
|
|
* The obfuscation is done in two steps. First we xor the kernel pointer with
|
|
|
|
* a random value, which puts pointer into a new position in a reordered space.
|
|
|
|
* Secondly we multiply the xor production with a large odd random number to
|
|
|
|
* permute its bits even more (the odd multiplier guarantees that the product
|
|
|
|
* is unique ever after the high bits are truncated, since any odd number is
|
|
|
|
* relative prime to 2^n).
|
|
|
|
*
|
|
|
|
* Note also that the obfuscation itself is invisible to userspace and if needed
|
|
|
|
* it can be changed to an alternate scheme.
|
|
|
|
*/
|
|
|
|
static unsigned long cookies[KCMP_TYPES][2] __read_mostly;
|
|
|
|
|
|
|
|
static long kptr_obfuscate(long v, int type)
|
|
|
|
{
|
|
|
|
return (v ^ cookies[type][0]) * cookies[type][1];
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 0 - equal, i.e. v1 = v2
|
|
|
|
* 1 - less than, i.e. v1 < v2
|
|
|
|
* 2 - greater than, i.e. v1 > v2
|
|
|
|
* 3 - not equal but ordering unavailable (reserved for future)
|
|
|
|
*/
|
|
|
|
static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
|
|
|
|
{
|
kcmp: fix standard comparison bug
The C operator <= defines a perfectly fine total ordering on the set of
values representable in a long. However, unlike its namesake in the
integers, it is not translation invariant, meaning that we do not have
"b <= c" iff "a+b <= a+c" for all a,b,c.
This means that it is always wrong to try to boil down the relationship
between two longs to a question about the sign of their difference,
because the resulting relation [a LEQ b iff a-b <= 0] is neither
anti-symmetric or transitive. The former is due to -LONG_MIN==LONG_MIN
(take any two a,b with a-b = LONG_MIN; then a LEQ b and b LEQ a, but a !=
b). The latter can either be seen observing that x LEQ x+1 for all x,
implying x LEQ x+1 LEQ x+2 ... LEQ x-1 LEQ x; or more directly with the
simple example a=LONG_MIN, b=0, c=1, for which a-b < 0, b-c < 0, but a-c >
0.
Note that it makes absolutely no difference that a transmogrying bijection
has been applied before the comparison is done. In fact, had the
obfuscation not been done, one could probably not observe the bug
(assuming all values being compared always lie in one half of the address
space, the mathematical value of a-b is always representable in a long).
As it stands, one can easily obtain three file descriptors exhibiting the
non-transitivity of kcmp().
Side note 1: I can't see that ensuring the MSB of the multiplier is
set serves any purpose other than obfuscating the obfuscating code.
Side note 2:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/syscall.h>
enum kcmp_type {
KCMP_FILE,
KCMP_VM,
KCMP_FILES,
KCMP_FS,
KCMP_SIGHAND,
KCMP_IO,
KCMP_SYSVSEM,
KCMP_TYPES,
};
pid_t pid;
int kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2)
{
return syscall(SYS_kcmp, pid1, pid2, type, idx1, idx2);
}
int cmp_fd(int fd1, int fd2)
{
int c = kcmp(pid, pid, KCMP_FILE, fd1, fd2);
if (c < 0) {
perror("kcmp");
exit(1);
}
assert(0 <= c && c < 3);
return c;
}
int cmp_fdp(const void *a, const void *b)
{
static const int normalize[] = {0, -1, 1};
return normalize[cmp_fd(*(int*)a, *(int*)b)];
}
#define MAX 100 /* This is plenty; I've seen it trigger for MAX==3 */
int main(int argc, char *argv[])
{
int r, s, count = 0;
int REL[3] = {0,0,0};
int fd[MAX];
pid = getpid();
while (count < MAX) {
r = open("/dev/null", O_RDONLY);
if (r < 0)
break;
fd[count++] = r;
}
printf("opened %d file descriptors\n", count);
for (r = 0; r < count; ++r) {
for (s = r+1; s < count; ++s) {
REL[cmp_fd(fd[r], fd[s])]++;
}
}
printf("== %d\t< %d\t> %d\n", REL[0], REL[1], REL[2]);
qsort(fd, count, sizeof(fd[0]), cmp_fdp);
memset(REL, 0, sizeof(REL));
for (r = 0; r < count; ++r) {
for (s = r+1; s < count; ++s) {
REL[cmp_fd(fd[r], fd[s])]++;
}
}
printf("== %d\t< %d\t> %d\n", REL[0], REL[1], REL[2]);
return (REL[0] + REL[2] != 0);
}
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
"Eric W. Biederman" <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-09-10 05:51:01 +08:00
|
|
|
long t1, t2;
|
2012-06-01 07:26:44 +08:00
|
|
|
|
kcmp: fix standard comparison bug
The C operator <= defines a perfectly fine total ordering on the set of
values representable in a long. However, unlike its namesake in the
integers, it is not translation invariant, meaning that we do not have
"b <= c" iff "a+b <= a+c" for all a,b,c.
This means that it is always wrong to try to boil down the relationship
between two longs to a question about the sign of their difference,
because the resulting relation [a LEQ b iff a-b <= 0] is neither
anti-symmetric or transitive. The former is due to -LONG_MIN==LONG_MIN
(take any two a,b with a-b = LONG_MIN; then a LEQ b and b LEQ a, but a !=
b). The latter can either be seen observing that x LEQ x+1 for all x,
implying x LEQ x+1 LEQ x+2 ... LEQ x-1 LEQ x; or more directly with the
simple example a=LONG_MIN, b=0, c=1, for which a-b < 0, b-c < 0, but a-c >
0.
Note that it makes absolutely no difference that a transmogrying bijection
has been applied before the comparison is done. In fact, had the
obfuscation not been done, one could probably not observe the bug
(assuming all values being compared always lie in one half of the address
space, the mathematical value of a-b is always representable in a long).
As it stands, one can easily obtain three file descriptors exhibiting the
non-transitivity of kcmp().
Side note 1: I can't see that ensuring the MSB of the multiplier is
set serves any purpose other than obfuscating the obfuscating code.
Side note 2:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/syscall.h>
enum kcmp_type {
KCMP_FILE,
KCMP_VM,
KCMP_FILES,
KCMP_FS,
KCMP_SIGHAND,
KCMP_IO,
KCMP_SYSVSEM,
KCMP_TYPES,
};
pid_t pid;
int kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2)
{
return syscall(SYS_kcmp, pid1, pid2, type, idx1, idx2);
}
int cmp_fd(int fd1, int fd2)
{
int c = kcmp(pid, pid, KCMP_FILE, fd1, fd2);
if (c < 0) {
perror("kcmp");
exit(1);
}
assert(0 <= c && c < 3);
return c;
}
int cmp_fdp(const void *a, const void *b)
{
static const int normalize[] = {0, -1, 1};
return normalize[cmp_fd(*(int*)a, *(int*)b)];
}
#define MAX 100 /* This is plenty; I've seen it trigger for MAX==3 */
int main(int argc, char *argv[])
{
int r, s, count = 0;
int REL[3] = {0,0,0};
int fd[MAX];
pid = getpid();
while (count < MAX) {
r = open("/dev/null", O_RDONLY);
if (r < 0)
break;
fd[count++] = r;
}
printf("opened %d file descriptors\n", count);
for (r = 0; r < count; ++r) {
for (s = r+1; s < count; ++s) {
REL[cmp_fd(fd[r], fd[s])]++;
}
}
printf("== %d\t< %d\t> %d\n", REL[0], REL[1], REL[2]);
qsort(fd, count, sizeof(fd[0]), cmp_fdp);
memset(REL, 0, sizeof(REL));
for (r = 0; r < count; ++r) {
for (s = r+1; s < count; ++s) {
REL[cmp_fd(fd[r], fd[s])]++;
}
}
printf("== %d\t< %d\t> %d\n", REL[0], REL[1], REL[2]);
return (REL[0] + REL[2] != 0);
}
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
"Eric W. Biederman" <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-09-10 05:51:01 +08:00
|
|
|
t1 = kptr_obfuscate((long)v1, type);
|
|
|
|
t2 = kptr_obfuscate((long)v2, type);
|
2012-06-01 07:26:44 +08:00
|
|
|
|
kcmp: fix standard comparison bug
The C operator <= defines a perfectly fine total ordering on the set of
values representable in a long. However, unlike its namesake in the
integers, it is not translation invariant, meaning that we do not have
"b <= c" iff "a+b <= a+c" for all a,b,c.
This means that it is always wrong to try to boil down the relationship
between two longs to a question about the sign of their difference,
because the resulting relation [a LEQ b iff a-b <= 0] is neither
anti-symmetric or transitive. The former is due to -LONG_MIN==LONG_MIN
(take any two a,b with a-b = LONG_MIN; then a LEQ b and b LEQ a, but a !=
b). The latter can either be seen observing that x LEQ x+1 for all x,
implying x LEQ x+1 LEQ x+2 ... LEQ x-1 LEQ x; or more directly with the
simple example a=LONG_MIN, b=0, c=1, for which a-b < 0, b-c < 0, but a-c >
0.
Note that it makes absolutely no difference that a transmogrying bijection
has been applied before the comparison is done. In fact, had the
obfuscation not been done, one could probably not observe the bug
(assuming all values being compared always lie in one half of the address
space, the mathematical value of a-b is always representable in a long).
As it stands, one can easily obtain three file descriptors exhibiting the
non-transitivity of kcmp().
Side note 1: I can't see that ensuring the MSB of the multiplier is
set serves any purpose other than obfuscating the obfuscating code.
Side note 2:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/syscall.h>
enum kcmp_type {
KCMP_FILE,
KCMP_VM,
KCMP_FILES,
KCMP_FS,
KCMP_SIGHAND,
KCMP_IO,
KCMP_SYSVSEM,
KCMP_TYPES,
};
pid_t pid;
int kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2)
{
return syscall(SYS_kcmp, pid1, pid2, type, idx1, idx2);
}
int cmp_fd(int fd1, int fd2)
{
int c = kcmp(pid, pid, KCMP_FILE, fd1, fd2);
if (c < 0) {
perror("kcmp");
exit(1);
}
assert(0 <= c && c < 3);
return c;
}
int cmp_fdp(const void *a, const void *b)
{
static const int normalize[] = {0, -1, 1};
return normalize[cmp_fd(*(int*)a, *(int*)b)];
}
#define MAX 100 /* This is plenty; I've seen it trigger for MAX==3 */
int main(int argc, char *argv[])
{
int r, s, count = 0;
int REL[3] = {0,0,0};
int fd[MAX];
pid = getpid();
while (count < MAX) {
r = open("/dev/null", O_RDONLY);
if (r < 0)
break;
fd[count++] = r;
}
printf("opened %d file descriptors\n", count);
for (r = 0; r < count; ++r) {
for (s = r+1; s < count; ++s) {
REL[cmp_fd(fd[r], fd[s])]++;
}
}
printf("== %d\t< %d\t> %d\n", REL[0], REL[1], REL[2]);
qsort(fd, count, sizeof(fd[0]), cmp_fdp);
memset(REL, 0, sizeof(REL));
for (r = 0; r < count; ++r) {
for (s = r+1; s < count; ++s) {
REL[cmp_fd(fd[r], fd[s])]++;
}
}
printf("== %d\t< %d\t> %d\n", REL[0], REL[1], REL[2]);
return (REL[0] + REL[2] != 0);
}
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
"Eric W. Biederman" <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-09-10 05:51:01 +08:00
|
|
|
return (t1 < t2) | ((t1 > t2) << 1);
|
2012-06-01 07:26:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* The caller must have pinned the task */
|
|
|
|
static struct file *
|
|
|
|
get_file_raw_ptr(struct task_struct *task, unsigned int idx)
|
|
|
|
{
|
|
|
|
struct file *file = NULL;
|
|
|
|
|
|
|
|
task_lock(task);
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
if (task->files)
|
|
|
|
file = fcheck_files(task->files, idx);
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
task_unlock(task);
|
|
|
|
|
|
|
|
return file;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
|
|
|
|
{
|
|
|
|
if (likely(m2 != m1))
|
|
|
|
mutex_unlock(m2);
|
|
|
|
mutex_unlock(m1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kcmp_lock(struct mutex *m1, struct mutex *m2)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (m2 > m1)
|
|
|
|
swap(m1, m2);
|
|
|
|
|
|
|
|
err = mutex_lock_killable(m1);
|
|
|
|
if (!err && likely(m1 != m2)) {
|
|
|
|
err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
|
|
|
|
if (err)
|
|
|
|
mutex_unlock(m1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
|
|
|
|
unsigned long, idx1, unsigned long, idx2)
|
|
|
|
{
|
|
|
|
struct task_struct *task1, *task2;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tasks are looked up in caller's PID namespace only.
|
|
|
|
*/
|
|
|
|
task1 = find_task_by_vpid(pid1);
|
|
|
|
task2 = find_task_by_vpid(pid2);
|
|
|
|
if (!task1 || !task2)
|
|
|
|
goto err_no_task;
|
|
|
|
|
|
|
|
get_task_struct(task1);
|
|
|
|
get_task_struct(task2);
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* One should have enough rights to inspect task details.
|
|
|
|
*/
|
|
|
|
ret = kcmp_lock(&task1->signal->cred_guard_mutex,
|
|
|
|
&task2->signal->cred_guard_mutex);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 07:00:04 +08:00
|
|
|
if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
|
|
|
|
!ptrace_may_access(task2, PTRACE_MODE_READ_REALCREDS)) {
|
2012-06-01 07:26:44 +08:00
|
|
|
ret = -EPERM;
|
|
|
|
goto err_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case KCMP_FILE: {
|
|
|
|
struct file *filp1, *filp2;
|
|
|
|
|
|
|
|
filp1 = get_file_raw_ptr(task1, idx1);
|
|
|
|
filp2 = get_file_raw_ptr(task2, idx2);
|
|
|
|
|
|
|
|
if (filp1 && filp2)
|
|
|
|
ret = kcmp_ptr(filp1, filp2, KCMP_FILE);
|
|
|
|
else
|
|
|
|
ret = -EBADF;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case KCMP_VM:
|
|
|
|
ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM);
|
|
|
|
break;
|
|
|
|
case KCMP_FILES:
|
|
|
|
ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
|
|
|
|
break;
|
|
|
|
case KCMP_FS:
|
|
|
|
ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
|
|
|
|
break;
|
|
|
|
case KCMP_SIGHAND:
|
|
|
|
ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
|
|
|
|
break;
|
|
|
|
case KCMP_IO:
|
|
|
|
ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO);
|
|
|
|
break;
|
|
|
|
case KCMP_SYSVSEM:
|
|
|
|
#ifdef CONFIG_SYSVIPC
|
|
|
|
ret = kcmp_ptr(task1->sysvsem.undo_list,
|
|
|
|
task2->sysvsem.undo_list,
|
|
|
|
KCMP_SYSVSEM);
|
|
|
|
#else
|
|
|
|
ret = -EOPNOTSUPP;
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
err_unlock:
|
|
|
|
kcmp_unlock(&task1->signal->cred_guard_mutex,
|
|
|
|
&task2->signal->cred_guard_mutex);
|
|
|
|
err:
|
|
|
|
put_task_struct(task1);
|
|
|
|
put_task_struct(task2);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
err_no_task:
|
|
|
|
rcu_read_unlock();
|
|
|
|
return -ESRCH;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __init int kcmp_cookies_init(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
get_random_bytes(cookies, sizeof(cookies));
|
|
|
|
|
|
|
|
for (i = 0; i < KCMP_TYPES; i++)
|
|
|
|
cookies[i][1] |= (~(~0UL >> 1) | 1);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
arch_initcall(kcmp_cookies_init);
|