Merge branch 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 entry updates from Ingo Molnar: "This contains x32 and compat syscall improvements, the biggest one of which splits x32 syscalls into their own table, which allows new syscalls to share the x32 and x86-64 number - which turns the 512-547 special syscall numbers range into a legacy wart that won't be extended going forward" * 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/syscalls: Split the x32 syscalls into their own table x86/syscalls: Disallow compat entries for all types of 64-bit syscalls x86/syscalls: Use the compat versions of rt_sigsuspend() and rt_sigprocmask() x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long
This commit is contained in:
commit
e0d60a1e68
|
@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
|
|||
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
|
||||
nr = syscall_trace_enter(regs);
|
||||
|
||||
/*
|
||||
* NB: Native and x32 syscalls are dispatched from the same
|
||||
* table. The only functional difference is the x32 bit in
|
||||
* regs->orig_ax, which changes the behavior of some syscalls.
|
||||
*/
|
||||
nr &= __SYSCALL_MASK;
|
||||
if (likely(nr < NR_syscalls)) {
|
||||
nr = array_index_nospec(nr, NR_syscalls);
|
||||
regs->ax = sys_call_table[nr](regs);
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
} else if (likely((nr & __X32_SYSCALL_BIT) &&
|
||||
(nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
|
||||
nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
|
||||
X32_NR_syscalls);
|
||||
regs->ax = x32_sys_call_table[nr](regs);
|
||||
#endif
|
||||
}
|
||||
|
||||
syscall_return_slowpath(regs);
|
||||
|
|
|
@ -10,10 +10,13 @@
|
|||
/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
|
||||
extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
|
||||
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
|
||||
#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
|
||||
#include <asm/syscalls_64.h>
|
||||
#undef __SYSCALL_64
|
||||
#undef __SYSCALL_X32
|
||||
|
||||
#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
|
||||
#define __SYSCALL_X32(nr, sym, qual)
|
||||
|
||||
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
|
||||
/*
|
||||
|
@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
|
|||
[0 ... __NR_syscall_max] = &sys_ni_syscall,
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
|
||||
#undef __SYSCALL_64
|
||||
#undef __SYSCALL_X32
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
|
||||
#define __SYSCALL_64(nr, sym, qual)
|
||||
#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
|
||||
|
||||
asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
|
||||
/*
|
||||
* Smells like a compiler bug -- it doesn't work
|
||||
* when the & below is removed.
|
||||
*/
|
||||
[0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
|
||||
#undef __SYSCALL_64
|
||||
#undef __SYSCALL_X32
|
||||
|
||||
#endif
|
||||
|
|
|
@ -186,11 +186,11 @@
|
|||
172 i386 prctl sys_prctl __ia32_sys_prctl
|
||||
173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
|
||||
174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
|
||||
175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask
|
||||
175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask
|
||||
176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
|
||||
177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32
|
||||
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo
|
||||
179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend
|
||||
179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend
|
||||
180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread
|
||||
181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite
|
||||
182 i386 chown sys_chown16 __ia32_sys_chown16
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
in="$1"
|
||||
out="$2"
|
||||
|
||||
syscall_macro() {
|
||||
abi="$1"
|
||||
nr="$2"
|
||||
entry="$3"
|
||||
local abi="$1"
|
||||
local nr="$2"
|
||||
local entry="$3"
|
||||
|
||||
# Entry can be either just a function name or "function/qualifier"
|
||||
real_entry="${entry%%/*}"
|
||||
|
@ -21,14 +21,14 @@ syscall_macro() {
|
|||
}
|
||||
|
||||
emit() {
|
||||
abi="$1"
|
||||
nr="$2"
|
||||
entry="$3"
|
||||
compat="$4"
|
||||
umlentry=""
|
||||
local abi="$1"
|
||||
local nr="$2"
|
||||
local entry="$3"
|
||||
local compat="$4"
|
||||
local umlentry=""
|
||||
|
||||
if [ "$abi" = "64" -a -n "$compat" ]; then
|
||||
echo "a compat entry for a 64-bit syscall makes no sense" >&2
|
||||
if [ "$abi" != "I386" -a -n "$compat" ]; then
|
||||
echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | (
|
|||
while read nr abi name entry compat; do
|
||||
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
|
||||
if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
|
||||
# COMMON is the same as 64, except that we don't expect X32
|
||||
# programs to use it. Our expectation has nothing to do with
|
||||
# any generated code, so treat them the same.
|
||||
emit 64 "$nr" "$entry" "$compat"
|
||||
if [ "$abi" = "COMMON" ]; then
|
||||
# COMMON means that this syscall exists in the same form for
|
||||
# 64-bit and X32.
|
||||
echo "#ifdef CONFIG_X86_X32_ABI"
|
||||
emit X32 "$nr" "$entry" "$compat"
|
||||
echo "#endif"
|
||||
fi
|
||||
elif [ "$abi" = "X32" ]; then
|
||||
# X32 is equivalent to 64 on an X32-compatible kernel.
|
||||
echo "#ifdef CONFIG_X86_X32_ABI"
|
||||
emit 64 "$nr" "$entry" "$compat"
|
||||
emit X32 "$nr" "$entry" "$compat"
|
||||
echo "#endif"
|
||||
elif [ "$abi" = "I386" ]; then
|
||||
emit "$abi" "$nr" "$entry" "$compat"
|
||||
|
|
|
@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[];
|
|||
extern const sys_call_ptr_t ia32_sys_call_table[];
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
extern const sys_call_ptr_t x32_sys_call_table[];
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Only the low 32 bits of orig_ax are meaningful, so we return int.
|
||||
* This importantly ignores the high bits on 64-bit, so comparisons
|
||||
|
|
|
@ -5,12 +5,6 @@
|
|||
#include <uapi/asm/unistd.h>
|
||||
|
||||
|
||||
# ifdef CONFIG_X86_X32_ABI
|
||||
# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
|
||||
# else
|
||||
# define __SYSCALL_MASK (~0)
|
||||
# endif
|
||||
|
||||
# ifdef CONFIG_X86_32
|
||||
|
||||
# include <asm/unistd_32.h>
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#define _UAPI_ASM_X86_UNISTD_H
|
||||
|
||||
/* x32 syscall flag bit */
|
||||
#define __X32_SYSCALL_BIT 0x40000000
|
||||
#define __X32_SYSCALL_BIT 0x40000000UL
|
||||
|
||||
#ifndef __KERNEL__
|
||||
# ifdef __i386__
|
||||
|
|
|
@ -6,13 +6,28 @@
|
|||
#include <asm/ia32.h>
|
||||
|
||||
#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
|
||||
#define __SYSCALL_X32(nr, sym, qual)
|
||||
static char syscalls_64[] = {
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
#undef __SYSCALL_64
|
||||
#undef __SYSCALL_X32
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
#define __SYSCALL_64(nr, sym, qual)
|
||||
#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
|
||||
static char syscalls_x32[] = {
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
#undef __SYSCALL_64
|
||||
#undef __SYSCALL_X32
|
||||
#endif
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
|
||||
static char syscalls_ia32[] = {
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
#undef __SYSCALL_I386
|
||||
|
||||
#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
#include <asm/kvm_para.h>
|
||||
|
@ -80,6 +95,11 @@ int main(void)
|
|||
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
|
||||
DEFINE(NR_syscalls, sizeof(syscalls_64));
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
|
||||
DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
|
||||
#endif
|
||||
|
||||
DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
|
||||
DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap
|
|||
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
|
||||
test_FCMOV test_FCOMI test_FISTTP \
|
||||
vdso_restorer
|
||||
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
|
||||
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
|
||||
# Some selftests require 32bit support enabled also on 64bit systems
|
||||
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
|
||||
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
|
||||
* Copyright (c) 2018 Andrew Lutomirski
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <syscall.h>
|
||||
|
||||
static int nerrs;
|
||||
|
||||
#define X32_BIT 0x40000000UL
|
||||
|
||||
static void check_enosys(unsigned long nr, bool *ok)
|
||||
{
|
||||
/* If this fails, a segfault is reasonably likely. */
|
||||
fflush(stdout);
|
||||
|
||||
long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
|
||||
if (ret == 0) {
|
||||
printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
|
||||
*ok = false;
|
||||
} else if (errno != ENOSYS) {
|
||||
printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
|
||||
*ok = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_x32_without_x32_bit(void)
|
||||
{
|
||||
bool ok = true;
|
||||
|
||||
/*
|
||||
* Syscalls 512-547 are "x32" syscalls. They are intended to be
|
||||
* called with the x32 (0x40000000) bit set. Calling them without
|
||||
* the x32 bit set is nonsense and should not work.
|
||||
*/
|
||||
printf("[RUN]\tChecking syscalls 512-547\n");
|
||||
for (int i = 512; i <= 547; i++)
|
||||
check_enosys(i, &ok);
|
||||
|
||||
/*
|
||||
* Check that a handful of 64-bit-only syscalls are rejected if the x32
|
||||
* bit is set.
|
||||
*/
|
||||
printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
|
||||
check_enosys(16 | X32_BIT, &ok); /* ioctl */
|
||||
check_enosys(19 | X32_BIT, &ok); /* readv */
|
||||
check_enosys(20 | X32_BIT, &ok); /* writev */
|
||||
|
||||
/*
|
||||
* Check some syscalls with high bits set.
|
||||
*/
|
||||
printf("[RUN]\tChecking numbers above 2^32-1\n");
|
||||
check_enosys((1UL << 32), &ok);
|
||||
check_enosys(X32_BIT | (1UL << 32), &ok);
|
||||
|
||||
if (!ok)
|
||||
nerrs++;
|
||||
else
|
||||
printf("[OK]\tThey all returned -ENOSYS\n");
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
/*
|
||||
* Anyone diagnosing a failure will want to know whether the kernel
|
||||
* supports x32. Tell them.
|
||||
*/
|
||||
printf("\tChecking for x32...");
|
||||
fflush(stdout);
|
||||
if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
|
||||
printf(" supported\n");
|
||||
} else if (errno == ENOSYS) {
|
||||
printf(" not supported\n");
|
||||
} else {
|
||||
printf(" confused\n");
|
||||
}
|
||||
|
||||
test_x32_without_x32_bit();
|
||||
|
||||
return nerrs ? 1 : 0;
|
||||
}
|
Loading…
Reference in New Issue