Merge branch 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 entry updates from Ingo Molnar:
 "This contains x32 and compat syscall improvements, the biggest one of
  which splits x32 syscalls into their own table, which allows new
  syscalls to share the x32 and x86-64 number - which turns the
  512-547 special syscall numbers range into a legacy wart that won't be
  extended going forward"

* 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/syscalls: Split the x32 syscalls into their own table
  x86/syscalls: Disallow compat entries for all types of 64-bit syscalls
  x86/syscalls: Use the compat versions of rt_sigsuspend() and rt_sigprocmask()
  x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long
This commit is contained in:
Linus Torvalds 2019-09-16 19:06:29 -07:00
commit e0d60a1e68
10 changed files with 168 additions and 32 deletions

View File

@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);
/*
* NB: Native and x32 syscalls are dispatched from the same
* table. The only functional difference is the x32 bit in
* regs->orig_ax, which changes the behavior of some syscalls.
*/
nr &= __SYSCALL_MASK;
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
#ifdef CONFIG_X86_X32_ABI
} else if (likely((nr & __X32_SYSCALL_BIT) &&
(nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
X32_NR_syscalls);
regs->ax = x32_sys_call_table[nr](regs);
#endif
}
syscall_return_slowpath(regs);

View File

@ -10,10 +10,13 @@
/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
#undef __SYSCALL_X32
#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
#define __SYSCALL_X32(nr, sym, qual)
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};
#undef __SYSCALL_64
#undef __SYSCALL_X32
#ifdef CONFIG_X86_X32_ABI
#define __SYSCALL_64(nr, sym, qual)
#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};
#undef __SYSCALL_64
#undef __SYSCALL_X32
#endif

View File

@ -186,11 +186,11 @@
172 i386 prctl sys_prctl __ia32_sys_prctl
173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask
175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask
176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo
179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend
179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend
180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread
181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite
182 i386 chown sys_chown16 __ia32_sys_chown16

View File

@ -1,13 +1,13 @@
#!/bin/sh
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
in="$1"
out="$2"
syscall_macro() {
abi="$1"
nr="$2"
entry="$3"
local abi="$1"
local nr="$2"
local entry="$3"
# Entry can be either just a function name or "function/qualifier"
real_entry="${entry%%/*}"
@ -21,14 +21,14 @@ syscall_macro() {
}
emit() {
abi="$1"
nr="$2"
entry="$3"
compat="$4"
umlentry=""
local abi="$1"
local nr="$2"
local entry="$3"
local compat="$4"
local umlentry=""
if [ "$abi" = "64" -a -n "$compat" ]; then
echo "a compat entry for a 64-bit syscall makes no sense" >&2
if [ "$abi" != "I386" -a -n "$compat" ]; then
echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
exit 1
fi
@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
# COMMON is the same as 64, except that we don't expect X32
# programs to use it. Our expectation has nothing to do with
# any generated code, so treat them the same.
emit 64 "$nr" "$entry" "$compat"
elif [ "$abi" = "X32" ]; then
# X32 is equivalent to 64 on an X32-compatible kernel.
if [ "$abi" = "COMMON" ]; then
# COMMON means that this syscall exists in the same form for
# 64-bit and X32.
echo "#ifdef CONFIG_X86_X32_ABI"
emit 64 "$nr" "$entry" "$compat"
emit X32 "$nr" "$entry" "$compat"
echo "#endif"
fi
elif [ "$abi" = "X32" ]; then
echo "#ifdef CONFIG_X86_X32_ABI"
emit X32 "$nr" "$entry" "$compat"
echo "#endif"
elif [ "$abi" = "I386" ]; then
emit "$abi" "$nr" "$entry" "$compat"

View File

@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[];
extern const sys_call_ptr_t ia32_sys_call_table[];
#endif
#ifdef CONFIG_X86_X32_ABI
extern const sys_call_ptr_t x32_sys_call_table[];
#endif
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons

View File

@ -5,12 +5,6 @@
#include <uapi/asm/unistd.h>
# ifdef CONFIG_X86_X32_ABI
# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
# else
# define __SYSCALL_MASK (~0)
# endif
# ifdef CONFIG_X86_32
# include <asm/unistd_32.h>

View File

@ -3,7 +3,7 @@
#define _UAPI_ASM_X86_UNISTD_H
/* x32 syscall flag bit */
#define __X32_SYSCALL_BIT 0x40000000
#define __X32_SYSCALL_BIT 0x40000000UL
#ifndef __KERNEL__
# ifdef __i386__

View File

@ -6,13 +6,28 @@
#include <asm/ia32.h>
#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
#define __SYSCALL_X32(nr, sym, qual)
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL_64
#undef __SYSCALL_X32
#ifdef CONFIG_X86_X32_ABI
#define __SYSCALL_64(nr, sym, qual)
#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
static char syscalls_x32[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL_64
#undef __SYSCALL_X32
#endif
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
#undef __SYSCALL_I386
#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#include <asm/kvm_para.h>
@ -80,6 +95,11 @@ int main(void)
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
DEFINE(NR_syscalls, sizeof(syscalls_64));
#ifdef CONFIG_X86_X32_ABI
DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
#endif
DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));

View File

@ -17,7 +17,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall

View File

@ -0,0 +1,89 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
* Copyright (c) 2018 Andrew Lutomirski
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
#include <syscall.h>
static int nerrs;
#define X32_BIT 0x40000000UL
static void check_enosys(unsigned long nr, bool *ok)
{
/* If this fails, a segfault is reasonably likely. */
fflush(stdout);
long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
if (ret == 0) {
printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
*ok = false;
} else if (errno != ENOSYS) {
printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
*ok = false;
}
}
static void test_x32_without_x32_bit(void)
{
bool ok = true;
/*
* Syscalls 512-547 are "x32" syscalls. They are intended to be
* called with the x32 (0x40000000) bit set. Calling them without
* the x32 bit set is nonsense and should not work.
*/
printf("[RUN]\tChecking syscalls 512-547\n");
for (int i = 512; i <= 547; i++)
check_enosys(i, &ok);
/*
* Check that a handful of 64-bit-only syscalls are rejected if the x32
* bit is set.
*/
printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
check_enosys(16 | X32_BIT, &ok); /* ioctl */
check_enosys(19 | X32_BIT, &ok); /* readv */
check_enosys(20 | X32_BIT, &ok); /* writev */
/*
* Check some syscalls with high bits set.
*/
printf("[RUN]\tChecking numbers above 2^32-1\n");
check_enosys((1UL << 32), &ok);
check_enosys(X32_BIT | (1UL << 32), &ok);
if (!ok)
nerrs++;
else
printf("[OK]\tThey all returned -ENOSYS\n");
}
int main()
{
/*
* Anyone diagnosing a failure will want to know whether the kernel
* supports x32. Tell them.
*/
printf("\tChecking for x32...");
fflush(stdout);
if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
printf(" supported\n");
} else if (errno == ENOSYS) {
printf(" not supported\n");
} else {
printf(" confused\n");
}
test_x32_without_x32_bit();
return nerrs ? 1 : 0;
}