x86/entry/64: Always run ptregs-using syscalls on the slow path

64-bit syscalls currently have an optimization in which they are
called with partial pt_regs.  A small handful require full
pt_regs.

In the 32-bit and compat cases, I cleaned this up by forcing
full pt_regs for all syscalls.  The performance hit doesn't
really matter as the affected system calls are fundamentally
heavy and this is the 32-bit compat case.

I want to clean up the 64-bit case as well, but I don't want to
hurt fast path performance.  To do that, I want to force the
syscalls that use pt_regs onto the slow path.  This will enable
us to make slow path syscalls be real ABI-compliant C functions.

Use the new syscall entry qualification machinery for this.
'stub_clone' is now 'stub_clone/ptregs'.

The next patch will eliminate the stubs, and we'll just have
'sys_clone/ptregs'.

As of this patch, two-phase entry tracing is no longer used.  It
has served its purpose (namely a huge speedup on some workloads
prior to more general opportunistic SYSRET support), and once
the dust settles I'll send patches to back it out.

The implementation is heavily based on a patch from Brian Gerst:

  http://lkml.kernel.org/g/1449666173-15366-1-git-send-email-brgerst@gmail.com

Originally-From: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/b9beda88460bcefec6e7d792bd44eca9b760b0c4.1454022279.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Andy Lutomirski 2016-01-28 15:11:25 -08:00 committed by Ingo Molnar
parent cfcbadb49d
commit 302f5b260c
3 changed files with 55 additions and 24 deletions

View File

@ -182,7 +182,15 @@ entry_SYSCALL_64_fastpath:
#endif #endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */ ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx movq %r10, %rcx
/*
* This call instruction is handled specially in stub_ptregs_64.
* It might end up jumping to the slow path. If it jumps, RAX is
* clobbered.
*/
call *sys_call_table(, %rax, 8) call *sys_call_table(, %rax, 8)
.Lentry_SYSCALL_64_after_fastpath_call:
movq %rax, RAX(%rsp) movq %rax, RAX(%rsp)
1: 1:
/* /*
@ -235,25 +243,13 @@ GLOBAL(int_ret_from_sys_call_irqs_off)
/* Do syscall entry tracing */ /* Do syscall entry tracing */
tracesys: tracesys:
movq %rsp, %rdi
movl $AUDIT_ARCH_X86_64, %esi
call syscall_trace_enter_phase1
test %rax, %rax
jnz tracesys_phase2 /* if needed, run the slow path */
RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
movq ORIG_RAX(%rsp), %rax
jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
tracesys_phase2:
SAVE_EXTRA_REGS SAVE_EXTRA_REGS
movq %rsp, %rdi movq %rsp, %rdi
movl $AUDIT_ARCH_X86_64, %esi call syscall_trace_enter
movq %rax, %rdx
call syscall_trace_enter_phase2
/* /*
* Reload registers from stack in case ptrace changed them. * Reload registers from stack in case ptrace changed them.
* We don't reload %rax because syscall_trace_entry_phase2() returned * We don't reload %rax because syscall_trace_enter() returned
* the value it wants us to use in the table lookup. * the value it wants us to use in the table lookup.
*/ */
RESTORE_C_REGS_EXCEPT_RAX RESTORE_C_REGS_EXCEPT_RAX
@ -355,6 +351,38 @@ opportunistic_sysret_failed:
jmp restore_c_regs_and_iret jmp restore_c_regs_and_iret
END(entry_SYSCALL_64) END(entry_SYSCALL_64)
ENTRY(stub_ptregs_64)
/*
* Syscalls marked as needing ptregs land here.
* If we are on the fast path, we need to save the extra regs.
* If we are on the slow path, the extra regs are already saved.
*
* RAX stores a pointer to the C function implementing the syscall.
*/
cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
jne 1f
/* Called from fast path -- pop return address and jump to slow path */
popq %rax
jmp tracesys /* called from fast path */
1:
/* Called from C */
jmp *%rax /* called from C */
END(stub_ptregs_64)
.macro ptregs_stub func
ENTRY(ptregs_\func)
leaq \func(%rip), %rax
jmp stub_ptregs_64
END(ptregs_\func)
.endm
/* Instantiate ptregs_stub for each ptregs-using syscall */
#define __SYSCALL_64_QUAL_(sym)
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
#include <asm/syscalls_64.h>
.macro FORK_LIKE func .macro FORK_LIKE func
ENTRY(stub_\func) ENTRY(stub_\func)

View File

@ -6,11 +6,14 @@
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/syscall.h> #include <asm/syscall.h>
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #define __SYSCALL_64_QUAL_(sym) sym
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h> #include <asm/syscalls_64.h>
#undef __SYSCALL_64 #undef __SYSCALL_64
#define __SYSCALL_64(nr, sym, qual) [nr] = sym, #define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);

View File

@ -21,7 +21,7 @@
12 common brk sys_brk 12 common brk sys_brk
13 64 rt_sigaction sys_rt_sigaction 13 64 rt_sigaction sys_rt_sigaction
14 common rt_sigprocmask sys_rt_sigprocmask 14 common rt_sigprocmask sys_rt_sigprocmask
15 64 rt_sigreturn stub_rt_sigreturn 15 64 rt_sigreturn stub_rt_sigreturn/ptregs
16 64 ioctl sys_ioctl 16 64 ioctl sys_ioctl
17 common pread64 sys_pread64 17 common pread64 sys_pread64
18 common pwrite64 sys_pwrite64 18 common pwrite64 sys_pwrite64
@ -62,10 +62,10 @@
53 common socketpair sys_socketpair 53 common socketpair sys_socketpair
54 64 setsockopt sys_setsockopt 54 64 setsockopt sys_setsockopt
55 64 getsockopt sys_getsockopt 55 64 getsockopt sys_getsockopt
56 common clone stub_clone 56 common clone stub_clone/ptregs
57 common fork stub_fork 57 common fork stub_fork/ptregs
58 common vfork stub_vfork 58 common vfork stub_vfork/ptregs
59 64 execve stub_execve 59 64 execve stub_execve/ptregs
60 common exit sys_exit 60 common exit sys_exit
61 common wait4 sys_wait4 61 common wait4 sys_wait4
62 common kill sys_kill 62 common kill sys_kill
@ -328,7 +328,7 @@
319 common memfd_create sys_memfd_create 319 common memfd_create sys_memfd_create
320 common kexec_file_load sys_kexec_file_load 320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf 321 common bpf sys_bpf
322 64 execveat stub_execveat 322 64 execveat stub_execveat/ptregs
323 common userfaultfd sys_userfaultfd 323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier 324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2 325 common mlock2 sys_mlock2
@ -346,7 +346,7 @@
517 x32 recvfrom compat_sys_recvfrom 517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg 518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg 519 x32 recvmsg compat_sys_recvmsg
520 x32 execve stub_x32_execve 520 x32 execve stub_x32_execve/ptregs
521 x32 ptrace compat_sys_ptrace 521 x32 ptrace compat_sys_ptrace
522 x32 rt_sigpending compat_sys_rt_sigpending 522 x32 rt_sigpending compat_sys_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait 523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
@ -371,4 +371,4 @@
542 x32 getsockopt compat_sys_getsockopt 542 x32 getsockopt compat_sys_getsockopt
543 x32 io_setup compat_sys_io_setup 543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit 544 x32 io_submit compat_sys_io_submit
545 x32 execveat stub_x32_execveat 545 x32 execveat stub_x32_execveat/ptregs