x86/entry/64: Always run ptregs-using syscalls on the slow path
64-bit syscalls currently have an optimization in which they are called with partial pt_regs. A small handful require full pt_regs. In the 32-bit and compat cases, I cleaned this up by forcing full pt_regs for all syscalls. The performance hit doesn't really matter as the affected system calls are fundamentally heavy and this is the 32-bit compat case. I want to clean up the 64-bit case as well, but I don't want to hurt fast path performance. To do that, I want to force the syscalls that use pt_regs onto the slow path. This will enable us to make slow path syscalls be real ABI-compliant C functions. Use the new syscall entry qualification machinery for this. 'stub_clone' is now 'stub_clone/ptregs'. The next patch will eliminate the stubs, and we'll just have 'sys_clone/ptregs'. As of this patch, two-phase entry tracing is no longer used. It has served its purpose (namely a huge speedup on some workloads prior to more general opportunistic SYSRET support), and once the dust settles I'll send patches to back it out. The implementation is heavily based on a patch from Brian Gerst: http://lkml.kernel.org/g/1449666173-15366-1-git-send-email-brgerst@gmail.com Originally-From: Brian Gerst <brgerst@gmail.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/b9beda88460bcefec6e7d792bd44eca9b760b0c4.1454022279.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
cfcbadb49d
commit
302f5b260c
|
@ -182,7 +182,15 @@ entry_SYSCALL_64_fastpath:
|
|||
#endif
|
||||
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
|
||||
movq %r10, %rcx
|
||||
|
||||
/*
|
||||
* This call instruction is handled specially in stub_ptregs_64.
|
||||
* It might end up jumping to the slow path. If it jumps, RAX is
|
||||
* clobbered.
|
||||
*/
|
||||
call *sys_call_table(, %rax, 8)
|
||||
.Lentry_SYSCALL_64_after_fastpath_call:
|
||||
|
||||
movq %rax, RAX(%rsp)
|
||||
1:
|
||||
/*
|
||||
|
@ -235,25 +243,13 @@ GLOBAL(int_ret_from_sys_call_irqs_off)
|
|||
|
||||
/* Do syscall entry tracing */
|
||||
tracesys:
|
||||
movq %rsp, %rdi
|
||||
movl $AUDIT_ARCH_X86_64, %esi
|
||||
call syscall_trace_enter_phase1
|
||||
test %rax, %rax
|
||||
jnz tracesys_phase2 /* if needed, run the slow path */
|
||||
RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
|
||||
movq ORIG_RAX(%rsp), %rax
|
||||
jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
|
||||
|
||||
tracesys_phase2:
|
||||
SAVE_EXTRA_REGS
|
||||
movq %rsp, %rdi
|
||||
movl $AUDIT_ARCH_X86_64, %esi
|
||||
movq %rax, %rdx
|
||||
call syscall_trace_enter_phase2
|
||||
call syscall_trace_enter
|
||||
|
||||
/*
|
||||
* Reload registers from stack in case ptrace changed them.
|
||||
* We don't reload %rax because syscall_trace_entry_phase2() returned
|
||||
* We don't reload %rax because syscall_trace_enter() returned
|
||||
* the value it wants us to use in the table lookup.
|
||||
*/
|
||||
RESTORE_C_REGS_EXCEPT_RAX
|
||||
|
@ -355,6 +351,38 @@ opportunistic_sysret_failed:
|
|||
jmp restore_c_regs_and_iret
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
ENTRY(stub_ptregs_64)
|
||||
/*
|
||||
* Syscalls marked as needing ptregs land here.
|
||||
* If we are on the fast path, we need to save the extra regs.
|
||||
* If we are on the slow path, the extra regs are already saved.
|
||||
*
|
||||
* RAX stores a pointer to the C function implementing the syscall.
|
||||
*/
|
||||
cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
|
||||
jne 1f
|
||||
|
||||
/* Called from fast path -- pop return address and jump to slow path */
|
||||
popq %rax
|
||||
jmp tracesys /* called from fast path */
|
||||
|
||||
1:
|
||||
/* Called from C */
|
||||
jmp *%rax /* called from C */
|
||||
END(stub_ptregs_64)
|
||||
|
||||
.macro ptregs_stub func
|
||||
ENTRY(ptregs_\func)
|
||||
leaq \func(%rip), %rax
|
||||
jmp stub_ptregs_64
|
||||
END(ptregs_\func)
|
||||
.endm
|
||||
|
||||
/* Instantiate ptregs_stub for each ptregs-using syscall */
|
||||
#define __SYSCALL_64_QUAL_(sym)
|
||||
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
|
||||
#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
|
||||
#include <asm/syscalls_64.h>
|
||||
|
||||
.macro FORK_LIKE func
|
||||
ENTRY(stub_\func)
|
||||
|
|
|
@ -6,11 +6,14 @@
|
|||
#include <asm/asm-offsets.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#define __SYSCALL_64_QUAL_(sym) sym
|
||||
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
|
||||
|
||||
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
#include <asm/syscalls_64.h>
|
||||
#undef __SYSCALL_64
|
||||
|
||||
#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
|
||||
#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
|
||||
|
||||
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
12 common brk sys_brk
|
||||
13 64 rt_sigaction sys_rt_sigaction
|
||||
14 common rt_sigprocmask sys_rt_sigprocmask
|
||||
15 64 rt_sigreturn stub_rt_sigreturn
|
||||
15 64 rt_sigreturn stub_rt_sigreturn/ptregs
|
||||
16 64 ioctl sys_ioctl
|
||||
17 common pread64 sys_pread64
|
||||
18 common pwrite64 sys_pwrite64
|
||||
|
@ -62,10 +62,10 @@
|
|||
53 common socketpair sys_socketpair
|
||||
54 64 setsockopt sys_setsockopt
|
||||
55 64 getsockopt sys_getsockopt
|
||||
56 common clone stub_clone
|
||||
57 common fork stub_fork
|
||||
58 common vfork stub_vfork
|
||||
59 64 execve stub_execve
|
||||
56 common clone stub_clone/ptregs
|
||||
57 common fork stub_fork/ptregs
|
||||
58 common vfork stub_vfork/ptregs
|
||||
59 64 execve stub_execve/ptregs
|
||||
60 common exit sys_exit
|
||||
61 common wait4 sys_wait4
|
||||
62 common kill sys_kill
|
||||
|
@ -328,7 +328,7 @@
|
|||
319 common memfd_create sys_memfd_create
|
||||
320 common kexec_file_load sys_kexec_file_load
|
||||
321 common bpf sys_bpf
|
||||
322 64 execveat stub_execveat
|
||||
322 64 execveat stub_execveat/ptregs
|
||||
323 common userfaultfd sys_userfaultfd
|
||||
324 common membarrier sys_membarrier
|
||||
325 common mlock2 sys_mlock2
|
||||
|
@ -346,7 +346,7 @@
|
|||
517 x32 recvfrom compat_sys_recvfrom
|
||||
518 x32 sendmsg compat_sys_sendmsg
|
||||
519 x32 recvmsg compat_sys_recvmsg
|
||||
520 x32 execve stub_x32_execve
|
||||
520 x32 execve stub_x32_execve/ptregs
|
||||
521 x32 ptrace compat_sys_ptrace
|
||||
522 x32 rt_sigpending compat_sys_rt_sigpending
|
||||
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
|
||||
|
@ -371,4 +371,4 @@
|
|||
542 x32 getsockopt compat_sys_getsockopt
|
||||
543 x32 io_setup compat_sys_io_setup
|
||||
544 x32 io_submit compat_sys_io_submit
|
||||
545 x32 execveat stub_x32_execveat
|
||||
545 x32 execveat stub_x32_execveat/ptregs
|
||||
|
|
Loading…
Reference in New Issue