x86/entry/64: Clean up and document espfix64 stack setup

The espfix64 setup code was a bit inscrutible and contained an
unnecessary push of RAX.  Remove that push, update all the stack
offsets to match, and document the whole mess.

Reported-By: Borislav Petkov <bp@alien8.de>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/e5459eb10cf1175c8b36b840bc425f210d045f35.1473717910.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Andy Lutomirski 2016-09-12 15:05:51 -07:00 committed by Ingo Molnar
parent 1ef0199a1a
commit 85063fac1f
1 changed files with 53 additions and 11 deletions

View File

@ -586,27 +586,69 @@ native_irq_return_iret:
#ifdef CONFIG_X86_ESPFIX64
native_irq_return_ldt:
pushq %rax
pushq %rdi
/*
* We are running with user GSBASE. All GPRs contain their user
* values. We have a percpu ESPFIX stack that is eight slots
* long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom
* of the ESPFIX stack.
*
* We clobber RAX and RDI in this code. We stash RDI on the
* normal stack and RAX on the ESPFIX stack.
*
* The ESPFIX stack layout we set up looks like this:
*
* --- top of ESPFIX stack ---
* SS
* RSP
* RFLAGS
* CS
* RIP <-- RSP points here when we're done
* RAX <-- espfix_waddr points here
* --- bottom of ESPFIX stack ---
*/
pushq %rdi /* Stash user RDI */
SWAPGS
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* RAX */
movq (2*8)(%rsp), %rax /* RIP */
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
movq %rax, (1*8)(%rdi)
movq (3*8)(%rsp), %rax /* CS */
movq (2*8)(%rsp), %rax /* user CS */
movq %rax, (2*8)(%rdi)
movq (4*8)(%rsp), %rax /* RFLAGS */
movq (3*8)(%rsp), %rax /* user RFLAGS */
movq %rax, (3*8)(%rdi)
movq (6*8)(%rsp), %rax /* SS */
movq (5*8)(%rsp), %rax /* user SS */
movq %rax, (5*8)(%rdi)
movq (5*8)(%rsp), %rax /* RSP */
movq (4*8)(%rsp), %rax /* user RSP */
movq %rax, (4*8)(%rdi)
andl $0xffff0000, %eax
popq %rdi
/* Now RAX == RSP. */
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
popq %rdi /* Restore user RDI */
/*
* espfix_stack[31:16] == 0. The page tables are set up such that
* (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
* espfix_waddr for any X. That is, there are 65536 RO aliases of
* the same page. Set up RSP so that RSP[31:16] contains the
* respective 16 bits of the /userspace/ RSP and RSP nonetheless
* still points to an RO alias of the ESPFIX stack.
*/
orq PER_CPU_VAR(espfix_stack), %rax
SWAPGS
movq %rax, %rsp
popq %rax
/*
* At this point, we cannot write to the stack any more, but we can
* still read.
*/
popq %rax /* Restore user RAX */
/*
* RSP now points to an ordinary IRET frame, except that the page
* is read-only and RSP[31:16] are preloaded with the userspace
* values. We can now IRET back to userspace.
*/
jmp native_irq_return_iret
#endif
END(common_interrupt)