x86/asm: Rewrite sync_core() to use IRET-to-self
Aside from being excessively slow, CPUID is problematic: Linux runs on a handful of CPUs that don't have CPUID. Use IRET-to-self instead. IRET-to-self works everywhere, so it makes testing easy. For reference, On my laptop, IRET-to-self is ~110ns, CPUID(eax=1, ecx=0) is ~83ns on native and very very slow under KVM, and MOV-to-CR2 is ~42ns. While we're at it: sync_core() serves a very specific purpose. Document it. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Juergen Gross <jgross@suse.com> Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Matthew Whitehead <tedheadster@gmail.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Henrique de Moraes Holschuh <hmh@hmh.eng.br> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: xen-devel <Xen-devel@lists.xen.org> Link: http://lkml.kernel.org/r/5c79f0225f68bc8c40335612bf624511abb78941.1481307769.git.luto@kernel.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
484d0e5c79
commit
c198b121b1
|
@ -602,33 +602,69 @@ static __always_inline void cpu_relax(void)
|
|||
rep_nop();
|
||||
}
|
||||
|
||||
/* Stop speculative execution and prefetching of modified code. */
|
||||
/*
|
||||
* This function forces the icache and prefetched instruction stream to
|
||||
* catch up with reality in two very specific cases:
|
||||
*
|
||||
* a) Text was modified using one virtual address and is about to be executed
|
||||
* from the same physical page at a different virtual address.
|
||||
*
|
||||
* b) Text was modified on a different CPU, may subsequently be
|
||||
* executed on this CPU, and you want to make sure the new version
|
||||
* gets executed. This generally means you're calling this in a IPI.
|
||||
*
|
||||
* If you're calling this for a different reason, you're probably doing
|
||||
* it wrong.
|
||||
*/
|
||||
static inline void sync_core(void)
|
||||
{
|
||||
int tmp;
|
||||
/*
|
||||
* There are quite a few ways to do this. IRET-to-self is nice
|
||||
* because it works on every CPU, at any CPL (so it's compatible
|
||||
* with paravirtualization), and it never exits to a hypervisor.
|
||||
* The only down sides are that it's a bit slow (it seems to be
|
||||
* a bit more than 2x slower than the fastest options) and that
|
||||
* it unmasks NMIs. The "push %cs" is needed because, in
|
||||
* paravirtual environments, __KERNEL_CS may not be a valid CS
|
||||
* value when we do IRET directly.
|
||||
*
|
||||
* In case NMI unmasking or performance ever becomes a problem,
|
||||
* the next best option appears to be MOV-to-CR2 and an
|
||||
* unconditional jump. That sequence also works on all CPUs,
|
||||
* but it will fault at CPL3 (i.e. Xen PV and lguest).
|
||||
*
|
||||
* CPUID is the conventional way, but it's nasty: it doesn't
|
||||
* exist on some 486-like CPUs, and it usually exits to a
|
||||
* hypervisor.
|
||||
*
|
||||
* Like all of Linux's memory ordering operations, this is a
|
||||
* compiler barrier as well.
|
||||
*/
|
||||
register void *__sp asm(_ASM_SP);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Do a CPUID if available, otherwise do a jump. The jump
|
||||
* can conveniently enough be the jump around CPUID.
|
||||
*/
|
||||
asm volatile("cmpl %2,%1\n\t"
|
||||
"jl 1f\n\t"
|
||||
"cpuid\n"
|
||||
"1:"
|
||||
: "=a" (tmp)
|
||||
: "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
|
||||
: "ebx", "ecx", "edx", "memory");
|
||||
asm volatile (
|
||||
"pushfl\n\t"
|
||||
"pushl %%cs\n\t"
|
||||
"pushl $1f\n\t"
|
||||
"iret\n\t"
|
||||
"1:"
|
||||
: "+r" (__sp) : : "memory");
|
||||
#else
|
||||
/*
|
||||
* CPUID is a barrier to speculative execution.
|
||||
* Prefetched instructions are automatically
|
||||
* invalidated when modified.
|
||||
*/
|
||||
asm volatile("cpuid"
|
||||
: "=a" (tmp)
|
||||
: "0" (1)
|
||||
: "ebx", "ecx", "edx", "memory");
|
||||
unsigned int tmp;
|
||||
|
||||
asm volatile (
|
||||
"mov %%ss, %0\n\t"
|
||||
"pushq %q0\n\t"
|
||||
"pushq %%rsp\n\t"
|
||||
"addq $8, (%%rsp)\n\t"
|
||||
"pushfq\n\t"
|
||||
"mov %%cs, %0\n\t"
|
||||
"pushq %q0\n\t"
|
||||
"pushq $1f\n\t"
|
||||
"iretq\n\t"
|
||||
"1:"
|
||||
: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue