Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "A handful of x86 fixes: - a syscall ABI fix, fixing an Android breakage - a Xen PV guest fix relating to the RTC device, causing a non-working console - a Xen guest syscall stack frame fix - an MCE hotplug CPU crash fix" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/numachip: Fix NumaConnect2 MMCFG PCI access x86/entry: Restore traditional SYSENTER calling convention x86/entry: Fix some comments x86/paravirt: Prevent rtc_cmos platform device init on PV guests x86/xen: Avoid fast syscall path for Xen PV guests x86/mce: Ensure offline CPUs don't participate in rendezvous process
This commit is contained in:
commit
650e5455d8
|
@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
|
|||
regs->ip = landing_pad;
|
||||
|
||||
/*
|
||||
* Fetch ECX from where the vDSO stashed it.
|
||||
* Fetch EBP from where the vDSO stashed it.
|
||||
*
|
||||
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
|
||||
*/
|
||||
|
@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
|
|||
* Micro-optimization: the pointer we're following is explicitly
|
||||
* 32 bits, so it can't be out of range.
|
||||
*/
|
||||
__get_user(*(u32 *)®s->cx,
|
||||
__get_user(*(u32 *)®s->bp,
|
||||
(u32 __user __force *)(unsigned long)(u32)regs->sp)
|
||||
#else
|
||||
get_user(*(u32 *)®s->cx,
|
||||
get_user(*(u32 *)®s->bp,
|
||||
(u32 __user __force *)(unsigned long)(u32)regs->sp)
|
||||
#endif
|
||||
) {
|
||||
|
|
|
@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
|
|||
movl TSS_sysenter_sp0(%esp), %esp
|
||||
sysenter_past_esp:
|
||||
pushl $__USER_DS /* pt_regs->ss */
|
||||
pushl %ecx /* pt_regs->cx */
|
||||
pushl %ebp /* pt_regs->sp (stashed in bp) */
|
||||
pushfl /* pt_regs->flags (except IF = 0) */
|
||||
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
|
||||
pushl $__USER_CS /* pt_regs->cs */
|
||||
|
@ -308,8 +308,9 @@ sysenter_past_esp:
|
|||
|
||||
movl %esp, %eax
|
||||
call do_fast_syscall_32
|
||||
testl %eax, %eax
|
||||
jz .Lsyscall_32_done
|
||||
/* XEN PV guests always use IRET path */
|
||||
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
|
||||
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
|
||||
|
||||
/* Opportunistic SYSEXIT */
|
||||
TRACE_IRQS_ON /* User mode traces as IRQs on. */
|
||||
|
|
|
@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat)
|
|||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq $__USER32_DS /* pt_regs->ss */
|
||||
pushq %rcx /* pt_regs->sp */
|
||||
pushq %rbp /* pt_regs->sp (stashed in bp) */
|
||||
|
||||
/*
|
||||
* Push flags. This is nasty. First, interrupts are currently
|
||||
|
@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat)
|
|||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
pushq %rcx /* pt_regs->cx (will be overwritten) */
|
||||
pushq %rcx /* pt_regs->cx */
|
||||
pushq $-ENOSYS /* pt_regs->ax */
|
||||
pushq %r8 /* pt_regs->r8 = 0 */
|
||||
pushq %r8 /* pt_regs->r9 = 0 */
|
||||
pushq %r8 /* pt_regs->r10 = 0 */
|
||||
pushq %r8 /* pt_regs->r11 = 0 */
|
||||
pushq %rbx /* pt_regs->rbx */
|
||||
pushq %rbp /* pt_regs->rbp */
|
||||
pushq %rbp /* pt_regs->rbp (will be overwritten) */
|
||||
pushq %r8 /* pt_regs->r12 = 0 */
|
||||
pushq %r8 /* pt_regs->r13 = 0 */
|
||||
pushq %r8 /* pt_regs->r14 = 0 */
|
||||
|
@ -121,8 +121,9 @@ sysenter_flags_fixed:
|
|||
|
||||
movq %rsp, %rdi
|
||||
call do_fast_syscall_32
|
||||
testl %eax, %eax
|
||||
jz .Lsyscall_32_done
|
||||
/* XEN PV guests always use IRET path */
|
||||
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
|
||||
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
|
||||
jmp sysret32_from_system_call
|
||||
|
||||
sysenter_fix_flags:
|
||||
|
@ -178,7 +179,7 @@ ENTRY(entry_SYSCALL_compat)
|
|||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
pushq %rcx /* pt_regs->cx (will be overwritten) */
|
||||
pushq %rbp /* pt_regs->cx (stashed in bp) */
|
||||
pushq $-ENOSYS /* pt_regs->ax */
|
||||
xorq %r8,%r8
|
||||
pushq %r8 /* pt_regs->r8 = 0 */
|
||||
|
@ -186,7 +187,7 @@ ENTRY(entry_SYSCALL_compat)
|
|||
pushq %r8 /* pt_regs->r10 = 0 */
|
||||
pushq %r8 /* pt_regs->r11 = 0 */
|
||||
pushq %rbx /* pt_regs->rbx */
|
||||
pushq %rbp /* pt_regs->rbp */
|
||||
pushq %rbp /* pt_regs->rbp (will be overwritten) */
|
||||
pushq %r8 /* pt_regs->r12 = 0 */
|
||||
pushq %r8 /* pt_regs->r13 = 0 */
|
||||
pushq %r8 /* pt_regs->r14 = 0 */
|
||||
|
@ -200,8 +201,9 @@ ENTRY(entry_SYSCALL_compat)
|
|||
|
||||
movq %rsp, %rdi
|
||||
call do_fast_syscall_32
|
||||
testl %eax, %eax
|
||||
jz .Lsyscall_32_done
|
||||
/* XEN PV guests always use IRET path */
|
||||
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
|
||||
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
|
||||
|
||||
/* Opportunistic SYSRET */
|
||||
sysret32_from_system_call:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Code for the vDSO. This version uses the old int $0x80 method.
|
||||
* AT_SYSINFO entry point
|
||||
*/
|
||||
|
||||
#include <asm/dwarf2.h>
|
||||
|
@ -21,35 +21,67 @@ __kernel_vsyscall:
|
|||
/*
|
||||
* Reshuffle regs so that all of any of the entry instructions
|
||||
* will preserve enough state.
|
||||
*
|
||||
* A really nice entry sequence would be:
|
||||
* pushl %edx
|
||||
* pushl %ecx
|
||||
* movl %esp, %ecx
|
||||
*
|
||||
* Unfortunately, naughty Android versions between July and December
|
||||
* 2015 actually hardcode the traditional Linux SYSENTER entry
|
||||
* sequence. That is severely broken for a number of reasons (ask
|
||||
* anyone with an AMD CPU, for example). Nonetheless, we try to keep
|
||||
* it working approximately as well as it ever worked.
|
||||
*
|
||||
* This link may eludicate some of the history:
|
||||
* https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
|
||||
* personally, I find it hard to understand what's going on there.
|
||||
*
|
||||
* Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
|
||||
* Execute an indirect call to the address in the AT_SYSINFO auxv
|
||||
* entry. That is the ONLY correct way to make a fast 32-bit system
|
||||
* call on Linux. (Open-coding int $0x80 is also fine, but it's
|
||||
* slow.)
|
||||
*/
|
||||
pushl %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edx, 0
|
||||
pushl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx, 0
|
||||
movl %esp, %ecx
|
||||
pushl %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edx, 0
|
||||
pushl %ebp
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ebp, 0
|
||||
|
||||
#define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter"
|
||||
#define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall"
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
|
||||
ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
|
||||
"syscall", X86_FEATURE_SYSCALL32
|
||||
ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
|
||||
SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32
|
||||
#else
|
||||
ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
|
||||
ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
|
||||
#endif
|
||||
|
||||
/* Enter using int $0x80 */
|
||||
movl (%esp), %ecx
|
||||
int $0x80
|
||||
GLOBAL(int80_landing_pad)
|
||||
|
||||
/* Restore ECX and EDX in case they were clobbered. */
|
||||
popl %ecx
|
||||
CFI_RESTORE ecx
|
||||
/*
|
||||
* Restore EDX and ECX in case they were clobbered. EBP is not
|
||||
* clobbered (the kernel restores it), but it's cleaner and
|
||||
* probably faster to pop it than to adjust ESP using addl.
|
||||
*/
|
||||
popl %ebp
|
||||
CFI_RESTORE ebp
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
popl %edx
|
||||
CFI_RESTORE edx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
popl %ecx
|
||||
CFI_RESTORE ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
|
||||
|
|
|
@ -216,6 +216,7 @@
|
|||
#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
|
||||
#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
|
||||
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
|
||||
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
|
||||
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
|
||||
|
|
|
@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
|
|||
return pv_info.paravirt_enabled;
|
||||
}
|
||||
|
||||
static inline int paravirt_has_feature(unsigned int feature)
|
||||
{
|
||||
WARN_ON_ONCE(!pv_info.paravirt_enabled);
|
||||
return (pv_info.features & feature);
|
||||
}
|
||||
|
||||
static inline void load_sp0(struct tss_struct *tss,
|
||||
struct thread_struct *thread)
|
||||
{
|
||||
|
|
|
@ -70,9 +70,14 @@ struct pv_info {
|
|||
#endif
|
||||
|
||||
int paravirt_enabled;
|
||||
unsigned int features; /* valid only if paravirt_enabled is set */
|
||||
const char *name;
|
||||
};
|
||||
|
||||
#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
|
||||
/* Supported features */
|
||||
#define PV_SUPPORTED_RTC (1<<0)
|
||||
|
||||
struct pv_init_ops {
|
||||
/*
|
||||
* Patch may replace one of the defined code sequences with
|
||||
|
|
|
@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
|
|||
#else
|
||||
#define __cpuid native_cpuid
|
||||
#define paravirt_enabled() 0
|
||||
#define paravirt_has(x) 0
|
||||
|
||||
static inline void load_sp0(struct tss_struct *tss,
|
||||
struct thread_struct *thread)
|
||||
|
|
|
@ -193,20 +193,17 @@ static int __init numachip_system_init(void)
|
|||
case 1:
|
||||
init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
|
||||
numachip_apic_icr_write = numachip1_apic_icr_write;
|
||||
x86_init.pci.arch_init = pci_numachip_init;
|
||||
break;
|
||||
case 2:
|
||||
init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
|
||||
numachip_apic_icr_write = numachip2_apic_icr_write;
|
||||
|
||||
/* Use MCFG config cycles rather than locked CF8 cycles */
|
||||
raw_pci_ops = &pci_mmcfg;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
|
||||
x86_init.pci.arch_init = pci_numachip_init;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -999,6 +999,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
int flags = MF_ACTION_REQUIRED;
|
||||
int lmce = 0;
|
||||
|
||||
/* If this CPU is offline, just bail out. */
|
||||
if (cpu_is_offline(smp_processor_id())) {
|
||||
u64 mcgstatus;
|
||||
|
||||
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||
if (mcgstatus & MCG_STATUS_RIPV) {
|
||||
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ist_enter(regs);
|
||||
|
||||
this_cpu_inc(mce_exception_count);
|
||||
|
|
|
@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (paravirt_enabled() && !paravirt_has(RTC))
|
||||
return -ENODEV;
|
||||
|
||||
platform_device_register(&rtc_device);
|
||||
dev_info(&rtc_device.dev,
|
||||
"registered platform RTC device (no PNP device found)\n");
|
||||
|
|
|
@ -1414,6 +1414,7 @@ __init void lguest_init(void)
|
|||
pv_info.kernel_rpl = 1;
|
||||
/* Everyone except Xen runs with this set. */
|
||||
pv_info.shared_kernel_pmd = 1;
|
||||
pv_info.features = 0;
|
||||
|
||||
/*
|
||||
* We set up all the lguest overrides for sensitive operations. These
|
||||
|
|
|
@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = {
|
|||
#ifdef CONFIG_X86_64
|
||||
.extra_user_64bit_cs = FLAT_USER_CS64,
|
||||
#endif
|
||||
|
||||
.features = 0,
|
||||
.name = "Xen",
|
||||
};
|
||||
|
||||
|
@ -1535,6 +1535,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
|||
|
||||
/* Install Xen paravirt ops */
|
||||
pv_info = xen_info;
|
||||
if (xen_initial_domain())
|
||||
pv_info.features |= PV_SUPPORTED_RTC;
|
||||
pv_init_ops = xen_init_ops;
|
||||
pv_apic_ops = xen_apic_ops;
|
||||
if (!xen_pvh_domain()) {
|
||||
|
@ -1886,8 +1888,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
|
|||
|
||||
static void xen_set_cpu_features(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (xen_pv_domain())
|
||||
if (xen_pv_domain()) {
|
||||
clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
|
||||
set_cpu_cap(c, X86_FEATURE_XENPV);
|
||||
}
|
||||
}
|
||||
|
||||
const struct hypervisor_x86 x86_hyper_xen = {
|
||||
|
|
Loading…
Reference in New Issue