Early fixes for x86. Instead of the (botched) revert, the
lockdep/might_sleep splat has a real fix provided by Andrea. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJYV/xYAAoJEL/70l94x66DalgH/0dMqN9nGDJfCR3tmEJEs4AH WiFFsu7rHfXac2QeO5Eq30XLFghQjlcj9GkBYgF33wU/VMQ7xcDje+lHKQZ8e4b/ gc4BSwDGPpWW6IXfVVeMz9Zxfi7WE6J03kQQ+uxDbwaF65AsrSQQq5SCgcmo8Gof SS138Ws6v5/Us/UAJNI8x5xhcJnE4p4YV2yzTIJkKmZDEAYY9Y9CuKd3na4lRjoc Seuigldo1zY6oYF1Xqwb1efik1xTH4oJdmG8dnf1fvBw0P70Gs4s1eFXm6TFQhTK mdZxfxMw0YtzxroAMwXNjPcTy+Sn4Zx47dpHMspnN2cPePw53YfPS25en9/W09g= =jQbY -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM fixes from Paolo Bonzini: "Early fixes for x86. Instead of the (botched) revert, the lockdep/might_sleep splat has a real fix provided by Andrea" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF) kvm: take srcu lock around kvm_steal_time_set_preempted() kvm: fix schedule in atomic in kvm_steal_time_set_preempted() KVM: hyperv: fix locking of struct kvm_hv fields KVM: x86: Expose Intel AVX512IFMA/AVX512VBMI/SHA features to guest. kvm: nVMX: Correct a VMX instruction error code for VMPTRLD
This commit is contained in:
commit
45d36906e2
|
@ -13,8 +13,12 @@ The acquisition orders for mutexes are as follows:
|
|||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
For spinlocks, kvm_lock is taken outside kvm->mmu_lock. Everything
|
||||
else is a leaf: no other lock is taken inside the critical sections.
|
||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||
|
||||
For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
||||
2: Exception
|
||||
------------
|
||||
|
|
|
@ -704,6 +704,7 @@ struct kvm_apic_map {
|
|||
|
||||
/* Hyper-V emulation context */
|
||||
struct kvm_hv {
|
||||
struct mutex hv_lock;
|
||||
u64 hv_guest_os_id;
|
||||
u64 hv_hypercall;
|
||||
u64 hv_tsc_page;
|
||||
|
|
|
@ -373,16 +373,17 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
const u32 kvm_cpuid_7_0_ebx_x86_features =
|
||||
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
|
||||
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
|
||||
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
|
||||
F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
|
||||
F(AVX512BW) | F(AVX512VL);
|
||||
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
|
||||
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
|
||||
F(SHA_NI) | F(AVX512BW) | F(AVX512VL);
|
||||
|
||||
/* cpuid 0xD.1.eax */
|
||||
const u32 kvm_cpuid_D_1_eax_x86_features =
|
||||
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
|
||||
|
||||
/* cpuid 7.0.ecx*/
|
||||
const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
|
||||
const u32 kvm_cpuid_7_0_ecx_x86_features =
|
||||
F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/;
|
||||
|
||||
/* cpuid 7.0.edx*/
|
||||
const u32 kvm_cpuid_7_0_edx_x86_features =
|
||||
|
|
|
@ -852,6 +852,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
||||
return;
|
||||
|
||||
mutex_lock(&kvm->arch.hyperv.hv_lock);
|
||||
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
||||
goto out_unlock;
|
||||
|
||||
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
|
||||
/*
|
||||
* Because the TSC parameters only vary when there is a
|
||||
|
@ -859,7 +863,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||
*/
|
||||
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
|
||||
&tsc_seq, sizeof(tsc_seq))))
|
||||
return;
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* While we're computing and writing the parameters, force the
|
||||
|
@ -868,15 +872,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||
hv->tsc_ref.tsc_sequence = 0;
|
||||
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
||||
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
|
||||
return;
|
||||
goto out_unlock;
|
||||
|
||||
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
|
||||
return;
|
||||
goto out_unlock;
|
||||
|
||||
/* Ensure sequence is zero before writing the rest of the struct. */
|
||||
smp_wmb();
|
||||
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
|
||||
return;
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Now switch to the TSC page mechanism by writing the sequence.
|
||||
|
@ -891,6 +895,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||
hv->tsc_ref.tsc_sequence = tsc_seq;
|
||||
kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
||||
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->arch.hyperv.hv_lock);
|
||||
}
|
||||
|
||||
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
||||
|
@ -1142,9 +1148,9 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|||
if (kvm_hv_msr_partition_wide(msr)) {
|
||||
int r;
|
||||
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
return r;
|
||||
} else
|
||||
return kvm_hv_set_msr(vcpu, msr, data, host);
|
||||
|
@ -1155,9 +1161,9 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|||
if (kvm_hv_msr_partition_wide(msr)) {
|
||||
int r;
|
||||
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
return r;
|
||||
} else
|
||||
return kvm_hv_get_msr(vcpu, msr, pdata);
|
||||
|
@ -1165,7 +1171,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|||
|
||||
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
|
||||
return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
|
||||
}
|
||||
|
||||
static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
|
||||
|
|
|
@ -1389,10 +1389,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
|
|||
return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
|
||||
}
|
||||
|
||||
static inline bool is_exception(u32 intr_info)
|
||||
static inline bool is_nmi(u32 intr_info)
|
||||
{
|
||||
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
|
||||
== (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
|
||||
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
|
||||
}
|
||||
|
||||
static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
||||
|
@ -5728,7 +5728,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
|||
if (is_machine_check(intr_info))
|
||||
return handle_machine_check(vcpu);
|
||||
|
||||
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
|
||||
if (is_nmi(intr_info))
|
||||
return 1; /* already handled by vmx_vcpu_run() */
|
||||
|
||||
if (is_no_device(intr_info)) {
|
||||
|
@ -7122,7 +7122,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
|
|||
|
||||
if (vmptr == vmx->nested.vmxon_ptr) {
|
||||
nested_vmx_failValid(vcpu,
|
||||
VMXERR_VMCLEAR_VMXON_POINTER);
|
||||
VMXERR_VMPTRLD_VMXON_POINTER);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
}
|
||||
break;
|
||||
|
@ -8170,7 +8170,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|||
|
||||
switch (exit_reason) {
|
||||
case EXIT_REASON_EXCEPTION_NMI:
|
||||
if (!is_exception(intr_info))
|
||||
if (is_nmi(intr_info))
|
||||
return false;
|
||||
else if (is_page_fault(intr_info))
|
||||
return enable_ept;
|
||||
|
@ -8765,8 +8765,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
|
|||
kvm_machine_check();
|
||||
|
||||
/* We need to handle NMIs before interrupts are enabled */
|
||||
if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
|
||||
(exit_intr_info & INTR_INFO_VALID_MASK)) {
|
||||
if (is_nmi(exit_intr_info)) {
|
||||
kvm_before_handle_nmi(&vmx->vcpu);
|
||||
asm("int $2");
|
||||
kvm_after_handle_nmi(&vmx->vcpu);
|
||||
|
|
|
@ -2844,7 +2844,24 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
|
|||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int idx;
|
||||
/*
|
||||
* Disable page faults because we're in atomic context here.
|
||||
* kvm_write_guest_offset_cached() would call might_fault()
|
||||
* that relies on pagefault_disable() to tell if there's a
|
||||
* bug. NOTE: the write to guest memory may not go through if
|
||||
* during postcopy live migration or if there's heavy guest
|
||||
* paging.
|
||||
*/
|
||||
pagefault_disable();
|
||||
/*
|
||||
* kvm_memslots() will be called by
|
||||
* kvm_write_guest_offset_cached() so take the srcu lock.
|
||||
*/
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_steal_time_set_preempted(vcpu);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
pagefault_enable();
|
||||
kvm_x86_ops->vcpu_put(vcpu);
|
||||
kvm_put_guest_fpu(vcpu);
|
||||
vcpu->arch.last_host_tsc = rdtsc();
|
||||
|
@ -7881,6 +7898,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
|
||||
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
||||
mutex_init(&kvm->arch.apic_map_lock);
|
||||
mutex_init(&kvm->arch.hyperv.hv_lock);
|
||||
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
|
||||
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
|
||||
|
|
Loading…
Reference in New Issue