* Xen timer fixes
* Documentation formatting fixes * Make rseq selftest compatible with glibc-2.35 * Fix handling of illegal LEA reg, reg * Cleanup creation of debugfs entries * Fix steal time cache handling bug * Fixes for MMIO caching * Optimize computation of number of LBRs * Fix uninitialized field in guest_maxphyaddr < host_maxphyaddr path -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmL0qwIUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroML1gf/SK6by+Gi0r7WSkrDjU94PKZ8D6Y3 fErMhratccc9IfL3p90IjCVhEngfdQf5UVHExA5TswgHHAJTpECzuHya9TweQZc5 2rrTvufup0MNALfzkSijrcI80CBvrJc6JyOCkv0BLp7yqXUrnrm0OOMV2XniS7y0 YNn2ZCy44tLqkNiQrLhJQg3EsXu9l7okGpHSVO6iZwC7KKHvYkbscVFa/AOlaAwK WOZBB+1Ee+/pWhxsngM1GwwM3ZNU/jXOSVjew5plnrD4U7NYXIDATszbZAuNyxqV 5gi+wvTF1x9dC6Tgd3qF7ouAqtT51BdRYaI9aYHOYgvzqdNFHWJu3XauDQ== =vI6Q -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull more kvm updates from Paolo Bonzini: - Xen timer fixes - Documentation formatting fixes - Make rseq selftest compatible with glibc-2.35 - Fix handling of illegal LEA reg, reg - Cleanup creation of debugfs entries - Fix steal time cache handling bug - Fixes for MMIO caching - Optimize computation of number of LBRs - Fix uninitialized field in guest_maxphyaddr < host_maxphyaddr path * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (26 commits) KVM: x86/MMU: properly format KVM_CAP_VM_DISABLE_NX_HUGE_PAGES capability table Documentation: KVM: extend KVM_CAP_VM_DISABLE_NX_HUGE_PAGES heading underline KVM: VMX: Adjust number of LBR records for PERF_CAPABILITIES at refresh KVM: VMX: Use proper type-safe functions for vCPU => LBRs helpers KVM: x86: Refresh PMU after writes to MSR_IA32_PERF_CAPABILITIES KVM: selftests: Test all possible "invalid" PERF_CAPABILITIES.LBR_FMT vals KVM: selftests: Use getcpu() instead of sched_getcpu() in rseq_test KVM: selftests: Make rseq compatible with glibc-2.35 KVM: Actually create debugfs in kvm_create_vm() KVM: Pass the name of the VM fd to kvm_create_vm_debugfs() KVM: Get an fd before creating the VM KVM: Shove vcpu stats_id init into kvm_vcpu_init() KVM: Shove vm stats_id init into kvm_create_vm() KVM: x86/mmu: Add sanity check that MMIO SPTE mask doesn't overlap gen KVM: x86/mmu: rename trace function name for asynchronous page fault KVM: x86/xen: Stop Xen timer before changing IRQ KVM: x86/xen: Initialize Xen timer only once KVM: SVM: Disable SEV-ES support if MMIO caching is disable KVM: x86/mmu: Fully re-evaluate MMIO caching when SPTE masks change KVM: x86: Tag kvm_mmu_x86_module_init() with __init ...
This commit is contained in:
commit
e18a90427c
|
@ -8262,15 +8262,15 @@ dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is
|
|||
available and supports the `KVM_PV_DUMP_CPU` subcommand.
|
||||
|
||||
8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
|
||||
---------------------------
|
||||
-------------------------------------
|
||||
|
||||
:Capability KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
|
||||
:Capability: KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
|
||||
:Architectures: x86
|
||||
:Type: vm
|
||||
:Parameters: arg[0] must be 0.
|
||||
:Returns 0 on success, -EPERM if the userspace process does not
|
||||
have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
|
||||
created.
|
||||
:Returns: 0 on success, -EPERM if the userspace process does not
|
||||
have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
|
||||
created.
|
||||
|
||||
This capability disables the NX huge pages mitigation for iTLB MULTIHIT.
|
||||
|
||||
|
|
|
@ -1704,7 +1704,7 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
|
|||
#define kvm_arch_pmi_in_guest(vcpu) \
|
||||
((vcpu) && (vcpu)->arch.handling_intr_from_guest)
|
||||
|
||||
void kvm_mmu_x86_module_init(void);
|
||||
void __init kvm_mmu_x86_module_init(void);
|
||||
int kvm_mmu_vendor_module_init(void);
|
||||
void kvm_mmu_vendor_module_exit(void);
|
||||
|
||||
|
|
|
@ -4578,6 +4578,10 @@ static const struct mode_dual mode_dual_63 = {
|
|||
N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
|
||||
};
|
||||
|
||||
static const struct instr_dual instr_dual_8d = {
|
||||
D(DstReg | SrcMem | ModRM | NoAccess), N
|
||||
};
|
||||
|
||||
static const struct opcode opcode_table[256] = {
|
||||
/* 0x00 - 0x07 */
|
||||
F6ALU(Lock, em_add),
|
||||
|
@ -4634,7 +4638,7 @@ static const struct opcode opcode_table[256] = {
|
|||
I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
|
||||
I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
|
||||
I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
|
||||
D(ModRM | SrcMem | NoAccess | DstReg),
|
||||
ID(0, &instr_dual_8d),
|
||||
I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
|
||||
G(0, group1A),
|
||||
/* 0x90 - 0x97 */
|
||||
|
|
|
@ -2284,10 +2284,12 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
|
|||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u64 val;
|
||||
|
||||
if (apic_x2apic_mode(apic))
|
||||
kvm_lapic_msr_read(apic, offset, &val);
|
||||
else
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm))
|
||||
return;
|
||||
} else {
|
||||
val = kvm_lapic_get_reg(apic, offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* ICR is a single 64-bit register when x2APIC is enabled. For legacy
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "kvm_cache_regs.h"
|
||||
#include "cpuid.h"
|
||||
|
||||
extern bool __read_mostly enable_mmio_caching;
|
||||
|
||||
#define PT_WRITABLE_SHIFT 1
|
||||
#define PT_USER_SHIFT 2
|
||||
|
||||
|
|
|
@ -4164,7 +4164,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
|
||||
trace_kvm_try_async_get_page(fault->addr, fault->gfn);
|
||||
if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
|
||||
trace_kvm_async_pf_doublefault(fault->addr, fault->gfn);
|
||||
trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn);
|
||||
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
|
||||
return RET_PF_RETRY;
|
||||
} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
|
||||
|
@ -6697,11 +6697,15 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
|
|||
/*
|
||||
* nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as
|
||||
* its default value of -1 is technically undefined behavior for a boolean.
|
||||
* Forward the module init call to SPTE code so that it too can handle module
|
||||
* params that need to be resolved/snapshot.
|
||||
*/
|
||||
void kvm_mmu_x86_module_init(void)
|
||||
void __init kvm_mmu_x86_module_init(void)
|
||||
{
|
||||
if (nx_huge_pages == -1)
|
||||
__set_nx_huge_pages(get_nx_auto_mode());
|
||||
|
||||
kvm_mmu_spte_module_init();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -20,7 +20,9 @@
|
|||
#include <asm/vmx.h>
|
||||
|
||||
bool __read_mostly enable_mmio_caching = true;
|
||||
static bool __ro_after_init allow_mmio_caching;
|
||||
module_param_named(mmio_caching, enable_mmio_caching, bool, 0444);
|
||||
EXPORT_SYMBOL_GPL(enable_mmio_caching);
|
||||
|
||||
u64 __read_mostly shadow_host_writable_mask;
|
||||
u64 __read_mostly shadow_mmu_writable_mask;
|
||||
|
@ -43,6 +45,18 @@ u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
|||
|
||||
u8 __read_mostly shadow_phys_bits;
|
||||
|
||||
void __init kvm_mmu_spte_module_init(void)
|
||||
{
|
||||
/*
|
||||
* Snapshot userspace's desire to allow MMIO caching. Whether or not
|
||||
* KVM can actually enable MMIO caching depends on vendor-specific
|
||||
* hardware capabilities and other module params that can't be resolved
|
||||
* until the vendor module is loaded, i.e. enable_mmio_caching can and
|
||||
* will change when the vendor module is (re)loaded.
|
||||
*/
|
||||
allow_mmio_caching = enable_mmio_caching;
|
||||
}
|
||||
|
||||
static u64 generation_mmio_spte_mask(u64 gen)
|
||||
{
|
||||
u64 mask;
|
||||
|
@ -340,9 +354,23 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
|
|||
BUG_ON((u64)(unsigned)access_mask != access_mask);
|
||||
WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
|
||||
|
||||
/*
|
||||
* Reset to the original module param value to honor userspace's desire
|
||||
* to (dis)allow MMIO caching. Update the param itself so that
|
||||
* userspace can see whether or not KVM is actually using MMIO caching.
|
||||
*/
|
||||
enable_mmio_caching = allow_mmio_caching;
|
||||
if (!enable_mmio_caching)
|
||||
mmio_value = 0;
|
||||
|
||||
/*
|
||||
* The mask must contain only bits that are carved out specifically for
|
||||
* the MMIO SPTE mask, e.g. to ensure there's no overlap with the MMIO
|
||||
* generation.
|
||||
*/
|
||||
if (WARN_ON(mmio_mask & ~SPTE_MMIO_ALLOWED_MASK))
|
||||
mmio_value = 0;
|
||||
|
||||
/*
|
||||
* Disable MMIO caching if the MMIO value collides with the bits that
|
||||
* are used to hold the relocated GFN when the L1TF mitigation is
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
|
||||
#include "mmu_internal.h"
|
||||
|
||||
extern bool __read_mostly enable_mmio_caching;
|
||||
|
||||
/*
|
||||
* A MMU present SPTE is backed by actual memory and may or may not be present
|
||||
* in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it
|
||||
|
@ -125,6 +123,20 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK));
|
|||
static_assert(!(SPTE_MMU_PRESENT_MASK &
|
||||
(MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK)));
|
||||
|
||||
/*
|
||||
* The SPTE MMIO mask must NOT overlap the MMIO generation bits or the
|
||||
* MMU-present bit. The generation obviously co-exists with the magic MMIO
|
||||
* mask/value, and MMIO SPTEs are considered !MMU-present.
|
||||
*
|
||||
* The SPTE MMIO mask is allowed to use hardware "present" bits (i.e. all EPT
|
||||
* RWX bits), all physical address bits (legal PA bits are used for "fast" MMIO
|
||||
* and so they're off-limits for generation; additional checks ensure the mask
|
||||
* doesn't overlap legal PA bits), and bit 63 (carved out for future usage).
|
||||
*/
|
||||
#define SPTE_MMIO_ALLOWED_MASK (BIT_ULL(63) | GENMASK_ULL(51, 12) | GENMASK_ULL(2, 0))
|
||||
static_assert(!(SPTE_MMIO_ALLOWED_MASK &
|
||||
(SPTE_MMU_PRESENT_MASK | MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK)));
|
||||
|
||||
#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
|
||||
#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
|
||||
|
||||
|
@ -450,6 +462,7 @@ static inline u64 restore_acc_track_spte(u64 spte)
|
|||
|
||||
u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn);
|
||||
|
||||
void __init kvm_mmu_spte_module_init(void);
|
||||
void kvm_mmu_reset_all_pte_masks(void);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <asm/trapnr.h>
|
||||
#include <asm/fpu/xcr.h>
|
||||
|
||||
#include "mmu.h"
|
||||
#include "x86.h"
|
||||
#include "svm.h"
|
||||
#include "svm_ops.h"
|
||||
|
@ -2221,6 +2222,15 @@ void __init sev_hardware_setup(void)
|
|||
if (!sev_es_enabled)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* SEV-ES requires MMIO caching as KVM doesn't have access to the guest
|
||||
* instruction stream, i.e. can't emulate in response to a #NPF and
|
||||
* instead relies on #NPF(RSVD) being reflected into the guest as #VC
|
||||
* (the guest can then do a #VMGEXIT to request MMIO emulation).
|
||||
*/
|
||||
if (!enable_mmio_caching)
|
||||
goto out;
|
||||
|
||||
/* Does the CPU support SEV-ES? */
|
||||
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
|
||||
goto out;
|
||||
|
|
|
@ -5034,13 +5034,16 @@ static __init int svm_hardware_setup(void)
|
|||
/* Setup shadow_me_value and shadow_me_mask */
|
||||
kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);
|
||||
|
||||
/* Note, SEV setup consumes npt_enabled. */
|
||||
svm_adjust_mmio_mask();
|
||||
|
||||
/*
|
||||
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
|
||||
* may be modified by svm_adjust_mmio_mask()).
|
||||
*/
|
||||
sev_hardware_setup();
|
||||
|
||||
svm_hv_hardware_setup();
|
||||
|
||||
svm_adjust_mmio_mask();
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
r = svm_cpu_init(cpu);
|
||||
if (r)
|
||||
|
|
|
@ -171,13 +171,6 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
|
|||
return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
|
||||
}
|
||||
|
||||
bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
|
||||
|
||||
return lbr->nr && (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_LBR_FMT);
|
||||
}
|
||||
|
||||
static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
|
||||
{
|
||||
struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu);
|
||||
|
@ -592,7 +585,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
|||
bitmap_set(pmu->all_valid_pmc_idx,
|
||||
INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
|
||||
|
||||
if (cpuid_model_is_consistent(vcpu))
|
||||
perf_capabilities = vcpu_get_perf_capabilities(vcpu);
|
||||
if (cpuid_model_is_consistent(vcpu) &&
|
||||
(perf_capabilities & PMU_CAP_LBR_FMT))
|
||||
x86_perf_get_lbr(&lbr_desc->records);
|
||||
else
|
||||
lbr_desc->records.nr = 0;
|
||||
|
@ -600,7 +595,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
|||
if (lbr_desc->records.nr)
|
||||
bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
|
||||
|
||||
perf_capabilities = vcpu_get_perf_capabilities(vcpu);
|
||||
if (perf_capabilities & PERF_CAP_PEBS_FORMAT) {
|
||||
if (perf_capabilities & PERF_CAP_PEBS_BASELINE) {
|
||||
pmu->pebs_enable_mask = counter_mask;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <asm/kvm.h>
|
||||
#include <asm/intel_pt.h>
|
||||
#include <asm/perf_event.h>
|
||||
|
||||
#include "capabilities.h"
|
||||
#include "../kvm_cache_regs.h"
|
||||
|
@ -104,15 +105,6 @@ static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
|
|||
return pmu->version > 1;
|
||||
}
|
||||
|
||||
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
|
||||
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
|
||||
|
||||
void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
|
||||
bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);
|
||||
|
||||
int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
|
||||
void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
|
||||
|
||||
struct lbr_desc {
|
||||
/* Basic info about guest LBR records. */
|
||||
struct x86_pmu_lbr records;
|
||||
|
@ -542,6 +534,25 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
|
|||
return container_of(vcpu, struct vcpu_vmx, vcpu);
|
||||
}
|
||||
|
||||
static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return &to_vmx(vcpu)->lbr_desc;
|
||||
}
|
||||
|
||||
static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return &vcpu_to_lbr_desc(vcpu)->records;
|
||||
}
|
||||
|
||||
static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !!vcpu_to_lbr_records(vcpu)->nr;
|
||||
}
|
||||
|
||||
void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
|
||||
int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
|
||||
void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
|
|
@ -3413,6 +3413,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
|||
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
|
||||
struct kvm_steal_time __user *st;
|
||||
struct kvm_memslots *slots;
|
||||
gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
|
||||
u64 steal;
|
||||
u32 version;
|
||||
|
||||
|
@ -3430,13 +3431,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
|||
slots = kvm_memslots(vcpu->kvm);
|
||||
|
||||
if (unlikely(slots->generation != ghc->generation ||
|
||||
gpa != ghc->gpa ||
|
||||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
|
||||
gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
|
||||
|
||||
/* We rely on the fact that it fits in a single page. */
|
||||
BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
|
||||
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
|
||||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)
|
||||
return;
|
||||
}
|
||||
|
@ -3545,9 +3545,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
return 1;
|
||||
|
||||
vcpu->arch.perf_capabilities = data;
|
||||
|
||||
kvm_pmu_refresh(vcpu);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
case MSR_EFER:
|
||||
return set_efer(vcpu, msr_info);
|
||||
case MSR_K7_HWCR:
|
||||
|
@ -4714,6 +4714,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
|
|||
struct kvm_steal_time __user *st;
|
||||
struct kvm_memslots *slots;
|
||||
static const u8 preempted = KVM_VCPU_PREEMPTED;
|
||||
gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
|
||||
|
||||
/*
|
||||
* The vCPU can be marked preempted if and only if the VM-Exit was on
|
||||
|
@ -4741,6 +4742,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
|
|||
slots = kvm_memslots(vcpu->kvm);
|
||||
|
||||
if (unlikely(slots->generation != ghc->generation ||
|
||||
gpa != ghc->gpa ||
|
||||
kvm_is_error_hva(ghc->hva) || !ghc->memslot))
|
||||
return;
|
||||
|
||||
|
@ -13019,6 +13021,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c
|
|||
fault.error_code = error_code;
|
||||
fault.nested_page_fault = false;
|
||||
fault.address = gva;
|
||||
fault.async_page_fault = false;
|
||||
}
|
||||
vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
|
||||
}
|
||||
|
|
|
@ -707,23 +707,24 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
|||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_TIMER:
|
||||
if (data->u.timer.port) {
|
||||
if (data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
vcpu->arch.xen.timer_virq = data->u.timer.port;
|
||||
if (data->u.timer.port &&
|
||||
data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!vcpu->arch.xen.timer.function)
|
||||
kvm_xen_init_timer(vcpu);
|
||||
|
||||
/* Restart the timer if it's set */
|
||||
if (data->u.timer.expires_ns)
|
||||
kvm_xen_start_timer(vcpu, data->u.timer.expires_ns,
|
||||
data->u.timer.expires_ns -
|
||||
get_kvmclock_ns(vcpu->kvm));
|
||||
} else if (kvm_xen_timer_enabled(vcpu)) {
|
||||
kvm_xen_stop_timer(vcpu);
|
||||
vcpu->arch.xen.timer_virq = 0;
|
||||
}
|
||||
/* Stop the timer (if it's running) before changing the vector */
|
||||
kvm_xen_stop_timer(vcpu);
|
||||
vcpu->arch.xen.timer_virq = data->u.timer.port;
|
||||
|
||||
/* Start the timer if the new value has a valid vector+expiry. */
|
||||
if (data->u.timer.port && data->u.timer.expires_ns)
|
||||
kvm_xen_start_timer(vcpu, data->u.timer.expires_ns,
|
||||
data->u.timer.expires_ns -
|
||||
get_kvmclock_ns(vcpu->kvm));
|
||||
|
||||
r = 0;
|
||||
break;
|
||||
|
|
|
@ -282,7 +282,7 @@ DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page,
|
|||
TP_ARGS(gva, gfn)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault,
|
||||
DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_repeated_fault,
|
||||
|
||||
TP_PROTO(u64 gva, u64 gfn),
|
||||
|
||||
|
|
|
@ -4,6 +4,8 @@ include ../../../build/Build.include
|
|||
all:
|
||||
|
||||
top_srcdir = ../../../..
|
||||
include $(top_srcdir)/scripts/subarch.include
|
||||
ARCH ?= $(SUBARCH)
|
||||
|
||||
# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
|
||||
# directories and targets in this Makefile. "uname -m" doesn't map to
|
||||
|
@ -197,7 +199,8 @@ endif
|
|||
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
|
||||
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
|
||||
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
|
||||
-I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
|
||||
-I$(<D) -Iinclude/$(UNAME_M) -I ../rseq -I.. $(EXTRA_CFLAGS) \
|
||||
$(KHDR_INCLUDES)
|
||||
|
||||
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
|
||||
$(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
|
||||
|
@ -206,7 +209,7 @@ no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
|
|||
pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
|
||||
$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
|
||||
|
||||
|
||||
LDLIBS += -ldl
|
||||
LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
|
||||
|
||||
# After inclusion, $(OUTPUT) is defined and
|
||||
|
|
|
@ -20,15 +20,7 @@
|
|||
#include "processor.h"
|
||||
#include "test_util.h"
|
||||
|
||||
static __thread volatile struct rseq __rseq = {
|
||||
.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
|
||||
};
|
||||
|
||||
/*
|
||||
* Use an arbitrary, bogus signature for configuring rseq, this test does not
|
||||
* actually enter an rseq critical section.
|
||||
*/
|
||||
#define RSEQ_SIG 0xdeadbeef
|
||||
#include "../rseq/rseq.c"
|
||||
|
||||
/*
|
||||
* Any bug related to task migration is likely to be timing-dependent; perform
|
||||
|
@ -49,12 +41,16 @@ static void guest_code(void)
|
|||
GUEST_SYNC(0);
|
||||
}
|
||||
|
||||
static void sys_rseq(int flags)
|
||||
/*
|
||||
* We have to perform direct system call for getcpu() because it's
|
||||
* not available until glic 2.29.
|
||||
*/
|
||||
static void sys_getcpu(unsigned *cpu)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG);
|
||||
TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno));
|
||||
r = syscall(__NR_getcpu, cpu, NULL, NULL);
|
||||
TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)", errno, strerror(errno));
|
||||
}
|
||||
|
||||
static int next_cpu(int cpu)
|
||||
|
@ -101,7 +97,7 @@ static void *migration_worker(void *__rseq_tid)
|
|||
atomic_inc(&seq_cnt);
|
||||
|
||||
/*
|
||||
* Ensure the odd count is visible while sched_getcpu() isn't
|
||||
* Ensure the odd count is visible while getcpu() isn't
|
||||
* stable, i.e. while changing affinity is in-progress.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
@ -142,10 +138,10 @@ static void *migration_worker(void *__rseq_tid)
|
|||
* check completes.
|
||||
*
|
||||
* 3. To ensure the read-side makes efficient forward progress,
|
||||
* e.g. if sched_getcpu() involves a syscall. Stalling the
|
||||
* read-side means the test will spend more time waiting for
|
||||
* sched_getcpu() to stabilize and less time trying to hit
|
||||
* the timing-dependent bug.
|
||||
* e.g. if getcpu() involves a syscall. Stalling the read-side
|
||||
* means the test will spend more time waiting for getcpu()
|
||||
* to stabilize and less time trying to hit the timing-dependent
|
||||
* bug.
|
||||
*
|
||||
* Because any bug in this area is likely to be timing-dependent,
|
||||
* run with a range of delays at 1us intervals from 1us to 10us
|
||||
|
@ -218,7 +214,9 @@ int main(int argc, char *argv[])
|
|||
|
||||
calc_min_max_cpu();
|
||||
|
||||
sys_rseq(0);
|
||||
r = rseq_register_current_thread();
|
||||
TEST_ASSERT(!r, "rseq_register_current_thread failed, errno = %d (%s)",
|
||||
errno, strerror(errno));
|
||||
|
||||
/*
|
||||
* Create and run a dummy VM that immediately exits to userspace via
|
||||
|
@ -238,9 +236,9 @@ int main(int argc, char *argv[])
|
|||
|
||||
/*
|
||||
* Verify rseq's CPU matches sched's CPU. Ensure migration
|
||||
* doesn't occur between sched_getcpu() and reading the rseq
|
||||
* cpu_id by rereading both if the sequence count changes, or
|
||||
* if the count is odd (migration in-progress).
|
||||
* doesn't occur between getcpu() and reading the rseq cpu_id
|
||||
* by rereading both if the sequence count changes, or if the
|
||||
* count is odd (migration in-progress).
|
||||
*/
|
||||
do {
|
||||
/*
|
||||
|
@ -250,13 +248,13 @@ int main(int argc, char *argv[])
|
|||
snapshot = atomic_read(&seq_cnt) & ~1;
|
||||
|
||||
/*
|
||||
* Ensure reading sched_getcpu() and rseq.cpu_id
|
||||
* complete in a single "no migration" window, i.e. are
|
||||
* not reordered across the seq_cnt reads.
|
||||
* Ensure calling getcpu() and reading rseq.cpu_id complete
|
||||
* in a single "no migration" window, i.e. are not reordered
|
||||
* across the seq_cnt reads.
|
||||
*/
|
||||
smp_rmb();
|
||||
cpu = sched_getcpu();
|
||||
rseq_cpu = READ_ONCE(__rseq.cpu_id);
|
||||
sys_getcpu(&cpu);
|
||||
rseq_cpu = rseq_current_cpu_raw();
|
||||
smp_rmb();
|
||||
} while (snapshot != atomic_read(&seq_cnt));
|
||||
|
||||
|
@ -267,9 +265,9 @@ int main(int argc, char *argv[])
|
|||
/*
|
||||
* Sanity check that the test was able to enter the guest a reasonable
|
||||
* number of times, e.g. didn't get stalled too often/long waiting for
|
||||
* sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a
|
||||
* fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs
|
||||
* than migrations given the 1us+ delay in the migration task.
|
||||
* getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly
|
||||
* conservative ratio on x86-64, which can do _more_ KVM_RUNs than
|
||||
* migrations given the 1us+ delay in the migration task.
|
||||
*/
|
||||
TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
|
||||
"Only performed %d KVM_RUNs, task stalled too much?\n", i);
|
||||
|
@ -278,7 +276,7 @@ int main(int argc, char *argv[])
|
|||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
sys_rseq(RSEQ_FLAG_UNREGISTER);
|
||||
rseq_unregister_current_thread();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -59,6 +59,7 @@ int main(int argc, char *argv[])
|
|||
int ret;
|
||||
union cpuid10_eax eax;
|
||||
union perf_capabilities host_cap;
|
||||
uint64_t val;
|
||||
|
||||
host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
|
||||
host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
|
||||
|
@ -91,11 +92,17 @@ int main(int argc, char *argv[])
|
|||
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
|
||||
ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
|
||||
|
||||
/* testcase 3, check invalid LBR format is rejected */
|
||||
/* Note, on Arch LBR capable platforms, LBR_FMT in perf capability msr is 0x3f,
|
||||
* to avoid the failure, use a true invalid format 0x30 for the test. */
|
||||
ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0x30);
|
||||
TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
|
||||
/*
|
||||
* Testcase 3, check that an "invalid" LBR format is rejected. Only an
|
||||
* exact match of the host's format (and 0/disabled) is allowed.
|
||||
*/
|
||||
for (val = 1; val <= PMU_CAP_LBR_FMT; val++) {
|
||||
if (val == (host_cap.capabilities & PMU_CAP_LBR_FMT))
|
||||
continue;
|
||||
|
||||
ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val);
|
||||
TEST_ASSERT(!ret, "Bad LBR FMT = 0x%lx didn't fail", val);
|
||||
}
|
||||
|
||||
printf("Completed perf capability tests.\n");
|
||||
kvm_vm_free(vm);
|
||||
|
|
|
@ -484,6 +484,10 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
|||
vcpu->ready = false;
|
||||
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
|
||||
vcpu->last_used_slot = NULL;
|
||||
|
||||
/* Fill the stats id string for the vcpu */
|
||||
snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
|
||||
task_pid_nr(current), id);
|
||||
}
|
||||
|
||||
static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
@ -1017,21 +1021,21 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
|
|||
}
|
||||
}
|
||||
|
||||
static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
|
||||
static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
|
||||
{
|
||||
static DEFINE_MUTEX(kvm_debugfs_lock);
|
||||
struct dentry *dent;
|
||||
char dir_name[ITOA_MAX_LEN * 2];
|
||||
struct kvm_stat_data *stat_data;
|
||||
const struct _kvm_stats_desc *pdesc;
|
||||
int i, ret;
|
||||
int i, ret = -ENOMEM;
|
||||
int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
|
||||
kvm_vcpu_stats_header.num_desc;
|
||||
|
||||
if (!debugfs_initialized())
|
||||
return 0;
|
||||
|
||||
snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
|
||||
snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname);
|
||||
mutex_lock(&kvm_debugfs_lock);
|
||||
dent = debugfs_lookup(dir_name, kvm_debugfs_dir);
|
||||
if (dent) {
|
||||
|
@ -1050,13 +1054,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
|
|||
sizeof(*kvm->debugfs_stat_data),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!kvm->debugfs_stat_data)
|
||||
return -ENOMEM;
|
||||
goto out_err;
|
||||
|
||||
for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) {
|
||||
pdesc = &kvm_vm_stats_desc[i];
|
||||
stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
|
||||
if (!stat_data)
|
||||
return -ENOMEM;
|
||||
goto out_err;
|
||||
|
||||
stat_data->kvm = kvm;
|
||||
stat_data->desc = pdesc;
|
||||
|
@ -1071,7 +1075,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
|
|||
pdesc = &kvm_vcpu_stats_desc[i];
|
||||
stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
|
||||
if (!stat_data)
|
||||
return -ENOMEM;
|
||||
goto out_err;
|
||||
|
||||
stat_data->kvm = kvm;
|
||||
stat_data->desc = pdesc;
|
||||
|
@ -1083,12 +1087,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
|
|||
}
|
||||
|
||||
ret = kvm_arch_create_vm_debugfs(kvm);
|
||||
if (ret) {
|
||||
kvm_destroy_vm_debugfs(kvm);
|
||||
return i;
|
||||
}
|
||||
if (ret)
|
||||
goto out_err;
|
||||
|
||||
return 0;
|
||||
out_err:
|
||||
kvm_destroy_vm_debugfs(kvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1119,7 +1124,7 @@ int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct kvm *kvm_create_vm(unsigned long type)
|
||||
static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
|
||||
{
|
||||
struct kvm *kvm = kvm_arch_alloc_vm();
|
||||
struct kvm_memslots *slots;
|
||||
|
@ -1155,6 +1160,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
|||
*/
|
||||
kvm->debugfs_dentry = ERR_PTR(-ENOENT);
|
||||
|
||||
snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d",
|
||||
task_pid_nr(current));
|
||||
|
||||
if (init_srcu_struct(&kvm->srcu))
|
||||
goto out_err_no_srcu;
|
||||
if (init_srcu_struct(&kvm->irq_srcu))
|
||||
|
@ -1205,7 +1213,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
|||
|
||||
r = kvm_arch_post_init_vm(kvm);
|
||||
if (r)
|
||||
goto out_err;
|
||||
goto out_err_mmu_notifier;
|
||||
|
||||
mutex_lock(&kvm_lock);
|
||||
list_add(&kvm->vm_list, &vm_list);
|
||||
|
@ -1221,12 +1229,18 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
|||
*/
|
||||
if (!try_module_get(kvm_chardev_ops.owner)) {
|
||||
r = -ENODEV;
|
||||
goto out_err;
|
||||
goto out_err_mmu_notifier;
|
||||
}
|
||||
|
||||
r = kvm_create_vm_debugfs(kvm, fdname);
|
||||
if (r)
|
||||
goto out_err;
|
||||
|
||||
return kvm;
|
||||
|
||||
out_err:
|
||||
module_put(kvm_chardev_ops.owner);
|
||||
out_err_mmu_notifier:
|
||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
if (kvm->mmu_notifier.ops)
|
||||
mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
|
||||
|
@ -3916,10 +3930,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
|
|||
if (r)
|
||||
goto unlock_vcpu_destroy;
|
||||
|
||||
/* Fill the stats id string for the vcpu */
|
||||
snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
|
||||
task_pid_nr(current), id);
|
||||
|
||||
/* Now it's all set up, let userspace reach it */
|
||||
kvm_get_kvm(kvm);
|
||||
r = create_vcpu_fd(vcpu);
|
||||
|
@ -4886,28 +4896,30 @@ EXPORT_SYMBOL_GPL(file_is_kvm);
|
|||
|
||||
static int kvm_dev_ioctl_create_vm(unsigned long type)
|
||||
{
|
||||
int r;
|
||||
char fdname[ITOA_MAX_LEN + 1];
|
||||
int r, fd;
|
||||
struct kvm *kvm;
|
||||
struct file *file;
|
||||
|
||||
kvm = kvm_create_vm(type);
|
||||
if (IS_ERR(kvm))
|
||||
return PTR_ERR(kvm);
|
||||
fd = get_unused_fd_flags(O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
snprintf(fdname, sizeof(fdname), "%d", fd);
|
||||
|
||||
kvm = kvm_create_vm(type, fdname);
|
||||
if (IS_ERR(kvm)) {
|
||||
r = PTR_ERR(kvm);
|
||||
goto put_fd;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_MMIO
|
||||
r = kvm_coalesced_mmio_init(kvm);
|
||||
if (r < 0)
|
||||
goto put_kvm;
|
||||
#endif
|
||||
r = get_unused_fd_flags(O_CLOEXEC);
|
||||
if (r < 0)
|
||||
goto put_kvm;
|
||||
|
||||
snprintf(kvm->stats_id, sizeof(kvm->stats_id),
|
||||
"kvm-%d", task_pid_nr(current));
|
||||
|
||||
file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(r);
|
||||
r = PTR_ERR(file);
|
||||
goto put_kvm;
|
||||
}
|
||||
|
@ -4918,18 +4930,15 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
|
|||
* cases it will be called by the final fput(file) and will take
|
||||
* care of doing kvm_put_kvm(kvm).
|
||||
*/
|
||||
if (kvm_create_vm_debugfs(kvm, r) < 0) {
|
||||
put_unused_fd(r);
|
||||
fput(file);
|
||||
return -ENOMEM;
|
||||
}
|
||||
kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
|
||||
|
||||
fd_install(r, file);
|
||||
return r;
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
|
||||
put_kvm:
|
||||
kvm_put_kvm(kvm);
|
||||
put_fd:
|
||||
put_unused_fd(fd);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue