Merge branch 'kvm-5.16-fixes' into kvm-master
* Fix misuse of gfn-to-pfn cache when recording guest steal time / preempted status * Fix selftests on APICv machines * Fix sparse warnings * Fix detection of KVM features in CPUID * Cleanups for bogus writes to MSR_KVM_PV_EOI_EN * Fixes and cleanups for MSR bitmap handling * Cleanups for INVPCID * Make x86 KVM_SOFT_MAX_VCPUS consistent with other architectures
This commit is contained in:
commit
f5396f2d82
|
@ -38,7 +38,6 @@
|
|||
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
|
||||
|
||||
#define KVM_MAX_VCPUS 1024
|
||||
#define KVM_SOFT_MAX_VCPUS 710
|
||||
|
||||
/*
|
||||
* In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
|
||||
|
@ -725,6 +724,7 @@ struct kvm_vcpu_arch {
|
|||
|
||||
int cpuid_nent;
|
||||
struct kvm_cpuid_entry2 *cpuid_entries;
|
||||
u32 kvm_cpuid_base;
|
||||
|
||||
u64 reserved_gpa_bits;
|
||||
int maxphyaddr;
|
||||
|
@ -748,7 +748,7 @@ struct kvm_vcpu_arch {
|
|||
u8 preempted;
|
||||
u64 msr_val;
|
||||
u64 last_steal;
|
||||
struct gfn_to_pfn_cache cache;
|
||||
struct gfn_to_hva_cache cache;
|
||||
} st;
|
||||
|
||||
u64 l1_tsc_offset;
|
||||
|
@ -1034,6 +1034,7 @@ struct kvm_x86_msr_filter {
|
|||
#define APICV_INHIBIT_REASON_IRQWIN 3
|
||||
#define APICV_INHIBIT_REASON_PIT_REINJ 4
|
||||
#define APICV_INHIBIT_REASON_X2APIC 5
|
||||
#define APICV_INHIBIT_REASON_BLOCKIRQ 6
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned long n_used_mmu_pages;
|
||||
|
|
|
@ -806,11 +806,14 @@ static inline u32 amd_get_nodes_per_socket(void) { return 0; }
|
|||
static inline u32 amd_get_highest_perf(void) { return 0; }
|
||||
#endif
|
||||
|
||||
#define for_each_possible_hypervisor_cpuid_base(function) \
|
||||
for (function = 0x40000000; function < 0x40010000; function += 0x100)
|
||||
|
||||
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
|
||||
{
|
||||
uint32_t base, eax, signature[3];
|
||||
|
||||
for (base = 0x40000000; base < 0x40010000; base += 0x100) {
|
||||
for_each_possible_hypervisor_cpuid_base(base) {
|
||||
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
|
||||
|
||||
if (!memcmp(sig, signature, 12) &&
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
* should be used to determine that a VM is running under KVM.
|
||||
*/
|
||||
#define KVM_CPUID_SIGNATURE 0x40000000
|
||||
#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
|
||||
|
||||
/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
|
||||
* a particular paravirtualization, the appropriate feature bit should
|
||||
|
|
|
@ -809,7 +809,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
|
|||
return 0; /* So we don't blow up on old processors */
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
|
||||
return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -99,11 +99,45 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 function;
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
|
||||
vcpu->arch.kvm_cpuid_base = 0;
|
||||
|
||||
for_each_possible_hypervisor_cpuid_base(function) {
|
||||
entry = kvm_find_cpuid_entry(vcpu, function, 0);
|
||||
|
||||
if (entry) {
|
||||
u32 signature[3];
|
||||
|
||||
signature[0] = entry->ebx;
|
||||
signature[1] = entry->ecx;
|
||||
signature[2] = entry->edx;
|
||||
|
||||
BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
|
||||
if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
|
||||
vcpu->arch.kvm_cpuid_base = function;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 base = vcpu->arch.kvm_cpuid_base;
|
||||
|
||||
if (!base)
|
||||
return NULL;
|
||||
|
||||
return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
|
||||
}
|
||||
|
||||
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
|
||||
struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);
|
||||
|
||||
/*
|
||||
* save the feature bitmap to avoid cpuid lookup for every PV
|
||||
|
@ -142,7 +176,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
|
|||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
|
||||
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
|
||||
best = kvm_find_kvm_cpuid_features(vcpu);
|
||||
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
|
||||
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
|
||||
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
|
||||
|
@ -239,6 +273,26 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
|
|||
return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
|
||||
}
|
||||
|
||||
static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
|
||||
int nent)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = kvm_check_cpuid(e2, nent);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
kvfree(vcpu->arch.cpuid_entries);
|
||||
vcpu->arch.cpuid_entries = e2;
|
||||
vcpu->arch.cpuid_nent = nent;
|
||||
|
||||
kvm_update_kvm_cpuid_base(vcpu);
|
||||
kvm_update_cpuid_runtime(vcpu);
|
||||
kvm_vcpu_after_set_cpuid(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* when an old userspace process fills a new kernel module */
|
||||
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
|
||||
struct kvm_cpuid *cpuid,
|
||||
|
@ -275,18 +329,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
|
|||
e2[i].padding[2] = 0;
|
||||
}
|
||||
|
||||
r = kvm_check_cpuid(e2, cpuid->nent);
|
||||
if (r) {
|
||||
r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
|
||||
if (r)
|
||||
kvfree(e2);
|
||||
goto out_free_cpuid;
|
||||
}
|
||||
|
||||
kvfree(vcpu->arch.cpuid_entries);
|
||||
vcpu->arch.cpuid_entries = e2;
|
||||
vcpu->arch.cpuid_nent = cpuid->nent;
|
||||
|
||||
kvm_update_cpuid_runtime(vcpu);
|
||||
kvm_vcpu_after_set_cpuid(vcpu);
|
||||
|
||||
out_free_cpuid:
|
||||
kvfree(e);
|
||||
|
@ -310,20 +355,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
|
|||
return PTR_ERR(e2);
|
||||
}
|
||||
|
||||
r = kvm_check_cpuid(e2, cpuid->nent);
|
||||
if (r) {
|
||||
r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
|
||||
if (r)
|
||||
kvfree(e2);
|
||||
return r;
|
||||
}
|
||||
|
||||
kvfree(vcpu->arch.cpuid_entries);
|
||||
vcpu->arch.cpuid_entries = e2;
|
||||
vcpu->arch.cpuid_nent = cpuid->nent;
|
||||
|
||||
kvm_update_cpuid_runtime(vcpu);
|
||||
kvm_vcpu_after_set_cpuid(vcpu);
|
||||
|
||||
return 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
|
||||
|
@ -871,8 +907,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
|||
}
|
||||
break;
|
||||
case KVM_CPUID_SIGNATURE: {
|
||||
static const char signature[12] = "KVMKVMKVM\0\0";
|
||||
const u32 *sigptr = (const u32 *)signature;
|
||||
const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
|
||||
entry->eax = KVM_CPUID_FEATURES;
|
||||
entry->ebx = sigptr[0];
|
||||
entry->ecx = sigptr[1];
|
||||
|
|
|
@ -1472,7 +1472,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|||
|
||||
if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
|
||||
hv_vcpu->hv_vapic = data;
|
||||
if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
|
||||
if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
|
||||
return 1;
|
||||
break;
|
||||
}
|
||||
|
@ -1490,7 +1490,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|||
return 1;
|
||||
hv_vcpu->hv_vapic = data;
|
||||
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
||||
if (kvm_lapic_enable_pv_eoi(vcpu,
|
||||
if (kvm_lapic_set_pv_eoi(vcpu,
|
||||
gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
|
||||
sizeof(struct hv_vp_assist_page)))
|
||||
return 1;
|
||||
|
|
|
@ -2856,25 +2856,30 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
|
||||
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
|
||||
{
|
||||
u64 addr = data & ~KVM_MSR_ENABLED;
|
||||
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
|
||||
unsigned long new_len;
|
||||
int ret;
|
||||
|
||||
if (!IS_ALIGNED(addr, 4))
|
||||
return 1;
|
||||
|
||||
if (data & KVM_MSR_ENABLED) {
|
||||
if (addr == ghc->gpa && len <= ghc->len)
|
||||
new_len = ghc->len;
|
||||
else
|
||||
new_len = len;
|
||||
|
||||
ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
vcpu->arch.pv_eoi.msr_val = data;
|
||||
if (!pv_eoi_enabled(vcpu))
|
||||
return 0;
|
||||
|
||||
if (addr == ghc->gpa && len <= ghc->len)
|
||||
new_len = ghc->len;
|
||||
else
|
||||
new_len = len;
|
||||
|
||||
return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -127,7 +127,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
|
|||
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
||||
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
|
||||
|
||||
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
|
||||
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
|
||||
void kvm_lapic_exit(void);
|
||||
|
||||
#define VEC_POS(v) ((v) & (32 - 1))
|
||||
|
|
|
@ -3191,17 +3191,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
new_spte |= PT_WRITABLE_MASK;
|
||||
|
||||
/*
|
||||
* Do not fix write-permission on the large spte. Since
|
||||
* we only dirty the first page into the dirty-bitmap in
|
||||
* Do not fix write-permission on the large spte when
|
||||
* dirty logging is enabled. Since we only dirty the
|
||||
* first page into the dirty-bitmap in
|
||||
* fast_pf_fix_direct_spte(), other pages are missed
|
||||
* if its slot has dirty logging enabled.
|
||||
*
|
||||
* Instead, we let the slow page fault path create a
|
||||
* normal spte to fix the access.
|
||||
*
|
||||
* See the comments in kvm_arch_commit_memory_region().
|
||||
*/
|
||||
if (sp->role.level > PG_LEVEL_4K)
|
||||
if (sp->role.level > PG_LEVEL_4K &&
|
||||
kvm_slot_dirty_track_enabled(fault->slot))
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -897,7 +897,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
|
|||
struct kvm_page_fault *fault,
|
||||
struct tdp_iter *iter)
|
||||
{
|
||||
struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep);
|
||||
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
|
||||
u64 new_spte;
|
||||
int ret = RET_PF_FIXED;
|
||||
bool wrprot = false;
|
||||
|
|
|
@ -319,7 +319,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
/* check if idx is a valid index to access PMU */
|
||||
int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
{
|
||||
return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ struct kvm_pmu_ops {
|
|||
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
|
||||
unsigned int idx, u64 *mask);
|
||||
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
|
@ -149,7 +149,7 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
|
|||
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
|
||||
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
|
||||
int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
|
|
|
@ -904,7 +904,8 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
|
|||
BIT(APICV_INHIBIT_REASON_NESTED) |
|
||||
BIT(APICV_INHIBIT_REASON_IRQWIN) |
|
||||
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
|
||||
BIT(APICV_INHIBIT_REASON_X2APIC);
|
||||
BIT(APICV_INHIBIT_REASON_X2APIC) |
|
||||
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
|
||||
|
||||
return supported & BIT(bit);
|
||||
}
|
||||
|
|
|
@ -181,14 +181,13 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
|||
return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
|
||||
}
|
||||
|
||||
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
|
||||
static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
|
||||
return (idx >= pmu->nr_arch_gp_counters);
|
||||
return idx < pmu->nr_arch_gp_counters;
|
||||
}
|
||||
|
||||
/* idx is the ECX register of RDPMC instruction */
|
||||
|
|
|
@ -3121,11 +3121,6 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
|
|||
type = svm->vmcb->control.exit_info_2;
|
||||
gva = svm->vmcb->control.exit_info_1;
|
||||
|
||||
if (type > 3) {
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return kvm_handle_invpcid(vcpu, type, gva);
|
||||
}
|
||||
|
||||
|
|
|
@ -525,67 +525,19 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
|
||||
/*
|
||||
* Check if MSR is intercepted for L01 MSR bitmap.
|
||||
* For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1
|
||||
* itself utilizing x2APIC. All MSRs were previously set to be intercepted,
|
||||
* only the "disable intercept" case needs to be handled.
|
||||
*/
|
||||
static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
|
||||
static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
|
||||
unsigned long *msr_bitmap_l0,
|
||||
u32 msr, int type)
|
||||
{
|
||||
unsigned long *msr_bitmap;
|
||||
int f = sizeof(unsigned long);
|
||||
if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
|
||||
vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
|
||||
|
||||
if (!cpu_has_vmx_msr_bitmap())
|
||||
return true;
|
||||
|
||||
msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
|
||||
|
||||
if (msr <= 0x1fff) {
|
||||
return !!test_bit(msr, msr_bitmap + 0x800 / f);
|
||||
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
||||
msr &= 0x1fff;
|
||||
return !!test_bit(msr, msr_bitmap + 0xc00 / f);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a msr is allowed by L0, we should check whether it is allowed by L1.
|
||||
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
|
||||
*/
|
||||
static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
|
||||
unsigned long *msr_bitmap_nested,
|
||||
u32 msr, int type)
|
||||
{
|
||||
int f = sizeof(unsigned long);
|
||||
|
||||
/*
|
||||
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
|
||||
* have the write-low and read-high bitmap offsets the wrong way round.
|
||||
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
|
||||
*/
|
||||
if (msr <= 0x1fff) {
|
||||
if (type & MSR_TYPE_R &&
|
||||
!test_bit(msr, msr_bitmap_l1 + 0x000 / f))
|
||||
/* read-low */
|
||||
__clear_bit(msr, msr_bitmap_nested + 0x000 / f);
|
||||
|
||||
if (type & MSR_TYPE_W &&
|
||||
!test_bit(msr, msr_bitmap_l1 + 0x800 / f))
|
||||
/* write-low */
|
||||
__clear_bit(msr, msr_bitmap_nested + 0x800 / f);
|
||||
|
||||
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
||||
msr &= 0x1fff;
|
||||
if (type & MSR_TYPE_R &&
|
||||
!test_bit(msr, msr_bitmap_l1 + 0x400 / f))
|
||||
/* read-high */
|
||||
__clear_bit(msr, msr_bitmap_nested + 0x400 / f);
|
||||
|
||||
if (type & MSR_TYPE_W &&
|
||||
!test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
|
||||
/* write-high */
|
||||
__clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
|
||||
|
||||
}
|
||||
if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
|
||||
vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
|
||||
}
|
||||
|
||||
static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
|
||||
|
@ -600,6 +552,34 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
|
|||
}
|
||||
}
|
||||
|
||||
#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
|
||||
static inline \
|
||||
void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
|
||||
unsigned long *msr_bitmap_l1, \
|
||||
unsigned long *msr_bitmap_l0, u32 msr) \
|
||||
{ \
|
||||
if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
|
||||
vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
|
||||
vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
|
||||
else \
|
||||
vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
|
||||
}
|
||||
BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
|
||||
BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
|
||||
|
||||
static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
|
||||
unsigned long *msr_bitmap_l1,
|
||||
unsigned long *msr_bitmap_l0,
|
||||
u32 msr, int types)
|
||||
{
|
||||
if (types & MSR_TYPE_R)
|
||||
nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
|
||||
msr_bitmap_l0, msr);
|
||||
if (types & MSR_TYPE_W)
|
||||
nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
|
||||
msr_bitmap_l0, msr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge L0's and L1's MSR bitmap, return false to indicate that
|
||||
* we do not use the hardware.
|
||||
|
@ -607,10 +587,11 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
|
|||
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
int msr;
|
||||
unsigned long *msr_bitmap_l1;
|
||||
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
|
||||
struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
|
||||
unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
|
||||
struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
|
||||
|
||||
/* Nothing to do if the MSR bitmap is not in use. */
|
||||
if (!cpu_has_vmx_msr_bitmap() ||
|
||||
|
@ -625,7 +606,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
|
|||
/*
|
||||
* To keep the control flow simple, pay eight 8-byte writes (sixteen
|
||||
* 4-byte writes on 32-bit systems) up front to enable intercepts for
|
||||
* the x2APIC MSR range and selectively disable them below.
|
||||
* the x2APIC MSR range and selectively toggle those relevant to L2.
|
||||
*/
|
||||
enable_x2apic_msr_intercepts(msr_bitmap_l0);
|
||||
|
||||
|
@ -644,61 +625,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
}
|
||||
|
||||
nested_vmx_disable_intercept_for_msr(
|
||||
nested_vmx_disable_intercept_for_x2apic_msr(
|
||||
msr_bitmap_l1, msr_bitmap_l0,
|
||||
X2APIC_MSR(APIC_TASKPRI),
|
||||
MSR_TYPE_R | MSR_TYPE_W);
|
||||
|
||||
if (nested_cpu_has_vid(vmcs12)) {
|
||||
nested_vmx_disable_intercept_for_msr(
|
||||
nested_vmx_disable_intercept_for_x2apic_msr(
|
||||
msr_bitmap_l1, msr_bitmap_l0,
|
||||
X2APIC_MSR(APIC_EOI),
|
||||
MSR_TYPE_W);
|
||||
nested_vmx_disable_intercept_for_msr(
|
||||
nested_vmx_disable_intercept_for_x2apic_msr(
|
||||
msr_bitmap_l1, msr_bitmap_l0,
|
||||
X2APIC_MSR(APIC_SELF_IPI),
|
||||
MSR_TYPE_W);
|
||||
}
|
||||
}
|
||||
|
||||
/* KVM unconditionally exposes the FS/GS base MSRs to L1. */
|
||||
#ifdef CONFIG_X86_64
|
||||
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_FS_BASE, MSR_TYPE_RW);
|
||||
|
||||
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_GS_BASE, MSR_TYPE_RW);
|
||||
|
||||
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Checking the L0->L1 bitmap is trying to verify two things:
|
||||
*
|
||||
* 1. L0 gave a permission to L1 to actually passthrough the MSR. This
|
||||
* ensures that we do not accidentally generate an L02 MSR bitmap
|
||||
* from the L12 MSR bitmap that is too permissive.
|
||||
* 2. That L1 or L2s have actually used the MSR. This avoids
|
||||
* unnecessarily merging of the bitmap if the MSR is unused. This
|
||||
* works properly because we only update the L01 MSR bitmap lazily.
|
||||
* So even if L0 should pass L1 these MSRs, the L01 bitmap is only
|
||||
* updated to reflect this when L1 (or its L2s) actually write to
|
||||
* the MSR.
|
||||
* Always check vmcs01's bitmap to honor userspace MSR filters and any
|
||||
* other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
|
||||
*/
|
||||
if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
|
||||
nested_vmx_disable_intercept_for_msr(
|
||||
msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_IA32_SPEC_CTRL,
|
||||
MSR_TYPE_R | MSR_TYPE_W);
|
||||
#ifdef CONFIG_X86_64
|
||||
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_FS_BASE, MSR_TYPE_RW);
|
||||
|
||||
if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
|
||||
nested_vmx_disable_intercept_for_msr(
|
||||
msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_IA32_PRED_CMD,
|
||||
MSR_TYPE_W);
|
||||
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_GS_BASE, MSR_TYPE_RW);
|
||||
|
||||
kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
|
||||
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
|
||||
#endif
|
||||
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
|
||||
|
||||
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_IA32_PRED_CMD, MSR_TYPE_W);
|
||||
|
||||
kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -5379,7 +5343,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
|
|||
struct {
|
||||
u64 eptp, gpa;
|
||||
} operand;
|
||||
int i, r;
|
||||
int i, r, gpr_index;
|
||||
|
||||
if (!(vmx->nested.msrs.secondary_ctls_high &
|
||||
SECONDARY_EXEC_ENABLE_EPT) ||
|
||||
|
@ -5392,7 +5356,8 @@ static int handle_invept(struct kvm_vcpu *vcpu)
|
|||
return 1;
|
||||
|
||||
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
||||
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
|
||||
gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
|
||||
type = kvm_register_read(vcpu, gpr_index);
|
||||
|
||||
types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
|
||||
|
||||
|
@ -5459,7 +5424,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
|
|||
u64 gla;
|
||||
} operand;
|
||||
u16 vpid02;
|
||||
int r;
|
||||
int r, gpr_index;
|
||||
|
||||
if (!(vmx->nested.msrs.secondary_ctls_high &
|
||||
SECONDARY_EXEC_ENABLE_VPID) ||
|
||||
|
@ -5472,7 +5437,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
|
|||
return 1;
|
||||
|
||||
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
||||
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
|
||||
gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
|
||||
type = kvm_register_read(vcpu, gpr_index);
|
||||
|
||||
types = (vmx->nested.msrs.vpid_caps &
|
||||
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
|
||||
|
|
|
@ -118,16 +118,15 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
|||
}
|
||||
}
|
||||
|
||||
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
|
||||
static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
bool fixed = idx & (1u << 30);
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
|
||||
return (!fixed && idx >= pmu->nr_arch_gp_counters) ||
|
||||
(fixed && idx >= pmu->nr_arch_fixed_counters);
|
||||
return fixed ? idx < pmu->nr_arch_fixed_counters
|
||||
: idx < pmu->nr_arch_gp_counters;
|
||||
}
|
||||
|
||||
static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
|
||||
|
|
|
@ -769,24 +769,13 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
|
|||
/*
|
||||
* Check if MSR is intercepted for currently loaded MSR bitmap.
|
||||
*/
|
||||
static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
|
||||
static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
|
||||
{
|
||||
unsigned long *msr_bitmap;
|
||||
int f = sizeof(unsigned long);
|
||||
|
||||
if (!cpu_has_vmx_msr_bitmap())
|
||||
if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
|
||||
return true;
|
||||
|
||||
msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
|
||||
|
||||
if (msr <= 0x1fff) {
|
||||
return !!test_bit(msr, msr_bitmap + 0x800 / f);
|
||||
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
||||
msr &= 0x1fff;
|
||||
return !!test_bit(msr, msr_bitmap + 0xc00 / f);
|
||||
}
|
||||
|
||||
return true;
|
||||
return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
|
||||
MSR_IA32_SPEC_CTRL);
|
||||
}
|
||||
|
||||
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
|
||||
|
@ -3697,46 +3686,6 @@ void free_vpid(int vpid)
|
|||
spin_unlock(&vmx_vpid_lock);
|
||||
}
|
||||
|
||||
static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
|
||||
{
|
||||
int f = sizeof(unsigned long);
|
||||
|
||||
if (msr <= 0x1fff)
|
||||
__clear_bit(msr, msr_bitmap + 0x000 / f);
|
||||
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
|
||||
__clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
|
||||
}
|
||||
|
||||
static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
|
||||
{
|
||||
int f = sizeof(unsigned long);
|
||||
|
||||
if (msr <= 0x1fff)
|
||||
__clear_bit(msr, msr_bitmap + 0x800 / f);
|
||||
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
|
||||
__clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
|
||||
}
|
||||
|
||||
static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
|
||||
{
|
||||
int f = sizeof(unsigned long);
|
||||
|
||||
if (msr <= 0x1fff)
|
||||
__set_bit(msr, msr_bitmap + 0x000 / f);
|
||||
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
|
||||
__set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
|
||||
}
|
||||
|
||||
static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
|
||||
{
|
||||
int f = sizeof(unsigned long);
|
||||
|
||||
if (msr <= 0x1fff)
|
||||
__set_bit(msr, msr_bitmap + 0x800 / f);
|
||||
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
|
||||
__set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
|
||||
}
|
||||
|
||||
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
@ -5494,6 +5443,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
|
|||
u64 pcid;
|
||||
u64 gla;
|
||||
} operand;
|
||||
int gpr_index;
|
||||
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
|
@ -5501,12 +5451,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
||||
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
|
||||
|
||||
if (type > 3) {
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
|
||||
type = kvm_register_read(vcpu, gpr_index);
|
||||
|
||||
/* According to the Intel instruction reference, the memory operand
|
||||
* is read even if it isn't needed (e.g., for type==all)
|
||||
|
@ -6749,7 +6695,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
* If the L02 MSR bitmap does not intercept the MSR, then we need to
|
||||
* save it.
|
||||
*/
|
||||
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
|
||||
if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
|
||||
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
|
||||
|
||||
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
|
||||
|
@ -7563,7 +7509,8 @@ static void hardware_unsetup(void)
|
|||
static bool vmx_check_apicv_inhibit_reasons(ulong bit)
|
||||
{
|
||||
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV);
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV) |
|
||||
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
|
||||
|
||||
return supported & BIT(bit);
|
||||
}
|
||||
|
|
|
@ -400,6 +400,34 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
|
|||
|
||||
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
* Note, early Intel manuals have the write-low and read-high bitmap offsets
|
||||
* the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and
|
||||
* 0xc0000000-0xc0001fff. The former (low) uses bytes 0-0x3ff for reads and
|
||||
* 0x800-0xbff for writes. The latter (high) uses 0x400-0x7ff for reads and
|
||||
* 0xc00-0xfff for writes. MSRs not covered by either of the ranges always
|
||||
* VM-Exit.
|
||||
*/
|
||||
#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base) \
|
||||
static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap, \
|
||||
u32 msr) \
|
||||
{ \
|
||||
int f = sizeof(unsigned long); \
|
||||
\
|
||||
if (msr <= 0x1fff) \
|
||||
return bitop##_bit(msr, bitmap + base / f); \
|
||||
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) \
|
||||
return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
|
||||
return (rtype)true; \
|
||||
}
|
||||
#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop) \
|
||||
__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0x0) \
|
||||
__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
|
||||
|
||||
BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
|
||||
BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
|
||||
BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
|
||||
|
||||
static inline u8 vmx_get_rvi(void)
|
||||
{
|
||||
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
|
||||
|
@ -522,4 +550,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
|
|||
|
||||
void dump_vmcs(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
|
||||
{
|
||||
return (vmx_instr_info >> 28) & 0xf;
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_H */
|
||||
|
|
|
@ -3260,8 +3260,11 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void record_steal_time(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_host_map map;
|
||||
struct kvm_steal_time *st;
|
||||
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
|
||||
struct kvm_steal_time __user *st;
|
||||
struct kvm_memslots *slots;
|
||||
u64 steal;
|
||||
u32 version;
|
||||
|
||||
if (kvm_xen_msr_enabled(vcpu->kvm)) {
|
||||
kvm_xen_runstate_set_running(vcpu);
|
||||
|
@ -3271,47 +3274,83 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
|||
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
|
||||
return;
|
||||
|
||||
/* -EAGAIN is returned in atomic context so we can just return. */
|
||||
if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
|
||||
&map, &vcpu->arch.st.cache, false))
|
||||
if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
|
||||
return;
|
||||
|
||||
st = map.hva +
|
||||
offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
|
||||
slots = kvm_memslots(vcpu->kvm);
|
||||
|
||||
if (unlikely(slots->generation != ghc->generation ||
|
||||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
|
||||
gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
|
||||
|
||||
/* We rely on the fact that it fits in a single page. */
|
||||
BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
|
||||
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
|
||||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)
|
||||
return;
|
||||
}
|
||||
|
||||
st = (struct kvm_steal_time __user *)ghc->hva;
|
||||
if (!user_access_begin(st, sizeof(*st)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Doing a TLB flush here, on the guest's behalf, can avoid
|
||||
* expensive IPIs.
|
||||
*/
|
||||
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
|
||||
u8 st_preempted = xchg(&st->preempted, 0);
|
||||
u8 st_preempted = 0;
|
||||
int err = -EFAULT;
|
||||
|
||||
asm volatile("1: xchgb %0, %2\n"
|
||||
"xor %1, %1\n"
|
||||
"2:\n"
|
||||
_ASM_EXTABLE_UA(1b, 2b)
|
||||
: "+r" (st_preempted),
|
||||
"+&r" (err)
|
||||
: "m" (st->preempted));
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
user_access_end();
|
||||
|
||||
vcpu->arch.st.preempted = 0;
|
||||
|
||||
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
|
||||
st_preempted & KVM_VCPU_FLUSH_TLB);
|
||||
if (st_preempted & KVM_VCPU_FLUSH_TLB)
|
||||
kvm_vcpu_flush_tlb_guest(vcpu);
|
||||
|
||||
if (!user_access_begin(st, sizeof(*st)))
|
||||
goto dirty;
|
||||
} else {
|
||||
st->preempted = 0;
|
||||
unsafe_put_user(0, &st->preempted, out);
|
||||
vcpu->arch.st.preempted = 0;
|
||||
}
|
||||
|
||||
vcpu->arch.st.preempted = 0;
|
||||
unsafe_get_user(version, &st->version, out);
|
||||
if (version & 1)
|
||||
version += 1; /* first time write, random junk */
|
||||
|
||||
if (st->version & 1)
|
||||
st->version += 1; /* first time write, random junk */
|
||||
|
||||
st->version += 1;
|
||||
version += 1;
|
||||
unsafe_put_user(version, &st->version, out);
|
||||
|
||||
smp_wmb();
|
||||
|
||||
st->steal += current->sched_info.run_delay -
|
||||
unsafe_get_user(steal, &st->steal, out);
|
||||
steal += current->sched_info.run_delay -
|
||||
vcpu->arch.st.last_steal;
|
||||
vcpu->arch.st.last_steal = current->sched_info.run_delay;
|
||||
unsafe_put_user(steal, &st->steal, out);
|
||||
|
||||
smp_wmb();
|
||||
version += 1;
|
||||
unsafe_put_user(version, &st->version, out);
|
||||
|
||||
st->version += 1;
|
||||
|
||||
kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
|
||||
out:
|
||||
user_access_end();
|
||||
dirty:
|
||||
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
|
||||
}
|
||||
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
|
@ -3517,7 +3556,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
|
||||
return 1;
|
||||
|
||||
if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
|
||||
if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
|
||||
return 1;
|
||||
break;
|
||||
|
||||
|
@ -4137,7 +4176,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
|
||||
break;
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
r = KVM_SOFT_MAX_VCPUS;
|
||||
r = num_online_cpus();
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPUS:
|
||||
r = KVM_MAX_VCPUS;
|
||||
|
@ -4351,8 +4390,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
|
||||
static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_host_map map;
|
||||
struct kvm_steal_time *st;
|
||||
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
|
||||
struct kvm_steal_time __user *st;
|
||||
struct kvm_memslots *slots;
|
||||
static const u8 preempted = KVM_VCPU_PREEMPTED;
|
||||
|
||||
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
|
||||
return;
|
||||
|
@ -4360,16 +4401,23 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
|
|||
if (vcpu->arch.st.preempted)
|
||||
return;
|
||||
|
||||
if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
|
||||
&vcpu->arch.st.cache, true))
|
||||
/* This happens on process exit */
|
||||
if (unlikely(current->mm != vcpu->kvm->mm))
|
||||
return;
|
||||
|
||||
st = map.hva +
|
||||
offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
|
||||
slots = kvm_memslots(vcpu->kvm);
|
||||
|
||||
st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
|
||||
if (unlikely(slots->generation != ghc->generation ||
|
||||
kvm_is_error_hva(ghc->hva) || !ghc->memslot))
|
||||
return;
|
||||
|
||||
kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
|
||||
st = (struct kvm_steal_time __user *)ghc->hva;
|
||||
BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
|
||||
|
||||
if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
|
||||
vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
|
||||
|
||||
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
|
@ -7334,7 +7382,9 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
|
|||
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
|
||||
u32 pmc)
|
||||
{
|
||||
return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
|
||||
if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
|
||||
|
@ -10570,6 +10620,24 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
|
||||
{
|
||||
bool inhibit = false;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
down_write(&kvm->arch.apicv_update_lock);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
|
||||
inhibit = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
__kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ);
|
||||
up_write(&kvm->arch.apicv_update_lock);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
struct kvm_guest_debug *dbg)
|
||||
{
|
||||
|
@ -10622,6 +10690,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
|||
|
||||
static_call(kvm_x86_update_exception_bitmap)(vcpu);
|
||||
|
||||
kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
|
||||
|
||||
r = 0;
|
||||
|
||||
out:
|
||||
|
@ -10865,11 +10935,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
|||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
|
||||
int idx;
|
||||
|
||||
kvm_release_pfn(cache->pfn, cache->dirty, cache);
|
||||
|
||||
kvmclock_reset(vcpu);
|
||||
|
||||
static_call(kvm_x86_vcpu_free)(vcpu);
|
||||
|
@ -12281,7 +12348,8 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
|
|||
return kvm_skip_emulated_instruction(vcpu);
|
||||
|
||||
default:
|
||||
BUG(); /* We have already checked above that type <= 3 */
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
|
||||
|
|
Loading…
Reference in New Issue