KVM: MMU: Add 5 level EPT & Shadow page table support.
Extends the shadow paging code, so that 5 level shadow page table can be constructed if VM is running in 5 level paging mode. Also extends the ept code, so that 5 level ept table can be constructed if maxphysaddr of VM exceeds 48 bits. Unlike the shadow logic, KVM should still use 4 level ept table for a VM whose physical address width is less than 48 bits, even when the VM is running in 5 level paging mode. Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com> [Unconditionally reset the MMU context in kvm_cpuid_update. Changing MAXPHYADDR invalidates the reserved bit bitmasks. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
2a7266a8f9
commit
855feb6736
|
@ -315,7 +315,7 @@ struct kvm_pio_request {
|
|||
int size;
|
||||
};
|
||||
|
||||
#define PT64_ROOT_MAX_LEVEL 4
|
||||
#define PT64_ROOT_MAX_LEVEL 5
|
||||
|
||||
struct rsvd_bits_validate {
|
||||
u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL];
|
||||
|
@ -323,9 +323,9 @@ struct rsvd_bits_validate {
|
|||
};
|
||||
|
||||
/*
|
||||
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
|
||||
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
|
||||
* mode.
|
||||
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
|
||||
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
|
||||
* current mmu mode.
|
||||
*/
|
||||
struct kvm_mmu {
|
||||
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
|
||||
|
@ -982,7 +982,7 @@ struct kvm_x86_ops {
|
|||
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
|
||||
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
|
||||
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
|
||||
int (*get_tdp_level)(void);
|
||||
int (*get_tdp_level)(struct kvm_vcpu *vcpu);
|
||||
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
|
||||
int (*get_lpage_level)(void);
|
||||
bool (*rdtscp_supported)(void);
|
||||
|
|
|
@ -453,6 +453,7 @@ enum vmcs_field {
|
|||
|
||||
#define VMX_EPT_EXECUTE_ONLY_BIT (1ull)
|
||||
#define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6)
|
||||
#define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7)
|
||||
#define VMX_EPTP_UC_BIT (1ull << 8)
|
||||
#define VMX_EPTP_WB_BIT (1ull << 14)
|
||||
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
|
||||
|
@ -471,6 +472,7 @@ enum vmcs_field {
|
|||
#define VMX_EPT_MT_EPTE_SHIFT 3
|
||||
#define VMX_EPTP_PWL_MASK 0x38ull
|
||||
#define VMX_EPTP_PWL_4 0x18ull
|
||||
#define VMX_EPTP_PWL_5 0x20ull
|
||||
#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6)
|
||||
#define VMX_EPTP_MT_MASK 0x7ull
|
||||
#define VMX_EPTP_MT_WB 0x6ull
|
||||
|
|
|
@ -136,6 +136,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
|
|||
|
||||
/* Update physical-address width */
|
||||
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
kvm_pmu_refresh(vcpu);
|
||||
return 0;
|
||||
|
|
|
@ -3322,8 +3322,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
|
|||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return;
|
||||
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL &&
|
||||
(vcpu->arch.mmu.root_level == PT64_ROOT_4LEVEL ||
|
||||
if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
|
||||
(vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
|
||||
vcpu->arch.mmu.direct_map)) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
|
@ -3375,13 +3375,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
|||
struct kvm_mmu_page *sp;
|
||||
unsigned i;
|
||||
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
|
||||
if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if(make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_4LEVEL, 1, ACC_ALL);
|
||||
sp = kvm_mmu_get_page(vcpu, 0, 0,
|
||||
vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
|
||||
++sp->root_count;
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
vcpu->arch.mmu.root_hpa = __pa(sp->spt);
|
||||
|
@ -3425,7 +3426,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
|||
* Do we shadow a long mode page table? If so we need to
|
||||
* write-protect the guests page table root.
|
||||
*/
|
||||
if (vcpu->arch.mmu.root_level == PT64_ROOT_4LEVEL) {
|
||||
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
MMU_WARN_ON(VALID_PAGE(root));
|
||||
|
@ -3435,8 +3436,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
|||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_4LEVEL,
|
||||
0, ACC_ALL);
|
||||
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
|
||||
vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
|
||||
root = __pa(sp->spt);
|
||||
++sp->root_count;
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
@ -3531,7 +3532,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
|
|||
|
||||
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
|
||||
if (vcpu->arch.mmu.root_level == PT64_ROOT_4LEVEL) {
|
||||
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
sp = page_header(root);
|
||||
mmu_sync_children(vcpu, sp);
|
||||
|
@ -4057,6 +4058,12 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
|||
rsvd_check->rsvd_bits_mask[1][0] =
|
||||
rsvd_check->rsvd_bits_mask[0][0];
|
||||
break;
|
||||
case PT64_ROOT_5LEVEL:
|
||||
rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd |
|
||||
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
|
||||
rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[1][4] =
|
||||
rsvd_check->rsvd_bits_mask[0][4];
|
||||
case PT64_ROOT_4LEVEL:
|
||||
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
|
||||
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
|
||||
|
@ -4098,6 +4105,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
|
|||
{
|
||||
u64 bad_mt_xwr;
|
||||
|
||||
rsvd_check->rsvd_bits_mask[0][4] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][3] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][2] =
|
||||
|
@ -4107,6 +4116,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
|
|||
rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
|
||||
|
||||
/* large page */
|
||||
rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
|
||||
rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
|
||||
rsvd_check->rsvd_bits_mask[1][2] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
|
||||
|
@ -4367,7 +4377,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
|
|||
static void paging64_init_context(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *context)
|
||||
{
|
||||
paging64_init_context_common(vcpu, context, PT64_ROOT_4LEVEL);
|
||||
int root_level = is_la57_mode(vcpu) ?
|
||||
PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
|
||||
|
||||
paging64_init_context_common(vcpu, context, root_level);
|
||||
}
|
||||
|
||||
static void paging32_init_context(struct kvm_vcpu *vcpu,
|
||||
|
@ -4408,7 +4421,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
|||
context->sync_page = nonpaging_sync_page;
|
||||
context->invlpg = nonpaging_invlpg;
|
||||
context->update_pte = nonpaging_update_pte;
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = true;
|
||||
context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
|
||||
|
@ -4422,7 +4435,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
|||
context->root_level = 0;
|
||||
} else if (is_long_mode(vcpu)) {
|
||||
context->nx = is_nx(vcpu);
|
||||
context->root_level = PT64_ROOT_4LEVEL;
|
||||
context->root_level = is_la57_mode(vcpu) ?
|
||||
PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, context);
|
||||
context->gva_to_gpa = paging64_gva_to_gpa;
|
||||
} else if (is_pae(vcpu)) {
|
||||
|
@ -4479,7 +4493,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
|||
|
||||
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
|
||||
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
|
||||
context->shadow_root_level = PT64_ROOT_4LEVEL;
|
||||
|
||||
context->nx = true;
|
||||
context->ept_ad = accessed_dirty;
|
||||
|
@ -4488,7 +4502,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
|||
context->sync_page = ept_sync_page;
|
||||
context->invlpg = ept_invlpg;
|
||||
context->update_pte = ept_update_pte;
|
||||
context->root_level = context->shadow_root_level;
|
||||
context->root_level = PT64_ROOT_4LEVEL;
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = false;
|
||||
context->base_role.ad_disabled = !accessed_dirty;
|
||||
|
@ -4533,7 +4547,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
|||
g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
|
||||
} else if (is_long_mode(vcpu)) {
|
||||
g_context->nx = is_nx(vcpu);
|
||||
g_context->root_level = PT64_ROOT_4LEVEL;
|
||||
g_context->root_level = is_la57_mode(vcpu) ?
|
||||
PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, g_context);
|
||||
g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
|
||||
} else if (is_pae(vcpu)) {
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#define PT32_DIR_PSE36_MASK \
|
||||
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
|
||||
|
||||
#define PT64_ROOT_5LEVEL 5
|
||||
#define PT64_ROOT_4LEVEL 4
|
||||
#define PT32_ROOT_LEVEL 2
|
||||
#define PT32E_ROOT_LEVEL 3
|
||||
|
|
|
@ -62,11 +62,11 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
|
|||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return;
|
||||
|
||||
if (vcpu->arch.mmu.root_level == PT64_ROOT_4LEVEL) {
|
||||
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
sp = page_header(root);
|
||||
__mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_4LEVEL);
|
||||
__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -581,7 +581,7 @@ static inline void invlpga(unsigned long addr, u32 asid)
|
|||
asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
|
||||
}
|
||||
|
||||
static int get_npt_level(void)
|
||||
static int get_npt_level(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return PT64_ROOT_4LEVEL;
|
||||
|
@ -2402,7 +2402,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
|
||||
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
|
||||
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
|
||||
vcpu->arch.mmu.shadow_root_level = get_npt_level();
|
||||
vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
|
||||
reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
|
||||
}
|
||||
|
|
|
@ -1207,6 +1207,11 @@ static inline bool cpu_has_vmx_ept_mt_wb(void)
|
|||
return vmx_capability.ept & VMX_EPTP_WB_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_5levels(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_ad_bits(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_AD_BIT;
|
||||
|
@ -4304,9 +4309,18 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|||
vmx->emulation_required = emulation_required(vcpu);
|
||||
}
|
||||
|
||||
static int get_ept_level(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
|
||||
return 5;
|
||||
return 4;
|
||||
}
|
||||
|
||||
static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
|
||||
{
|
||||
u64 eptp = VMX_EPTP_MT_WB | VMX_EPTP_PWL_4;
|
||||
u64 eptp = VMX_EPTP_MT_WB;
|
||||
|
||||
eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
|
||||
|
||||
if (enable_ept_ad_bits &&
|
||||
(!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
|
||||
|
@ -9612,11 +9626,6 @@ static void __init vmx_check_processor_compat(void *rtn)
|
|||
}
|
||||
}
|
||||
|
||||
static int get_ept_level(void)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||
{
|
||||
u8 cache;
|
||||
|
|
|
@ -62,6 +62,16 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
|
|||
return cs_l;
|
||||
}
|
||||
|
||||
static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return (vcpu->arch.efer & EFER_LMA) &&
|
||||
kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
|
||||
|
|
Loading…
Reference in New Issue