KVM: x86/mmu: Optimize and clean up so called "last nonleaf level" logic

Drop the pre-computed last_nonleaf_level, which is arguably wrong and at
best confusing.  Per the comment:

  Can have large pages at levels 2..last_nonleaf_level-1.

the intent of the variable would appear to be to track what levels can
_legally_ have large pages, but that intent doesn't align with reality.
The computed value will be wrong for 5-level paging, or if 1gb pages are
not supported.

The flawed code is not a problem in practice, because except for 32-bit
PSE paging, bit 7 is reserved if large pages aren't supported at the
level.  Take advantage of this invariant and simply omit the level magic
math for 64-bit page tables (including PAE).

For 32-bit paging (non-PAE), the adjustments are needed purely because
bit 7 is ignored if PSE=0.  Retain that logic as is, but make
is_last_gpte() unique per PTTYPE so that the PSE check is avoided for
PAE and EPT paging.  In the spirit of avoiding branches, bump the "last
nonleaf level" for 32-bit PSE paging by adding the PSE bit itself.

Note, bit 7 is ignored or has other meaning in CR3/EPTP, but despite
FNAME(walk_addr_generic) briefly grabbing CR3/EPTP in "pte", they are
not PTEs and will blow up all the other gpte helpers.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-51-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Sean Christopherson 2021-06-22 10:57:35 -07:00 committed by Paolo Bonzini
parent 616007c866
commit 7cd138db5c
3 changed files with 30 additions and 35 deletions

View File

@ -457,9 +457,6 @@ struct kvm_mmu {
struct rsvd_bits_validate guest_rsvd_check;
/* Can have large pages at levels 2..last_nonleaf_level-1. */
u8 last_nonleaf_level;
u64 pdptrs[4]; /* pae */
};

View File

@ -4071,26 +4071,6 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
return false;
}
static inline bool is_last_gpte(struct kvm_mmu *mmu,
unsigned level, unsigned gpte)
{
/*
* The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
* If it is clear, there are no large pages at this level, so clear
* PT_PAGE_SIZE_MASK in gpte if that is the case.
*/
gpte &= level - mmu->last_nonleaf_level;
/*
* PG_LEVEL_4K always terminates. The RHS has bit 7 set
* iff level <= PG_LEVEL_4K, which for our purpose means
* level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then.
*/
gpte |= level - PG_LEVEL_4K - 1;
return gpte & PT_PAGE_SIZE_MASK;
}
#define PTTYPE_EPT 18 /* arbitrary */
#define PTTYPE PTTYPE_EPT
#include "paging_tmpl.h"
@ -4491,15 +4471,6 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu)
}
}
static void update_last_nonleaf_level(struct kvm_mmu *mmu)
{
unsigned root_level = mmu->root_level;
mmu->last_nonleaf_level = root_level;
if (root_level == PT32_ROOT_LEVEL && is_cr4_pse(mmu))
mmu->last_nonleaf_level++;
}
static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu)
{
@ -4509,7 +4480,6 @@ static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, mmu);
update_permission_bitmask(mmu, false);
update_pkru_bitmask(mmu);
update_last_nonleaf_level(mmu);
}
static void paging64_init_context(struct kvm_mmu *context)
@ -4783,7 +4753,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->direct_map = false;
update_permission_bitmask(context, true);
update_last_nonleaf_level(context);
update_pkru_bitmask(context);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);

View File

@ -305,6 +305,35 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
return pkeys;
}
static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu,
unsigned int level, unsigned int gpte)
{
/*
* For EPT and PAE paging (both variants), bit 7 is either reserved at
* all level or indicates a huge page (ignoring CR3/EPTP). In either
* case, bit 7 being set terminates the walk.
*/
#if PTTYPE == 32
/*
* 32-bit paging requires special handling because bit 7 is ignored if
* CR4.PSE=0, not reserved. Clear bit 7 in the gpte if the level is
* greater than the last level for which bit 7 is the PAGE_SIZE bit.
*
* The RHS has bit 7 set iff level < (2 + PSE). If it is clear, bit 7
* is not reserved and does not indicate a large page at this level,
* so clear PT_PAGE_SIZE_MASK in gpte if that is the case.
*/
gpte &= level - (PT32_ROOT_LEVEL + mmu->mmu_role.ext.cr4_pse);
#endif
/*
* PG_LEVEL_4K always terminates. The RHS has bit 7 set
* iff level <= PG_LEVEL_4K, which for our purpose means
* level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then.
*/
gpte |= level - PG_LEVEL_4K - 1;
return gpte & PT_PAGE_SIZE_MASK;
}
/*
* Fetch a guest pte for a guest virtual address, or for an L2's GPA.
*/
@ -421,7 +450,7 @@ retry_walk:
/* Convert to ACC_*_MASK flags for struct guest_walker. */
walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
} while (!is_last_gpte(mmu, walker->level, pte));
} while (!FNAME(is_last_gpte)(mmu, walker->level, pte));
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;