kvm: x86: Add multi-entry LRU cache for previous CR3s
Adds support for storing multiple previous CR3/root_hpa pairs maintained as an LRU cache, so that the lockless CR3 switch path can be used when switching back to any of them. Signed-off-by: Junaid Shahid <junaids@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
faff87588d
commit
b94742c958
|
@ -335,6 +335,8 @@ struct kvm_mmu_root_info {
|
||||||
#define KVM_MMU_ROOT_INFO_INVALID \
|
#define KVM_MMU_ROOT_INFO_INVALID \
|
||||||
((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
|
((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
|
||||||
|
|
||||||
|
#define KVM_MMU_NUM_PREV_ROOTS 3
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
|
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
|
||||||
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
|
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
|
||||||
|
@ -363,7 +365,7 @@ struct kvm_mmu {
|
||||||
u8 shadow_root_level;
|
u8 shadow_root_level;
|
||||||
u8 ept_ad;
|
u8 ept_ad;
|
||||||
bool direct_map;
|
bool direct_map;
|
||||||
struct kvm_mmu_root_info prev_root;
|
struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bitmap; bit set = permission fault
|
* Bitmap; bit set = permission fault
|
||||||
|
@ -1296,7 +1298,7 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
|
||||||
}
|
}
|
||||||
|
|
||||||
#define KVM_MMU_ROOT_CURRENT BIT(0)
|
#define KVM_MMU_ROOT_CURRENT BIT(0)
|
||||||
#define KVM_MMU_ROOT_PREVIOUS BIT(1)
|
#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
|
||||||
#define KVM_MMU_ROOTS_ALL (~0UL)
|
#define KVM_MMU_ROOTS_ALL (~0UL)
|
||||||
|
|
||||||
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
|
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
|
||||||
|
|
|
@ -3445,17 +3445,25 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
|
||||||
LIST_HEAD(invalid_list);
|
LIST_HEAD(invalid_list);
|
||||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||||
bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
|
bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
|
||||||
bool free_prev_root = roots_to_free & KVM_MMU_ROOT_PREVIOUS;
|
|
||||||
|
BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
|
||||||
|
|
||||||
/* Before acquiring the MMU lock, see if we need to do any real work. */
|
/* Before acquiring the MMU lock, see if we need to do any real work. */
|
||||||
if (!(free_active_root && VALID_PAGE(mmu->root_hpa)) &&
|
if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
|
||||||
!(free_prev_root && VALID_PAGE(mmu->prev_root.hpa)))
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||||
|
if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
|
||||||
|
VALID_PAGE(mmu->prev_roots[i].hpa))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (i == KVM_MMU_NUM_PREV_ROOTS)
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock(&vcpu->kvm->mmu_lock);
|
spin_lock(&vcpu->kvm->mmu_lock);
|
||||||
|
|
||||||
if (free_prev_root)
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||||
mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa,
|
if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
|
||||||
|
mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
|
||||||
&invalid_list);
|
&invalid_list);
|
||||||
|
|
||||||
if (free_active_root) {
|
if (free_active_root) {
|
||||||
|
@ -4064,6 +4072,38 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
|
||||||
context->nx = false;
|
context->nx = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find out if a previously cached root matching the new CR3/role is available.
|
||||||
|
* The current root is also inserted into the cache.
|
||||||
|
* If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
|
||||||
|
* returned.
|
||||||
|
* Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
|
||||||
|
* false is returned. This root should now be freed by the caller.
|
||||||
|
*/
|
||||||
|
static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
|
||||||
|
union kvm_mmu_page_role new_role)
|
||||||
|
{
|
||||||
|
uint i;
|
||||||
|
struct kvm_mmu_root_info root;
|
||||||
|
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||||
|
|
||||||
|
root.cr3 = mmu->get_cr3(vcpu);
|
||||||
|
root.hpa = mmu->root_hpa;
|
||||||
|
|
||||||
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
|
||||||
|
swap(root, mmu->prev_roots[i]);
|
||||||
|
|
||||||
|
if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
|
||||||
|
page_header(root.hpa) != NULL &&
|
||||||
|
new_role.word == page_header(root.hpa)->role.word)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
mmu->root_hpa = root.hpa;
|
||||||
|
|
||||||
|
return i < KVM_MMU_NUM_PREV_ROOTS;
|
||||||
|
}
|
||||||
|
|
||||||
static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
|
static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
|
||||||
union kvm_mmu_page_role new_role,
|
union kvm_mmu_page_role new_role,
|
||||||
bool skip_tlb_flush)
|
bool skip_tlb_flush)
|
||||||
|
@ -4077,18 +4117,10 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
|
||||||
*/
|
*/
|
||||||
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
|
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
|
||||||
mmu->root_level >= PT64_ROOT_4LEVEL) {
|
mmu->root_level >= PT64_ROOT_4LEVEL) {
|
||||||
gpa_t prev_cr3 = mmu->prev_root.cr3;
|
|
||||||
|
|
||||||
if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
|
if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
swap(mmu->root_hpa, mmu->prev_root.hpa);
|
if (cached_root_available(vcpu, new_cr3, new_role)) {
|
||||||
mmu->prev_root.cr3 = mmu->get_cr3(vcpu);
|
|
||||||
|
|
||||||
if (new_cr3 == prev_cr3 &&
|
|
||||||
VALID_PAGE(mmu->root_hpa) &&
|
|
||||||
page_header(mmu->root_hpa) != NULL &&
|
|
||||||
new_role.word == page_header(mmu->root_hpa)->role.word) {
|
|
||||||
/*
|
/*
|
||||||
* It is possible that the cached previous root page is
|
* It is possible that the cached previous root page is
|
||||||
* obsolete because of a change in the MMU
|
* obsolete because of a change in the MMU
|
||||||
|
@ -4854,8 +4886,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
||||||
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
|
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
|
||||||
{
|
{
|
||||||
if (reset_roots) {
|
if (reset_roots) {
|
||||||
|
uint i;
|
||||||
|
|
||||||
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
||||||
vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
|
|
||||||
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||||
|
vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mmu_is_nested(vcpu))
|
if (mmu_is_nested(vcpu))
|
||||||
|
@ -5225,6 +5261,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
|
||||||
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
|
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
|
||||||
{
|
{
|
||||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||||
|
int i;
|
||||||
|
|
||||||
/* INVLPG on a * non-canonical address is a NOP according to the SDM. */
|
/* INVLPG on a * non-canonical address is a NOP according to the SDM. */
|
||||||
if (is_noncanonical_address(gva, vcpu))
|
if (is_noncanonical_address(gva, vcpu))
|
||||||
|
@ -5235,16 +5272,17 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
|
||||||
/*
|
/*
|
||||||
* INVLPG is required to invalidate any global mappings for the VA,
|
* INVLPG is required to invalidate any global mappings for the VA,
|
||||||
* irrespective of PCID. Since it would take us roughly similar amount
|
* irrespective of PCID. Since it would take us roughly similar amount
|
||||||
* of work to determine whether the prev_root mapping of the VA is
|
* of work to determine whether any of the prev_root mappings of the VA
|
||||||
* marked global, or to just sync it blindly, so we might as well just
|
* is marked global, or to just sync it blindly, so we might as well
|
||||||
* always sync it.
|
* just always sync it.
|
||||||
*
|
*
|
||||||
* Mappings not reachable via the current cr3 or the prev_root.cr3 will
|
* Mappings not reachable via the current cr3 or the prev_roots will be
|
||||||
* be synced when switching to that cr3, so nothing needs to be done
|
* synced when switching to that cr3, so nothing needs to be done here
|
||||||
* here for them.
|
* for them.
|
||||||
*/
|
*/
|
||||||
if (VALID_PAGE(mmu->prev_root.hpa))
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||||
mmu->invlpg(vcpu, gva, mmu->prev_root.hpa);
|
if (VALID_PAGE(mmu->prev_roots[i].hpa))
|
||||||
|
mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
|
||||||
|
|
||||||
kvm_x86_ops->tlb_flush_gva(vcpu, gva);
|
kvm_x86_ops->tlb_flush_gva(vcpu, gva);
|
||||||
++vcpu->stat.invlpg;
|
++vcpu->stat.invlpg;
|
||||||
|
@ -5255,17 +5293,20 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
|
||||||
{
|
{
|
||||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||||
bool tlb_flush = false;
|
bool tlb_flush = false;
|
||||||
|
uint i;
|
||||||
|
|
||||||
if (pcid == kvm_get_active_pcid(vcpu)) {
|
if (pcid == kvm_get_active_pcid(vcpu)) {
|
||||||
mmu->invlpg(vcpu, gva, mmu->root_hpa);
|
mmu->invlpg(vcpu, gva, mmu->root_hpa);
|
||||||
tlb_flush = true;
|
tlb_flush = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VALID_PAGE(mmu->prev_root.hpa) &&
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
|
||||||
pcid == kvm_get_pcid(vcpu, mmu->prev_root.cr3)) {
|
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
|
||||||
mmu->invlpg(vcpu, gva, mmu->prev_root.hpa);
|
pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
|
||||||
|
mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
|
||||||
tlb_flush = true;
|
tlb_flush = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (tlb_flush)
|
if (tlb_flush)
|
||||||
kvm_x86_ops->tlb_flush_gva(vcpu, gva);
|
kvm_x86_ops->tlb_flush_gva(vcpu, gva);
|
||||||
|
@ -5273,9 +5314,9 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
|
||||||
++vcpu->stat.invlpg;
|
++vcpu->stat.invlpg;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mappings not reachable via the current cr3 or the prev_root.cr3 will
|
* Mappings not reachable via the current cr3 or the prev_roots will be
|
||||||
* be synced when switching to that cr3, so nothing needs to be done
|
* synced when switching to that cr3, so nothing needs to be done here
|
||||||
* here for them.
|
* for them.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
|
EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
|
||||||
|
@ -5321,12 +5362,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
|
uint i;
|
||||||
|
|
||||||
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
|
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
|
||||||
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
||||||
vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
|
|
||||||
vcpu->arch.mmu.translate_gpa = translate_gpa;
|
vcpu->arch.mmu.translate_gpa = translate_gpa;
|
||||||
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
|
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
|
||||||
|
|
||||||
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||||
|
vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||||
|
|
||||||
return alloc_mmu_pages(vcpu);
|
return alloc_mmu_pages(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8788,6 +8788,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
|
||||||
bool pcid_enabled;
|
bool pcid_enabled;
|
||||||
gva_t gva;
|
gva_t gva;
|
||||||
struct x86_exception e;
|
struct x86_exception e;
|
||||||
|
unsigned i;
|
||||||
|
unsigned long roots_to_free = 0;
|
||||||
struct {
|
struct {
|
||||||
u64 pcid;
|
u64 pcid;
|
||||||
u64 gla;
|
u64 gla;
|
||||||
|
@ -8846,12 +8848,14 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
|
||||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_root.cr3)
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||||
|
if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
|
||||||
== operand.pcid)
|
== operand.pcid)
|
||||||
kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_PREVIOUS);
|
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
|
||||||
|
|
||||||
|
kvm_mmu_free_roots(vcpu, roots_to_free);
|
||||||
/*
|
/*
|
||||||
* If neither the current cr3 nor the prev_root.cr3 use the
|
* If neither the current cr3 nor any of the prev_roots use the
|
||||||
* given PCID, then nothing needs to be done here because a
|
* given PCID, then nothing needs to be done here because a
|
||||||
* resync will happen anyway before switching to any other CR3.
|
* resync will happen anyway before switching to any other CR3.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue