kvm: x86: Add multi-entry LRU cache for previous CR3s

Adds support for storing multiple previous CR3/root_hpa pairs maintained
as an LRU cache, so that the lockless CR3 switch path can be used when
switching back to any of them.

Signed-off-by: Junaid Shahid <junaids@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Junaid Shahid 2018-06-27 14:59:20 -07:00 committed by Paolo Bonzini
parent faff87588d
commit b94742c958
3 changed files with 92 additions and 41 deletions

View File

@ -335,6 +335,8 @@ struct kvm_mmu_root_info {
#define KVM_MMU_ROOT_INFO_INVALID \ #define KVM_MMU_ROOT_INFO_INVALID \
((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
#define KVM_MMU_NUM_PREV_ROOTS 3
/* /*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
@ -363,7 +365,7 @@ struct kvm_mmu {
u8 shadow_root_level; u8 shadow_root_level;
u8 ept_ad; u8 ept_ad;
bool direct_map; bool direct_map;
struct kvm_mmu_root_info prev_root; struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
/* /*
* Bitmap; bit set = permission fault * Bitmap; bit set = permission fault
@ -1295,9 +1297,9 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
return !!(*irq_state); return !!(*irq_state);
} }
#define KVM_MMU_ROOT_CURRENT BIT(0) #define KVM_MMU_ROOT_CURRENT BIT(0)
#define KVM_MMU_ROOT_PREVIOUS BIT(1) #define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
#define KVM_MMU_ROOTS_ALL (~0UL) #define KVM_MMU_ROOTS_ALL (~0UL)
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);

View File

@ -3445,18 +3445,26 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
LIST_HEAD(invalid_list); LIST_HEAD(invalid_list);
struct kvm_mmu *mmu = &vcpu->arch.mmu; struct kvm_mmu *mmu = &vcpu->arch.mmu;
bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT; bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
bool free_prev_root = roots_to_free & KVM_MMU_ROOT_PREVIOUS;
BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
/* Before acquiring the MMU lock, see if we need to do any real work. */ /* Before acquiring the MMU lock, see if we need to do any real work. */
if (!(free_active_root && VALID_PAGE(mmu->root_hpa)) && if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
!(free_prev_root && VALID_PAGE(mmu->prev_root.hpa))) for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
return; if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
VALID_PAGE(mmu->prev_roots[i].hpa))
break;
if (i == KVM_MMU_NUM_PREV_ROOTS)
return;
}
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
if (free_prev_root) for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa, if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
&invalid_list); mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
&invalid_list);
if (free_active_root) { if (free_active_root) {
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
@ -4064,6 +4072,38 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
context->nx = false; context->nx = false;
} }
/*
* Find out if a previously cached root matching the new CR3/role is available.
* The current root is also inserted into the cache.
* If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
* returned.
* Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
* false is returned. This root should now be freed by the caller.
*/
static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
union kvm_mmu_page_role new_role)
{
uint i;
struct kvm_mmu_root_info root;
struct kvm_mmu *mmu = &vcpu->arch.mmu;
root.cr3 = mmu->get_cr3(vcpu);
root.hpa = mmu->root_hpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
swap(root, mmu->prev_roots[i]);
if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
page_header(root.hpa) != NULL &&
new_role.word == page_header(root.hpa)->role.word)
break;
}
mmu->root_hpa = root.hpa;
return i < KVM_MMU_NUM_PREV_ROOTS;
}
static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
union kvm_mmu_page_role new_role, union kvm_mmu_page_role new_role,
bool skip_tlb_flush) bool skip_tlb_flush)
@ -4077,18 +4117,10 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
*/ */
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
mmu->root_level >= PT64_ROOT_4LEVEL) { mmu->root_level >= PT64_ROOT_4LEVEL) {
gpa_t prev_cr3 = mmu->prev_root.cr3;
if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT)) if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
return false; return false;
swap(mmu->root_hpa, mmu->prev_root.hpa); if (cached_root_available(vcpu, new_cr3, new_role)) {
mmu->prev_root.cr3 = mmu->get_cr3(vcpu);
if (new_cr3 == prev_cr3 &&
VALID_PAGE(mmu->root_hpa) &&
page_header(mmu->root_hpa) != NULL &&
new_role.word == page_header(mmu->root_hpa)->role.word) {
/* /*
* It is possible that the cached previous root page is * It is possible that the cached previous root page is
* obsolete because of a change in the MMU * obsolete because of a change in the MMU
@ -4854,8 +4886,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots) void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
{ {
if (reset_roots) { if (reset_roots) {
uint i;
vcpu->arch.mmu.root_hpa = INVALID_PAGE; vcpu->arch.mmu.root_hpa = INVALID_PAGE;
vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
} }
if (mmu_is_nested(vcpu)) if (mmu_is_nested(vcpu))
@ -5225,6 +5261,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{ {
struct kvm_mmu *mmu = &vcpu->arch.mmu; struct kvm_mmu *mmu = &vcpu->arch.mmu;
int i;
/* INVLPG on a * non-canonical address is a NOP according to the SDM. */ /* INVLPG on a * non-canonical address is a NOP according to the SDM. */
if (is_noncanonical_address(gva, vcpu)) if (is_noncanonical_address(gva, vcpu))
@ -5235,16 +5272,17 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
/* /*
* INVLPG is required to invalidate any global mappings for the VA, * INVLPG is required to invalidate any global mappings for the VA,
* irrespective of PCID. Since it would take us roughly similar amount * irrespective of PCID. Since it would take us roughly similar amount
* of work to determine whether the prev_root mapping of the VA is * of work to determine whether any of the prev_root mappings of the VA
* marked global, or to just sync it blindly, so we might as well just * is marked global, or to just sync it blindly, so we might as well
* always sync it. * just always sync it.
* *
* Mappings not reachable via the current cr3 or the prev_root.cr3 will * Mappings not reachable via the current cr3 or the prev_roots will be
* be synced when switching to that cr3, so nothing needs to be done * synced when switching to that cr3, so nothing needs to be done here
* here for them. * for them.
*/ */
if (VALID_PAGE(mmu->prev_root.hpa)) for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
mmu->invlpg(vcpu, gva, mmu->prev_root.hpa); if (VALID_PAGE(mmu->prev_roots[i].hpa))
mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
kvm_x86_ops->tlb_flush_gva(vcpu, gva); kvm_x86_ops->tlb_flush_gva(vcpu, gva);
++vcpu->stat.invlpg; ++vcpu->stat.invlpg;
@ -5255,16 +5293,19 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
{ {
struct kvm_mmu *mmu = &vcpu->arch.mmu; struct kvm_mmu *mmu = &vcpu->arch.mmu;
bool tlb_flush = false; bool tlb_flush = false;
uint i;
if (pcid == kvm_get_active_pcid(vcpu)) { if (pcid == kvm_get_active_pcid(vcpu)) {
mmu->invlpg(vcpu, gva, mmu->root_hpa); mmu->invlpg(vcpu, gva, mmu->root_hpa);
tlb_flush = true; tlb_flush = true;
} }
if (VALID_PAGE(mmu->prev_root.hpa) && for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
pcid == kvm_get_pcid(vcpu, mmu->prev_root.cr3)) { if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
mmu->invlpg(vcpu, gva, mmu->prev_root.hpa); pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
tlb_flush = true; mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
tlb_flush = true;
}
} }
if (tlb_flush) if (tlb_flush)
@ -5273,9 +5314,9 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
++vcpu->stat.invlpg; ++vcpu->stat.invlpg;
/* /*
* Mappings not reachable via the current cr3 or the prev_root.cr3 will * Mappings not reachable via the current cr3 or the prev_roots will be
* be synced when switching to that cr3, so nothing needs to be done * synced when switching to that cr3, so nothing needs to be done here
* here for them. * for them.
*/ */
} }
EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva); EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
@ -5321,12 +5362,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
int kvm_mmu_create(struct kvm_vcpu *vcpu) int kvm_mmu_create(struct kvm_vcpu *vcpu)
{ {
uint i;
vcpu->arch.walk_mmu = &vcpu->arch.mmu; vcpu->arch.walk_mmu = &vcpu->arch.mmu;
vcpu->arch.mmu.root_hpa = INVALID_PAGE; vcpu->arch.mmu.root_hpa = INVALID_PAGE;
vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
vcpu->arch.mmu.translate_gpa = translate_gpa; vcpu->arch.mmu.translate_gpa = translate_gpa;
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
return alloc_mmu_pages(vcpu); return alloc_mmu_pages(vcpu);
} }

View File

@ -8788,6 +8788,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
bool pcid_enabled; bool pcid_enabled;
gva_t gva; gva_t gva;
struct x86_exception e; struct x86_exception e;
unsigned i;
unsigned long roots_to_free = 0;
struct { struct {
u64 pcid; u64 pcid;
u64 gla; u64 gla;
@ -8846,12 +8848,14 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
} }
if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_root.cr3) for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
== operand.pcid) if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_PREVIOUS); == operand.pcid)
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
kvm_mmu_free_roots(vcpu, roots_to_free);
/* /*
* If neither the current cr3 nor the prev_root.cr3 use the * If neither the current cr3 nor any of the prev_roots use the
* given PCID, then nothing needs to be done here because a * given PCID, then nothing needs to be done here because a
* resync will happen anyway before switching to any other CR3. * resync will happen anyway before switching to any other CR3.
*/ */