kvm: Fix page ageing bugs
1. We were calling clear_flush_young_notify in unmap_one, but we are within an mmu notifier invalidate range scope. The spte exists no more (due to range_start) and the accessed bit info has already been propagated (due to kvm_pfn_set_accessed). Simply call clear_flush_young. 2. We clear_flush_young on a primary MMU PMD, but this may be mapped as a collection of PTEs by the secondary MMU (e.g. during log-dirty). This required expanding the interface of the clear_flush_young mmu notifier, so a lot of code has been trivially touched. 3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate the access bit by blowing the spte. This requires proper synchronizing with MMU notifier consumers, like every other removal of spte's does. Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
8a9522d2fe
commit
5712846808
|
@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
|
||||||
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
|
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
|
||||||
|
|
||||||
/* We do not have shadow page tables, hence the empty hooks */
|
/* We do not have shadow page tables, hence the empty hooks */
|
||||||
static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm,
|
||||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
|
|
||||||
/* We do not have shadow page tables, hence the empty hooks */
|
/* We do not have shadow page tables, hence the empty hooks */
|
||||||
static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||||
extern int kvm_unmap_hva_range(struct kvm *kvm,
|
extern int kvm_unmap_hva_range(struct kvm *kvm,
|
||||||
unsigned long start, unsigned long end);
|
unsigned long start, unsigned long end);
|
||||||
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||||
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
|
|
||||||
|
|
|
@ -243,7 +243,7 @@ struct kvmppc_ops {
|
||||||
int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
|
int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
|
||||||
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
|
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
|
||||||
unsigned long end);
|
unsigned long end);
|
||||||
int (*age_hva)(struct kvm *kvm, unsigned long hva);
|
int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||||
int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
|
int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
|
||||||
void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
|
void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
void (*mmu_destroy)(struct kvm_vcpu *vcpu);
|
void (*mmu_destroy)(struct kvm_vcpu *vcpu);
|
||||||
|
|
|
@ -851,9 +851,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
|
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
return kvm->arch.kvm_ops->age_hva(kvm, hva);
|
return kvm->arch.kvm_ops->age_hva(kvm, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||||
|
|
|
@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
|
||||||
extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
|
extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
|
||||||
extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
|
extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
|
||||||
unsigned long end);
|
unsigned long end);
|
||||||
extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
|
extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
|
||||||
|
unsigned long end);
|
||||||
extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
|
extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
|
||||||
extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
|
extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
|
|
||||||
|
|
|
@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
|
int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
if (!kvm->arch.using_mmu_notifiers)
|
if (!kvm->arch.using_mmu_notifiers)
|
||||||
return 0;
|
return 0;
|
||||||
return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
|
return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
|
|
|
@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
|
static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
/* XXX could be more clever ;) */
|
/* XXX could be more clever ;) */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -732,7 +732,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
/* XXX could be more clever ;) */
|
/* XXX could be more clever ;) */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -1035,7 +1035,7 @@ asmlinkage void kvm_spurious_fault(void);
|
||||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
|
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
|
||||||
|
|
|
@ -1417,18 +1417,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
struct rmap_iterator uninitialized_var(iter);
|
struct rmap_iterator uninitialized_var(iter);
|
||||||
int young = 0;
|
int young = 0;
|
||||||
|
|
||||||
/*
|
BUG_ON(!shadow_accessed_mask);
|
||||||
* In case of absence of EPT Access and Dirty Bits supports,
|
|
||||||
* emulate the accessed bit for EPT, by checking if this page has
|
|
||||||
* an EPT mapping, and clearing it if it does. On the next access,
|
|
||||||
* a new EPT mapping will be established.
|
|
||||||
* This has some overhead, but not as much as the cost of swapping
|
|
||||||
* out actively used pages or breaking up actively used hugepages.
|
|
||||||
*/
|
|
||||||
if (!shadow_accessed_mask) {
|
|
||||||
young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
||||||
sptep = rmap_get_next(&iter)) {
|
sptep = rmap_get_next(&iter)) {
|
||||||
|
@ -1440,7 +1429,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
(unsigned long *)sptep);
|
(unsigned long *)sptep);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
|
||||||
trace_kvm_age_page(gfn, level, slot, young);
|
trace_kvm_age_page(gfn, level, slot, young);
|
||||||
return young;
|
return young;
|
||||||
}
|
}
|
||||||
|
@ -1489,9 +1477,29 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
|
||||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
|
/*
|
||||||
|
* In case of absence of EPT Access and Dirty Bits supports,
|
||||||
|
* emulate the accessed bit for EPT, by checking if this page has
|
||||||
|
* an EPT mapping, and clearing it if it does. On the next access,
|
||||||
|
* a new EPT mapping will be established.
|
||||||
|
* This has some overhead, but not as much as the cost of swapping
|
||||||
|
* out actively used pages or breaking up actively used hugepages.
|
||||||
|
*/
|
||||||
|
if (!shadow_accessed_mask) {
|
||||||
|
/*
|
||||||
|
* We are holding the kvm->mmu_lock, and we are blowing up
|
||||||
|
* shadow PTEs. MMU notifier consumers need to be kept at bay.
|
||||||
|
* This is correct as long as we don't decouple the mmu_lock
|
||||||
|
* protected regions (like invalidate_range_start|end does).
|
||||||
|
*/
|
||||||
|
kvm->mmu_notifier_seq++;
|
||||||
|
return kvm_handle_hva_range(kvm, start, end, 0,
|
||||||
|
kvm_unmap_rmapp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||||
|
|
|
@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn,
|
||||||
|
|
||||||
static int mn_clear_flush_young(struct mmu_notifier *mn,
|
static int mn_clear_flush_young(struct mmu_notifier *mn,
|
||||||
struct mm_struct *mm,
|
struct mm_struct *mm,
|
||||||
unsigned long address)
|
unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
__mn_flush_page(mn, address);
|
for (; start < end; start += PAGE_SIZE)
|
||||||
|
__mn_flush_page(mn, start);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,10 +57,13 @@ struct mmu_notifier_ops {
|
||||||
* pte. This way the VM will provide proper aging to the
|
* pte. This way the VM will provide proper aging to the
|
||||||
* accesses to the page through the secondary MMUs and not
|
* accesses to the page through the secondary MMUs and not
|
||||||
* only to the ones through the Linux pte.
|
* only to the ones through the Linux pte.
|
||||||
|
* Start-end is necessary in case the secondary MMU is mapping the page
|
||||||
|
* at a smaller granularity than the primary MMU.
|
||||||
*/
|
*/
|
||||||
int (*clear_flush_young)(struct mmu_notifier *mn,
|
int (*clear_flush_young)(struct mmu_notifier *mn,
|
||||||
struct mm_struct *mm,
|
struct mm_struct *mm,
|
||||||
unsigned long address);
|
unsigned long start,
|
||||||
|
unsigned long end);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* test_young is called to check the young/accessed bitflag in
|
* test_young is called to check the young/accessed bitflag in
|
||||||
|
@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
|
||||||
extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
|
extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
|
||||||
extern void __mmu_notifier_release(struct mm_struct *mm);
|
extern void __mmu_notifier_release(struct mm_struct *mm);
|
||||||
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
||||||
unsigned long address);
|
unsigned long start,
|
||||||
|
unsigned long end);
|
||||||
extern int __mmu_notifier_test_young(struct mm_struct *mm,
|
extern int __mmu_notifier_test_young(struct mm_struct *mm,
|
||||||
unsigned long address);
|
unsigned long address);
|
||||||
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
|
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
|
||||||
|
@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
||||||
unsigned long address)
|
unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
if (mm_has_notifiers(mm))
|
if (mm_has_notifiers(mm))
|
||||||
return __mmu_notifier_clear_flush_young(mm, address);
|
return __mmu_notifier_clear_flush_young(mm, start, end);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
|
||||||
unsigned long ___address = __address; \
|
unsigned long ___address = __address; \
|
||||||
__young = ptep_clear_flush_young(___vma, ___address, __ptep); \
|
__young = ptep_clear_flush_young(___vma, ___address, __ptep); \
|
||||||
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
|
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
|
||||||
___address); \
|
___address, \
|
||||||
|
___address + \
|
||||||
|
PAGE_SIZE); \
|
||||||
__young; \
|
__young; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
|
||||||
unsigned long ___address = __address; \
|
unsigned long ___address = __address; \
|
||||||
__young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \
|
__young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \
|
||||||
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
|
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
|
||||||
___address); \
|
___address, \
|
||||||
|
___address + \
|
||||||
|
PMD_SIZE); \
|
||||||
__young; \
|
__young; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
||||||
unsigned long address)
|
unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm)
|
||||||
* existed or not.
|
* existed or not.
|
||||||
*/
|
*/
|
||||||
int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
||||||
unsigned long address)
|
unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
struct mmu_notifier *mn;
|
struct mmu_notifier *mn;
|
||||||
int young = 0, id;
|
int young = 0, id;
|
||||||
|
@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
|
||||||
id = srcu_read_lock(&srcu);
|
id = srcu_read_lock(&srcu);
|
||||||
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
|
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
|
||||||
if (mn->ops->clear_flush_young)
|
if (mn->ops->clear_flush_young)
|
||||||
young |= mn->ops->clear_flush_young(mn, mm, address);
|
young |= mn->ops->clear_flush_young(mn, mm, start, end);
|
||||||
}
|
}
|
||||||
srcu_read_unlock(&srcu, id);
|
srcu_read_unlock(&srcu, id);
|
||||||
|
|
||||||
|
|
|
@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
|
||||||
continue; /* don't unmap */
|
continue; /* don't unmap */
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptep_clear_flush_young_notify(vma, address, pte))
|
/*
|
||||||
|
* No need for _notify because we're within an
|
||||||
|
* mmu_notifier_invalidate_range_ {start|end} scope.
|
||||||
|
*/
|
||||||
|
if (ptep_clear_flush_young(vma, address, pte))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Nuke the page table entry. */
|
/* Nuke the page table entry. */
|
||||||
|
|
|
@ -369,7 +369,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
||||||
|
|
||||||
static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
|
static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
|
||||||
struct mm_struct *mm,
|
struct mm_struct *mm,
|
||||||
unsigned long address)
|
unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
struct kvm *kvm = mmu_notifier_to_kvm(mn);
|
struct kvm *kvm = mmu_notifier_to_kvm(mn);
|
||||||
int young, idx;
|
int young, idx;
|
||||||
|
@ -377,7 +378,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
|
||||||
idx = srcu_read_lock(&kvm->srcu);
|
idx = srcu_read_lock(&kvm->srcu);
|
||||||
spin_lock(&kvm->mmu_lock);
|
spin_lock(&kvm->mmu_lock);
|
||||||
|
|
||||||
young = kvm_age_hva(kvm, address);
|
young = kvm_age_hva(kvm, start, end);
|
||||||
if (young)
|
if (young)
|
||||||
kvm_flush_remote_tlbs(kvm);
|
kvm_flush_remote_tlbs(kvm);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue