From 523b3999e5f620cb5ccce6a7ca2780a4cab579a2 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 10 Sep 2020 14:33:51 +0100 Subject: [PATCH 1/3] KVM: arm64: Try PMD block mappings if PUD mappings are not supported When userspace uses hugetlbfs for the VM memory, user_mem_abort() tries to use the same block size to map the faulting IPA in stage 2. If stage 2 cannot the same block mapping because the block size doesn't fit in the memslot or the memslot is not properly aligned, user_mem_abort() will fall back to a page mapping, regardless of the block size. We can do better for PUD backed hugetlbfs by checking if a PMD block mapping is supported before deciding to use a page. vma_pagesize is an unsigned long, use 1UL instead of 1ULL when assigning its value. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200910133351.118191-1-alexandru.elisei@arm.com --- arch/arm64/kvm/mmu.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 21b70abf65a7..852497bd6d1e 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -779,16 +779,25 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else vma_shift = PAGE_SHIFT; - vma_pagesize = 1ULL << vma_shift; if (logging_active || - (vma->vm_flags & VM_PFNMAP) || - !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { + (vma->vm_flags & VM_PFNMAP)) { force_pte = true; - vma_pagesize = PAGE_SIZE; + vma_shift = PAGE_SHIFT; } + if (vma_shift == PUD_SHIFT && + !fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) + vma_shift = PMD_SHIFT; + + if (vma_shift == PMD_SHIFT && + !fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { + force_pte = true; + vma_shift = PAGE_SHIFT; + } + + vma_pagesize = 1UL << vma_shift; if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) - fault_ipa &= huge_page_mask(hstate_vma(vma)); + fault_ipa &= ~(vma_pagesize - 1); gfn = fault_ipa >> PAGE_SHIFT; mmap_read_unlock(current->mm); From ada329e6b5b406f33fae665e62caff7814409906 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 15 Sep 2020 18:04:42 +0100 Subject: [PATCH 2/3] KVM: arm64: Do not flush memslot if FWB is supported As a result of a KVM_SET_USER_MEMORY_REGION ioctl, KVM flushes the dcache for the memslot being changed to ensure a consistent view of memory between the host and the guest: the host runs with caches enabled, and it is possible for the data written by the hypervisor to still be in the caches, but the guest is running with stage 1 disabled, meaning data accesses are to Device-nGnRnE memory, bypassing the caches entirely. Flushing the dcache is not necessary when KVM enables FWB, because it forces the guest to uses cacheable memory accesses. The current behaviour does not change, as the dcache flush helpers execute the cache operation only if FWB is not enabled, but walking the stage 2 table is avoided. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200915170442.131635-1-alexandru.elisei@arm.com --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 852497bd6d1e..4d68c160a7b5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1344,7 +1344,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (ret) unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); - else + else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) stage2_flush_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); out: From c9c0279cc02b4e161686de7ccd1973357f29db8c Mon Sep 17 00:00:00 2001 From: Xiaofei Tan Date: Thu, 17 Sep 2020 09:47:49 +0800 Subject: [PATCH 3/3] KVM: arm64: Fix doc warnings in mmu code Fix following warnings caused by mismatch bewteen function parameters and comments. arch/arm64/kvm/mmu.c:128: warning: Function parameter or member 'mmu' not described in '__unmap_stage2_range' arch/arm64/kvm/mmu.c:128: warning: Function parameter or member 'may_block' not described in '__unmap_stage2_range' arch/arm64/kvm/mmu.c:128: warning: Excess function parameter 'kvm' description in '__unmap_stage2_range' arch/arm64/kvm/mmu.c:499: warning: Function parameter or member 'writable' not described in 'kvm_phys_addr_ioremap' arch/arm64/kvm/mmu.c:538: warning: Function parameter or member 'mmu' not described in 'stage2_wp_range' arch/arm64/kvm/mmu.c:538: warning: Excess function parameter 'kvm' description in 'stage2_wp_range' Signed-off-by: Xiaofei Tan Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/1600307269-50957-1-git-send-email-tanxiaofei@huawei.com --- arch/arm64/kvm/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 4d68c160a7b5..c5c26a9cb85b 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -114,9 +114,10 @@ static bool kvm_is_device_pfn(unsigned long pfn) */ /** * unmap_stage2_range -- Clear stage2 page table entries to unmap a range - * @kvm: The VM pointer + * @mmu: The KVM stage-2 MMU pointer * @start: The intermediate physical base address of the range to unmap * @size: The size of the area to unmap + * @may_block: Whether or not we are permitted to block * * Clear a range of stage-2 mappings, lowering the various ref-counts. Must * be called while holding mmu_lock (unless for freeing the stage2 pgd before @@ -493,6 +494,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) * @guest_ipa: The IPA at which to insert the mapping * @pa: The physical address of the device * @size: The size of the mapping + * @writable: Whether or not to create a writable mapping */ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable) @@ -530,7 +532,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, /** * stage2_wp_range() - write protect stage2 memory region range - * @kvm: The KVM pointer + * @mmu: The KVM stage-2 MMU pointer * @addr: Start address of range * @end: End address of range */