!245 KVM: arm64: Add support for FEAT_TLBIRANGE

Merge pull request !245 from 谢晓东/linux-5.4/devel
2024-11-08 08:45:16 +00:00 · 2024-11-08 08:45:16 +00:00 · f2abf181fe
parent f94b2a0c57 1b2905d358
commit f2abf181fe
9 changed files with 224 additions and 70 deletions
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@ -58,6 +58,8 @@ extern char __kvm_hyp_init_end[];
 extern char __kvm_hyp_vector[];
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_range(struct kvm *kvm,
 					phys_addr_t start, unsigned long pages);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 extern void __kvm_flush_cpu_context(struct kvm_vcpu *vcpu);
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@ -671,6 +671,8 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
 void kvm_set_ipa_limit(void);
 #define __KVM_HAVE_ARCH_VM_ALLOC
 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@ -282,16 +282,77 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 */
 #define MAX_TLBI_OPS	PTRS_PER_PTE
 /*
 * __flush_tlb_range_op - Perform TLBI operation upon a range
 *
 * @op:	TLBI instruction that operates on a range (has 'r' prefix)
 * @start:	The start address of the range
 * @pages:	Range as the number of pages from 'start'
 * @stride:	Flush granularity
 * @asid:	The ASID of the task (0 for IPA instructions)
 * @tlb_level:	Translation Table level hint, if known
 * @tlbi_user:	If 'true', call an additional __tlbi_user()
 *              (typically for user ASIDs). 'flase' for IPA instructions
 *
 * When the CPU does not support TLB range operations, flush the TLB
 * entries one by one at the granularity of 'stride'. If the TLB
 * range ops are supported, then:
 *
 * 1. If 'pages' is odd, flush the first page through non-range
 *    operations;
 *
 * 2. For remaining pages: the minimum range granularity is decided
 *    by 'scale', so multiple range TLBI operations may be required.
 *    Start from scale = 0, flush the corresponding number of pages
 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
 *    until no pages left.
 *
 * Note that certain ranges can be represented by either num = 31 and
 * scale or num = 0 and scale + 1. The loop below favours the latter
 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
 */
 #define __flush_tlb_range_op(op, start, pages, stride,			\
 				asid, tlb_level, tlbi_user)		\
 do {									\
 	int num = 0;							\
 	int scale = 0;							\
 	unsigned long addr;						\
 									\
 	while (pages > 0) {						\
 		if (!system_supports_tlb_range() ||			\
 		    pages % 2 == 1) {					\
 			addr = __TLBI_VADDR(start, asid);		\
 			__tlbi_level(op, addr, tlb_level);		\
 			if (tlbi_user)					\
 				__tlbi_user_level(op, addr, tlb_level);	\
 			start += stride;				\
 			pages -= stride >> PAGE_SHIFT;			\
 			continue;					\
 		}							\
 									\
 		num = __TLBI_RANGE_NUM(pages, scale);			\
 		if (num >= 0) {						\
 			addr = __TLBI_VADDR_RANGE(start, asid, scale,	\
 						  num, tlb_level);	\
 			__tlbi(r##op, addr);				\
 			if (tlbi_user)					\
 				__tlbi_user(r##op, addr);		\
 			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
 			pages -= __TLBI_RANGE_PAGES(num, scale);	\
 		}							\
 		scale++;						\
 	}								\
 } while (0)
 #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
 	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
 				     unsigned long stride, bool last_level,
 				     int tlb_level)
 {
-	int num = 0;
+	unsigned long asid, pages;
 	int scale = 0;
 	unsigned long asid = ASID(vma->vm_mm);
 	unsigned long addr;
 	unsigned long pages;
 	start = round_down(start, stride);
 	end = round_up(end, stride);
@ -311,57 +372,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	}
 	dsb(ishst);
 	asid = ASID(vma->vm_mm);
-	/*
+	if (last_level)
-	 * When the CPU does not support TLB range operations, flush the TLB
+		__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
-	 * entries one by one at the granularity of 'stride'. If the the TLB
+	else
-	 * range ops are supported, then:
+		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
 	 *
 	 * 1. If 'pages' is odd, flush the first page through non-range
 	 *    operations;
 	 *
 	 * 2. For remaining pages: the minimum range granularity is decided
 	 *    by 'scale', so multiple range TLBI operations may be required.
 	 *    Start from scale = 0, flush the corresponding number of pages
 	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
 	 *    until no pages left.
 	 *
 	 * Note that certain ranges can be represented by either num = 31 and
 	 * scale or num = 0 and scale + 1. The loop below favours the latter
 	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
 	 */
 	while (pages > 0) {
 		if (!system_supports_tlb_range() ||
 		    pages % 2 == 1) {
 			addr = __TLBI_VADDR(start, asid);
 			if (last_level) {
 				__tlbi_level(vale1is, addr, tlb_level);
 				__tlbi_user_level(vale1is, addr, tlb_level);
 			} else {
 				__tlbi_level(vae1is, addr, tlb_level);
 				__tlbi_user_level(vae1is, addr, tlb_level);
 			}
 			start += stride;
 			pages -= stride >> PAGE_SHIFT;
 			continue;
 		}
 		num = __TLBI_RANGE_NUM(pages, scale);
 		if (num >= 0) {
 			addr = __TLBI_VADDR_RANGE(start, asid, scale,
 						  num, tlb_level);
 			if (last_level) {
 				__tlbi(rvale1is, addr);
 				__tlbi_user(rvale1is, addr);
 			} else {
 				__tlbi(rvae1is, addr);
 				__tlbi_user(rvae1is, addr);
 			}
 			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
 			pages -= __TLBI_RANGE_PAGES(num, scale);
 		}
 		scale++;
 	}
 	dsb(ish);
 }
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@ -24,7 +24,6 @@ config KVM
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@ -165,6 +165,41 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	__tlb_switch_to_host(kvm, &cxt);
 }
 void __hyp_text __kvm_tlb_flush_vmid_range(struct kvm *kvm,
 				phys_addr_t start, unsigned long pages)
 {
 	struct tlb_inv_context cxt;
 	unsigned long stride;
 	/*
 	 * Since the range of addresses may not be mapped at
 	 * the same level, assume the worst case as PAGE_SIZE
 	 */
 	stride = PAGE_SIZE;
 	start = round_down(start, stride);
 	if (has_vhe())
 		dsb(ishst);
 	/* Switch to requested VMID */
 	kvm = kern_hyp_va(kvm);
 	__tlb_switch_to_guest(kvm, &cxt);
 	__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
 	dsb(ish);
 	__tlbi(vmalle1is);
 	dsb(ish);
 	isb();
 	/* See the comment in __kvm_tlb_flush_vmid_ipa() */
 	if (!has_vhe() && icache_is_vpipt())
 		__flush_icache_all();
 	__tlb_switch_to_host(kvm, &cxt);
 }
 void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 {
 	struct tlb_inv_context cxt;
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@ -813,6 +813,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
 void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
@ -917,11 +920,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
 	return -ENOTSUPP;
 }
 #else
 int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
 #endif
 #ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
 static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
 						    gfn_t gfn, u64 nr_pages)
 {
 	return -EOPNOTSUPP;
 }
 #else
 int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
 #endif
 #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@ -36,9 +36,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
 config KVM_VFIO
       bool
 config HAVE_KVM_ARCH_TLB_FLUSH_ALL
       bool
 config HAVE_KVM_INVALID_WAKEUPS
       bool
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@ -49,14 +49,15 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 }
 /**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
 * @kvm:	pointer to kvm structure.
 *
 * Interface to HYP function to flush all VM TLB entries
 */
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
 	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
 	return 0;
 }
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
@ -64,6 +65,48 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 /**
 * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
 *
 * @kvm:	pointer to kvm structure
 * @addr:	The base Intermediate physical address from which to invalidate
 * @size:	Size of the range from the base to invalidate
 */
 void kvm_tlb_flush_vmid_range(struct kvm *kvm, 
 			phys_addr_t addr, size_t size)
 {
 	unsigned long pages, inval_pages;
 	if (!system_supports_tlb_range()) {
 		kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
 		return;
 	}
 	pages = size >> PAGE_SHIFT;
 	while (pages > 0) {
 		inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
 		kvm_call_hyp(__kvm_tlb_flush_vmid_range,
 			       	kvm, addr, inval_pages);
 		addr += inval_pages << PAGE_SHIFT;
 		pages -= inval_pages;
 	}
 }
 static bool stage2_unmap_defer_tlb_flush(void)
 {
 	/*
 	 * If FEAT_TLBIRANGE is implemented, defer the individual
 	 * TLB invalidations until the entire walk is finished, and
 	 * then use the range-based TLBI instructions to do the
 	 * invalidations. Condition deferred TLB invalidation on the
 	 * system supporting FWB as the optimization is entirely
 	 * pointless when the unmap walker needs to perform CMOs.
 	 */
 	return system_supports_tlb_range() && cpus_have_const_cap(ARM64_HAS_STAGE2_FWB);
 }
 /*
 * D-Cache management functions. They take the page table entries by
 * value, as they are flushing the cache using the kernel mapping (or
@ -84,6 +127,15 @@ static void kvm_flush_dcache_pud(pud_t pud)
 	__kvm_flush_dcache_pud(pud);
 }
 int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
 	       				gfn_t gfn, u64 nr_pages)
 {
 	kvm_tlb_flush_vmid_range(kvm,gfn << PAGE_SHIFT,
 		       			nr_pages << PAGE_SHIFT);
 	return 0;
 }
 static bool kvm_is_device_pfn(unsigned long pfn)
 {
 	return !pfn_valid(pfn);
@ -249,9 +301,10 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 	do {
 		if (!pte_none(*pte)) {
 			pte_t old_pte = *pte;
 			kvm_set_pte(pte, __pte(0));
-			kvm_tlb_flush_vmid_ipa(kvm, addr);
+			
 			if (!stage2_unmap_defer_tlb_flush())
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
 			/* No need to invalidate the cache for device mappings */
 			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
@ -354,6 +407,11 @@ static void __unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size,
 		next = stage2_pgd_addr_end(kvm, addr, end);
 		if (!stage2_pgd_none(kvm, *pgd))
 			unmap_stage2_puds(kvm, pgd, addr, next);
 		if (stage2_unmap_defer_tlb_flush())
 			/* Perform the deferred TLB invalidations */
 			kvm_tlb_flush_vmid_range(kvm, addr, size);
 		/*
 		 * If the range is too large, release the kvm->mmu_lock
 		 * to prevent starvation and lockup detector warnings.
@ -1553,7 +1611,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	spin_lock(&kvm->mmu_lock);
 	stage2_wp_range(kvm, start, end);
 	spin_unlock(&kvm->mmu_lock);
-	kvm_flush_remote_tlbs(kvm);
+	kvm_flush_remote_tlbs_memslot(kvm, memslot);
 }
 /**
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@ -295,7 +295,33 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 	return called;
 }
-#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
 {
 	if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
 		return;
 	/*
 	 * Fall back to a flushing entire TLBs if the architecture range-based
 	 * TLB invalidation is unsupported or can't be performed for whatever
 	 * reason.
 	 */
 	kvm_flush_remote_tlbs(kvm);
 }
 void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot)
 {
 	/*
 	 * All current use cases for flushing the TLBs for a specific memslot
 	 * are related to dirty logging, and many do the TLB flush out of
 	 * mmu_lock. The interaction between the various operations on memslot
 	 * must be serialized by slots_locks to ensure the TLB flush from one
 	 * operation is observed by any other operation on the same memslot.
 	 */
 	lockdep_assert_held(&kvm->slots_lock);
 	kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
 }
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
 	/*
@ -315,13 +341,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
 	 * barrier here.
 	 */
-	if (!kvm_arch_flush_remote_tlb(kvm)
+	if (!kvm_arch_flush_remote_tlbs(kvm)
 	    || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 #endif
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
@ -1317,7 +1342,8 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
 		}
 		spin_unlock(&kvm->mmu_lock);
 	}
-
+	if (flush)
 		kvm_flush_remote_tlbs_memslot(kvm, memslot);
 	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
 		return -EFAULT;
 	return 0;
@ -1394,6 +1420,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
 	}
 	spin_unlock(&kvm->mmu_lock);
 	if(flush)
 		kvm_flush_remote_tlbs_memslot(kvm, memslot);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect);