!245 KVM: arm64: Add support for FEAT_TLBIRANGE
Merge pull request !245 from 谢晓东/linux-5.4/devel
This commit is contained in:
commit
f2abf181fe
|
@ -58,6 +58,8 @@ extern char __kvm_hyp_init_end[];
|
|||
extern char __kvm_hyp_vector[];
|
||||
|
||||
extern void __kvm_flush_vm_context(void);
|
||||
extern void __kvm_tlb_flush_vmid_range(struct kvm *kvm,
|
||||
phys_addr_t start, unsigned long pages);
|
||||
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
|
||||
extern void __kvm_flush_cpu_context(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -671,6 +671,8 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
|
|||
void kvm_set_ipa_limit(void);
|
||||
|
||||
#define __KVM_HAVE_ARCH_VM_ALLOC
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
|
||||
struct kvm *kvm_arch_alloc_vm(void);
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
|
|
|
@ -282,16 +282,77 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
|
|||
*/
|
||||
#define MAX_TLBI_OPS PTRS_PER_PTE
|
||||
|
||||
/*
|
||||
* __flush_tlb_range_op - Perform TLBI operation upon a range
|
||||
*
|
||||
* @op: TLBI instruction that operates on a range (has 'r' prefix)
|
||||
* @start: The start address of the range
|
||||
* @pages: Range as the number of pages from 'start'
|
||||
* @stride: Flush granularity
|
||||
* @asid: The ASID of the task (0 for IPA instructions)
|
||||
* @tlb_level: Translation Table level hint, if known
|
||||
* @tlbi_user: If 'true', call an additional __tlbi_user()
|
||||
* (typically for user ASIDs). 'flase' for IPA instructions
|
||||
*
|
||||
* When the CPU does not support TLB range operations, flush the TLB
|
||||
* entries one by one at the granularity of 'stride'. If the TLB
|
||||
* range ops are supported, then:
|
||||
*
|
||||
* 1. If 'pages' is odd, flush the first page through non-range
|
||||
* operations;
|
||||
*
|
||||
* 2. For remaining pages: the minimum range granularity is decided
|
||||
* by 'scale', so multiple range TLBI operations may be required.
|
||||
* Start from scale = 0, flush the corresponding number of pages
|
||||
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
|
||||
* until no pages left.
|
||||
*
|
||||
* Note that certain ranges can be represented by either num = 31 and
|
||||
* scale or num = 0 and scale + 1. The loop below favours the latter
|
||||
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
|
||||
*/
|
||||
#define __flush_tlb_range_op(op, start, pages, stride, \
|
||||
asid, tlb_level, tlbi_user) \
|
||||
do { \
|
||||
int num = 0; \
|
||||
int scale = 0; \
|
||||
unsigned long addr; \
|
||||
\
|
||||
while (pages > 0) { \
|
||||
if (!system_supports_tlb_range() || \
|
||||
pages % 2 == 1) { \
|
||||
addr = __TLBI_VADDR(start, asid); \
|
||||
__tlbi_level(op, addr, tlb_level); \
|
||||
if (tlbi_user) \
|
||||
__tlbi_user_level(op, addr, tlb_level); \
|
||||
start += stride; \
|
||||
pages -= stride >> PAGE_SHIFT; \
|
||||
continue; \
|
||||
} \
|
||||
\
|
||||
num = __TLBI_RANGE_NUM(pages, scale); \
|
||||
if (num >= 0) { \
|
||||
addr = __TLBI_VADDR_RANGE(start, asid, scale, \
|
||||
num, tlb_level); \
|
||||
__tlbi(r##op, addr); \
|
||||
if (tlbi_user) \
|
||||
__tlbi_user(r##op, addr); \
|
||||
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
|
||||
pages -= __TLBI_RANGE_PAGES(num, scale); \
|
||||
} \
|
||||
scale++; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
|
||||
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
|
||||
|
||||
static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long stride, bool last_level,
|
||||
int tlb_level)
|
||||
{
|
||||
int num = 0;
|
||||
int scale = 0;
|
||||
unsigned long asid = ASID(vma->vm_mm);
|
||||
unsigned long addr;
|
||||
unsigned long pages;
|
||||
unsigned long asid, pages;
|
||||
|
||||
start = round_down(start, stride);
|
||||
end = round_up(end, stride);
|
||||
|
@ -311,57 +372,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
|||
}
|
||||
|
||||
dsb(ishst);
|
||||
asid = ASID(vma->vm_mm);
|
||||
|
||||
if (last_level)
|
||||
__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
|
||||
else
|
||||
__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
|
||||
|
||||
/*
|
||||
* When the CPU does not support TLB range operations, flush the TLB
|
||||
* entries one by one at the granularity of 'stride'. If the the TLB
|
||||
* range ops are supported, then:
|
||||
*
|
||||
* 1. If 'pages' is odd, flush the first page through non-range
|
||||
* operations;
|
||||
*
|
||||
* 2. For remaining pages: the minimum range granularity is decided
|
||||
* by 'scale', so multiple range TLBI operations may be required.
|
||||
* Start from scale = 0, flush the corresponding number of pages
|
||||
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
|
||||
* until no pages left.
|
||||
*
|
||||
* Note that certain ranges can be represented by either num = 31 and
|
||||
* scale or num = 0 and scale + 1. The loop below favours the latter
|
||||
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
|
||||
*/
|
||||
while (pages > 0) {
|
||||
if (!system_supports_tlb_range() ||
|
||||
pages % 2 == 1) {
|
||||
addr = __TLBI_VADDR(start, asid);
|
||||
if (last_level) {
|
||||
__tlbi_level(vale1is, addr, tlb_level);
|
||||
__tlbi_user_level(vale1is, addr, tlb_level);
|
||||
} else {
|
||||
__tlbi_level(vae1is, addr, tlb_level);
|
||||
__tlbi_user_level(vae1is, addr, tlb_level);
|
||||
}
|
||||
start += stride;
|
||||
pages -= stride >> PAGE_SHIFT;
|
||||
continue;
|
||||
}
|
||||
|
||||
num = __TLBI_RANGE_NUM(pages, scale);
|
||||
if (num >= 0) {
|
||||
addr = __TLBI_VADDR_RANGE(start, asid, scale,
|
||||
num, tlb_level);
|
||||
if (last_level) {
|
||||
__tlbi(rvale1is, addr);
|
||||
__tlbi_user(rvale1is, addr);
|
||||
} else {
|
||||
__tlbi(rvae1is, addr);
|
||||
__tlbi_user(rvae1is, addr);
|
||||
}
|
||||
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
|
||||
pages -= __TLBI_RANGE_PAGES(num, scale);
|
||||
}
|
||||
scale++;
|
||||
}
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ config KVM
|
|||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
select KVM_MMIO
|
||||
select KVM_ARM_HOST
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
|
|
|
@ -165,6 +165,41 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
|
|||
__tlb_switch_to_host(kvm, &cxt);
|
||||
}
|
||||
|
||||
void __hyp_text __kvm_tlb_flush_vmid_range(struct kvm *kvm,
|
||||
phys_addr_t start, unsigned long pages)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
unsigned long stride;
|
||||
|
||||
/*
|
||||
* Since the range of addresses may not be mapped at
|
||||
* the same level, assume the worst case as PAGE_SIZE
|
||||
*/
|
||||
stride = PAGE_SIZE;
|
||||
start = round_down(start, stride);
|
||||
|
||||
|
||||
if (has_vhe())
|
||||
dsb(ishst);
|
||||
|
||||
/* Switch to requested VMID */
|
||||
kvm = kern_hyp_va(kvm);
|
||||
__tlb_switch_to_guest(kvm, &cxt);
|
||||
|
||||
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
|
||||
|
||||
dsb(ish);
|
||||
__tlbi(vmalle1is);
|
||||
dsb(ish);
|
||||
isb();
|
||||
|
||||
/* See the comment in __kvm_tlb_flush_vmid_ipa() */
|
||||
if (!has_vhe() && icache_is_vpipt())
|
||||
__flush_icache_all();
|
||||
|
||||
__tlb_switch_to_host(kvm, &cxt);
|
||||
}
|
||||
|
||||
void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
|
|
@ -813,6 +813,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
|
|||
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
|
||||
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm);
|
||||
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
|
||||
void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm);
|
||||
|
||||
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
|
||||
|
@ -917,11 +920,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
|
||||
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
|
||||
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
|
||||
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
#else
|
||||
int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
|
||||
#endif
|
||||
|
||||
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
|
||||
static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
|
||||
gfn_t gfn, u64 nr_pages)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#else
|
||||
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
|
||||
|
|
|
@ -36,9 +36,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
|
|||
config KVM_VFIO
|
||||
bool
|
||||
|
||||
config HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
bool
|
||||
|
||||
config HAVE_KVM_INVALID_WAKEUPS
|
||||
bool
|
||||
|
||||
|
|
|
@ -49,14 +49,15 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
|
|||
}
|
||||
|
||||
/**
|
||||
* kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
|
||||
* kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
|
||||
* @kvm: pointer to kvm structure.
|
||||
*
|
||||
* Interface to HYP function to flush all VM TLB entries
|
||||
*/
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
|
||||
|
@ -64,6 +65,48 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
|
|||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
|
||||
*
|
||||
* @kvm: pointer to kvm structure
|
||||
* @addr: The base Intermediate physical address from which to invalidate
|
||||
* @size: Size of the range from the base to invalidate
|
||||
*/
|
||||
|
||||
void kvm_tlb_flush_vmid_range(struct kvm *kvm,
|
||||
phys_addr_t addr, size_t size)
|
||||
{
|
||||
unsigned long pages, inval_pages;
|
||||
|
||||
if (!system_supports_tlb_range()) {
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
|
||||
return;
|
||||
}
|
||||
|
||||
pages = size >> PAGE_SHIFT;
|
||||
while (pages > 0) {
|
||||
inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_range,
|
||||
kvm, addr, inval_pages);
|
||||
|
||||
addr += inval_pages << PAGE_SHIFT;
|
||||
pages -= inval_pages;
|
||||
}
|
||||
}
|
||||
|
||||
static bool stage2_unmap_defer_tlb_flush(void)
|
||||
{
|
||||
/*
|
||||
* If FEAT_TLBIRANGE is implemented, defer the individual
|
||||
* TLB invalidations until the entire walk is finished, and
|
||||
* then use the range-based TLBI instructions to do the
|
||||
* invalidations. Condition deferred TLB invalidation on the
|
||||
* system supporting FWB as the optimization is entirely
|
||||
* pointless when the unmap walker needs to perform CMOs.
|
||||
*/
|
||||
return system_supports_tlb_range() && cpus_have_const_cap(ARM64_HAS_STAGE2_FWB);
|
||||
}
|
||||
|
||||
/*
|
||||
* D-Cache management functions. They take the page table entries by
|
||||
* value, as they are flushing the cache using the kernel mapping (or
|
||||
|
@ -84,6 +127,15 @@ static void kvm_flush_dcache_pud(pud_t pud)
|
|||
__kvm_flush_dcache_pud(pud);
|
||||
}
|
||||
|
||||
|
||||
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
|
||||
gfn_t gfn, u64 nr_pages)
|
||||
{
|
||||
kvm_tlb_flush_vmid_range(kvm,gfn << PAGE_SHIFT,
|
||||
nr_pages << PAGE_SHIFT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool kvm_is_device_pfn(unsigned long pfn)
|
||||
{
|
||||
return !pfn_valid(pfn);
|
||||
|
@ -249,9 +301,10 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
|
|||
do {
|
||||
if (!pte_none(*pte)) {
|
||||
pte_t old_pte = *pte;
|
||||
|
||||
kvm_set_pte(pte, __pte(0));
|
||||
kvm_tlb_flush_vmid_ipa(kvm, addr);
|
||||
|
||||
if (!stage2_unmap_defer_tlb_flush())
|
||||
kvm_tlb_flush_vmid_ipa(kvm, addr);
|
||||
|
||||
/* No need to invalidate the cache for device mappings */
|
||||
if (!kvm_is_device_pfn(pte_pfn(old_pte)))
|
||||
|
@ -354,6 +407,11 @@ static void __unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size,
|
|||
next = stage2_pgd_addr_end(kvm, addr, end);
|
||||
if (!stage2_pgd_none(kvm, *pgd))
|
||||
unmap_stage2_puds(kvm, pgd, addr, next);
|
||||
|
||||
if (stage2_unmap_defer_tlb_flush())
|
||||
/* Perform the deferred TLB invalidations */
|
||||
kvm_tlb_flush_vmid_range(kvm, addr, size);
|
||||
|
||||
/*
|
||||
* If the range is too large, release the kvm->mmu_lock
|
||||
* to prevent starvation and lockup detector warnings.
|
||||
|
@ -1553,7 +1611,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
|||
spin_lock(&kvm->mmu_lock);
|
||||
stage2_wp_range(kvm, start, end);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
kvm_flush_remote_tlbs_memslot(kvm, memslot);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -295,7 +295,33 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
|||
return called;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
|
||||
{
|
||||
if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Fall back to a flushing entire TLBs if the architecture range-based
|
||||
* TLB invalidation is unsupported or can't be performed for whatever
|
||||
* reason.
|
||||
*/
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
/*
|
||||
* All current use cases for flushing the TLBs for a specific memslot
|
||||
* are related to dirty logging, and many do the TLB flush out of
|
||||
* mmu_lock. The interaction between the various operations on memslot
|
||||
* must be serialized by slots_locks to ensure the TLB flush from one
|
||||
* operation is observed by any other operation on the same memslot.
|
||||
*/
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
|
||||
}
|
||||
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
|
@ -315,13 +341,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
|
|||
* kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
|
||||
* barrier here.
|
||||
*/
|
||||
if (!kvm_arch_flush_remote_tlb(kvm)
|
||||
if (!kvm_arch_flush_remote_tlbs(kvm)
|
||||
|| kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
|
||||
++kvm->stat.remote_tlb_flush;
|
||||
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
|
||||
#endif
|
||||
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm)
|
||||
{
|
||||
|
@ -1317,7 +1342,8 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
|
|||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_memslot(kvm, memslot);
|
||||
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
|
@ -1393,6 +1419,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
|
|||
}
|
||||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
if(flush)
|
||||
kvm_flush_remote_tlbs_memslot(kvm, memslot);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue