KVM: PPC: Book3S HV: Lockless tlbie for HPT hcalls
tlbies to an LPAR do not have to be serialised since POWER4/PPC970,
after which the MMU_FTR_LOCKLESS_TLBIE feature was introduced to
avoid tlbie locking.
Since commit c17b98cf60
("KVM: PPC: Book3S HV: Remove code for
PPC970 processors"), KVM no longer supports processors that do not
have this feature, so the tlbie locking can be removed completely.
A sanity check for the feature is put in kvmppc_mmu_hv_init.
Testing was done on a POWER9 system in HPT mode, with a -smp 32 guest
in HPT mode. 32 instances of the powerpc fork benchmark from selftests
were run with --fork, and the results measured.
Without this patch, total throughput was about 13.5K/sec, and this is
the top of the host profile:
74.52% [k] do_tlbies
2.95% [k] kvmppc_book3s_hv_page_fault
1.80% [k] calc_checksum
1.80% [k] kvmppc_vcpu_run_hv
1.49% [k] kvmppc_run_core
After this patch, throughput was about 51K/sec, with this profile:
21.28% [k] do_tlbies
5.26% [k] kvmppc_run_core
4.88% [k] kvmppc_book3s_hv_page_fault
3.30% [k] _raw_spin_lock_irqsave
3.25% [k] gup_pgd_range
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
f19d1f367a
commit
b755745147
|
@ -269,7 +269,6 @@ struct kvm_arch {
|
|||
unsigned long host_lpcr;
|
||||
unsigned long sdr1;
|
||||
unsigned long host_sdr1;
|
||||
int tlbie_lock;
|
||||
unsigned long lpcr;
|
||||
unsigned long vrma_slb_v;
|
||||
int mmu_ready;
|
||||
|
|
|
@ -272,6 +272,9 @@ int kvmppc_mmu_hv_init(void)
|
|||
if (!cpu_has_feature(CPU_FTR_HVMODE))
|
||||
return -EINVAL;
|
||||
|
||||
if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
|
||||
return -EINVAL;
|
||||
|
||||
/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
|
||||
host_lpid = mfspr(SPRN_LPID);
|
||||
rsvd_lpid = LPID_RSVD;
|
||||
|
|
|
@ -435,24 +435,6 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
|
|||
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
|
||||
}
|
||||
|
||||
static inline int try_lock_tlbie(unsigned int *lock)
|
||||
{
|
||||
unsigned int tmp, old;
|
||||
unsigned int token = LOCK_TOKEN;
|
||||
|
||||
asm volatile("1:lwarx %1,0,%2\n"
|
||||
" cmpwi cr0,%1,0\n"
|
||||
" bne 2f\n"
|
||||
" stwcx. %3,0,%2\n"
|
||||
" bne- 1b\n"
|
||||
" isync\n"
|
||||
"2:"
|
||||
: "=&r" (tmp), "=&r" (old)
|
||||
: "r" (lock), "r" (token)
|
||||
: "cc", "memory");
|
||||
return old == 0;
|
||||
}
|
||||
|
||||
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
|
||||
long npages, int global, bool need_sync)
|
||||
{
|
||||
|
@ -464,8 +446,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
|
|||
* the RS field, this is backwards-compatible with P7 and P8.
|
||||
*/
|
||||
if (global) {
|
||||
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
if (need_sync)
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
for (i = 0; i < npages; ++i) {
|
||||
|
@ -484,7 +464,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
|
|||
}
|
||||
|
||||
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
} else {
|
||||
if (need_sync)
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
|
|
Loading…
Reference in New Issue