From 434398ab5eed03dbc0075af9436e871712bfb45a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 23 Nov 2021 19:52:27 +1000 Subject: [PATCH] KVM: PPC: Book3S HV P9: Avoid cpu_in_guest atomics on entry and exit cpu_in_guest is set to determine if a CPU needs to be IPI'ed to exit the guest and notice the need_tlb_flush bit. This can be implemented as a global per-CPU pointer to the currently running guest instead of per-guest cpumasks, saving 2 atomics per entry/exit. P7/8 doesn't require cpu_in_guest, nor does a nested HV (only the L0 does), so move it to the P9 HV path. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211123095231.1036501-50-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 1 - arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/kvm/book3s_hv.c | 39 +++++++++++++----------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 96f0fda50a07..fe07558173ef 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -44,7 +44,6 @@ struct kvm_nested_guest { struct mutex tlb_lock; /* serialize page faults and tlbies */ struct kvm_nested_guest *next; cpumask_t need_tlb_flush; - cpumask_t cpu_in_guest; short prev_cpu[NR_CPUS]; u8 radix; /* is this nested guest radix */ }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d7004412b859..17263276189e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -287,7 +287,6 @@ struct kvm_arch { u32 online_vcores; atomic_t hpte_mod_interest; cpumask_t need_tlb_flush; - cpumask_t cpu_in_guest; u8 radix; u8 fwnmi_enabled; u8 secure_guest; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4056605d3367..00c1e102c103 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3009,19 +3009,18 @@ static void kvmppc_release_hwthread(int cpu) tpaca->kvm_hstate.kvm_split_mode = NULL; } +static DEFINE_PER_CPU(struct kvm *, cpu_in_guest); + static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) { struct kvm_nested_guest *nested = vcpu->arch.nested; - cpumask_t *cpu_in_guest, *need_tlb_flush; + cpumask_t *need_tlb_flush; int i; - if (nested) { + if (nested) need_tlb_flush = &nested->need_tlb_flush; - cpu_in_guest = &nested->cpu_in_guest; - } else { + else need_tlb_flush = &kvm->arch.need_tlb_flush; - cpu_in_guest = &kvm->arch.cpu_in_guest; - } cpu = cpu_first_tlb_thread_sibling(cpu); for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); @@ -3029,16 +3028,21 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) cpumask_set_cpu(i, need_tlb_flush); /* - * Make sure setting of bit in need_tlb_flush precedes - * testing of cpu_in_guest bits. The matching barrier on - * the other side is the first smp_mb() in kvmppc_run_core(). + * Make sure setting of bit in need_tlb_flush precedes testing of + * cpu_in_guest. The matching barrier on the other side is hwsync + * when switching to guest MMU mode, which happens between + * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit + * being tested. */ smp_mb(); for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); - i += cpu_tlb_thread_sibling_step()) - if (cpumask_test_cpu(i, cpu_in_guest)) + i += cpu_tlb_thread_sibling_step()) { + struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i); + + if (running == kvm) smp_call_function_single(i, do_nothing, NULL, 1); + } } static void do_migrate_away_vcpu(void *arg) @@ -3105,7 +3109,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { int cpu; struct paca_struct *tpaca; - struct kvm *kvm = vc->kvm; cpu = vc->pcpu; if (vcpu) { @@ -3116,7 +3119,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) cpu += vcpu->arch.ptid; vcpu->cpu = vc->pcpu; vcpu->arch.thread_cpu = cpu; - cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); } tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; @@ -3847,7 +3849,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_release_hwthread(pcpu + i); if (sip && sip->napped[i]) kvmppc_ipi_thread(pcpu + i); - cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); } spin_unlock(&vc->lock); @@ -4015,8 +4016,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, } } else { + struct kvm *kvm = vcpu->kvm; + kvmppc_xive_push_vcpu(vcpu); + + __this_cpu_write(cpu_in_guest, kvm); trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb); + __this_cpu_write(cpu_in_guest, NULL); + if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && !(vcpu->arch.shregs.msr & MSR_PR)) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -4041,7 +4048,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, } kvmppc_xive_pull_vcpu(vcpu); - if (kvm_is_radix(vcpu->kvm)) + if (kvm_is_radix(kvm)) vcpu->arch.slb_max = 0; } @@ -4531,8 +4538,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, powerpc_local_irq_pmu_restore(flags); - cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest); - preempt_enable(); /*