From 6654f9dfcb88fea3b9affc180dc3c04333d0f306 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 18 Aug 2021 20:21:30 +0000 Subject: [PATCH 1/4] KVM: arm64: Fix read-side race on updates to vcpu reset state KVM correctly serializes writes to a vCPU's reset state, however since we do not take the KVM lock on the read side it is entirely possible to read state from two different reset requests. Cure the race for now by taking the KVM lock when reading the reset_state structure. Fixes: 358b28f09f0a ("arm/arm64: KVM: Allow a VCPU to fully reset itself") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818202133.1106786-2-oupton@google.com --- arch/arm64/kvm/reset.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index cba7872d69a8..d862441b03b1 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -210,10 +210,16 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { + struct vcpu_reset_state reset_state; int ret; bool loaded; u32 pstate; + mutex_lock(&vcpu->kvm->lock); + reset_state = vcpu->arch.reset_state; + WRITE_ONCE(vcpu->arch.reset_state.reset, false); + mutex_unlock(&vcpu->kvm->lock); + /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); @@ -276,8 +282,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) * Additional reset state handling that PSCI may have imposed on us. * Must be done after all the sys_reg reset. */ - if (vcpu->arch.reset_state.reset) { - unsigned long target_pc = vcpu->arch.reset_state.pc; + if (reset_state.reset) { + unsigned long target_pc = reset_state.pc; /* Gracefully handle Thumb2 entry point */ if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { @@ -286,13 +292,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } /* Propagate caller endianness */ - if (vcpu->arch.reset_state.be) + if (reset_state.be) kvm_vcpu_set_be(vcpu); *vcpu_pc(vcpu) = target_pc; - vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0); - - vcpu->arch.reset_state.reset = false; + vcpu_set_reg(vcpu, 0, reset_state.r0); } /* Reset timer */ From 6826c6849b46aaa91300201213701eb861af4ba0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 18 Aug 2021 20:21:31 +0000 Subject: [PATCH 2/4] KVM: arm64: Handle PSCI resets before userspace touches vCPU state The CPU_ON PSCI call takes a payload that KVM uses to configure a destination vCPU to run. This payload is non-architectural state and not exposed through any existing UAPI. Effectively, we have a race between CPU_ON and userspace saving/restoring a guest: if the target vCPU isn't ran again before the VMM saves its state, the requested PC and context ID are lost. When restored, the target vCPU will be runnable and start executing at its old PC. We can avoid this race by making sure the reset payload is serviced before userspace can access a vCPU's state. Fixes: 358b28f09f0a ("arm/arm64: KVM: Allow a VCPU to fully reset itself") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818202133.1106786-3-oupton@google.com --- arch/arm64/kvm/arm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..8ce9996bcbfd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1215,6 +1215,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(®, argp, sizeof(reg))) break; + /* + * We could owe a reset due to PSCI. Handle the pending reset + * here to ensure userspace register accesses are ordered after + * the reset. + */ + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_reset_vcpu(vcpu); + if (ioctl == KVM_SET_ONE_REG) r = kvm_arm_set_reg(vcpu, ®); else From e10ecb4d6c0761ca545b3946df1707a41f9f845e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 18 Aug 2021 20:21:32 +0000 Subject: [PATCH 3/4] KVM: arm64: Enforce reserved bits for PSCI target affinities According to the PSCI specification, ARM DEN 0022D, 5.1.4 "CPU_ON", the CPU_ON function takes a target_cpu argument that is bit-compatible with the affinity fields in MPIDR_EL1. All other bits in the argument are RES0. Note that the same constraints apply to the target_affinity argument for the AFFINITY_INFO call. Enforce the spec by returning INVALID_PARAMS if a guest incorrectly sets a RES0 bit. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818202133.1106786-4-oupton@google.com --- arch/arm64/kvm/psci.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index db4056ecccfd..74c47d420253 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -59,6 +59,12 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) kvm_vcpu_kick(vcpu); } +static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu, + unsigned long affinity) +{ + return !(affinity & ~MPIDR_HWID_BITMASK); +} + static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct vcpu_reset_state *reset_state; @@ -66,9 +72,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct kvm_vcpu *vcpu = NULL; unsigned long cpu_id; - cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; - if (vcpu_mode_is_32bit(source_vcpu)) - cpu_id &= ~((u32) 0); + cpu_id = smccc_get_arg1(source_vcpu); + if (!kvm_psci_valid_affinity(source_vcpu, cpu_id)) + return PSCI_RET_INVALID_PARAMS; vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); @@ -126,6 +132,9 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) target_affinity = smccc_get_arg1(vcpu); lowest_affinity_level = smccc_get_arg2(vcpu); + if (!kvm_psci_valid_affinity(vcpu, target_affinity)) + return PSCI_RET_INVALID_PARAMS; + /* Determine target affinity mask */ target_affinity_mask = psci_affinity_mask(lowest_affinity_level); if (!target_affinity_mask) From cb97cf95c44021278b7637731bc0928026bc29ab Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 18 Aug 2021 20:21:33 +0000 Subject: [PATCH 4/4] selftests: KVM: Introduce psci_cpu_on_test Introduce a test for aarch64 that ensures CPU resets induced by PSCI are reflected in the target vCPU's state, even if the target is never run again. This is a regression test for a race between vCPU migration and PSCI. Reviewed-by: Andrew Jones Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818202133.1106786-5-oupton@google.com --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/psci_cpu_on_test.c | 121 ++++++++++++++++++ .../selftests/kvm/include/aarch64/processor.h | 3 + 4 files changed, 126 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 0709af0144c8..98053d3afbda 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only /aarch64/debug-exceptions /aarch64/get-reg-list +/aarch64/psci_cpu_on_test /aarch64/vgic_init /s390x/memop /s390x/resets diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 5832f510a16c..5d05801ab816 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -86,6 +86,7 @@ TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list +TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c new file mode 100644 index 000000000000..018c269990e1 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the + * CPU_ON PSCI call matches what the caller requested. + * + * Copyright (c) 2021 Google LLC. + * + * This is a regression test for a race between KVM servicing the PSCI call and + * userspace reading the vCPUs registers. + */ + +#define _GNU_SOURCE + +#include + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +#define VCPU_ID_SOURCE 0 +#define VCPU_ID_TARGET 1 + +#define CPU_ON_ENTRY_ADDR 0xfeedf00dul +#define CPU_ON_CONTEXT_ID 0xdeadc0deul + +static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, + uint64_t context_id) +{ + register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON; + register uint64_t x1 asm("x1") = target_cpu; + register uint64_t x2 asm("x2") = entry_addr; + register uint64_t x3 asm("x3") = context_id; + + asm("hvc #0" + : "=r"(x0) + : "r"(x0), "r"(x1), "r"(x2), "r"(x3) + : "memory"); + + return x0; +} + +static uint64_t psci_affinity_info(uint64_t target_affinity, + uint64_t lowest_affinity_level) +{ + register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO; + register uint64_t x1 asm("x1") = target_affinity; + register uint64_t x2 asm("x2") = lowest_affinity_level; + + asm("hvc #0" + : "=r"(x0) + : "r"(x0), "r"(x1), "r"(x2) + : "memory"); + + return x0; +} + +static void guest_main(uint64_t target_cpu) +{ + GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID)); + uint64_t target_state; + + do { + target_state = psci_affinity_info(target_cpu, 0); + + GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) || + (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF)); + } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON); + + GUEST_DONE(); +} + +int main(void) +{ + uint64_t target_mpidr, obs_pc, obs_x0; + struct kvm_vcpu_init init; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name); + ucall_init(vm, NULL); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); + + aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main); + + /* + * make sure the target is already off when executing the test. + */ + init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF); + aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main); + + get_reg(vm, VCPU_ID_TARGET, ARM64_SYS_REG(MPIDR_EL1), &target_mpidr); + vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK); + vcpu_run(vm, VCPU_ID_SOURCE); + + switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__, + uc.args[1]); + break; + default: + TEST_FAIL("Unhandled ucall: %lu", uc.cmd); + } + + get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc); + get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0); + + TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, + "unexpected target cpu pc: %lx (expected: %lx)", + obs_pc, CPU_ON_ENTRY_ADDR); + TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID, + "unexpected target context id: %lx (expected: %lx)", + obs_x0, CPU_ON_CONTEXT_ID); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 27dc5c2e56b9..c0273aefa63d 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -17,6 +17,7 @@ #define CPACR_EL1 3, 0, 1, 0, 2 #define TCR_EL1 3, 0, 2, 0, 2 #define MAIR_EL1 3, 0, 10, 2, 0 +#define MPIDR_EL1 3, 0, 0, 0, 5 #define TTBR0_EL1 3, 0, 2, 0, 0 #define SCTLR_EL1 3, 0, 1, 0, 0 #define VBAR_EL1 3, 0, 12, 0, 0 @@ -40,6 +41,8 @@ (0xfful << (4 * 8)) | \ (0xbbul << (5 * 8))) +#define MPIDR_HWID_BITMASK (0xff00fffffful) + static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr) { struct kvm_one_reg reg;