Merge branch kvm-arm64/misc-5.15 into kvmarm-master/next

* kvm-arm64/misc-5.15:
  : Misc improvements for 5.15:
  :
  : - Account the number of VMID-wide TLB invalidations as
  :   remote TLB flushes
  : - Fix comments in the VGIC code
  : - Cleanup the PMU IMPDEF identification
  : - Streamline the TGRAN2 usage
  : - Avoid advertising a 52bit IPA range for non-64KB configs
  : - Avoid spurious signalling when a HW-mapped interrupt is in the
  :   A+P state on entry, and in the P state on exit, but that the
  :   physical line is not pending anymore.
  : - Bunch of minor cleanups
  KVM: arm64: vgic: Resample HW pending state on deactivation
  KVM: arm64: vgic: Drop WARN from vgic_get_irq
  KVM: arm64: Drop unused REQUIRES_VIRT
  KVM: arm64: Drop check_kvm_target_cpu() based percpu probe
  KVM: arm64: Drop init_common_resources()
  KVM: arm64: Use ARM64_MIN_PARANGE_BITS as the minimum supported IPA
  arm64/mm: Add remaining ID_AA64MMFR0_PARANGE_ macros
  KVM: arm64: Restrict IPA size to maximum 48 bits on 4K and 16K page size
  arm64/mm: Define ID_AA64MMFR0_TGRAN_2_SHIFT
  KVM: arm64: perf: Replace '0xf' instances with ID_AA64DFR0_PMUVER_IMP_DEF
  KVM: arm64: Fix comments related to GICv2 PMR reporting
  KVM: arm64: Count VMID-wide TLB invalidations
  arm64/kexec: Test page size support with new TGRAN range values

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2021-08-20 12:14:56 +01:00
commit 3ce5db8a59
14 changed files with 112 additions and 135 deletions

View File

@ -657,7 +657,8 @@ static inline bool system_supports_4kb_granule(void)
val = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_TGRAN4_SHIFT);
return val == ID_AA64MMFR0_TGRAN4_SUPPORTED;
return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) &&
(val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX);
}
static inline bool system_supports_64kb_granule(void)
@ -669,7 +670,8 @@ static inline bool system_supports_64kb_granule(void)
val = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_TGRAN64_SHIFT);
return val == ID_AA64MMFR0_TGRAN64_SUPPORTED;
return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) &&
(val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX);
}
static inline bool system_supports_16kb_granule(void)
@ -681,7 +683,8 @@ static inline bool system_supports_16kb_granule(void)
val = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_TGRAN16_SHIFT);
return val == ID_AA64MMFR0_TGRAN16_SUPPORTED;
return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) &&
(val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX);
}
static inline bool system_supports_mixed_endian_el0(void)
@ -781,13 +784,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
{
switch (parange) {
case 0: return 32;
case 1: return 36;
case 2: return 40;
case 3: return 42;
case 4: return 44;
case 5: return 48;
case 6: return 52;
case ID_AA64MMFR0_PARANGE_32: return 32;
case ID_AA64MMFR0_PARANGE_36: return 36;
case ID_AA64MMFR0_PARANGE_40: return 40;
case ID_AA64MMFR0_PARANGE_42: return 42;
case ID_AA64MMFR0_PARANGE_44: return 44;
case ID_AA64MMFR0_PARANGE_48: return 48;
case ID_AA64MMFR0_PARANGE_52: return 52;
/*
* A future PE could use a value unknown to the kernel.
* However, by the "D10.1.4 Principles of the ID scheme

View File

@ -66,7 +66,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl;
int kvm_arm_init_sve(void);
int __attribute_const__ kvm_target_cpu(void);
u32 __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);

View File

@ -847,15 +847,26 @@
#define ID_AA64MMFR0_ASID_SHIFT 4
#define ID_AA64MMFR0_PARANGE_SHIFT 0
#define ID_AA64MMFR0_TGRAN4_NI 0xf
#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN64_NI 0xf
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
#define ID_AA64MMFR0_TGRAN4_NI 0xf
#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0
#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_TGRAN64_NI 0xf
#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0
#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1
#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf
#define ID_AA64MMFR0_PARANGE_32 0x0
#define ID_AA64MMFR0_PARANGE_36 0x1
#define ID_AA64MMFR0_PARANGE_40 0x2
#define ID_AA64MMFR0_PARANGE_42 0x3
#define ID_AA64MMFR0_PARANGE_44 0x4
#define ID_AA64MMFR0_PARANGE_48 0x5
#define ID_AA64MMFR0_PARANGE_52 0x6
#define ARM64_MIN_PARANGE_BITS 32
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2
@ -1028,16 +1039,19 @@
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0xF
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT
#elif defined(CONFIG_ARM64_64K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT
#endif
#define MVFR2_FPMISC_SHIFT 4

View File

@ -42,10 +42,6 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
#ifdef REQUIRES_VIRT
__asm__(".arch_extension virt");
#endif
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
@ -1035,7 +1031,7 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
unsigned int i, ret;
int phys_target = kvm_target_cpu();
u32 phys_target = kvm_target_cpu();
if (init->target != phys_target)
return -EINVAL;
@ -1696,11 +1692,6 @@ static bool init_psci_relay(void)
return true;
}
static int init_common_resources(void)
{
return kvm_set_ipa_limit();
}
static int init_subsystems(void)
{
int err = 0;
@ -2015,11 +2006,6 @@ static int finalize_hyp_mode(void)
return 0;
}
static void check_kvm_target_cpu(void *ret)
{
*(int *)ret = kvm_target_cpu();
}
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
struct kvm_vcpu *vcpu;
@ -2079,7 +2065,6 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
int kvm_arch_init(void *opaque)
{
int err;
int ret, cpu;
bool in_hyp_mode;
if (!is_hyp_mode_available()) {
@ -2094,15 +2079,7 @@ int kvm_arch_init(void *opaque)
kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
"Only trusted guests should be used on this system.\n");
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
if (ret < 0) {
kvm_err("Error, CPU %d not supported!\n", cpu);
return -ENODEV;
}
}
err = init_common_resources();
err = kvm_set_ipa_limit();
if (err)
return err;

View File

@ -842,7 +842,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
return 0;
}
int __attribute_const__ kvm_target_cpu(void)
u32 __attribute_const__ kvm_target_cpu(void)
{
unsigned long implementor = read_cpuid_implementor();
unsigned long part_number = read_cpuid_part_number();
@ -874,7 +874,7 @@ int __attribute_const__ kvm_target_cpu(void)
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
{
int target = kvm_target_cpu();
u32 target = kvm_target_cpu();
if (target < 0)
return -ENODEV;

View File

@ -81,6 +81,7 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
++kvm->stat.generic.remote_tlb_flush;
}
static bool kvm_is_device_pfn(unsigned long pfn)

View File

@ -50,7 +50,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
int kvm_perf_init(void)
{
if (kvm_pmu_probe_pmuver() != 0xf && !is_protected_kvm_enabled())
if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
static_branch_enable(&kvm_arm_pmu_available);
return perf_register_guest_info_callbacks(&kvm_guest_cbs);

View File

@ -745,7 +745,7 @@ int kvm_pmu_probe_pmuver(void)
struct perf_event_attr attr = { };
struct perf_event *event;
struct arm_pmu *pmu;
int pmuver = 0xf;
int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
/*
* Create a dummy event that only counts user cycles. As we'll never
@ -770,7 +770,7 @@ int kvm_pmu_probe_pmuver(void)
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
return 0xf;
return ID_AA64DFR0_PMUVER_IMP_DEF;
}
if (event->pmu) {
@ -923,7 +923,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!vcpu->kvm->arch.pmuver)
vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
if (vcpu->kvm->arch.pmuver == 0xf)
if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
return -ENODEV;
switch (attr->attr) {

View File

@ -311,31 +311,26 @@ u32 get_kvm_ipa_limit(void)
int kvm_set_ipa_limit(void)
{
unsigned int parange, tgran_2;
unsigned int parange;
u64 mmfr0;
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
parange = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_PARANGE_SHIFT);
/*
* IPA size beyond 48 bits could not be supported
* on either 4K or 16K page size. Hence let's cap
* it to 48 bits, in case it's reported as larger
* on the system.
*/
if (PAGE_SIZE != SZ_64K)
parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
/*
* Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
* Stage-2. If not, things will stop very quickly.
*/
switch (PAGE_SIZE) {
default:
case SZ_4K:
tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
break;
case SZ_16K:
tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
break;
case SZ_64K:
tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
break;
}
switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) {
case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
return -EINVAL;
@ -369,7 +364,7 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
if (phys_shift) {
if (phys_shift > kvm_ipa_limit ||
phys_shift < 32)
phys_shift < ARM64_MIN_PARANGE_BITS)
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;

View File

@ -282,7 +282,7 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
case GIC_CPU_PRIMASK:
/*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
* the PMR field as GICH_VMCR.VMPriMask rather than
* PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
@ -329,7 +329,7 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
case GIC_CPU_PRIMASK:
/*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
* the PMR field as GICH_VMCR.VMPriMask rather than
* PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.

View File

@ -60,6 +60,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
u32 val = cpuif->vgic_lr[lr];
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
bool deactivated;
/* Extract the source vCPU id from the LR */
cpuid = val & GICH_LR_PHYSID_CPUID;
@ -75,7 +76,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
raw_spin_lock(&irq->irq_lock);
/* Always preserve the active bit */
/* Always preserve the active bit, note deactivation */
deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
if (irq->active && vgic_irq_is_sgi(intid))
@ -96,36 +98,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
irq->pending_latch = false;
/*
* Level-triggered mapped IRQs are special because we only
* observe rising edges as input to the VGIC.
*
* If the guest never acked the interrupt we have to sample
* the physical line and set the line level, because the
* device state could have changed or we simply need to
* process the still pending interrupt later.
*
* If this causes us to lower the level, we have to also clear
* the physical active state, since we will otherwise never be
* told when the interrupt becomes asserted again.
*
* Another case is when the interrupt requires a helping hand
* on deactivation (no HW deactivation, for example).
*/
if (vgic_irq_is_mapped_level(irq)) {
bool resample = false;
if (val & GICH_LR_PENDING_BIT) {
irq->line_level = vgic_get_phys_line_level(irq);
resample = !irq->line_level;
} else if (vgic_irq_needs_resampling(irq) &&
!(irq->active || irq->pending_latch)) {
resample = true;
}
if (resample)
vgic_irq_set_phys_active(irq, false);
}
/* Handle resampling for mapped interrupts if required */
vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);

View File

@ -46,6 +46,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
u32 intid, cpuid;
struct vgic_irq *irq;
bool is_v2_sgi = false;
bool deactivated;
cpuid = val & GICH_LR_PHYSID_CPUID;
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
@ -68,7 +69,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
raw_spin_lock(&irq->irq_lock);
/* Always preserve the active bit */
/* Always preserve the active bit, note deactivation */
deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
if (irq->active && is_v2_sgi)
@ -89,36 +91,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
irq->pending_latch = false;
/*
* Level-triggered mapped IRQs are special because we only
* observe rising edges as input to the VGIC.
*
* If the guest never acked the interrupt we have to sample
* the physical line and set the line level, because the
* device state could have changed or we simply need to
* process the still pending interrupt later.
*
* If this causes us to lower the level, we have to also clear
* the physical active state, since we will otherwise never be
* told when the interrupt becomes asserted again.
*
* Another case is when the interrupt requires a helping hand
* on deactivation (no HW deactivation, for example).
*/
if (vgic_irq_is_mapped_level(irq)) {
bool resample = false;
if (val & ICH_LR_PENDING_BIT) {
irq->line_level = vgic_get_phys_line_level(irq);
resample = !irq->line_level;
} else if (vgic_irq_needs_resampling(irq) &&
!(irq->active || irq->pending_latch)) {
resample = true;
}
if (resample)
vgic_irq_set_phys_active(irq, false);
}
/* Handle resampling for mapped interrupts if required */
vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);

View File

@ -106,7 +106,6 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
if (intid >= VGIC_MIN_LPI)
return vgic_get_lpi(kvm, intid);
WARN(1, "Looking up struct vgic_irq for reserved INTID");
return NULL;
}
@ -1022,3 +1021,41 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
return map_is_active;
}
/*
* Level-triggered mapped IRQs are special because we only observe rising
* edges as input to the VGIC.
*
* If the guest never acked the interrupt we have to sample the physical
* line and set the line level, because the device state could have changed
* or we simply need to process the still pending interrupt later.
*
* We could also have entered the guest with the interrupt active+pending.
* On the next exit, we need to re-evaluate the pending state, as it could
* otherwise result in a spurious interrupt by injecting a now potentially
* stale pending state.
*
* If this causes us to lower the level, we have to also clear the physical
* active state, since we will otherwise never be told when the interrupt
* becomes asserted again.
*
* Another case is when the interrupt requires a helping hand on
* deactivation (no HW deactivation, for example).
*/
void vgic_irq_handle_resampling(struct vgic_irq *irq,
bool lr_deactivated, bool lr_pending)
{
if (vgic_irq_is_mapped_level(irq)) {
bool resample = false;
if (unlikely(vgic_irq_needs_resampling(irq))) {
resample = !(irq->active || irq->pending_latch);
} else if (lr_pending || (lr_deactivated && irq->line_level)) {
irq->line_level = vgic_get_phys_line_level(irq);
resample = !irq->line_level;
}
if (resample)
vgic_irq_set_phys_active(irq, false);
}
}

View File

@ -169,6 +169,8 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
unsigned long flags);
void vgic_kick_vcpus(struct kvm *kvm);
void vgic_irq_handle_resampling(struct vgic_irq *irq,
bool lr_deactivated, bool lr_pending);
int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
phys_addr_t addr, phys_addr_t alignment);