Bugfixes for ARM, x86 and tools.
-----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAl/UDHQUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMGeQf9EtGft5U5EihqAbNr2O61Bh4ptCIT +qNWWfuGQkKLsP6PCHMUJnNI3WJy2/Gb5+nUHjFXSEZBP2l3KGRuDniAdm4+DyEi 2khVmJiXYn2q2yfodmpHA/dqav3OHSrsq2IfH+J+WAFlIHnjkdz3Wk1zNFk7Y/xv PVv2czvXhsnrvHvNp5e1+YsVGkMZc9fwXLRbac7ptmaKUKCBAgpZO8Gkc2GGgOdE zUDp3qA8/7Ys+vzzYfPrRMUhev9dgE4x2TBmtOuzqOcfj2FOKRbKbwjur37fJ61j Px4F2ZI0GEL0RrHvZK1vZ5KO41BcD+gQPumKAg1Lgz312loKj85RG8nBEQ== =BJ9g -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "Bugfixes for ARM, x86 and tools" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: tools/kvm_stat: Exempt time-based counters KVM: mmu: Fix SPTE encoding of MMIO generation upper half kvm: x86/mmu: Use cpuid to determine max gfn kvm: svm: de-allocate svm_cpu_data for all cpus in svm_cpu_uninit() selftests: kvm/set_memory_region_test: Fix race in move region test KVM: arm64: Add usage of stage 2 fault lookup level in user_mem_abort() KVM: arm64: Fix handling of merging tables into a block entry KVM: arm64: Fix memory leak on stage2 update of a valid PTE
This commit is contained in:
commit
7b1b868e1d
|
@ -455,7 +455,7 @@ If the generation number of the spte does not equal the global generation
|
|||
number, it will ignore the cached MMIO information and handle the page
|
||||
fault through the slow path.
|
||||
|
||||
Since only 19 bits are used to store generation-number on mmio spte, all
|
||||
Since only 18 bits are used to store generation-number on mmio spte, all
|
||||
pages are zapped when there is an overflow.
|
||||
|
||||
Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
|
||||
|
|
|
@ -104,6 +104,7 @@
|
|||
/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
|
||||
#define ESR_ELx_FSC (0x3F)
|
||||
#define ESR_ELx_FSC_TYPE (0x3C)
|
||||
#define ESR_ELx_FSC_LEVEL (0x03)
|
||||
#define ESR_ELx_FSC_EXTABT (0x10)
|
||||
#define ESR_ELx_FSC_SERROR (0x11)
|
||||
#define ESR_ELx_FSC_ACCESS (0x08)
|
||||
|
|
|
@ -350,6 +350,11 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vc
|
|||
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
|
||||
}
|
||||
|
||||
static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
|
||||
}
|
||||
|
||||
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
switch (kvm_vcpu_trap_get_fault(vcpu)) {
|
||||
|
|
|
@ -470,6 +470,15 @@ static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
|||
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the PTE was already valid, drop the refcount on the table
|
||||
* early, as it will be bumped-up again in stage2_map_walk_leaf().
|
||||
* This ensures that the refcount stays constant across a valid to
|
||||
* valid PTE update.
|
||||
*/
|
||||
if (kvm_pte_valid(*ptep))
|
||||
put_page(virt_to_page(ptep));
|
||||
|
||||
if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
|
||||
goto out;
|
||||
|
||||
|
@ -493,7 +502,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
|
|||
return 0;
|
||||
|
||||
kvm_set_invalid_pte(ptep);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0);
|
||||
|
||||
/*
|
||||
* Invalidate the whole stage-2, as we may have numerous leaf
|
||||
* entries below us which would otherwise need invalidating
|
||||
* individually.
|
||||
*/
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
|
||||
data->anchor = ptep;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -754,10 +754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
gfn_t gfn;
|
||||
kvm_pfn_t pfn;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
unsigned long vma_pagesize;
|
||||
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
|
||||
unsigned long vma_pagesize, fault_granule;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt;
|
||||
|
||||
fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
VM_BUG_ON(write_fault && exec_fault);
|
||||
|
@ -896,7 +898,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
if (fault_status == FSC_PERM && !(logging_active && writable)) {
|
||||
/*
|
||||
* Under the premise of getting a FSC_PERM fault, we just need to relax
|
||||
* permissions only if vma_pagesize equals fault_granule. Otherwise,
|
||||
* kvm_pgtable_stage2_map() should be called to change block size.
|
||||
*/
|
||||
if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
|
||||
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
|
||||
} else {
|
||||
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
|
||||
|
|
|
@ -40,8 +40,8 @@ static u64 generation_mmio_spte_mask(u64 gen)
|
|||
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
|
||||
BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
|
||||
|
||||
mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
|
||||
mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
|
||||
mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
|
||||
mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
|
|
@ -56,11 +56,11 @@
|
|||
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
|
||||
|
||||
/*
|
||||
* Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
|
||||
* Due to limited space in PTEs, the MMIO generation is a 18 bit subset of
|
||||
* the memslots generation and is derived as follows:
|
||||
*
|
||||
* Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
|
||||
* Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
|
||||
* Bits 9-17 of the MMIO generation are propagated to spte bits 54-62
|
||||
*
|
||||
* The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
|
||||
* the MMIO generation number, as doing so would require stealing a bit from
|
||||
|
@ -69,18 +69,29 @@
|
|||
* requires a full MMU zap). The flag is instead explicitly queried when
|
||||
* checking for MMIO spte cache hits.
|
||||
*/
|
||||
#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0)
|
||||
|
||||
#define MMIO_SPTE_GEN_LOW_START 3
|
||||
#define MMIO_SPTE_GEN_LOW_END 11
|
||||
#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
|
||||
MMIO_SPTE_GEN_LOW_START)
|
||||
|
||||
#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
|
||||
#define MMIO_SPTE_GEN_HIGH_END 62
|
||||
|
||||
#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
|
||||
MMIO_SPTE_GEN_LOW_START)
|
||||
#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
|
||||
MMIO_SPTE_GEN_HIGH_START)
|
||||
|
||||
#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
|
||||
#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
|
||||
|
||||
/* remember to adjust the comment above as well if you change these */
|
||||
static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9);
|
||||
|
||||
#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0)
|
||||
#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
|
||||
|
||||
#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
|
||||
|
||||
extern u64 __read_mostly shadow_nx_mask;
|
||||
extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
|
||||
extern u64 __read_mostly shadow_user_mask;
|
||||
|
@ -228,8 +239,8 @@ static inline u64 get_mmio_spte_generation(u64 spte)
|
|||
{
|
||||
u64 gen;
|
||||
|
||||
gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
|
||||
gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
|
||||
gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT;
|
||||
gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT;
|
||||
return gen;
|
||||
}
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
|||
|
||||
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
|
||||
{
|
||||
gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
|
||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
|
||||
|
@ -456,7 +456,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
|
|||
|
||||
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
|
||||
{
|
||||
gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
|
||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||
bool flush;
|
||||
|
||||
flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
|
||||
|
|
|
@ -530,12 +530,12 @@ static int svm_hardware_enable(void)
|
|||
|
||||
static void svm_cpu_uninit(int cpu)
|
||||
{
|
||||
struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
|
||||
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
|
||||
|
||||
if (!sd)
|
||||
return;
|
||||
|
||||
per_cpu(svm_data, raw_smp_processor_id()) = NULL;
|
||||
per_cpu(svm_data, cpu) = NULL;
|
||||
kfree(sd->sev_vmcbs);
|
||||
__free_page(sd->save_area);
|
||||
kfree(sd);
|
||||
|
|
|
@ -742,7 +742,11 @@ class DebugfsProvider(Provider):
|
|||
The fields are all available KVM debugfs files
|
||||
|
||||
"""
|
||||
return self.walkdir(PATH_DEBUGFS_KVM)[2]
|
||||
exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns']
|
||||
fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
|
||||
if field not in exempt_list]
|
||||
|
||||
return fields
|
||||
|
||||
def update_fields(self, fields_filter):
|
||||
"""Refresh fields, applying fields_filter"""
|
||||
|
|
|
@ -156,14 +156,23 @@ static void guest_code_move_memory_region(void)
|
|||
GUEST_SYNC(0);
|
||||
|
||||
/*
|
||||
* Spin until the memory region is moved to a misaligned address. This
|
||||
* may or may not trigger MMIO, as the window where the memslot is
|
||||
* invalid is quite small.
|
||||
* Spin until the memory region starts getting moved to a
|
||||
* misaligned address.
|
||||
* Every region move may or may not trigger MMIO, as the
|
||||
* window where the memslot is invalid is usually quite small.
|
||||
*/
|
||||
val = guest_spin_on_val(0);
|
||||
GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
|
||||
|
||||
/* Spin until the memory region is realigned. */
|
||||
/* Spin until the misaligning memory region move completes. */
|
||||
val = guest_spin_on_val(MMIO_VAL);
|
||||
GUEST_ASSERT_1(val == 1 || val == 0, val);
|
||||
|
||||
/* Spin until the memory region starts to get re-aligned. */
|
||||
val = guest_spin_on_val(0);
|
||||
GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
|
||||
|
||||
/* Spin until the re-aligning memory region move completes. */
|
||||
val = guest_spin_on_val(MMIO_VAL);
|
||||
GUEST_ASSERT_1(val == 1, val);
|
||||
|
||||
|
|
Loading…
Reference in New Issue