KVM/arm updates for 4.21
- Large PUD support for HugeTLB - Single-stepping fixes - Improved tracing - Various timer and vgic fixups -----BEGIN PGP SIGNATURE----- iQJJBAABCgAzFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAlwahf0VHG1hcmMuenlu Z2llckBhcm0uY29tAAoJECPQ0LrRPXpDl3MQAKTJ2+vA1vCln2OiKJLZ0TzsSVFB EXiJfQ6QghD+BHeXw/XU/4X8sD8NjzIP833RvmAgQ/Gm2BpEY/Fj4CmTKaoA5wfJ UMEvLUFGWb19d0hbf7AllSXg3FvkpAMVof7zfKIyI7tHem6sWHmyXDiXzEfpX2un bS3x8OBbdVhHcjCvgc1U6Jbii0KUR8Ac5PJBJny1PWkKHFe8NYf/cX+Ii//FMdCm 7zihQAFOpksVOI7y9wYwpmMeI52vDwesergqBBJXkklsAFAda56a2NuoG6oim3BJ FH/cavGGfrwcdN6Dh5tkvubfxIL5sKF57ZW0Jpy7MPK7u2Zzr7ZvRBHdvYqE+kp3 +jieKr6t1MVnpYfOOZRZgnTqio3Cp++2GzZr283IH0WjDTnN7hhEWbU6/o8DHSge H/nDyxSycbUZtrGVAOm6oPoy4hNElvW8S71+rLqXVc46aKs3YheNg5MqkLawRA0q 5U9Lw5Um/IvcjfM8DESpmYnugZV8FkzEcMZ3SQjQRYafXdjq2V2NjSMtl2+dyeDh KthCujhK0F1KBgw7FocNOwh2M7q6mIjw93HrX30CcT6cu2q+0apty+tjXZapP+dc l7Tad8iFGzAGvW0i3yNWADXhMGk721YrGmptWZh4M9B8CZr2pPzuB4nUPDMeyMYl XlgIgVGv24MKDjnW =SiUI -----END PGP SIGNATURE----- Merge tag 'kvmarm-for-v4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD KVM/arm updates for 4.21 - Large PUD support for HugeTLB - Single-stepping fixes - Improved tracing - Various timer and vgic fixups
This commit is contained in:
commit
8c5e14f438
|
@ -23,6 +23,10 @@
|
|||
|
||||
#define ARM_EXIT_WITH_ABORT_BIT 31
|
||||
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_ABORT_BIT))
|
||||
#define ARM_EXCEPTION_IS_TRAP(x) \
|
||||
(ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_PREF_ABORT || \
|
||||
ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_DATA_ABORT || \
|
||||
ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_HVC)
|
||||
#define ARM_ABORT_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_ABORT_BIT))
|
||||
|
||||
#define ARM_EXCEPTION_RESET 0
|
||||
|
|
|
@ -296,11 +296,6 @@ static inline void kvm_arm_init_debug(void) {}
|
|||
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
|
||||
static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
|
|
|
@ -82,6 +82,67 @@ void kvm_clear_hyp_idmap(void);
|
|||
#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE)
|
||||
#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; })
|
||||
|
||||
#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
|
||||
#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
|
||||
#define kvm_pfn_pud(pfn, prot) (__pud(0))
|
||||
|
||||
#define kvm_pud_pfn(pud) ({ WARN_ON(1); 0; })
|
||||
|
||||
|
||||
#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
|
||||
/* No support for pud hugepages */
|
||||
#define kvm_pud_mkhuge(pud) ( {WARN_ON(1); pud; })
|
||||
|
||||
/*
|
||||
* The following kvm_*pud*() functions are provided strictly to allow
|
||||
* sharing code with arm64. They should never be called in practice.
|
||||
*/
|
||||
static inline void kvm_set_s2pud_readonly(pud_t *pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_readonly(pud_t *pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void kvm_set_pud(pud_t *pud, pud_t new_pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkexec(pud_t pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_exec(pud_t *pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
|
||||
{
|
||||
BUG();
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_young(pud_t pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= L_PTE_S2_RDWR;
|
||||
|
|
|
@ -68,4 +68,12 @@ stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
|
|||
#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
|
||||
#define stage2_pud_table_empty(kvm, pudp) false
|
||||
|
||||
static inline bool kvm_stage2_has_pud(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#define S2_PMD_MASK PMD_MASK
|
||||
#define S2_PMD_SIZE PMD_SIZE
|
||||
|
||||
#endif /* __ARM_S2_PGTABLE_H_ */
|
||||
|
|
|
@ -602,8 +602,8 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
} else {
|
||||
/* If access function fails, it should complain. */
|
||||
kvm_err("Unsupported guest CP15 access at: %08lx\n",
|
||||
*vcpu_pc(vcpu));
|
||||
kvm_err("Unsupported guest CP15 access at: %08lx [%08lx]\n",
|
||||
*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
|
||||
print_cp_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@
|
|||
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
|
||||
|
||||
/* VTCR_EL2 Registers bits */
|
||||
#define VTCR_EL2_RES1 (1 << 31)
|
||||
#define VTCR_EL2_RES1 (1U << 31)
|
||||
#define VTCR_EL2_HD (1 << 22)
|
||||
#define VTCR_EL2_HA (1 << 21)
|
||||
#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
|
||||
|
@ -320,10 +320,6 @@
|
|||
#define PAR_TO_HPFAR(par) \
|
||||
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
|
||||
|
||||
#define kvm_arm_exception_type \
|
||||
{0, "IRQ" }, \
|
||||
{1, "TRAP" }
|
||||
|
||||
#define ECN(x) { ESR_ELx_EC_##x, #x }
|
||||
|
||||
#define kvm_arm_exception_class \
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#define ARM_EXIT_WITH_SERROR_BIT 31
|
||||
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
|
||||
#define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP)
|
||||
#define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
|
||||
|
||||
#define ARM_EXCEPTION_IRQ 0
|
||||
|
@ -34,6 +35,12 @@
|
|||
/* The hyp-stub will return this for any kvm_call_hyp() call */
|
||||
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
|
||||
|
||||
#define kvm_arm_exception_type \
|
||||
{ARM_EXCEPTION_IRQ, "IRQ" }, \
|
||||
{ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \
|
||||
{ARM_EXCEPTION_TRAP, "TRAP" }, \
|
||||
{ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/esr.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
|
@ -147,14 +148,6 @@ static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
|
||||
{
|
||||
if (vcpu_mode_is_32bit(vcpu))
|
||||
kvm_skip_instr32(vcpu, is_wide_instr);
|
||||
else
|
||||
*vcpu_pc(vcpu) += 4;
|
||||
}
|
||||
|
||||
static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
*vcpu_cpsr(vcpu) |= PSR_AA32_T_BIT;
|
||||
|
@ -424,4 +417,30 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
|
|||
return data; /* Leave LE untouched */
|
||||
}
|
||||
|
||||
static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
|
||||
{
|
||||
if (vcpu_mode_is_32bit(vcpu))
|
||||
kvm_skip_instr32(vcpu, is_wide_instr);
|
||||
else
|
||||
*vcpu_pc(vcpu) += 4;
|
||||
|
||||
/* advance the singlestep state machine */
|
||||
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip an instruction which has been emulated at hyp while most guest sysregs
|
||||
* are live.
|
||||
*/
|
||||
static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
|
||||
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
|
||||
|
||||
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
|
||||
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
|
||||
write_sysreg_el2(*vcpu_pc(vcpu), elr);
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_EMULATE_H__ */
|
||||
|
|
|
@ -319,7 +319,7 @@ struct kvm_vcpu_arch {
|
|||
*/
|
||||
#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
|
||||
|
||||
u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
|
||||
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
|
||||
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
|
||||
|
||||
/*
|
||||
|
@ -445,7 +445,6 @@ void kvm_arm_init_debug(void);
|
|||
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
|
||||
bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
|
||||
|
|
|
@ -184,6 +184,17 @@ void kvm_clear_hyp_idmap(void);
|
|||
#define kvm_mk_pgd(pudp) \
|
||||
__pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
|
||||
|
||||
#define kvm_set_pud(pudp, pud) set_pud(pudp, pud)
|
||||
|
||||
#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
|
||||
#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
|
||||
#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot)
|
||||
|
||||
#define kvm_pud_pfn(pud) pud_pfn(pud)
|
||||
|
||||
#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
|
||||
#define kvm_pud_mkhuge(pud) pud_mkhuge(pud)
|
||||
|
||||
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= PTE_S2_RDWR;
|
||||
|
@ -196,6 +207,12 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
|
|||
return pmd;
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
|
||||
{
|
||||
pud_val(pud) |= PUD_S2_RDWR;
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline pte_t kvm_s2pte_mkexec(pte_t pte)
|
||||
{
|
||||
pte_val(pte) &= ~PTE_S2_XN;
|
||||
|
@ -208,6 +225,12 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
|
|||
return pmd;
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkexec(pud_t pud)
|
||||
{
|
||||
pud_val(pud) &= ~PUD_S2_XN;
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline void kvm_set_s2pte_readonly(pte_t *ptep)
|
||||
{
|
||||
pteval_t old_pteval, pteval;
|
||||
|
@ -246,6 +269,31 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
|
|||
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
|
||||
}
|
||||
|
||||
static inline void kvm_set_s2pud_readonly(pud_t *pudp)
|
||||
{
|
||||
kvm_set_s2pte_readonly((pte_t *)pudp);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_readonly(pud_t *pudp)
|
||||
{
|
||||
return kvm_s2pte_readonly((pte_t *)pudp);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_exec(pud_t *pudp)
|
||||
{
|
||||
return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN);
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
|
||||
{
|
||||
return pud_mkyoung(pud);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_young(pud_t pud)
|
||||
{
|
||||
return pud_young(pud);
|
||||
}
|
||||
|
||||
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
|
||||
|
||||
#ifdef __PAGETABLE_PMD_FOLDED
|
||||
|
|
|
@ -193,6 +193,10 @@
|
|||
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
|
||||
#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
|
||||
|
||||
#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */
|
||||
#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */
|
||||
#define PUD_S2_XN (_AT(pudval_t, 2) << 53) /* XN[1:0] */
|
||||
|
||||
/*
|
||||
* Memory Attribute override for Stage-2 (MemAttr[3:0])
|
||||
*/
|
||||
|
|
|
@ -314,6 +314,11 @@ static inline pte_t pud_pte(pud_t pud)
|
|||
return __pte(pud_val(pud));
|
||||
}
|
||||
|
||||
static inline pud_t pte_pud(pte_t pte)
|
||||
{
|
||||
return __pud(pte_val(pte));
|
||||
}
|
||||
|
||||
static inline pmd_t pud_pmd(pud_t pud)
|
||||
{
|
||||
return __pmd(pud_val(pud));
|
||||
|
@ -381,8 +386,12 @@ static inline int pmd_protnone(pmd_t pmd)
|
|||
#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
|
||||
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
|
||||
|
||||
#define pud_young(pud) pte_young(pud_pte(pud))
|
||||
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
|
||||
#define pud_write(pud) pte_write(pud_pte(pud))
|
||||
|
||||
#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
|
||||
|
||||
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
|
||||
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
|
||||
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
|
||||
|
|
|
@ -30,16 +30,14 @@
|
|||
#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
|
||||
|
||||
/*
|
||||
* The hardware supports concatenation of up to 16 tables at stage2 entry level
|
||||
* and we use the feature whenever possible.
|
||||
* The hardware supports concatenation of up to 16 tables at stage2 entry
|
||||
* level and we use the feature whenever possible, which means we resolve 4
|
||||
* additional bits of address at the entry level.
|
||||
*
|
||||
* Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3).
|
||||
* On arm64, the smallest PAGE_SIZE supported is 4k, which means
|
||||
* (PAGE_SHIFT - 3) > 4 holds for all page sizes.
|
||||
* This implies, the total number of page table levels at stage2 expected
|
||||
* by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
|
||||
* in normal translations(e.g, stage1), since we cannot have another level in
|
||||
* the range (IPA_SHIFT, IPA_SHIFT - 4).
|
||||
* This implies, the total number of page table levels required for
|
||||
* IPA_SHIFT at stage2 expected by the hardware can be calculated using
|
||||
* the same logic used for the (non-collapsable) stage1 page tables but for
|
||||
* (IPA_SHIFT - 4).
|
||||
*/
|
||||
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
|
||||
#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
|
||||
|
|
|
@ -236,24 +236,3 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* After successfully emulating an instruction, we might want to
|
||||
* return to user space with a KVM_EXIT_DEBUG. We can only do this
|
||||
* once the emulation is complete, though, so for userspace emulations
|
||||
* we have to wait until we have re-entered KVM before calling this
|
||||
* helper.
|
||||
*
|
||||
* Return true (and set exit_reason) to return to userspace or false
|
||||
* if no further action is required.
|
||||
*/
|
||||
bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
|
||||
run->exit_reason = KVM_EXIT_DEBUG;
|
||||
run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -229,13 +229,6 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
handled = exit_handler(vcpu, run);
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run
|
||||
* structure if we need to return to userspace.
|
||||
*/
|
||||
if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run))
|
||||
handled = 0;
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
|
@ -269,12 +262,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
|||
case ARM_EXCEPTION_IRQ:
|
||||
return 1;
|
||||
case ARM_EXCEPTION_EL1_SERROR:
|
||||
/* We may still need to return for single-step */
|
||||
if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
|
||||
&& kvm_arm_handle_step_debug(vcpu, run))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
return 1;
|
||||
case ARM_EXCEPTION_TRAP:
|
||||
return handle_trap_exceptions(vcpu, run);
|
||||
case ARM_EXCEPTION_HYP_GONE:
|
||||
|
|
|
@ -305,33 +305,6 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Skip an instruction which has been emulated. Returns true if
|
||||
* execution can continue or false if we need to exit hyp mode because
|
||||
* single-step was in effect.
|
||||
*/
|
||||
static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
|
||||
|
||||
if (vcpu_mode_is_32bit(vcpu)) {
|
||||
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
|
||||
kvm_skip_instr32(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
|
||||
} else {
|
||||
*vcpu_pc(vcpu) += 4;
|
||||
}
|
||||
|
||||
write_sysreg_el2(*vcpu_pc(vcpu), elr);
|
||||
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
|
||||
vcpu->arch.fault.esr_el2 =
|
||||
(ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22;
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
|
||||
|
@ -420,20 +393,12 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
|||
if (valid) {
|
||||
int ret = __vgic_v2_perform_cpuif_access(vcpu);
|
||||
|
||||
if (ret == 1 && __skip_instr(vcpu))
|
||||
if (ret == 1)
|
||||
return true;
|
||||
|
||||
if (ret == -1) {
|
||||
/* Promote an illegal access to an
|
||||
* SError. If we would be returning
|
||||
* due to single-step clear the SS
|
||||
* bit so handle_exit knows what to
|
||||
* do after dealing with the error.
|
||||
*/
|
||||
if (!__skip_instr(vcpu))
|
||||
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
|
||||
/* Promote an illegal access to an SError.*/
|
||||
if (ret == -1)
|
||||
*exit_code = ARM_EXCEPTION_EL1_SERROR;
|
||||
}
|
||||
|
||||
goto exit;
|
||||
}
|
||||
|
@ -444,7 +409,7 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
|||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
|
||||
int ret = __vgic_v3_perform_cpuif_access(vcpu);
|
||||
|
||||
if (ret == 1 && __skip_instr(vcpu))
|
||||
if (ret == 1)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
|
|||
* Returns:
|
||||
* 1: GICV access successfully performed
|
||||
* 0: Not a GICV access
|
||||
* -1: Illegal GICV access
|
||||
* -1: Illegal GICV access successfully performed
|
||||
*/
|
||||
int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -61,12 +61,16 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
|||
return 0;
|
||||
|
||||
/* Reject anything but a 32bit access */
|
||||
if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
|
||||
if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) {
|
||||
__kvm_skip_instr(vcpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Not aligned? Don't bother */
|
||||
if (fault_ipa & 3)
|
||||
if (fault_ipa & 3) {
|
||||
__kvm_skip_instr(vcpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
rd = kvm_vcpu_dabt_get_rd(vcpu);
|
||||
addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
|
||||
|
@ -88,5 +92,7 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
|||
vcpu_set_reg(vcpu, rd, data);
|
||||
}
|
||||
|
||||
__kvm_skip_instr(vcpu);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
|
|||
return false;
|
||||
}
|
||||
|
||||
u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
|
||||
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
|
||||
{
|
||||
if (!vcpu->arch.sysregs_loaded_on_cpu)
|
||||
goto immediate_read;
|
||||
|
@ -1850,6 +1850,8 @@ static void perform_access(struct kvm_vcpu *vcpu,
|
|||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);
|
||||
|
||||
/*
|
||||
* Not having an accessor means that we have configured a trap
|
||||
* that we don't know how to handle. This certainly qualifies
|
||||
|
@ -1912,8 +1914,8 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
|
|||
WARN_ON(1);
|
||||
}
|
||||
|
||||
kvm_err("Unsupported guest CP%d access at: %08lx\n",
|
||||
cp, *vcpu_pc(vcpu));
|
||||
kvm_err("Unsupported guest CP%d access at: %08lx [%08lx]\n",
|
||||
cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
|
||||
print_sys_reg_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
}
|
||||
|
@ -2063,8 +2065,8 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
|
|||
if (likely(r)) {
|
||||
perform_access(vcpu, params, r);
|
||||
} else {
|
||||
kvm_err("Unsupported guest sys_reg access at: %lx\n",
|
||||
*vcpu_pc(vcpu));
|
||||
kvm_err("Unsupported guest sys_reg access at: %lx [%08lx]\n",
|
||||
*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
|
||||
print_sys_reg_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
}
|
||||
|
|
|
@ -35,6 +35,9 @@ struct sys_reg_params {
|
|||
};
|
||||
|
||||
struct sys_reg_desc {
|
||||
/* Sysreg string for debug */
|
||||
const char *name;
|
||||
|
||||
/* MRS/MSR instruction which accesses it. */
|
||||
u8 Op0;
|
||||
u8 Op1;
|
||||
|
@ -130,6 +133,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
|
|||
#define Op2(_x) .Op2 = _x
|
||||
|
||||
#define SYS_DESC(reg) \
|
||||
.name = #reg, \
|
||||
Op0(sys_reg_Op0(reg)), Op1(sys_reg_Op1(reg)), \
|
||||
CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
|
||||
Op2(sys_reg_Op2(reg))
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#define _TRACE_ARM64_KVM_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include "sys_regs.h"
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
|
@ -152,6 +153,40 @@ TRACE_EVENT(kvm_handle_sys_reg,
|
|||
TP_printk("HSR 0x%08lx", __entry->hsr)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_sys_access,
|
||||
TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg),
|
||||
TP_ARGS(vcpu_pc, params, reg),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, vcpu_pc)
|
||||
__field(bool, is_write)
|
||||
__field(const char *, name)
|
||||
__field(u8, Op0)
|
||||
__field(u8, Op1)
|
||||
__field(u8, CRn)
|
||||
__field(u8, CRm)
|
||||
__field(u8, Op2)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_pc = vcpu_pc;
|
||||
__entry->is_write = params->is_write;
|
||||
__entry->name = reg->name;
|
||||
__entry->Op0 = reg->Op0;
|
||||
__entry->Op0 = reg->Op0;
|
||||
__entry->Op1 = reg->Op1;
|
||||
__entry->CRn = reg->CRn;
|
||||
__entry->CRm = reg->CRm;
|
||||
__entry->Op2 = reg->Op2;
|
||||
),
|
||||
|
||||
TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s",
|
||||
__entry->vcpu_pc, __entry->name ?: "UNKN",
|
||||
__entry->Op0, __entry->Op1, __entry->CRn,
|
||||
__entry->CRm, __entry->Op2,
|
||||
__entry->is_write ? "write" : "read")
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_set_guest_debug,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
|
||||
TP_ARGS(vcpu, guest_debug),
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
struct arch_timer_context {
|
||||
/* Registers: control register, timer value */
|
||||
|
@ -52,9 +51,6 @@ struct arch_timer_cpu {
|
|||
/* Background timer used when the guest is not running */
|
||||
struct hrtimer bg_timer;
|
||||
|
||||
/* Work queued with the above timer expires */
|
||||
struct work_struct expired;
|
||||
|
||||
/* Physical timer emulation */
|
||||
struct hrtimer phys_timer;
|
||||
|
||||
|
|
|
@ -70,11 +70,9 @@ static void soft_timer_start(struct hrtimer *hrt, u64 ns)
|
|||
HRTIMER_MODE_ABS);
|
||||
}
|
||||
|
||||
static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
|
||||
static void soft_timer_cancel(struct hrtimer *hrt)
|
||||
{
|
||||
hrtimer_cancel(hrt);
|
||||
if (work)
|
||||
cancel_work_sync(work);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
||||
|
@ -102,23 +100,6 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
|||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Work function for handling the backup timer that we schedule when a vcpu is
|
||||
* no longer running, but had a timer programmed to fire in the future.
|
||||
*/
|
||||
static void kvm_timer_inject_irq_work(struct work_struct *work)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
|
||||
|
||||
/*
|
||||
* If the vcpu is blocked we want to wake it up so that it will see
|
||||
* the timer has expired when entering the guest.
|
||||
*/
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
|
||||
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
u64 cval, now;
|
||||
|
@ -188,7 +169,7 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
|
|||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
schedule_work(&timer->expired);
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
|
@ -300,7 +281,7 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu)
|
|||
* then we also don't need a soft timer.
|
||||
*/
|
||||
if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
|
||||
soft_timer_cancel(&timer->phys_timer, NULL);
|
||||
soft_timer_cancel(&timer->phys_timer);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -426,7 +407,7 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
|
|||
|
||||
vtimer_restore_state(vcpu);
|
||||
|
||||
soft_timer_cancel(&timer->bg_timer, &timer->expired);
|
||||
soft_timer_cancel(&timer->bg_timer);
|
||||
}
|
||||
|
||||
static void set_cntvoff(u64 cntvoff)
|
||||
|
@ -544,7 +525,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
* In any case, we re-schedule the hrtimer for the physical timer when
|
||||
* coming back to the VCPU thread in kvm_timer_vcpu_load().
|
||||
*/
|
||||
soft_timer_cancel(&timer->phys_timer, NULL);
|
||||
soft_timer_cancel(&timer->phys_timer);
|
||||
|
||||
/*
|
||||
* The kernel may decide to run userspace after calling vcpu_put, so
|
||||
|
@ -637,7 +618,6 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
|
||||
vcpu_ptimer(vcpu)->cntvoff = 0;
|
||||
|
||||
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
|
||||
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
timer->bg_timer.function = kvm_bg_timer_expire;
|
||||
|
||||
|
@ -792,11 +772,8 @@ out_free_irq:
|
|||
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
|
||||
soft_timer_cancel(&timer->bg_timer, &timer->expired);
|
||||
soft_timer_cancel(&timer->phys_timer, NULL);
|
||||
kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
|
||||
soft_timer_cancel(&timer->bg_timer);
|
||||
}
|
||||
|
||||
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -66,7 +66,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
|
|||
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
|
||||
static u32 kvm_next_vmid;
|
||||
static unsigned int kvm_vmid_bits __read_mostly;
|
||||
static DEFINE_RWLOCK(kvm_vmid_lock);
|
||||
static DEFINE_SPINLOCK(kvm_vmid_lock);
|
||||
|
||||
static bool vgic_present;
|
||||
|
||||
|
@ -484,7 +484,9 @@ void force_vm_exit(const cpumask_t *mask)
|
|||
*/
|
||||
static bool need_new_vmid_gen(struct kvm *kvm)
|
||||
{
|
||||
return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
|
||||
u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
|
||||
smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
|
||||
return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -499,16 +501,11 @@ static void update_vttbr(struct kvm *kvm)
|
|||
{
|
||||
phys_addr_t pgd_phys;
|
||||
u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0;
|
||||
bool new_gen;
|
||||
|
||||
read_lock(&kvm_vmid_lock);
|
||||
new_gen = need_new_vmid_gen(kvm);
|
||||
read_unlock(&kvm_vmid_lock);
|
||||
|
||||
if (!new_gen)
|
||||
if (!need_new_vmid_gen(kvm))
|
||||
return;
|
||||
|
||||
write_lock(&kvm_vmid_lock);
|
||||
spin_lock(&kvm_vmid_lock);
|
||||
|
||||
/*
|
||||
* We need to re-check the vmid_gen here to ensure that if another vcpu
|
||||
|
@ -516,7 +513,7 @@ static void update_vttbr(struct kvm *kvm)
|
|||
* use the same vmid.
|
||||
*/
|
||||
if (!need_new_vmid_gen(kvm)) {
|
||||
write_unlock(&kvm_vmid_lock);
|
||||
spin_unlock(&kvm_vmid_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -539,7 +536,6 @@ static void update_vttbr(struct kvm *kvm)
|
|||
kvm_call_hyp(__kvm_flush_vm_context);
|
||||
}
|
||||
|
||||
kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
|
||||
kvm->arch.vmid = kvm_next_vmid;
|
||||
kvm_next_vmid++;
|
||||
kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
|
||||
|
@ -550,7 +546,10 @@ static void update_vttbr(struct kvm *kvm)
|
|||
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
|
||||
kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
|
||||
|
||||
write_unlock(&kvm_vmid_lock);
|
||||
smp_wmb();
|
||||
WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen));
|
||||
|
||||
spin_unlock(&kvm_vmid_lock);
|
||||
}
|
||||
|
||||
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
|
||||
|
@ -674,8 +673,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
ret = kvm_handle_mmio_return(vcpu, vcpu->run);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (run->immediate_exit)
|
||||
|
|
|
@ -1012,8 +1012,10 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
|||
|
||||
esr = kvm_vcpu_get_hsr(vcpu);
|
||||
if (vcpu_mode_is_32bit(vcpu)) {
|
||||
if (!kvm_condition_valid(vcpu))
|
||||
if (!kvm_condition_valid(vcpu)) {
|
||||
__kvm_skip_instr(vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
sysreg = esr_cp15_to_sysreg(esr);
|
||||
} else {
|
||||
|
@ -1123,6 +1125,8 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
|||
rt = kvm_vcpu_sys_get_rt(vcpu);
|
||||
fn(vcpu, vmcr, rt);
|
||||
|
||||
__kvm_skip_instr(vcpu);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -117,6 +117,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
|
||||
}
|
||||
|
||||
/*
|
||||
* The MMIO instruction is emulated and should not be re-executed
|
||||
* in the guest.
|
||||
*/
|
||||
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -144,11 +150,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
|
|||
vcpu->arch.mmio_decode.sign_extend = sign_extend;
|
||||
vcpu->arch.mmio_decode.rt = rt;
|
||||
|
||||
/*
|
||||
* The MMIO instruction is emulated and should not be re-executed
|
||||
* in the guest.
|
||||
*/
|
||||
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -115,6 +115,25 @@ static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
|
|||
put_page(virt_to_page(pmd));
|
||||
}
|
||||
|
||||
/**
|
||||
* stage2_dissolve_pud() - clear and flush huge PUD entry
|
||||
* @kvm: pointer to kvm structure.
|
||||
* @addr: IPA
|
||||
* @pud: pud pointer for IPA
|
||||
*
|
||||
* Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. Marks all
|
||||
* pages in the range dirty.
|
||||
*/
|
||||
static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
|
||||
{
|
||||
if (!stage2_pud_huge(kvm, *pudp))
|
||||
return;
|
||||
|
||||
stage2_pud_clear(kvm, pudp);
|
||||
kvm_tlb_flush_vmid_ipa(kvm, addr);
|
||||
put_page(virt_to_page(pudp));
|
||||
}
|
||||
|
||||
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
|
||||
int min, int max)
|
||||
{
|
||||
|
@ -607,7 +626,7 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
|
|||
addr = start;
|
||||
do {
|
||||
pte = pte_offset_kernel(pmd, addr);
|
||||
kvm_set_pte(pte, pfn_pte(pfn, prot));
|
||||
kvm_set_pte(pte, kvm_pfn_pte(pfn, prot));
|
||||
get_page(virt_to_page(pte));
|
||||
pfn++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
@ -1022,7 +1041,7 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
|
|||
pmd_t *pmd;
|
||||
|
||||
pud = stage2_get_pud(kvm, cache, addr);
|
||||
if (!pud)
|
||||
if (!pud || stage2_pud_huge(kvm, *pud))
|
||||
return NULL;
|
||||
|
||||
if (stage2_pud_none(kvm, *pud)) {
|
||||
|
@ -1083,29 +1102,103 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
|
||||
static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
||||
phys_addr_t addr, const pud_t *new_pudp)
|
||||
{
|
||||
pud_t *pudp, old_pud;
|
||||
|
||||
pudp = stage2_get_pud(kvm, cache, addr);
|
||||
VM_BUG_ON(!pudp);
|
||||
|
||||
old_pud = *pudp;
|
||||
|
||||
/*
|
||||
* A large number of vcpus faulting on the same stage 2 entry,
|
||||
* can lead to a refault due to the
|
||||
* stage2_pud_clear()/tlb_flush(). Skip updating the page
|
||||
* tables if there is no change.
|
||||
*/
|
||||
if (pud_val(old_pud) == pud_val(*new_pudp))
|
||||
return 0;
|
||||
|
||||
if (stage2_pud_present(kvm, old_pud)) {
|
||||
stage2_pud_clear(kvm, pudp);
|
||||
kvm_tlb_flush_vmid_ipa(kvm, addr);
|
||||
} else {
|
||||
get_page(virt_to_page(pudp));
|
||||
}
|
||||
|
||||
kvm_set_pud(pudp, *new_pudp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* stage2_get_leaf_entry - walk the stage2 VM page tables and return
|
||||
* true if a valid and present leaf-entry is found. A pointer to the
|
||||
* leaf-entry is returned in the appropriate level variable - pudpp,
|
||||
* pmdpp, ptepp.
|
||||
*/
|
||||
static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
|
||||
pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp)
|
||||
{
|
||||
pud_t *pudp;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
|
||||
pmdp = stage2_get_pmd(kvm, NULL, addr);
|
||||
*pudpp = NULL;
|
||||
*pmdpp = NULL;
|
||||
*ptepp = NULL;
|
||||
|
||||
pudp = stage2_get_pud(kvm, NULL, addr);
|
||||
if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp))
|
||||
return false;
|
||||
|
||||
if (stage2_pud_huge(kvm, *pudp)) {
|
||||
*pudpp = pudp;
|
||||
return true;
|
||||
}
|
||||
|
||||
pmdp = stage2_pmd_offset(kvm, pudp, addr);
|
||||
if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
|
||||
return false;
|
||||
|
||||
if (pmd_thp_or_huge(*pmdp))
|
||||
return kvm_s2pmd_exec(pmdp);
|
||||
if (pmd_thp_or_huge(*pmdp)) {
|
||||
*pmdpp = pmdp;
|
||||
return true;
|
||||
}
|
||||
|
||||
ptep = pte_offset_kernel(pmdp, addr);
|
||||
if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
|
||||
return false;
|
||||
|
||||
return kvm_s2pte_exec(ptep);
|
||||
*ptepp = ptep;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
|
||||
{
|
||||
pud_t *pudp;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
bool found;
|
||||
|
||||
found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep);
|
||||
if (!found)
|
||||
return false;
|
||||
|
||||
if (pudp)
|
||||
return kvm_s2pud_exec(pudp);
|
||||
else if (pmdp)
|
||||
return kvm_s2pmd_exec(pmdp);
|
||||
else
|
||||
return kvm_s2pte_exec(ptep);
|
||||
}
|
||||
|
||||
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
||||
phys_addr_t addr, const pte_t *new_pte,
|
||||
unsigned long flags)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte, old_pte;
|
||||
bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
|
||||
|
@ -1114,7 +1207,31 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
|||
VM_BUG_ON(logging_active && !cache);
|
||||
|
||||
/* Create stage-2 page table mapping - Levels 0 and 1 */
|
||||
pmd = stage2_get_pmd(kvm, cache, addr);
|
||||
pud = stage2_get_pud(kvm, cache, addr);
|
||||
if (!pud) {
|
||||
/*
|
||||
* Ignore calls from kvm_set_spte_hva for unallocated
|
||||
* address ranges.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* While dirty page logging - dissolve huge PUD, then continue
|
||||
* on to allocate page.
|
||||
*/
|
||||
if (logging_active)
|
||||
stage2_dissolve_pud(kvm, addr, pud);
|
||||
|
||||
if (stage2_pud_none(kvm, *pud)) {
|
||||
if (!cache)
|
||||
return 0; /* ignore calls from kvm_set_spte_hva */
|
||||
pmd = mmu_memory_cache_alloc(cache);
|
||||
stage2_pud_populate(kvm, pud, pmd);
|
||||
get_page(virt_to_page(pud));
|
||||
}
|
||||
|
||||
pmd = stage2_pmd_offset(kvm, pud, addr);
|
||||
if (!pmd) {
|
||||
/*
|
||||
* Ignore calls from kvm_set_spte_hva for unallocated
|
||||
|
@ -1182,6 +1299,11 @@ static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
|
|||
return stage2_ptep_test_and_clear_young((pte_t *)pmd);
|
||||
}
|
||||
|
||||
static int stage2_pudp_test_and_clear_young(pud_t *pud)
|
||||
{
|
||||
return stage2_ptep_test_and_clear_young((pte_t *)pud);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_phys_addr_ioremap - map a device range to guest IPA
|
||||
*
|
||||
|
@ -1202,7 +1324,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
|||
pfn = __phys_to_pfn(pa);
|
||||
|
||||
for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
|
||||
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
|
||||
pte_t pte = kvm_pfn_pte(pfn, PAGE_S2_DEVICE);
|
||||
|
||||
if (writable)
|
||||
pte = kvm_s2pte_mkwrite(pte);
|
||||
|
@ -1234,7 +1356,7 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
|
|||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
/*
|
||||
* PageTransCompoungMap() returns true for THP and
|
||||
* PageTransCompoundMap() returns true for THP and
|
||||
* hugetlbfs. Make sure the adjustment is done only for THP
|
||||
* pages.
|
||||
*/
|
||||
|
@ -1347,9 +1469,12 @@ static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
|
|||
do {
|
||||
next = stage2_pud_addr_end(kvm, addr, end);
|
||||
if (!stage2_pud_none(kvm, *pud)) {
|
||||
/* TODO:PUD not supported, revisit later if supported */
|
||||
BUG_ON(stage2_pud_huge(kvm, *pud));
|
||||
stage2_wp_pmds(kvm, pud, addr, next);
|
||||
if (stage2_pud_huge(kvm, *pud)) {
|
||||
if (!kvm_s2pud_readonly(pud))
|
||||
kvm_set_s2pud_readonly(pud);
|
||||
} else {
|
||||
stage2_wp_pmds(kvm, pud, addr, next);
|
||||
}
|
||||
}
|
||||
} while (pud++, addr = next, addr != end);
|
||||
}
|
||||
|
@ -1392,7 +1517,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
|
|||
*
|
||||
* Called to start logging dirty pages after memory region
|
||||
* KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
|
||||
* all present PMD and PTEs are write protected in the memory region.
|
||||
* all present PUD, PMD and PTEs are write protected in the memory region.
|
||||
* Afterwards read of dirty page log can be called.
|
||||
*
|
||||
* Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
|
||||
|
@ -1470,12 +1595,70 @@ static void kvm_send_hwpoison_signal(unsigned long address,
|
|||
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
|
||||
}
|
||||
|
||||
static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot,
|
||||
unsigned long hva)
|
||||
{
|
||||
gpa_t gpa_start, gpa_end;
|
||||
hva_t uaddr_start, uaddr_end;
|
||||
size_t size;
|
||||
|
||||
size = memslot->npages * PAGE_SIZE;
|
||||
|
||||
gpa_start = memslot->base_gfn << PAGE_SHIFT;
|
||||
gpa_end = gpa_start + size;
|
||||
|
||||
uaddr_start = memslot->userspace_addr;
|
||||
uaddr_end = uaddr_start + size;
|
||||
|
||||
/*
|
||||
* Pages belonging to memslots that don't have the same alignment
|
||||
* within a PMD for userspace and IPA cannot be mapped with stage-2
|
||||
* PMD entries, because we'll end up mapping the wrong pages.
|
||||
*
|
||||
* Consider a layout like the following:
|
||||
*
|
||||
* memslot->userspace_addr:
|
||||
* +-----+--------------------+--------------------+---+
|
||||
* |abcde|fgh Stage-1 PMD | Stage-1 PMD tv|xyz|
|
||||
* +-----+--------------------+--------------------+---+
|
||||
*
|
||||
* memslot->base_gfn << PAGE_SIZE:
|
||||
* +---+--------------------+--------------------+-----+
|
||||
* |abc|def Stage-2 PMD | Stage-2 PMD |tvxyz|
|
||||
* +---+--------------------+--------------------+-----+
|
||||
*
|
||||
* If we create those stage-2 PMDs, we'll end up with this incorrect
|
||||
* mapping:
|
||||
* d -> f
|
||||
* e -> g
|
||||
* f -> h
|
||||
*/
|
||||
if ((gpa_start & ~S2_PMD_MASK) != (uaddr_start & ~S2_PMD_MASK))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Next, let's make sure we're not trying to map anything not covered
|
||||
* by the memslot. This means we have to prohibit PMD size mappings
|
||||
* for the beginning and end of a non-PMD aligned and non-PMD sized
|
||||
* memory slot (illustrated by the head and tail parts of the
|
||||
* userspace view above containing pages 'abcde' and 'xyz',
|
||||
* respectively).
|
||||
*
|
||||
* Note that it doesn't matter if we do the check using the
|
||||
* userspace_addr or the base_gfn, as both are equally aligned (per
|
||||
* the check above) and equally sized.
|
||||
*/
|
||||
return (hva & S2_PMD_MASK) >= uaddr_start &&
|
||||
(hva & S2_PMD_MASK) + S2_PMD_SIZE <= uaddr_end;
|
||||
}
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||
unsigned long fault_status)
|
||||
{
|
||||
int ret;
|
||||
bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false;
|
||||
bool write_fault, writable, force_pte = false;
|
||||
bool exec_fault, needs_exec;
|
||||
unsigned long mmu_seq;
|
||||
gfn_t gfn = fault_ipa >> PAGE_SHIFT;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
@ -1484,7 +1667,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
kvm_pfn_t pfn;
|
||||
pgprot_t mem_type = PAGE_S2;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
unsigned long flags = 0;
|
||||
unsigned long vma_pagesize, flags = 0;
|
||||
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
|
||||
|
@ -1495,6 +1678,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (!fault_supports_stage2_pmd_mappings(memslot, hva))
|
||||
force_pte = true;
|
||||
|
||||
if (logging_active)
|
||||
force_pte = true;
|
||||
|
||||
/* Let's check if we will get back a huge page backed by hugetlbfs */
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = find_vma_intersection(current->mm, hva, hva + 1);
|
||||
|
@ -1504,22 +1693,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) {
|
||||
hugetlb = true;
|
||||
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
|
||||
} else {
|
||||
/*
|
||||
* Pages belonging to memslots that don't have the same
|
||||
* alignment for userspace and IPA cannot be mapped using
|
||||
* block descriptors even if the pages belong to a THP for
|
||||
* the process, because the stage-2 block descriptor will
|
||||
* cover more than a single THP and we loose atomicity for
|
||||
* unmapping, updates, and splits of the THP or other pages
|
||||
* in the stage-2 block range.
|
||||
*/
|
||||
if ((memslot->userspace_addr & ~PMD_MASK) !=
|
||||
((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
|
||||
force_pte = true;
|
||||
vma_pagesize = vma_kernel_pagesize(vma);
|
||||
/*
|
||||
* PUD level may not exist for a VM but PMD is guaranteed to
|
||||
* exist.
|
||||
*/
|
||||
if ((vma_pagesize == PMD_SIZE ||
|
||||
(vma_pagesize == PUD_SIZE && kvm_stage2_has_pud(kvm))) &&
|
||||
!force_pte) {
|
||||
gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
|
@ -1558,7 +1740,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
* should not be mapped with huge pages (it introduces churn
|
||||
* and performance degradation), so force a pte mapping.
|
||||
*/
|
||||
force_pte = true;
|
||||
flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
|
||||
|
||||
/*
|
||||
|
@ -1573,50 +1754,69 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
if (mmu_notifier_retry(kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
if (!hugetlb && !force_pte)
|
||||
hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
|
||||
if (vma_pagesize == PAGE_SIZE && !force_pte) {
|
||||
/*
|
||||
* Only PMD_SIZE transparent hugepages(THP) are
|
||||
* currently supported. This code will need to be
|
||||
* updated to support other THP sizes.
|
||||
*/
|
||||
if (transparent_hugepage_adjust(&pfn, &fault_ipa))
|
||||
vma_pagesize = PMD_SIZE;
|
||||
}
|
||||
|
||||
if (hugetlb) {
|
||||
pmd_t new_pmd = pfn_pmd(pfn, mem_type);
|
||||
new_pmd = pmd_mkhuge(new_pmd);
|
||||
if (writable) {
|
||||
if (writable)
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
|
||||
if (fault_status != FSC_PERM)
|
||||
clean_dcache_guest_page(pfn, vma_pagesize);
|
||||
|
||||
if (exec_fault)
|
||||
invalidate_icache_guest_page(pfn, vma_pagesize);
|
||||
|
||||
/*
|
||||
* If we took an execution fault we have made the
|
||||
* icache/dcache coherent above and should now let the s2
|
||||
* mapping be executable.
|
||||
*
|
||||
* Write faults (!exec_fault && FSC_PERM) are orthogonal to
|
||||
* execute permissions, and we preserve whatever we have.
|
||||
*/
|
||||
needs_exec = exec_fault ||
|
||||
(fault_status == FSC_PERM && stage2_is_exec(kvm, fault_ipa));
|
||||
|
||||
if (vma_pagesize == PUD_SIZE) {
|
||||
pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
|
||||
|
||||
new_pud = kvm_pud_mkhuge(new_pud);
|
||||
if (writable)
|
||||
new_pud = kvm_s2pud_mkwrite(new_pud);
|
||||
|
||||
if (needs_exec)
|
||||
new_pud = kvm_s2pud_mkexec(new_pud);
|
||||
|
||||
ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud);
|
||||
} else if (vma_pagesize == PMD_SIZE) {
|
||||
pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type);
|
||||
|
||||
new_pmd = kvm_pmd_mkhuge(new_pmd);
|
||||
|
||||
if (writable)
|
||||
new_pmd = kvm_s2pmd_mkwrite(new_pmd);
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
}
|
||||
|
||||
if (fault_status != FSC_PERM)
|
||||
clean_dcache_guest_page(pfn, PMD_SIZE);
|
||||
|
||||
if (exec_fault) {
|
||||
if (needs_exec)
|
||||
new_pmd = kvm_s2pmd_mkexec(new_pmd);
|
||||
invalidate_icache_guest_page(pfn, PMD_SIZE);
|
||||
} else if (fault_status == FSC_PERM) {
|
||||
/* Preserve execute if XN was already cleared */
|
||||
if (stage2_is_exec(kvm, fault_ipa))
|
||||
new_pmd = kvm_s2pmd_mkexec(new_pmd);
|
||||
}
|
||||
|
||||
ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
|
||||
} else {
|
||||
pte_t new_pte = pfn_pte(pfn, mem_type);
|
||||
pte_t new_pte = kvm_pfn_pte(pfn, mem_type);
|
||||
|
||||
if (writable) {
|
||||
new_pte = kvm_s2pte_mkwrite(new_pte);
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
mark_page_dirty(kvm, gfn);
|
||||
}
|
||||
|
||||
if (fault_status != FSC_PERM)
|
||||
clean_dcache_guest_page(pfn, PAGE_SIZE);
|
||||
|
||||
if (exec_fault) {
|
||||
if (needs_exec)
|
||||
new_pte = kvm_s2pte_mkexec(new_pte);
|
||||
invalidate_icache_guest_page(pfn, PAGE_SIZE);
|
||||
} else if (fault_status == FSC_PERM) {
|
||||
/* Preserve execute if XN was already cleared */
|
||||
if (stage2_is_exec(kvm, fault_ipa))
|
||||
new_pte = kvm_s2pte_mkexec(new_pte);
|
||||
}
|
||||
|
||||
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
|
||||
}
|
||||
|
@ -1637,6 +1837,7 @@ out_unlock:
|
|||
*/
|
||||
static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
kvm_pfn_t pfn;
|
||||
|
@ -1646,24 +1847,23 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
|
|||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
|
||||
if (!pmd || pmd_none(*pmd)) /* Nothing there */
|
||||
if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte))
|
||||
goto out;
|
||||
|
||||
if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */
|
||||
if (pud) { /* HugeTLB */
|
||||
*pud = kvm_s2pud_mkyoung(*pud);
|
||||
pfn = kvm_pud_pfn(*pud);
|
||||
pfn_valid = true;
|
||||
} else if (pmd) { /* THP, HugeTLB */
|
||||
*pmd = pmd_mkyoung(*pmd);
|
||||
pfn = pmd_pfn(*pmd);
|
||||
pfn_valid = true;
|
||||
goto out;
|
||||
} else {
|
||||
*pte = pte_mkyoung(*pte); /* Just a page... */
|
||||
pfn = pte_pfn(*pte);
|
||||
pfn_valid = true;
|
||||
}
|
||||
|
||||
pte = pte_offset_kernel(pmd, fault_ipa);
|
||||
if (pte_none(*pte)) /* Nothing there either */
|
||||
goto out;
|
||||
|
||||
*pte = pte_mkyoung(*pte); /* Just a page... */
|
||||
pfn = pte_pfn(*pte);
|
||||
pfn_valid = true;
|
||||
out:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
if (pfn_valid)
|
||||
|
@ -1865,48 +2065,44 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
|||
* just like a translation fault and clean the cache to the PoC.
|
||||
*/
|
||||
clean_dcache_guest_page(pfn, PAGE_SIZE);
|
||||
stage2_pte = pfn_pte(pfn, PAGE_S2);
|
||||
stage2_pte = kvm_pfn_pte(pfn, PAGE_S2);
|
||||
handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
|
||||
}
|
||||
|
||||
static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
|
||||
pmd = stage2_get_pmd(kvm, NULL, gpa);
|
||||
if (!pmd || pmd_none(*pmd)) /* Nothing there */
|
||||
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
|
||||
if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
|
||||
return 0;
|
||||
|
||||
if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
|
||||
if (pud)
|
||||
return stage2_pudp_test_and_clear_young(pud);
|
||||
else if (pmd)
|
||||
return stage2_pmdp_test_and_clear_young(pmd);
|
||||
|
||||
pte = pte_offset_kernel(pmd, gpa);
|
||||
if (pte_none(*pte))
|
||||
return 0;
|
||||
|
||||
return stage2_ptep_test_and_clear_young(pte);
|
||||
else
|
||||
return stage2_ptep_test_and_clear_young(pte);
|
||||
}
|
||||
|
||||
static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
|
||||
pmd = stage2_get_pmd(kvm, NULL, gpa);
|
||||
if (!pmd || pmd_none(*pmd)) /* Nothing there */
|
||||
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
|
||||
if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
|
||||
return 0;
|
||||
|
||||
if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
|
||||
if (pud)
|
||||
return kvm_s2pud_young(*pud);
|
||||
else if (pmd)
|
||||
return pmd_young(*pmd);
|
||||
|
||||
pte = pte_offset_kernel(pmd, gpa);
|
||||
if (!pte_none(*pte)) /* Just a page... */
|
||||
else
|
||||
return pte_young(*pte);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
|
|
|
@ -26,25 +26,25 @@ TRACE_EVENT(kvm_entry,
|
|||
);
|
||||
|
||||
TRACE_EVENT(kvm_exit,
|
||||
TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
|
||||
TP_ARGS(idx, exit_reason, vcpu_pc),
|
||||
TP_PROTO(int ret, unsigned int esr_ec, unsigned long vcpu_pc),
|
||||
TP_ARGS(ret, esr_ec, vcpu_pc),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, idx )
|
||||
__field( unsigned int, exit_reason )
|
||||
__field( int, ret )
|
||||
__field( unsigned int, esr_ec )
|
||||
__field( unsigned long, vcpu_pc )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->idx = idx;
|
||||
__entry->exit_reason = exit_reason;
|
||||
__entry->ret = ARM_EXCEPTION_CODE(ret);
|
||||
__entry->esr_ec = ARM_EXCEPTION_IS_TRAP(ret) ? esr_ec : 0;
|
||||
__entry->vcpu_pc = vcpu_pc;
|
||||
),
|
||||
|
||||
TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
|
||||
__print_symbolic(__entry->idx, kvm_arm_exception_type),
|
||||
__entry->exit_reason,
|
||||
__print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
|
||||
__print_symbolic(__entry->ret, kvm_arm_exception_type),
|
||||
__entry->esr_ec,
|
||||
__print_symbolic(__entry->esr_ec, kvm_arm_exception_class),
|
||||
__entry->vcpu_pc)
|
||||
);
|
||||
|
||||
|
|
|
@ -313,36 +313,30 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
|||
|
||||
spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
|
||||
/*
|
||||
* If this virtual IRQ was written into a list register, we
|
||||
* have to make sure the CPU that runs the VCPU thread has
|
||||
* synced back the LR state to the struct vgic_irq.
|
||||
*
|
||||
* As long as the conditions below are true, we know the VCPU thread
|
||||
* may be on its way back from the guest (we kicked the VCPU thread in
|
||||
* vgic_change_active_prepare) and still has to sync back this IRQ,
|
||||
* so we release and re-acquire the spin_lock to let the other thread
|
||||
* sync back the IRQ.
|
||||
*
|
||||
* When accessing VGIC state from user space, requester_vcpu is
|
||||
* NULL, which is fine, because we guarantee that no VCPUs are running
|
||||
* when accessing VGIC state from user space so irq->vcpu->cpu is
|
||||
* always -1.
|
||||
*/
|
||||
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
|
||||
irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
|
||||
irq->vcpu->cpu != -1) /* VCPU thread is running */
|
||||
cond_resched_lock(&irq->irq_lock);
|
||||
|
||||
if (irq->hw) {
|
||||
vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
|
||||
} else {
|
||||
u32 model = vcpu->kvm->arch.vgic.vgic_model;
|
||||
u8 active_source;
|
||||
|
||||
irq->active = active;
|
||||
|
||||
/*
|
||||
* The GICv2 architecture indicates that the source CPUID for
|
||||
* an SGI should be provided during an EOI which implies that
|
||||
* the active state is stored somewhere, but at the same time
|
||||
* this state is not architecturally exposed anywhere and we
|
||||
* have no way of knowing the right source.
|
||||
*
|
||||
* This may lead to a VCPU not being able to receive
|
||||
* additional instances of a particular SGI after migration
|
||||
* for a GICv2 VM on some GIC implementations. Oh well.
|
||||
*/
|
||||
active_source = (requester_vcpu) ? requester_vcpu->vcpu_id : 0;
|
||||
|
||||
if (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
|
||||
active && vgic_irq_is_sgi(irq->intid))
|
||||
irq->active_source = requester_vcpu->vcpu_id;
|
||||
irq->active_source = active_source;
|
||||
}
|
||||
|
||||
if (irq->active)
|
||||
|
@ -368,14 +362,16 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
|||
*/
|
||||
static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
|
||||
{
|
||||
if (intid > VGIC_NR_PRIVATE_IRQS)
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
|
||||
intid > VGIC_NR_PRIVATE_IRQS)
|
||||
kvm_arm_halt_guest(vcpu->kvm);
|
||||
}
|
||||
|
||||
/* See vgic_change_active_prepare */
|
||||
static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
|
||||
{
|
||||
if (intid > VGIC_NR_PRIVATE_IRQS)
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
|
||||
intid > VGIC_NR_PRIVATE_IRQS)
|
||||
kvm_arm_resume_guest(vcpu->kvm);
|
||||
}
|
||||
|
||||
|
|
|
@ -103,13 +103,13 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
|||
{
|
||||
/* SGIs and PPIs */
|
||||
if (intid <= VGIC_MAX_PRIVATE) {
|
||||
intid = array_index_nospec(intid, VGIC_MAX_PRIVATE);
|
||||
intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1);
|
||||
return &vcpu->arch.vgic_cpu.private_irqs[intid];
|
||||
}
|
||||
|
||||
/* SPIs */
|
||||
if (intid <= VGIC_MAX_SPI) {
|
||||
intid = array_index_nospec(intid, VGIC_MAX_SPI);
|
||||
if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
|
||||
intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
|
||||
return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
|
||||
}
|
||||
|
||||
|
@ -908,6 +908,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
|
|||
struct vgic_irq *irq;
|
||||
bool pending = false;
|
||||
unsigned long flags;
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
if (!vcpu->kvm->arch.vgic.enabled)
|
||||
return false;
|
||||
|
@ -915,11 +916,15 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
|
|||
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
|
||||
return true;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
|
||||
spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
|
||||
|
||||
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
|
||||
spin_lock(&irq->irq_lock);
|
||||
pending = irq_is_pending(irq) && irq->enabled;
|
||||
pending = irq_is_pending(irq) && irq->enabled &&
|
||||
!irq->active &&
|
||||
irq->priority < vmcr.pmr;
|
||||
spin_unlock(&irq->irq_lock);
|
||||
|
||||
if (pending)
|
||||
|
|
Loading…
Reference in New Issue