ARM:
- Page ownership tracking between host EL1 and EL2 - Rely on userspace page tables to create large stage-2 mappings - Fix incompatibility between pKVM and kmemleak - Fix the PMU reset state, and improve the performance of the virtual PMU - Move over to the generic KVM entry code - Address PSCI reset issues w.r.t. save/restore - Preliminary rework for the upcoming pKVM fixed feature - A bunch of MM cleanups - a vGIC fix for timer spurious interrupts - Various cleanups s390: - enable interpretation of specification exceptions - fix a vcpu_idx vs vcpu_id mixup x86: - fast (lockless) page fault support for the new MMU - new MMU now the default - increased maximum allowed VCPU count - allow inhibit IRQs on KVM_RUN while debugging guests - let Hyper-V-enabled guests run with virtualized LAPIC as long as they do not enable the Hyper-V "AutoEOI" feature - fixes and optimizations for the toggling of AMD AVIC (virtualized LAPIC) - tuning for the case when two-dimensional paging (EPT/NPT) is disabled - bugfixes and cleanups, especially with respect to 1) vCPU reset and 2) choosing a paging mode based on CR0/CR4/EFER - support for 5-level page table on AMD processors Generic: - MMU notifier invalidation callbacks do not take mmu_lock unless necessary - improved caching of LRU kvm_memory_slot - support for histogram statistics - add statistics for halt polling and remote TLB flush requests -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmE2CIAUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMyqwf+Ky2WoThuQ9Ra0r/m8pUTAx5+gsAf MmG24rNLE+26X0xuBT9Q5+etYYRLrRTWJvo5cgHooz7muAYW6scR+ho5xzvLTAxi DAuoijkXsSdGoFCp0OMUHiwG3cgY5N7feTEwLPAb2i6xr/l6SZyCP4zcwiiQbJ2s UUD0i3rEoNQ02/hOEveud/ENxzUli9cmmgHKXR3kNgsJClSf1fcuLnhg+7EGMhK9 +c2V+hde5y0gmEairQWm22MLMRolNZ5NL4kjykiNh2M5q9YvbHe5+f/JmENlNZMT bsUQT6Ry1ukuJ0V59rZvUw71KknPFzZ3d6HgW4pwytMq6EJKiISHzRbVnQ== =FCAB -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "ARM: - Page ownership tracking between host EL1 and EL2 - Rely on userspace page tables to create large stage-2 mappings - Fix incompatibility between pKVM and kmemleak - Fix the PMU reset state, and improve the performance of the virtual PMU - Move over to the generic KVM entry code - Address PSCI reset issues w.r.t. save/restore - Preliminary rework for the upcoming pKVM fixed feature - A bunch of MM cleanups - a vGIC fix for timer spurious interrupts - Various cleanups s390: - enable interpretation of specification exceptions - fix a vcpu_idx vs vcpu_id mixup x86: - fast (lockless) page fault support for the new MMU - new MMU now the default - increased maximum allowed VCPU count - allow inhibit IRQs on KVM_RUN while debugging guests - let Hyper-V-enabled guests run with virtualized LAPIC as long as they do not enable the Hyper-V "AutoEOI" feature - fixes and optimizations for the toggling of AMD AVIC (virtualized LAPIC) - tuning for the case when two-dimensional paging (EPT/NPT) is disabled - bugfixes and cleanups, especially with respect to vCPU reset and choosing a paging mode based on CR0/CR4/EFER - support for 5-level page table on AMD processors Generic: - MMU notifier invalidation callbacks do not take mmu_lock unless necessary - improved caching of LRU kvm_memory_slot - support for histogram statistics - add statistics for halt polling and remote TLB flush requests" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (210 commits) KVM: Drop unused kvm_dirty_gfn_invalid() KVM: x86: Update vCPU's hv_clock before back to guest when tsc_offset is adjusted KVM: MMU: mark role_regs and role accessors as maybe unused KVM: MIPS: Remove a "set but not used" variable x86/kvm: Don't enable IRQ when IRQ enabled in kvm_wait KVM: stats: Add VM stat for remote tlb flush requests KVM: Remove unnecessary export of kvm_{inc,dec}_notifier_count() KVM: x86/mmu: Move lpage_disallowed_link further "down" in kvm_mmu_page KVM: x86/mmu: Relocate kvm_mmu_page.tdp_mmu_page for better cache locality Revert "KVM: x86: mmu: Add guest physical address check in translate_gpa()" KVM: x86/mmu: Remove unused field mmio_cached in struct kvm_mmu_page kvm: x86: Increase KVM_SOFT_MAX_VCPUS to 710 kvm: x86: Increase MAX_VCPUS to 1024 kvm: x86: Set KVM_MAX_VCPU_ID to 4*KVM_MAX_VCPUS KVM: VMX: avoid running vmx_handle_exit_irqoff in case of emulation KVM: x86/mmu: Don't freak out if pml5_root is NULL on 4-level host KVM: s390: index kvm->arch.idle_mask by vcpu_idx KVM: s390: Enable specification exception interpretation KVM: arm64: Trim guest debug exception handling KVM: SVM: Add 5-level page table support for SVM ...
This commit is contained in:
commit
192ad3c27a
|
@ -3357,6 +3357,7 @@ flags which can include the following:
|
|||
- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
|
||||
- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
|
||||
- KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
|
||||
- KVM_GUESTDBG_BLOCKIRQ: avoid injecting interrupts/NMI/SMI [x86]
|
||||
|
||||
For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
|
||||
are enabled in memory so we need to ensure breakpoint exceptions are
|
||||
|
@ -5208,6 +5209,9 @@ by a string of size ``name_size``.
|
|||
#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST
|
||||
|
||||
#define KVM_STATS_UNIT_SHIFT 4
|
||||
#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT)
|
||||
|
@ -5215,18 +5219,20 @@ by a string of size ``name_size``.
|
|||
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
|
||||
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
|
||||
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
|
||||
#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
|
||||
|
||||
#define KVM_STATS_BASE_SHIFT 8
|
||||
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
|
||||
#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT)
|
||||
#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT)
|
||||
#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2
|
||||
|
||||
struct kvm_stats_desc {
|
||||
__u32 flags;
|
||||
__s16 exponent;
|
||||
__u16 size;
|
||||
__u32 offset;
|
||||
__u32 unused;
|
||||
__u32 bucket_size;
|
||||
char name[];
|
||||
};
|
||||
|
||||
|
@ -5237,21 +5243,35 @@ The following flags are supported:
|
|||
Bits 0-3 of ``flags`` encode the type:
|
||||
|
||||
* ``KVM_STATS_TYPE_CUMULATIVE``
|
||||
The statistics data is cumulative. The value of data can only be increased.
|
||||
The statistics reports a cumulative count. The value of data can only be increased.
|
||||
Most of the counters used in KVM are of this type.
|
||||
The corresponding ``size`` field for this type is always 1.
|
||||
All cumulative statistics data are read/write.
|
||||
* ``KVM_STATS_TYPE_INSTANT``
|
||||
The statistics data is instantaneous. Its value can be increased or
|
||||
The statistics reports an instantaneous value. Its value can be increased or
|
||||
decreased. This type is usually used as a measurement of some resources,
|
||||
like the number of dirty pages, the number of large pages, etc.
|
||||
All instant statistics are read only.
|
||||
The corresponding ``size`` field for this type is always 1.
|
||||
* ``KVM_STATS_TYPE_PEAK``
|
||||
The statistics data is peak. The value of data can only be increased, and
|
||||
represents a peak value for a measurement, for example the maximum number
|
||||
The statistics data reports a peak value, for example the maximum number
|
||||
of items in a hash table bucket, the longest time waited and so on.
|
||||
The value of data can only be increased.
|
||||
The corresponding ``size`` field for this type is always 1.
|
||||
* ``KVM_STATS_TYPE_LINEAR_HIST``
|
||||
The statistic is reported as a linear histogram. The number of
|
||||
buckets is specified by the ``size`` field. The size of buckets is specified
|
||||
by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``)
|
||||
is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last
|
||||
bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity
|
||||
value.) The bucket value indicates how many samples fell in the bucket's range.
|
||||
* ``KVM_STATS_TYPE_LOG_HIST``
|
||||
The statistic is reported as a logarithmic histogram. The number of
|
||||
buckets is specified by the ``size`` field. The range of the first bucket is
|
||||
[0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF).
|
||||
Otherwise, The Nth bucket (1 < N < ``size``) covers
|
||||
[pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell
|
||||
in the bucket's range.
|
||||
|
||||
Bits 4-7 of ``flags`` encode the unit:
|
||||
|
||||
|
@ -5286,9 +5306,9 @@ unsigned 64bit data.
|
|||
The ``offset`` field is the offset from the start of Data Block to the start of
|
||||
the corresponding statistics data.
|
||||
|
||||
The ``unused`` field is reserved for future support for other types of
|
||||
statistics data, like log/linear histogram. Its value is always 0 for the types
|
||||
defined above.
|
||||
The ``bucket_size`` field is used as a parameter for histogram statistics data.
|
||||
It is only used by linear histogram statistics data, specifying the size of a
|
||||
bucket.
|
||||
|
||||
The ``name`` field is the name string of the statistics data. The name string
|
||||
starts at the end of ``struct kvm_stats_desc``. The maximum length including
|
||||
|
|
|
@ -21,6 +21,12 @@ The acquisition orders for mutexes are as follows:
|
|||
can be taken inside a kvm->srcu read-side critical section,
|
||||
while kvm->slots_lock cannot.
|
||||
|
||||
- kvm->mn_active_invalidate_count ensures that pairs of
|
||||
invalidate_range_start() and invalidate_range_end() callbacks
|
||||
use the same memslots array. kvm->slots_lock and kvm->slots_arch_lock
|
||||
are taken on the waiting side in install_new_memslots, so MMU notifiers
|
||||
must not take either kvm->slots_lock or kvm->slots_arch_lock.
|
||||
|
||||
On x86:
|
||||
|
||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
||||
|
|
|
@ -602,14 +602,14 @@ static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
|
|||
{
|
||||
u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT);
|
||||
|
||||
return val == ID_AA64PFR0_EL1_32BIT_64BIT;
|
||||
return val == ID_AA64PFR0_ELx_32BIT_64BIT;
|
||||
}
|
||||
|
||||
static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
|
||||
{
|
||||
u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
|
||||
|
||||
return val == ID_AA64PFR0_EL0_32BIT_64BIT;
|
||||
return val == ID_AA64PFR0_ELx_32BIT_64BIT;
|
||||
}
|
||||
|
||||
static inline bool id_aa64pfr0_sve(u64 pfr0)
|
||||
|
@ -784,13 +784,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
|
|||
static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
|
||||
{
|
||||
switch (parange) {
|
||||
case 0: return 32;
|
||||
case 1: return 36;
|
||||
case 2: return 40;
|
||||
case 3: return 42;
|
||||
case 4: return 44;
|
||||
case 5: return 48;
|
||||
case 6: return 52;
|
||||
case ID_AA64MMFR0_PARANGE_32: return 32;
|
||||
case ID_AA64MMFR0_PARANGE_36: return 36;
|
||||
case ID_AA64MMFR0_PARANGE_40: return 40;
|
||||
case ID_AA64MMFR0_PARANGE_42: return 42;
|
||||
case ID_AA64MMFR0_PARANGE_44: return 44;
|
||||
case ID_AA64MMFR0_PARANGE_48: return 48;
|
||||
case ID_AA64MMFR0_PARANGE_52: return 52;
|
||||
/*
|
||||
* A future PE could use a value unknown to the kernel.
|
||||
* However, by the "D10.1.4 Principles of the ID scheme
|
||||
|
|
|
@ -12,8 +12,13 @@
|
|||
#include <asm/types.h>
|
||||
|
||||
/* Hyp Configuration Register (HCR) bits */
|
||||
|
||||
#define HCR_TID5 (UL(1) << 58)
|
||||
#define HCR_DCT (UL(1) << 57)
|
||||
#define HCR_ATA_SHIFT 56
|
||||
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
|
||||
#define HCR_AMVOFFEN (UL(1) << 51)
|
||||
#define HCR_FIEN (UL(1) << 47)
|
||||
#define HCR_FWB (UL(1) << 46)
|
||||
#define HCR_API (UL(1) << 41)
|
||||
#define HCR_APK (UL(1) << 40)
|
||||
|
@ -32,9 +37,9 @@
|
|||
#define HCR_TVM (UL(1) << 26)
|
||||
#define HCR_TTLB (UL(1) << 25)
|
||||
#define HCR_TPU (UL(1) << 24)
|
||||
#define HCR_TPC (UL(1) << 23)
|
||||
#define HCR_TPC (UL(1) << 23) /* HCR_TPCP if FEAT_DPB */
|
||||
#define HCR_TSW (UL(1) << 22)
|
||||
#define HCR_TAC (UL(1) << 21)
|
||||
#define HCR_TACR (UL(1) << 21)
|
||||
#define HCR_TIDCP (UL(1) << 20)
|
||||
#define HCR_TSC (UL(1) << 19)
|
||||
#define HCR_TID3 (UL(1) << 18)
|
||||
|
@ -56,12 +61,13 @@
|
|||
#define HCR_PTW (UL(1) << 2)
|
||||
#define HCR_SWIO (UL(1) << 1)
|
||||
#define HCR_VM (UL(1) << 0)
|
||||
#define HCR_RES0 ((UL(1) << 48) | (UL(1) << 39))
|
||||
|
||||
/*
|
||||
* The bits we set in HCR:
|
||||
* TLOR: Trap LORegion register accesses
|
||||
* RW: 64bit by default, can be overridden for 32bit VMs
|
||||
* TAC: Trap ACTLR
|
||||
* TACR: Trap ACTLR
|
||||
* TSC: Trap SMC
|
||||
* TSW: Trap cache operations by set/way
|
||||
* TWE: Trap WFE
|
||||
|
@ -76,7 +82,7 @@
|
|||
* PTW: Take a stage2 fault if a stage1 walk steps in device memory
|
||||
*/
|
||||
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
|
||||
HCR_BSU_IS | HCR_FB | HCR_TAC | \
|
||||
HCR_BSU_IS | HCR_FB | HCR_TACR | \
|
||||
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
|
||||
HCR_FMO | HCR_IMO | HCR_PTW )
|
||||
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
|
||||
|
@ -275,24 +281,40 @@
|
|||
#define CPTR_EL2_TTA (1 << 20)
|
||||
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
|
||||
#define CPTR_EL2_TZ (1 << 8)
|
||||
#define CPTR_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 */
|
||||
#define CPTR_EL2_DEFAULT CPTR_EL2_RES1
|
||||
#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
|
||||
#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1
|
||||
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
|
||||
GENMASK(29, 21) | \
|
||||
GENMASK(19, 14) | \
|
||||
BIT(11))
|
||||
|
||||
/* Hyp Debug Configuration Register bits */
|
||||
#define MDCR_EL2_E2TB_MASK (UL(0x3))
|
||||
#define MDCR_EL2_E2TB_SHIFT (UL(24))
|
||||
#define MDCR_EL2_TTRF (1 << 19)
|
||||
#define MDCR_EL2_TPMS (1 << 14)
|
||||
#define MDCR_EL2_HPMFZS (UL(1) << 36)
|
||||
#define MDCR_EL2_HPMFZO (UL(1) << 29)
|
||||
#define MDCR_EL2_MTPME (UL(1) << 28)
|
||||
#define MDCR_EL2_TDCC (UL(1) << 27)
|
||||
#define MDCR_EL2_HCCD (UL(1) << 23)
|
||||
#define MDCR_EL2_TTRF (UL(1) << 19)
|
||||
#define MDCR_EL2_HPMD (UL(1) << 17)
|
||||
#define MDCR_EL2_TPMS (UL(1) << 14)
|
||||
#define MDCR_EL2_E2PB_MASK (UL(0x3))
|
||||
#define MDCR_EL2_E2PB_SHIFT (UL(12))
|
||||
#define MDCR_EL2_TDRA (1 << 11)
|
||||
#define MDCR_EL2_TDOSA (1 << 10)
|
||||
#define MDCR_EL2_TDA (1 << 9)
|
||||
#define MDCR_EL2_TDE (1 << 8)
|
||||
#define MDCR_EL2_HPME (1 << 7)
|
||||
#define MDCR_EL2_TPM (1 << 6)
|
||||
#define MDCR_EL2_TPMCR (1 << 5)
|
||||
#define MDCR_EL2_HPMN_MASK (0x1F)
|
||||
#define MDCR_EL2_TDRA (UL(1) << 11)
|
||||
#define MDCR_EL2_TDOSA (UL(1) << 10)
|
||||
#define MDCR_EL2_TDA (UL(1) << 9)
|
||||
#define MDCR_EL2_TDE (UL(1) << 8)
|
||||
#define MDCR_EL2_HPME (UL(1) << 7)
|
||||
#define MDCR_EL2_TPM (UL(1) << 6)
|
||||
#define MDCR_EL2_TPMCR (UL(1) << 5)
|
||||
#define MDCR_EL2_HPMN_MASK (UL(0x1F))
|
||||
#define MDCR_EL2_RES0 (GENMASK(63, 37) | \
|
||||
GENMASK(35, 30) | \
|
||||
GENMASK(25, 24) | \
|
||||
GENMASK(22, 20) | \
|
||||
BIT(18) | \
|
||||
GENMASK(16, 15))
|
||||
|
||||
/* For compatibility with fault code shared with 32-bit */
|
||||
#define FSC_FAULT ESR_ELx_FSC_FAULT
|
||||
|
|
|
@ -59,12 +59,11 @@
|
|||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings 16
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 21
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
@ -210,7 +209,7 @@ extern u64 __vgic_v3_read_vmcr(void);
|
|||
extern void __vgic_v3_write_vmcr(u32 vmcr);
|
||||
extern void __vgic_v3_init_lrs(void);
|
||||
|
||||
extern u32 __kvm_get_mdcr_el2(void);
|
||||
extern u64 __kvm_get_mdcr_el2(void);
|
||||
|
||||
#define __KVM_EXTABLE(from, to) \
|
||||
" .pushsection __kvm_ex_table, \"a\"\n" \
|
||||
|
|
|
@ -66,7 +66,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
|
|||
extern unsigned int kvm_sve_max_vl;
|
||||
int kvm_arm_init_sve(void);
|
||||
|
||||
int __attribute_const__ kvm_target_cpu(void);
|
||||
u32 __attribute_const__ kvm_target_cpu(void);
|
||||
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -185,7 +185,6 @@ enum vcpu_sysreg {
|
|||
PMCNTENSET_EL0, /* Count Enable Set Register */
|
||||
PMINTENSET_EL1, /* Interrupt Enable Set Register */
|
||||
PMOVSSET_EL0, /* Overflow Flag Status Set Register */
|
||||
PMSWINC_EL0, /* Software Increment Register */
|
||||
PMUSERENR_EL0, /* User Enable Register */
|
||||
|
||||
/* Pointer Authentication Registers in a strict increasing order. */
|
||||
|
@ -287,9 +286,13 @@ struct kvm_vcpu_arch {
|
|||
/* Stage 2 paging state used by the hardware on next switch */
|
||||
struct kvm_s2_mmu *hw_mmu;
|
||||
|
||||
/* HYP configuration */
|
||||
/* Values of trap registers for the guest. */
|
||||
u64 hcr_el2;
|
||||
u32 mdcr_el2;
|
||||
u64 mdcr_el2;
|
||||
u64 cptr_el2;
|
||||
|
||||
/* Values of trap registers for the host before guest entry. */
|
||||
u64 mdcr_el2_host;
|
||||
|
||||
/* Exception Information */
|
||||
struct kvm_vcpu_fault_info fault;
|
||||
|
@ -576,6 +579,7 @@ struct kvm_vcpu_stat {
|
|||
u64 wfi_exit_stat;
|
||||
u64 mmio_exit_user;
|
||||
u64 mmio_exit_kernel;
|
||||
u64 signal_exits;
|
||||
u64 exits;
|
||||
};
|
||||
|
||||
|
@ -771,6 +775,11 @@ void kvm_arch_free_vm(struct kvm *kvm);
|
|||
|
||||
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
|
||||
|
||||
static inline bool kvm_vm_is_protected(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
|
||||
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
|
|
@ -95,7 +95,7 @@ void __sve_restore_state(void *sve_pffr, u32 *fpsr);
|
|||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
|
||||
void deactivate_traps_vhe_put(void);
|
||||
void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu);
|
||||
#endif
|
||||
|
||||
u64 __guest_enter(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -252,6 +252,11 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
|
|||
|
||||
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
|
||||
|
||||
/*
|
||||
* When this is (directly or indirectly) used on the TLB invalidation
|
||||
* path, we rely on a previously issued DSB so that page table updates
|
||||
* and VMID reads are correctly ordered.
|
||||
*/
|
||||
static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct kvm_vmid *vmid = &mmu->vmid;
|
||||
|
@ -259,7 +264,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
|
|||
u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
|
||||
|
||||
baddr = mmu->pgd_phys;
|
||||
vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
|
||||
vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT;
|
||||
return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
|
||||
}
|
||||
|
||||
|
@ -267,9 +272,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
|
|||
* Must be called from hyp code running at EL2 with an updated VTTBR
|
||||
* and interrupts disabled.
|
||||
*/
|
||||
static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr)
|
||||
static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
|
||||
struct kvm_arch *arch)
|
||||
{
|
||||
write_sysreg(vtcr, vtcr_el2);
|
||||
write_sysreg(arch->vtcr, vtcr_el2);
|
||||
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
|
||||
|
||||
/*
|
||||
|
@ -280,11 +286,6 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long
|
|||
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
|
||||
}
|
||||
|
||||
static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
__load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr);
|
||||
}
|
||||
|
||||
static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
return container_of(mmu->arch, struct kvm, arch);
|
||||
|
|
|
@ -25,6 +25,46 @@ static inline u64 kvm_get_parange(u64 mmfr0)
|
|||
|
||||
typedef u64 kvm_pte_t;
|
||||
|
||||
#define KVM_PTE_VALID BIT(0)
|
||||
|
||||
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
|
||||
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
|
||||
|
||||
static inline bool kvm_pte_valid(kvm_pte_t pte)
|
||||
{
|
||||
return pte & KVM_PTE_VALID;
|
||||
}
|
||||
|
||||
static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
|
||||
{
|
||||
u64 pa = pte & KVM_PTE_ADDR_MASK;
|
||||
|
||||
if (PAGE_SHIFT == 16)
|
||||
pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
static inline u64 kvm_granule_shift(u32 level)
|
||||
{
|
||||
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
|
||||
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
|
||||
}
|
||||
|
||||
static inline u64 kvm_granule_size(u32 level)
|
||||
{
|
||||
return BIT(kvm_granule_shift(level));
|
||||
}
|
||||
|
||||
static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
{
|
||||
/*
|
||||
* Reject invalid block mappings and don't bother with 4TB mappings for
|
||||
* 52-bit PAs.
|
||||
*/
|
||||
return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable_mm_ops - Memory management callbacks.
|
||||
* @zalloc_page: Allocate a single zeroed memory page.
|
||||
|
@ -75,6 +115,44 @@ enum kvm_pgtable_stage2_flags {
|
|||
KVM_PGTABLE_S2_IDMAP = BIT(1),
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kvm_pgtable_prot - Page-table permissions and attributes.
|
||||
* @KVM_PGTABLE_PROT_X: Execute permission.
|
||||
* @KVM_PGTABLE_PROT_W: Write permission.
|
||||
* @KVM_PGTABLE_PROT_R: Read permission.
|
||||
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
|
||||
* @KVM_PGTABLE_PROT_SW0: Software bit 0.
|
||||
* @KVM_PGTABLE_PROT_SW1: Software bit 1.
|
||||
* @KVM_PGTABLE_PROT_SW2: Software bit 2.
|
||||
* @KVM_PGTABLE_PROT_SW3: Software bit 3.
|
||||
*/
|
||||
enum kvm_pgtable_prot {
|
||||
KVM_PGTABLE_PROT_X = BIT(0),
|
||||
KVM_PGTABLE_PROT_W = BIT(1),
|
||||
KVM_PGTABLE_PROT_R = BIT(2),
|
||||
|
||||
KVM_PGTABLE_PROT_DEVICE = BIT(3),
|
||||
|
||||
KVM_PGTABLE_PROT_SW0 = BIT(55),
|
||||
KVM_PGTABLE_PROT_SW1 = BIT(56),
|
||||
KVM_PGTABLE_PROT_SW2 = BIT(57),
|
||||
KVM_PGTABLE_PROT_SW3 = BIT(58),
|
||||
};
|
||||
|
||||
#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
|
||||
#define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
|
||||
|
||||
#define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX
|
||||
#define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW
|
||||
|
||||
#define PAGE_HYP KVM_PGTABLE_PROT_RW
|
||||
#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
|
||||
#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
|
||||
#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
|
||||
|
||||
typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
|
||||
enum kvm_pgtable_prot prot);
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable - KVM page-table.
|
||||
* @ia_bits: Maximum input address size, in bits.
|
||||
|
@ -82,6 +160,9 @@ enum kvm_pgtable_stage2_flags {
|
|||
* @pgd: Pointer to the first top-level entry of the page-table.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
|
||||
* @flags: Stage-2 page-table flags.
|
||||
* @force_pte_cb: Function that returns true if page level mappings must
|
||||
* be used instead of block mappings.
|
||||
*/
|
||||
struct kvm_pgtable {
|
||||
u32 ia_bits;
|
||||
|
@ -92,36 +173,7 @@ struct kvm_pgtable {
|
|||
/* Stage-2 only */
|
||||
struct kvm_s2_mmu *mmu;
|
||||
enum kvm_pgtable_stage2_flags flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kvm_pgtable_prot - Page-table permissions and attributes.
|
||||
* @KVM_PGTABLE_PROT_X: Execute permission.
|
||||
* @KVM_PGTABLE_PROT_W: Write permission.
|
||||
* @KVM_PGTABLE_PROT_R: Read permission.
|
||||
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
|
||||
*/
|
||||
enum kvm_pgtable_prot {
|
||||
KVM_PGTABLE_PROT_X = BIT(0),
|
||||
KVM_PGTABLE_PROT_W = BIT(1),
|
||||
KVM_PGTABLE_PROT_R = BIT(2),
|
||||
|
||||
KVM_PGTABLE_PROT_DEVICE = BIT(3),
|
||||
};
|
||||
|
||||
#define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
|
||||
#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
|
||||
#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
|
||||
#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
|
||||
|
||||
/**
|
||||
* struct kvm_mem_range - Range of Intermediate Physical Addresses
|
||||
* @start: Start of the range.
|
||||
* @end: End of the range.
|
||||
*/
|
||||
struct kvm_mem_range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
kvm_pgtable_force_pte_cb_t force_pte_cb;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -216,21 +268,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
|
|||
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table.
|
||||
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
|
||||
* @pgt: Uninitialised page-table structure to initialise.
|
||||
* @arch: Arch-specific KVM structure representing the guest virtual
|
||||
* machine.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @flags: Stage-2 configuration flags.
|
||||
* @force_pte_cb: Function that returns true if page level mappings must
|
||||
* be used instead of block mappings.
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags);
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags,
|
||||
kvm_pgtable_force_pte_cb_t force_pte_cb);
|
||||
|
||||
#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
|
||||
kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
|
||||
__kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
|
||||
|
@ -374,7 +429,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
|
|||
* If there is a valid, leaf page-table entry used to translate @addr, then
|
||||
* relax the permissions in that entry according to the read, write and
|
||||
* execute permissions specified by @prot. No permissions are removed, and
|
||||
* TLB invalidation is performed after updating the entry.
|
||||
* TLB invalidation is performed after updating the entry. Software bits cannot
|
||||
* be set or cleared using kvm_pgtable_stage2_relax_perms().
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
|
@ -433,22 +489,42 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||
struct kvm_pgtable_walker *walker);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
|
||||
* Addresses with compatible permission
|
||||
* attributes.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Address that must be covered by the range.
|
||||
* @prot: Protection attributes that the range must be compatible with.
|
||||
* @range: Range structure used to limit the search space at call time and
|
||||
* that will hold the result.
|
||||
* kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
|
||||
* with its level.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_*_init()
|
||||
* or a similar initialiser.
|
||||
* @addr: Input address for the start of the walk.
|
||||
* @ptep: Pointer to storage for the retrieved PTE.
|
||||
* @level: Pointer to storage for the level of the retrieved PTE.
|
||||
*
|
||||
* The offset of @addr within a page is ignored. An IPA is compatible with @prot
|
||||
* iff its corresponding stage-2 page-table entry has default ownership and, if
|
||||
* valid, is mapped with protection attributes identical to @prot.
|
||||
* The offset of @addr within a page is ignored.
|
||||
*
|
||||
* The walker will walk the page-table entries corresponding to the input
|
||||
* address specified, retrieving the leaf corresponding to this address.
|
||||
* Invalid entries are treated as leaf entries.
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
|
||||
enum kvm_pgtable_prot prot,
|
||||
struct kvm_mem_range *range);
|
||||
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
kvm_pte_t *ptep, u32 *level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
|
||||
* stage-2 Page-Table Entry.
|
||||
* @pte: Page-table entry
|
||||
*
|
||||
* Return: protection attributes of the page-table entry in the enum
|
||||
* kvm_pgtable_prot format.
|
||||
*/
|
||||
enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
|
||||
* Page-Table Entry.
|
||||
* @pte: Page-table entry
|
||||
*
|
||||
* Return: protection attributes of the page-table entry in the enum
|
||||
* kvm_pgtable_prot format.
|
||||
*/
|
||||
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
|
||||
#endif /* __ARM64_KVM_PGTABLE_H__ */
|
||||
|
|
|
@ -784,14 +784,13 @@
|
|||
#define ID_AA64PFR0_AMU 0x1
|
||||
#define ID_AA64PFR0_SVE 0x1
|
||||
#define ID_AA64PFR0_RAS_V1 0x1
|
||||
#define ID_AA64PFR0_RAS_V1P1 0x2
|
||||
#define ID_AA64PFR0_FP_NI 0xf
|
||||
#define ID_AA64PFR0_FP_SUPPORTED 0x0
|
||||
#define ID_AA64PFR0_ASIMD_NI 0xf
|
||||
#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
|
||||
#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1
|
||||
#define ID_AA64PFR0_EL1_32BIT_64BIT 0x2
|
||||
#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1
|
||||
#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2
|
||||
#define ID_AA64PFR0_ELx_64BIT_ONLY 0x1
|
||||
#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2
|
||||
|
||||
/* id_aa64pfr1 */
|
||||
#define ID_AA64PFR1_MPAMFRAC_SHIFT 16
|
||||
|
@ -847,6 +846,9 @@
|
|||
#define ID_AA64MMFR0_ASID_SHIFT 4
|
||||
#define ID_AA64MMFR0_PARANGE_SHIFT 0
|
||||
|
||||
#define ID_AA64MMFR0_ASID_8 0x0
|
||||
#define ID_AA64MMFR0_ASID_16 0x2
|
||||
|
||||
#define ID_AA64MMFR0_TGRAN4_NI 0xf
|
||||
#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0
|
||||
#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7
|
||||
|
@ -857,9 +859,16 @@
|
|||
#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1
|
||||
#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf
|
||||
|
||||
#define ID_AA64MMFR0_PARANGE_32 0x0
|
||||
#define ID_AA64MMFR0_PARANGE_36 0x1
|
||||
#define ID_AA64MMFR0_PARANGE_40 0x2
|
||||
#define ID_AA64MMFR0_PARANGE_42 0x3
|
||||
#define ID_AA64MMFR0_PARANGE_44 0x4
|
||||
#define ID_AA64MMFR0_PARANGE_48 0x5
|
||||
#define ID_AA64MMFR0_PARANGE_52 0x6
|
||||
|
||||
#define ARM64_MIN_PARANGE_BITS 32
|
||||
|
||||
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0
|
||||
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1
|
||||
#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2
|
||||
|
@ -904,6 +913,7 @@
|
|||
#define ID_AA64MMFR2_CNP_SHIFT 0
|
||||
|
||||
/* id_aa64dfr0 */
|
||||
#define ID_AA64DFR0_MTPMU_SHIFT 48
|
||||
#define ID_AA64DFR0_TRBE_SHIFT 44
|
||||
#define ID_AA64DFR0_TRACE_FILT_SHIFT 40
|
||||
#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36
|
||||
|
@ -1034,14 +1044,17 @@
|
|||
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
|
||||
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
|
||||
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
|
||||
#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT
|
||||
#elif defined(CONFIG_ARM64_16K_PAGES)
|
||||
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
|
||||
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
|
||||
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
|
||||
#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT
|
||||
#elif defined(CONFIG_ARM64_64K_PAGES)
|
||||
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
|
||||
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
|
||||
#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
|
||||
#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT
|
||||
#endif
|
||||
|
||||
#define MVFR2_FPMISC_SHIFT 4
|
||||
|
@ -1172,6 +1185,11 @@
|
|||
#define ICH_VTR_A3V_SHIFT 21
|
||||
#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
|
||||
|
||||
#define ARM64_FEATURE_FIELD_BITS 4
|
||||
|
||||
/* Create a mask for the feature bits of the specified feature. */
|
||||
#define ARM64_FEATURE_MASK(x) (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
|
||||
|
|
|
@ -240,8 +240,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
|
|||
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
|
@ -1983,7 +1983,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
|||
.sys_reg = SYS_ID_AA64PFR0_EL1,
|
||||
.sign = FTR_UNSIGNED,
|
||||
.field_pos = ID_AA64PFR0_EL0_SHIFT,
|
||||
.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
|
||||
.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
|
||||
},
|
||||
#ifdef CONFIG_KVM
|
||||
{
|
||||
|
@ -1994,7 +1994,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
|||
.sys_reg = SYS_ID_AA64PFR0_EL1,
|
||||
.sign = FTR_UNSIGNED,
|
||||
.field_pos = ID_AA64PFR0_EL1_SHIFT,
|
||||
.min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
|
||||
.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
|
||||
},
|
||||
{
|
||||
.desc = "Protected KVM",
|
||||
|
|
|
@ -181,6 +181,8 @@ SECTIONS
|
|||
/* everything from this point to __init_begin will be marked RO NX */
|
||||
RO_DATA(PAGE_SIZE)
|
||||
|
||||
HYPERVISOR_DATA_SECTIONS
|
||||
|
||||
idmap_pg_dir = .;
|
||||
. += IDMAP_DIR_SIZE;
|
||||
idmap_pg_end = .;
|
||||
|
@ -260,8 +262,6 @@ SECTIONS
|
|||
_sdata = .;
|
||||
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
|
||||
|
||||
HYPERVISOR_DATA_SECTIONS
|
||||
|
||||
/*
|
||||
* Data written with the MMU off but read with the MMU on requires
|
||||
* cache lines to be invalidated, discarding up to a Cache Writeback
|
||||
|
|
|
@ -26,6 +26,7 @@ menuconfig KVM
|
|||
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
select KVM_MMIO
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select SRCU
|
||||
select KVM_VFIO
|
||||
select HAVE_KVM_EVENTFD
|
||||
|
@ -46,6 +47,15 @@ if KVM
|
|||
|
||||
source "virt/kvm/Kconfig"
|
||||
|
||||
config NVHE_EL2_DEBUG
|
||||
bool "Debug mode for non-VHE EL2 object"
|
||||
help
|
||||
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
|
||||
Failure reports will BUG() in the hypervisor. This is intended for
|
||||
local EL2 hypervisor development.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endif # KVM
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cpu_pm.h>
|
||||
#include <linux/entry-kvm.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
@ -15,6 +16,7 @@
|
|||
#include <linux/fs.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_irqfd.h>
|
||||
#include <linux/irqbypass.h>
|
||||
|
@ -42,10 +44,6 @@
|
|||
#include <kvm/arm_pmu.h>
|
||||
#include <kvm/arm_psci.h>
|
||||
|
||||
#ifdef REQUIRES_VIRT
|
||||
__asm__(".arch_extension virt");
|
||||
#endif
|
||||
|
||||
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
|
||||
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
|
||||
|
||||
|
@ -575,7 +573,7 @@ static void update_vmid(struct kvm_vmid *vmid)
|
|||
kvm_call_hyp(__kvm_flush_vm_context);
|
||||
}
|
||||
|
||||
vmid->vmid = kvm_next_vmid;
|
||||
WRITE_ONCE(vmid->vmid, kvm_next_vmid);
|
||||
kvm_next_vmid++;
|
||||
kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
|
||||
|
||||
|
@ -718,6 +716,45 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
|
|||
static_branch_unlikely(&arm64_mismatched_32bit_el0);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest
|
||||
* @vcpu: The VCPU pointer
|
||||
* @ret: Pointer to write optional return code
|
||||
*
|
||||
* Returns: true if the VCPU needs to return to a preemptible + interruptible
|
||||
* and skip guest entry.
|
||||
*
|
||||
* This function disambiguates between two different types of exits: exits to a
|
||||
* preemptible + interruptible kernel context and exits to userspace. For an
|
||||
* exit to userspace, this function will write the return code to ret and return
|
||||
* true. For an exit to preemptible + interruptible kernel context (i.e. check
|
||||
* for pending work and re-enter), return true without writing to ret.
|
||||
*/
|
||||
static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
|
||||
{
|
||||
struct kvm_run *run = vcpu->run;
|
||||
|
||||
/*
|
||||
* If we're using a userspace irqchip, then check if we need
|
||||
* to tell a userspace irqchip about timer or PMU level
|
||||
* changes and if so, exit to userspace (the actual level
|
||||
* state gets updated in kvm_timer_update_run and
|
||||
* kvm_pmu_update_run below).
|
||||
*/
|
||||
if (static_branch_unlikely(&userspace_irqchip_in_use)) {
|
||||
if (kvm_timer_should_notify_user(vcpu) ||
|
||||
kvm_pmu_should_notify_user(vcpu)) {
|
||||
*ret = -EINTR;
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return kvm_request_pending(vcpu) ||
|
||||
need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
|
||||
xfer_to_guest_mode_work_pending();
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
|
||||
* @vcpu: The VCPU pointer
|
||||
|
@ -761,7 +798,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
|||
/*
|
||||
* Check conditions before entering the guest
|
||||
*/
|
||||
cond_resched();
|
||||
ret = xfer_to_guest_mode_handle_work(vcpu);
|
||||
if (!ret)
|
||||
ret = 1;
|
||||
|
||||
update_vmid(&vcpu->arch.hw_mmu->vmid);
|
||||
|
||||
|
@ -780,30 +819,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
|||
|
||||
kvm_vgic_flush_hwstate(vcpu);
|
||||
|
||||
/*
|
||||
* Exit if we have a signal pending so that we can deliver the
|
||||
* signal to user space.
|
||||
*/
|
||||
if (signal_pending(current)) {
|
||||
ret = -EINTR;
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're using a userspace irqchip, then check if we need
|
||||
* to tell a userspace irqchip about timer or PMU level
|
||||
* changes and if so, exit to userspace (the actual level
|
||||
* state gets updated in kvm_timer_update_run and
|
||||
* kvm_pmu_update_run below).
|
||||
*/
|
||||
if (static_branch_unlikely(&userspace_irqchip_in_use)) {
|
||||
if (kvm_timer_should_notify_user(vcpu) ||
|
||||
kvm_pmu_should_notify_user(vcpu)) {
|
||||
ret = -EINTR;
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure we set mode to IN_GUEST_MODE after we disable
|
||||
* interrupts and before the final VCPU requests check.
|
||||
|
@ -812,8 +827,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
|
||||
|
||||
if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
|
||||
kvm_request_pending(vcpu)) {
|
||||
if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) {
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
isb(); /* Ensure work in x_flush_hwstate is committed */
|
||||
kvm_pmu_sync_hwstate(vcpu);
|
||||
|
@ -1039,7 +1053,7 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
|
|||
const struct kvm_vcpu_init *init)
|
||||
{
|
||||
unsigned int i, ret;
|
||||
int phys_target = kvm_target_cpu();
|
||||
u32 phys_target = kvm_target_cpu();
|
||||
|
||||
if (init->target != phys_target)
|
||||
return -EINVAL;
|
||||
|
@ -1108,6 +1122,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
|
||||
vcpu_reset_hcr(vcpu);
|
||||
vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
|
||||
|
||||
/*
|
||||
* Handle the "start in power-off" case.
|
||||
|
@ -1219,6 +1234,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||
if (copy_from_user(®, argp, sizeof(reg)))
|
||||
break;
|
||||
|
||||
/*
|
||||
* We could owe a reset due to PSCI. Handle the pending reset
|
||||
* here to ensure userspace register accesses are ordered after
|
||||
* the reset.
|
||||
*/
|
||||
if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
|
||||
kvm_reset_vcpu(vcpu);
|
||||
|
||||
if (ioctl == KVM_SET_ONE_REG)
|
||||
r = kvm_arm_set_reg(vcpu, ®);
|
||||
else
|
||||
|
@ -1700,11 +1723,6 @@ static bool init_psci_relay(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static int init_common_resources(void)
|
||||
{
|
||||
return kvm_set_ipa_limit();
|
||||
}
|
||||
|
||||
static int init_subsystems(void)
|
||||
{
|
||||
int err = 0;
|
||||
|
@ -1958,56 +1976,17 @@ static void _kvm_host_prot_finalize(void *discard)
|
|||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
|
||||
}
|
||||
|
||||
static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
|
||||
}
|
||||
|
||||
#define pkvm_mark_hyp_section(__section) \
|
||||
pkvm_mark_hyp(__pa_symbol(__section##_start), \
|
||||
__pa_symbol(__section##_end))
|
||||
|
||||
static int finalize_hyp_mode(void)
|
||||
{
|
||||
int cpu, ret;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return 0;
|
||||
|
||||
ret = pkvm_mark_hyp_section(__hyp_idmap_text);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_mark_hyp_section(__hyp_text);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_mark_hyp_section(__hyp_rodata);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_mark_hyp_section(__hyp_bss);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
|
||||
phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
|
||||
|
||||
ret = pkvm_mark_hyp(start, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
|
||||
end = start + PAGE_SIZE;
|
||||
ret = pkvm_mark_hyp(start, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Exclude HYP BSS from kmemleak so that it doesn't get peeked
|
||||
* at, which would end badly once the section is inaccessible.
|
||||
* None of other sections should ever be introspected.
|
||||
*/
|
||||
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
|
||||
|
||||
/*
|
||||
* Flip the static key upfront as that may no longer be possible
|
||||
|
@ -2019,11 +1998,6 @@ static int finalize_hyp_mode(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void check_kvm_target_cpu(void *ret)
|
||||
{
|
||||
*(int *)ret = kvm_target_cpu();
|
||||
}
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
@ -2083,7 +2057,6 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
|
|||
int kvm_arch_init(void *opaque)
|
||||
{
|
||||
int err;
|
||||
int ret, cpu;
|
||||
bool in_hyp_mode;
|
||||
|
||||
if (!is_hyp_mode_available()) {
|
||||
|
@ -2098,15 +2071,7 @@ int kvm_arch_init(void *opaque)
|
|||
kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
|
||||
"Only trusted guests should be used on this system.\n");
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
|
||||
if (ret < 0) {
|
||||
kvm_err("Error, CPU %d not supported!\n", cpu);
|
||||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
|
||||
err = init_common_resources();
|
||||
err = kvm_set_ipa_limit();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
DBG_MDSCR_KDE | \
|
||||
DBG_MDSCR_MDE)
|
||||
|
||||
static DEFINE_PER_CPU(u32, mdcr_el2);
|
||||
static DEFINE_PER_CPU(u64, mdcr_el2);
|
||||
|
||||
/**
|
||||
* save/restore_guest_debug_regs
|
||||
|
|
|
@ -31,8 +31,6 @@
|
|||
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||
KVM_GENERIC_VM_STATS()
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -50,10 +48,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
|
||||
STATS_DESC_COUNTER(VCPU, mmio_exit_user),
|
||||
STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
|
||||
STATS_DESC_COUNTER(VCPU, signal_exits),
|
||||
STATS_DESC_COUNTER(VCPU, exits)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -842,7 +839,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int __attribute_const__ kvm_target_cpu(void)
|
||||
u32 __attribute_const__ kvm_target_cpu(void)
|
||||
{
|
||||
unsigned long implementor = read_cpuid_implementor();
|
||||
unsigned long part_number = read_cpuid_part_number();
|
||||
|
@ -874,7 +871,7 @@ int __attribute_const__ kvm_target_cpu(void)
|
|||
|
||||
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
|
||||
{
|
||||
int target = kvm_target_cpu();
|
||||
u32 target = kvm_target_cpu();
|
||||
|
||||
if (target < 0)
|
||||
return -ENODEV;
|
||||
|
|
|
@ -113,34 +113,20 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
|
|||
* guest and host are using the same debug facilities it will be up to
|
||||
* userspace to re-inject the correct exception for guest delivery.
|
||||
*
|
||||
* @return: 0 (while setting vcpu->run->exit_reason), -1 for error
|
||||
* @return: 0 (while setting vcpu->run->exit_reason)
|
||||
*/
|
||||
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_run *run = vcpu->run;
|
||||
u32 esr = kvm_vcpu_get_esr(vcpu);
|
||||
int ret = 0;
|
||||
|
||||
run->exit_reason = KVM_EXIT_DEBUG;
|
||||
run->debug.arch.hsr = esr;
|
||||
|
||||
switch (ESR_ELx_EC(esr)) {
|
||||
case ESR_ELx_EC_WATCHPT_LOW:
|
||||
if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
|
||||
run->debug.arch.far = vcpu->arch.fault.far_el2;
|
||||
fallthrough;
|
||||
case ESR_ELx_EC_SOFTSTP_LOW:
|
||||
case ESR_ELx_EC_BREAKPT_LOW:
|
||||
case ESR_ELx_EC_BKPT32:
|
||||
case ESR_ELx_EC_BRK64:
|
||||
break;
|
||||
default:
|
||||
kvm_err("%s: un-handled case esr: %#08x\n",
|
||||
__func__, (unsigned int) esr);
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
|
||||
|
@ -292,11 +278,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
|
|||
kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
|
||||
}
|
||||
|
||||
void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
|
||||
void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
|
||||
u64 elr_virt, u64 elr_phys,
|
||||
u64 par, uintptr_t vcpu,
|
||||
u64 far, u64 hpfar) {
|
||||
u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr));
|
||||
u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr;
|
||||
u64 elr_in_kimg = __phys_to_kimg(elr_phys);
|
||||
u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
|
||||
u64 mode = spsr & PSR_MODE_MASK;
|
||||
|
||||
/*
|
||||
|
@ -309,20 +296,24 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
|
|||
kvm_err("Invalid host exception to nVHE hyp!\n");
|
||||
} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
|
||||
(esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
|
||||
struct bug_entry *bug = find_bug(elr_in_kimg);
|
||||
const char *file = NULL;
|
||||
unsigned int line = 0;
|
||||
|
||||
/* All hyp bugs, including warnings, are treated as fatal. */
|
||||
if (bug)
|
||||
bug_get_file_line(bug, &file, &line);
|
||||
if (!is_protected_kvm_enabled() ||
|
||||
IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
|
||||
struct bug_entry *bug = find_bug(elr_in_kimg);
|
||||
|
||||
if (bug)
|
||||
bug_get_file_line(bug, &file, &line);
|
||||
}
|
||||
|
||||
if (file)
|
||||
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
|
||||
else
|
||||
kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset);
|
||||
kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
|
||||
} else {
|
||||
kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset);
|
||||
kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -334,5 +325,5 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
|
|||
kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
|
||||
|
||||
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
|
||||
spsr, elr, esr, far, hpfar, par, vcpu);
|
||||
spsr, elr_virt, esr, far, hpfar, par, vcpu);
|
||||
}
|
||||
|
|
|
@ -92,11 +92,15 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
|
|||
write_sysreg(0, pmselr_el0);
|
||||
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
|
||||
}
|
||||
|
||||
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
|
||||
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_common(void)
|
||||
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
|
||||
|
||||
write_sysreg(0, hstr_el2);
|
||||
if (kvm_arm_support_pmu_v3())
|
||||
write_sysreg(0, pmuserenr_el0);
|
||||
|
|
|
@ -12,6 +12,32 @@
|
|||
#include <asm/virt.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
/*
|
||||
* SW bits 0-1 are reserved to track the memory ownership state of each page:
|
||||
* 00: The page is owned exclusively by the page-table owner.
|
||||
* 01: The page is owned by the page-table owner, but is shared
|
||||
* with another entity.
|
||||
* 10: The page is shared with, but not owned by the page-table owner.
|
||||
* 11: Reserved for future use (lending).
|
||||
*/
|
||||
enum pkvm_page_state {
|
||||
PKVM_PAGE_OWNED = 0ULL,
|
||||
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
|
||||
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
|
||||
};
|
||||
|
||||
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
|
||||
static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
|
||||
enum pkvm_page_state state)
|
||||
{
|
||||
return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
|
||||
}
|
||||
|
||||
static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
|
||||
{
|
||||
return prot & PKVM_PAGE_STATE_PROT_MASK;
|
||||
}
|
||||
|
||||
struct host_kvm {
|
||||
struct kvm_arch arch;
|
||||
struct kvm_pgtable pgt;
|
||||
|
@ -20,16 +46,21 @@ struct host_kvm {
|
|||
};
|
||||
extern struct host_kvm host_kvm;
|
||||
|
||||
int __pkvm_prot_finalize(void);
|
||||
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
|
||||
extern const u8 pkvm_hyp_id;
|
||||
|
||||
int __pkvm_prot_finalize(void);
|
||||
int __pkvm_host_share_hyp(u64 pfn);
|
||||
|
||||
bool addr_is_memory(phys_addr_t phys);
|
||||
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
|
||||
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
|
||||
int kvm_host_prepare_stage2(void *pgt_pool_base);
|
||||
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
|
||||
|
||||
static __always_inline void __load_host_stage2(void)
|
||||
{
|
||||
if (static_branch_likely(&kvm_protected_mode_initialized))
|
||||
__load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
|
||||
__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
|
||||
else
|
||||
write_sysreg(0, vttbr_el2);
|
||||
}
|
||||
|
|
|
@ -23,8 +23,7 @@ int hyp_map_vectors(void);
|
|||
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
|
||||
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
|
||||
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot);
|
||||
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
enum kvm_pgtable_prot prot);
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/lse.h>
|
||||
#include <asm/rwonce.h>
|
||||
|
||||
typedef union hyp_spinlock {
|
||||
u32 __val;
|
||||
|
@ -89,4 +90,28 @@ static inline void hyp_spin_unlock(hyp_spinlock_t *lock)
|
|||
: "memory");
|
||||
}
|
||||
|
||||
static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock)
|
||||
{
|
||||
hyp_spinlock_t lockval = READ_ONCE(*lock);
|
||||
|
||||
return lockval.owner != lockval.next;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_DEBUG
|
||||
static inline void hyp_assert_lock_held(hyp_spinlock_t *lock)
|
||||
{
|
||||
/*
|
||||
* The __pkvm_init() path accesses protected data-structures without
|
||||
* holding locks as the other CPUs are guaranteed to not enter EL2
|
||||
* concurrently at this point in time. The point by which EL2 is
|
||||
* initialized on all CPUs is reflected in the pkvm static key, so
|
||||
* wait until it is set before checking the lock state.
|
||||
*/
|
||||
if (static_branch_likely(&kvm_protected_mode_initialized))
|
||||
BUG_ON(!hyp_spin_is_locked(lock));
|
||||
}
|
||||
#else
|
||||
static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) { }
|
||||
#endif
|
||||
|
||||
#endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
|
||||
|
|
|
@ -109,7 +109,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
|
|||
__debug_switch_to_host_common(vcpu);
|
||||
}
|
||||
|
||||
u32 __kvm_get_mdcr_el2(void)
|
||||
u64 __kvm_get_mdcr_el2(void)
|
||||
{
|
||||
return read_sysreg(mdcr_el2);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
|
@ -85,12 +86,24 @@ SYM_FUNC_START(__hyp_do_panic)
|
|||
|
||||
mov x29, x0
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_DEBUG
|
||||
/* Ensure host stage-2 is disabled */
|
||||
mrs x0, hcr_el2
|
||||
bic x0, x0, #HCR_VM
|
||||
msr hcr_el2, x0
|
||||
isb
|
||||
tlbi vmalls12e1
|
||||
dsb nsh
|
||||
#endif
|
||||
|
||||
/* Load the panic arguments into x0-7 */
|
||||
mrs x0, esr_el2
|
||||
get_vcpu_ptr x4, x5
|
||||
mrs x5, far_el2
|
||||
mrs x6, hpfar_el2
|
||||
mov x7, xzr // Unused argument
|
||||
mov x4, x3
|
||||
mov x3, x2
|
||||
hyp_pa x3, x6
|
||||
get_vcpu_ptr x5, x6
|
||||
mrs x6, far_el2
|
||||
mrs x7, hpfar_el2
|
||||
|
||||
/* Enter the host, conditionally restoring the host context. */
|
||||
cbz x29, __host_enter_without_restoring
|
||||
|
|
|
@ -140,14 +140,11 @@ static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
|
|||
cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot);
|
||||
}
|
||||
|
||||
static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt)
|
||||
static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned long, start, host_ctxt, 1);
|
||||
DECLARE_REG(unsigned long, size, host_ctxt, 2);
|
||||
DECLARE_REG(unsigned long, phys, host_ctxt, 3);
|
||||
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
|
||||
DECLARE_REG(u64, pfn, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot);
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
|
||||
}
|
||||
|
||||
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
|
||||
|
@ -163,14 +160,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
|
|||
{
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
|
||||
}
|
||||
|
||||
static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(phys_addr_t, start, host_ctxt, 1);
|
||||
DECLARE_REG(phys_addr_t, end, host_ctxt, 2);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end);
|
||||
}
|
||||
typedef void (*hcall_t)(struct kvm_cpu_context *);
|
||||
|
||||
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
|
||||
|
@ -193,10 +182,9 @@ static const hcall_t host_hcall[] = {
|
|||
HANDLE_FUNC(__vgic_v3_restore_aprs),
|
||||
HANDLE_FUNC(__pkvm_init),
|
||||
HANDLE_FUNC(__pkvm_cpu_set_vector),
|
||||
HANDLE_FUNC(__pkvm_create_mappings),
|
||||
HANDLE_FUNC(__pkvm_host_share_hyp),
|
||||
HANDLE_FUNC(__pkvm_create_private_mapping),
|
||||
HANDLE_FUNC(__pkvm_prot_finalize),
|
||||
HANDLE_FUNC(__pkvm_mark_hyp),
|
||||
};
|
||||
|
||||
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
|
|
|
@ -31,7 +31,7 @@ static struct hyp_pool host_s2_pool;
|
|||
u64 id_aa64mmfr0_el1_sys_val;
|
||||
u64 id_aa64mmfr1_el1_sys_val;
|
||||
|
||||
static const u8 pkvm_hyp_id = 1;
|
||||
const u8 pkvm_hyp_id = 1;
|
||||
|
||||
static void *host_s2_zalloc_pages_exact(size_t size)
|
||||
{
|
||||
|
@ -89,6 +89,8 @@ static void prepare_host_vtcr(void)
|
|||
id_aa64mmfr1_el1_sys_val, phys_shift);
|
||||
}
|
||||
|
||||
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
|
||||
|
||||
int kvm_host_prepare_stage2(void *pgt_pool_base)
|
||||
{
|
||||
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
|
||||
|
@ -101,16 +103,17 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch,
|
||||
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS);
|
||||
ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
|
||||
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
|
||||
host_stage2_force_pte_cb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
|
||||
mmu->arch = &host_kvm.arch;
|
||||
mmu->pgt = &host_kvm.pgt;
|
||||
mmu->vmid.vmid_gen = 0;
|
||||
mmu->vmid.vmid = 0;
|
||||
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
|
||||
WRITE_ONCE(mmu->vmid.vmid, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -126,7 +129,7 @@ int __pkvm_prot_finalize(void)
|
|||
kvm_flush_dcache_to_poc(params, sizeof(*params));
|
||||
|
||||
write_sysreg(params->hcr_el2, hcr_el2);
|
||||
__load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
|
||||
__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
|
||||
|
||||
/*
|
||||
* Make sure to have an ISB before the TLB maintenance below but only
|
||||
|
@ -159,6 +162,11 @@ static int host_stage2_unmap_dev_all(void)
|
|||
return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
|
||||
}
|
||||
|
||||
struct kvm_mem_range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
|
||||
{
|
||||
int cur, left = 0, right = hyp_memblock_nr;
|
||||
|
@ -189,16 +197,26 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool addr_is_memory(phys_addr_t phys)
|
||||
{
|
||||
struct kvm_mem_range range;
|
||||
|
||||
return find_mem_range(phys, &range);
|
||||
}
|
||||
|
||||
static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
|
||||
{
|
||||
return range->start <= addr && addr < range->end;
|
||||
}
|
||||
|
||||
static bool range_is_memory(u64 start, u64 end)
|
||||
{
|
||||
struct kvm_mem_range r1, r2;
|
||||
struct kvm_mem_range r;
|
||||
|
||||
if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2))
|
||||
return false;
|
||||
if (r1.start != r2.start)
|
||||
if (!find_mem_range(start, &r))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return is_in_mem_range(end - 1, &r);
|
||||
}
|
||||
|
||||
static inline int __host_stage2_idmap(u64 start, u64 end,
|
||||
|
@ -208,60 +226,208 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
|
|||
prot, &host_s2_pool);
|
||||
}
|
||||
|
||||
static int host_stage2_idmap(u64 addr)
|
||||
/*
|
||||
* The pool has been provided with enough pages to cover all of memory with
|
||||
* page granularity, but it is difficult to know how much of the MMIO range
|
||||
* we will need to cover upfront, so we may need to 'recycle' the pages if we
|
||||
* run out.
|
||||
*/
|
||||
#define host_stage2_try(fn, ...) \
|
||||
({ \
|
||||
int __ret; \
|
||||
hyp_assert_lock_held(&host_kvm.lock); \
|
||||
__ret = fn(__VA_ARGS__); \
|
||||
if (__ret == -ENOMEM) { \
|
||||
__ret = host_stage2_unmap_dev_all(); \
|
||||
if (!__ret) \
|
||||
__ret = fn(__VA_ARGS__); \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
static inline bool range_included(struct kvm_mem_range *child,
|
||||
struct kvm_mem_range *parent)
|
||||
{
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
|
||||
struct kvm_mem_range range;
|
||||
bool is_memory = find_mem_range(addr, &range);
|
||||
return parent->start <= child->start && child->end <= parent->end;
|
||||
}
|
||||
|
||||
static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
{
|
||||
struct kvm_mem_range cur;
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
int ret;
|
||||
|
||||
if (is_memory)
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (kvm_pte_valid(pte))
|
||||
return -EAGAIN;
|
||||
|
||||
if (pte)
|
||||
return -EPERM;
|
||||
|
||||
do {
|
||||
u64 granule = kvm_granule_size(level);
|
||||
cur.start = ALIGN_DOWN(addr, granule);
|
||||
cur.end = cur.start + granule;
|
||||
level++;
|
||||
} while ((level < KVM_PGTABLE_MAX_LEVELS) &&
|
||||
!(kvm_level_supports_block_mapping(level) &&
|
||||
range_included(&cur, range)));
|
||||
|
||||
*range = cur;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
|
||||
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
|
||||
}
|
||||
|
||||
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
|
||||
{
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
|
||||
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
|
||||
addr, size, &host_s2_pool, owner_id);
|
||||
}
|
||||
|
||||
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
/*
|
||||
* Block mappings must be used with care in the host stage-2 as a
|
||||
* kvm_pgtable_stage2_map() operation targeting a page in the range of
|
||||
* an existing block will delete the block under the assumption that
|
||||
* mappings in the rest of the block range can always be rebuilt lazily.
|
||||
* That assumption is correct for the host stage-2 with RWX mappings
|
||||
* targeting memory or RW mappings targeting MMIO ranges (see
|
||||
* host_stage2_idmap() below which implements some of the host memory
|
||||
* abort logic). However, this is not safe for any other mappings where
|
||||
* the host stage-2 page-table is in fact the only place where this
|
||||
* state is stored. In all those cases, it is safer to use page-level
|
||||
* mappings, hence avoiding to lose the state because of side-effects in
|
||||
* kvm_pgtable_stage2_map().
|
||||
*/
|
||||
if (range_is_memory(addr, end))
|
||||
return prot != PKVM_HOST_MEM_PROT;
|
||||
else
|
||||
return prot != PKVM_HOST_MMIO_PROT;
|
||||
}
|
||||
|
||||
static int host_stage2_idmap(u64 addr)
|
||||
{
|
||||
struct kvm_mem_range range;
|
||||
bool is_memory = find_mem_range(addr, &range);
|
||||
enum kvm_pgtable_prot prot;
|
||||
int ret;
|
||||
|
||||
prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
|
||||
|
||||
hyp_spin_lock(&host_kvm.lock);
|
||||
ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range);
|
||||
ret = host_stage2_adjust_range(addr, &range);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = __host_stage2_idmap(range.start, range.end, prot);
|
||||
if (ret != -ENOMEM)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* The pool has been provided with enough pages to cover all of memory
|
||||
* with page granularity, but it is difficult to know how much of the
|
||||
* MMIO range we will need to cover upfront, so we may need to 'recycle'
|
||||
* the pages if we run out.
|
||||
*/
|
||||
ret = host_stage2_unmap_dev_all();
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = __host_stage2_idmap(range.start, range.end, prot);
|
||||
|
||||
ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
|
||||
unlock:
|
||||
hyp_spin_unlock(&host_kvm.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
|
||||
static inline bool check_prot(enum kvm_pgtable_prot prot,
|
||||
enum kvm_pgtable_prot required,
|
||||
enum kvm_pgtable_prot denied)
|
||||
{
|
||||
return (prot & (required | denied)) == required;
|
||||
}
|
||||
|
||||
int __pkvm_host_share_hyp(u64 pfn)
|
||||
{
|
||||
phys_addr_t addr = hyp_pfn_to_phys(pfn);
|
||||
enum kvm_pgtable_prot prot, cur;
|
||||
void *virt = __hyp_va(addr);
|
||||
enum pkvm_page_state state;
|
||||
kvm_pte_t pte;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* host_stage2_unmap_dev_all() currently relies on MMIO mappings being
|
||||
* non-persistent, so don't allow changing page ownership in MMIO range.
|
||||
*/
|
||||
if (!range_is_memory(start, end))
|
||||
if (!addr_is_memory(addr))
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock(&host_kvm.lock);
|
||||
ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
|
||||
&host_s2_pool, pkvm_hyp_id);
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
|
||||
ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
if (!pte)
|
||||
goto map_shared;
|
||||
|
||||
/*
|
||||
* Check attributes in the host stage-2 PTE. We need the page to be:
|
||||
* - mapped RWX as we're sharing memory;
|
||||
* - not borrowed, as that implies absence of ownership.
|
||||
* Otherwise, we can't let it got through
|
||||
*/
|
||||
cur = kvm_pgtable_stage2_pte_prot(pte);
|
||||
prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
|
||||
if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
|
||||
ret = -EPERM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
state = pkvm_getstate(cur);
|
||||
if (state == PKVM_PAGE_OWNED)
|
||||
goto map_shared;
|
||||
|
||||
/*
|
||||
* Tolerate double-sharing the same page, but this requires
|
||||
* cross-checking the hypervisor stage-1.
|
||||
*/
|
||||
if (state != PKVM_PAGE_SHARED_OWNED) {
|
||||
ret = -EPERM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* If the page has been shared with the hypervisor, it must be
|
||||
* already mapped as SHARED_BORROWED in its stage-1.
|
||||
*/
|
||||
cur = kvm_pgtable_hyp_pte_prot(pte);
|
||||
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
|
||||
if (!check_prot(cur, prot, ~prot))
|
||||
ret = -EPERM;
|
||||
goto unlock;
|
||||
|
||||
map_shared:
|
||||
/*
|
||||
* If the page is not yet shared, adjust mappings in both page-tables
|
||||
* while both locks are held.
|
||||
*/
|
||||
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
|
||||
ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
|
||||
BUG_ON(ret);
|
||||
|
||||
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
|
||||
ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
|
||||
BUG_ON(ret);
|
||||
|
||||
unlock:
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
hyp_spin_unlock(&host_kvm.lock);
|
||||
|
||||
return ret != -EAGAIN ? ret : 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
|
||||
|
|
|
@ -23,8 +23,8 @@ u64 __io_map_base;
|
|||
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
|
||||
unsigned int hyp_memblock_nr;
|
||||
|
||||
int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot)
|
||||
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
int err;
|
||||
|
||||
|
@ -67,13 +67,15 @@ out:
|
|||
return addr;
|
||||
}
|
||||
|
||||
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
unsigned long start = (unsigned long)from;
|
||||
unsigned long end = (unsigned long)to;
|
||||
unsigned long virt_addr;
|
||||
phys_addr_t phys;
|
||||
|
||||
hyp_assert_lock_held(&pkvm_pgd_lock);
|
||||
|
||||
start = start & PAGE_MASK;
|
||||
end = PAGE_ALIGN(end);
|
||||
|
||||
|
@ -81,7 +83,8 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
|||
int err;
|
||||
|
||||
phys = hyp_virt_to_phys((void *)virt_addr);
|
||||
err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot);
|
||||
err = kvm_pgtable_hyp_map(&pkvm_pgtable, virt_addr, PAGE_SIZE,
|
||||
phys, prot);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
@ -89,6 +92,17 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
int ret;
|
||||
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
ret = pkvm_create_mappings_locked(from, to, prot);
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
|
||||
{
|
||||
unsigned long start, end;
|
||||
|
|
|
@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
|||
{
|
||||
void *start, *end, *virt = hyp_phys_to_virt(phys);
|
||||
unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
|
||||
enum kvm_pgtable_prot prot;
|
||||
int ret, i;
|
||||
|
||||
/* Recreate the hyp page-table using the early page allocator */
|
||||
|
@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map the host's .bss and .rodata sections RO in the hypervisor, but
|
||||
* transfer the ownership from the host to the hypervisor itself to
|
||||
* make sure it can't be donated or shared with another entity.
|
||||
*
|
||||
* The ownership transition requires matching changes in the host
|
||||
* stage-2. This will be done later (see finalize_host_mappings()) once
|
||||
* the hyp_vmemmap is addressable.
|
||||
*/
|
||||
prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
|
||||
ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
|
|||
hyp_put_page(&hpool, addr);
|
||||
}
|
||||
|
||||
static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
{
|
||||
enum kvm_pgtable_prot prot;
|
||||
enum pkvm_page_state state;
|
||||
kvm_pte_t pte = *ptep;
|
||||
phys_addr_t phys;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
return 0;
|
||||
|
||||
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
return -EINVAL;
|
||||
|
||||
phys = kvm_pte_to_phys(pte);
|
||||
if (!addr_is_memory(phys))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Adjust the host stage-2 mappings to match the ownership attributes
|
||||
* configured in the hypervisor stage-1.
|
||||
*/
|
||||
state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
|
||||
switch (state) {
|
||||
case PKVM_PAGE_OWNED:
|
||||
return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id);
|
||||
case PKVM_PAGE_SHARED_OWNED:
|
||||
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
|
||||
break;
|
||||
case PKVM_PAGE_SHARED_BORROWED:
|
||||
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
|
||||
}
|
||||
|
||||
static int finalize_host_mappings(void)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = finalize_host_mappings_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
};
|
||||
|
||||
return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
|
||||
}
|
||||
|
||||
void __noreturn __pkvm_init_finalise(void)
|
||||
{
|
||||
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
|
||||
|
@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void)
|
|||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = finalize_host_mappings();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
.zalloc_page = hyp_zalloc_hyp_page,
|
||||
.phys_to_virt = hyp_phys_to_virt,
|
||||
|
|
|
@ -41,7 +41,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
|||
___activate_traps(vcpu);
|
||||
__activate_traps_common(vcpu);
|
||||
|
||||
val = CPTR_EL2_DEFAULT;
|
||||
val = vcpu->arch.cptr_el2;
|
||||
val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
|
||||
if (!update_fp_enabled(vcpu)) {
|
||||
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
|
||||
|
@ -69,12 +69,10 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
|||
static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
extern char __kvm_hyp_host_vector[];
|
||||
u64 mdcr_el2, cptr;
|
||||
u64 cptr;
|
||||
|
||||
___deactivate_traps(vcpu);
|
||||
|
||||
mdcr_el2 = read_sysreg(mdcr_el2);
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
|
||||
u64 val;
|
||||
|
||||
|
@ -92,13 +90,8 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
|||
isb();
|
||||
}
|
||||
|
||||
__deactivate_traps_common();
|
||||
__deactivate_traps_common(vcpu);
|
||||
|
||||
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
|
||||
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
|
||||
mdcr_el2 |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
|
||||
|
||||
write_sysreg(mdcr_el2, mdcr_el2);
|
||||
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
|
||||
|
||||
cptr = CPTR_EL2_DEFAULT;
|
||||
|
@ -170,6 +163,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct kvm_cpu_context *host_ctxt;
|
||||
struct kvm_cpu_context *guest_ctxt;
|
||||
struct kvm_s2_mmu *mmu;
|
||||
bool pmu_switch_needed;
|
||||
u64 exit_code;
|
||||
|
||||
|
@ -213,7 +207,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
__sysreg32_restore_state(vcpu);
|
||||
__sysreg_restore_state_nvhe(guest_ctxt);
|
||||
|
||||
__load_guest_stage2(kern_hyp_va(vcpu->arch.hw_mmu));
|
||||
mmu = kern_hyp_va(vcpu->arch.hw_mmu);
|
||||
__load_stage2(mmu, kern_hyp_va(mmu->arch));
|
||||
__activate_traps(vcpu);
|
||||
|
||||
__hyp_vgic_restore_state(vcpu);
|
||||
|
|
|
@ -34,12 +34,12 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
|
|||
}
|
||||
|
||||
/*
|
||||
* __load_guest_stage2() includes an ISB only when the AT
|
||||
* __load_stage2() includes an ISB only when the AT
|
||||
* workaround is applied. Take care of the opposite condition,
|
||||
* ensuring that we always have an ISB, but not two ISBs back
|
||||
* to back.
|
||||
*/
|
||||
__load_guest_stage2(mmu);
|
||||
__load_stage2(mmu, kern_hyp_va(mmu->arch));
|
||||
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
|
||||
}
|
||||
|
||||
|
|
|
@ -11,16 +11,12 @@
|
|||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/stage2_pgtable.h>
|
||||
|
||||
#define KVM_PTE_VALID BIT(0)
|
||||
|
||||
#define KVM_PTE_TYPE BIT(1)
|
||||
#define KVM_PTE_TYPE_BLOCK 0
|
||||
#define KVM_PTE_TYPE_PAGE 1
|
||||
#define KVM_PTE_TYPE_TABLE 1
|
||||
|
||||
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
|
||||
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
|
||||
|
@ -40,6 +36,8 @@
|
|||
|
||||
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
|
||||
|
@ -48,9 +46,7 @@
|
|||
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
|
||||
KVM_PTE_LEAF_ATTR_HI_S2_XN)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_S2_IGNORED GENMASK(58, 55)
|
||||
|
||||
#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56)
|
||||
#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
|
||||
#define KVM_MAX_OWNER_ID 1
|
||||
|
||||
struct kvm_pgtable_walk_data {
|
||||
|
@ -61,17 +57,6 @@ struct kvm_pgtable_walk_data {
|
|||
u64 end;
|
||||
};
|
||||
|
||||
static u64 kvm_granule_shift(u32 level)
|
||||
{
|
||||
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
|
||||
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
|
||||
}
|
||||
|
||||
static u64 kvm_granule_size(u32 level)
|
||||
{
|
||||
return BIT(kvm_granule_shift(level));
|
||||
}
|
||||
|
||||
#define KVM_PHYS_INVALID (-1ULL)
|
||||
|
||||
static bool kvm_phys_is_valid(u64 phys)
|
||||
|
@ -79,15 +64,6 @@ static bool kvm_phys_is_valid(u64 phys)
|
|||
return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
|
||||
}
|
||||
|
||||
static bool kvm_level_supports_block_mapping(u32 level)
|
||||
{
|
||||
/*
|
||||
* Reject invalid block mappings and don't bother with 4TB mappings for
|
||||
* 52-bit PAs.
|
||||
*/
|
||||
return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
|
||||
}
|
||||
|
||||
static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
|
||||
{
|
||||
u64 granule = kvm_granule_size(level);
|
||||
|
@ -135,11 +111,6 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
|
|||
return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
|
||||
}
|
||||
|
||||
static bool kvm_pte_valid(kvm_pte_t pte)
|
||||
{
|
||||
return pte & KVM_PTE_VALID;
|
||||
}
|
||||
|
||||
static bool kvm_pte_table(kvm_pte_t pte, u32 level)
|
||||
{
|
||||
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
|
@ -151,16 +122,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level)
|
|||
return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
|
||||
}
|
||||
|
||||
static u64 kvm_pte_to_phys(kvm_pte_t pte)
|
||||
{
|
||||
u64 pa = pte & KVM_PTE_ADDR_MASK;
|
||||
|
||||
if (PAGE_SHIFT == 16)
|
||||
pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
static kvm_pte_t kvm_phys_to_pte(u64 pa)
|
||||
{
|
||||
kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
|
||||
|
@ -326,6 +287,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||
return _kvm_pgtable_walk(&walk_data);
|
||||
}
|
||||
|
||||
struct leaf_walk_data {
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
};
|
||||
|
||||
static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag, void * const arg)
|
||||
{
|
||||
struct leaf_walk_data *data = arg;
|
||||
|
||||
data->pte = *ptep;
|
||||
data->level = level;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
kvm_pte_t *ptep, u32 *level)
|
||||
{
|
||||
struct leaf_walk_data data;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = leaf_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = &data,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
|
||||
PAGE_SIZE, &walker);
|
||||
if (!ret) {
|
||||
if (ptep)
|
||||
*ptep = data.pte;
|
||||
if (level)
|
||||
*level = data.level;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct hyp_map_data {
|
||||
u64 phys;
|
||||
kvm_pte_t attr;
|
||||
|
@ -357,11 +357,47 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
|
|||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
|
||||
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
|
||||
*ptep = attr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
|
||||
{
|
||||
enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
|
||||
u32 ap;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
return prot;
|
||||
|
||||
if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
|
||||
if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
|
||||
prot |= KVM_PGTABLE_PROT_R;
|
||||
else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
|
||||
prot |= KVM_PGTABLE_PROT_RW;
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
|
||||
{
|
||||
/*
|
||||
* Tolerate KVM recreating the exact same mapping, or changing software
|
||||
* bits if the existing mapping was valid.
|
||||
*/
|
||||
if (old == new)
|
||||
return false;
|
||||
|
||||
if (!kvm_pte_valid(old))
|
||||
return true;
|
||||
|
||||
return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
|
||||
}
|
||||
|
||||
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep, struct hyp_map_data *data)
|
||||
{
|
||||
|
@ -371,9 +407,8 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
|||
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
||||
return false;
|
||||
|
||||
/* Tolerate KVM recreating the exact same mapping */
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
|
||||
if (old != new && !WARN_ON(kvm_pte_valid(old)))
|
||||
if (hyp_pte_needs_update(old, new))
|
||||
smp_store_release(ptep, new);
|
||||
|
||||
data->phys += granule;
|
||||
|
@ -438,6 +473,8 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
|
|||
pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
|
||||
pgt->mm_ops = mm_ops;
|
||||
pgt->mmu = NULL;
|
||||
pgt->force_pte_cb = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -475,6 +512,9 @@ struct stage2_map_data {
|
|||
void *memcache;
|
||||
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
|
||||
/* Force mappings to page granularity */
|
||||
bool force_pte;
|
||||
};
|
||||
|
||||
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
|
@ -539,11 +579,29 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
|
|||
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
|
||||
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
|
||||
*ptep = attr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
|
||||
{
|
||||
enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
return prot;
|
||||
|
||||
if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
|
||||
prot |= KVM_PGTABLE_PROT_R;
|
||||
if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
|
||||
prot |= KVM_PGTABLE_PROT_W;
|
||||
if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
|
||||
{
|
||||
if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
|
||||
|
@ -588,6 +646,15 @@ static bool stage2_pte_executable(kvm_pte_t pte)
|
|||
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
|
||||
}
|
||||
|
||||
static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
|
||||
return false;
|
||||
|
||||
return kvm_block_mapping_supported(addr, end, data->phys, level);
|
||||
}
|
||||
|
||||
static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
struct stage2_map_data *data)
|
||||
|
@ -597,7 +664,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
|||
struct kvm_pgtable *pgt = data->mmu->pgt;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
||||
|
||||
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
||||
if (!stage2_leaf_mapping_allowed(addr, end, level, data))
|
||||
return -E2BIG;
|
||||
|
||||
if (kvm_phys_is_valid(phys))
|
||||
|
@ -641,7 +708,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
|
|||
if (data->anchor)
|
||||
return 0;
|
||||
|
||||
if (!kvm_block_mapping_supported(addr, end, data->phys, level))
|
||||
if (!stage2_leaf_mapping_allowed(addr, end, level, data))
|
||||
return 0;
|
||||
|
||||
data->childp = kvm_pte_follow(*ptep, data->mm_ops);
|
||||
|
@ -771,6 +838,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||
.mmu = pgt->mmu,
|
||||
.memcache = mc,
|
||||
.mm_ops = pgt->mm_ops,
|
||||
.force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_map_walker,
|
||||
|
@ -802,6 +870,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||
.memcache = mc,
|
||||
.mm_ops = pgt->mm_ops,
|
||||
.owner_id = owner_id,
|
||||
.force_pte = true,
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_map_walker,
|
||||
|
@ -995,6 +1064,9 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
|
|||
u32 level;
|
||||
kvm_pte_t set = 0, clr = 0;
|
||||
|
||||
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
|
||||
return -EINVAL;
|
||||
|
||||
if (prot & KVM_PGTABLE_PROT_R)
|
||||
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
|
||||
|
||||
|
@ -1043,9 +1115,11 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
|||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags)
|
||||
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags,
|
||||
kvm_pgtable_force_pte_cb_t force_pte_cb)
|
||||
{
|
||||
size_t pgd_sz;
|
||||
u64 vtcr = arch->vtcr;
|
||||
|
@ -1063,6 +1137,7 @@ int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch
|
|||
pgt->mm_ops = mm_ops;
|
||||
pgt->mmu = &arch->mmu;
|
||||
pgt->flags = flags;
|
||||
pgt->force_pte_cb = force_pte_cb;
|
||||
|
||||
/* Ensure zeroed PGD pages are visible to the hardware walker */
|
||||
dsb(ishst);
|
||||
|
@ -1102,77 +1177,3 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
|||
pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
|
||||
pgt->pgd = NULL;
|
||||
}
|
||||
|
||||
#define KVM_PTE_LEAF_S2_COMPAT_MASK (KVM_PTE_LEAF_ATTR_S2_PERMS | \
|
||||
KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \
|
||||
KVM_PTE_LEAF_ATTR_S2_IGNORED)
|
||||
|
||||
static int stage2_check_permission_walker(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
{
|
||||
kvm_pte_t old_attr, pte = *ptep, *new_attr = arg;
|
||||
|
||||
/*
|
||||
* Compatible mappings are either invalid and owned by the page-table
|
||||
* owner (whose id is 0), or valid with matching permission attributes.
|
||||
*/
|
||||
if (kvm_pte_valid(pte)) {
|
||||
old_attr = pte & KVM_PTE_LEAF_S2_COMPAT_MASK;
|
||||
if (old_attr != *new_attr)
|
||||
return -EEXIST;
|
||||
} else if (pte) {
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
|
||||
enum kvm_pgtable_prot prot,
|
||||
struct kvm_mem_range *range)
|
||||
{
|
||||
kvm_pte_t attr;
|
||||
struct kvm_pgtable_walker check_perm_walker = {
|
||||
.cb = stage2_check_permission_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = &attr,
|
||||
};
|
||||
u64 granule, start, end;
|
||||
u32 level;
|
||||
int ret;
|
||||
|
||||
ret = stage2_set_prot_attr(pgt, prot, &attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;
|
||||
|
||||
for (level = pgt->start_level; level < KVM_PGTABLE_MAX_LEVELS; level++) {
|
||||
granule = kvm_granule_size(level);
|
||||
start = ALIGN_DOWN(addr, granule);
|
||||
end = start + granule;
|
||||
|
||||
if (!kvm_level_supports_block_mapping(level))
|
||||
continue;
|
||||
|
||||
if (start < range->start || range->end < end)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Check the presence of existing mappings with incompatible
|
||||
* permissions within the current block range, and try one level
|
||||
* deeper if one is found.
|
||||
*/
|
||||
ret = kvm_pgtable_walk(pgt, start, granule, &check_perm_walker);
|
||||
if (ret != -EEXIST)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
range->start = start;
|
||||
range->end = end;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
|
|||
__debug_switch_to_host_common(vcpu);
|
||||
}
|
||||
|
||||
u32 __kvm_get_mdcr_el2(void)
|
||||
u64 __kvm_get_mdcr_el2(void)
|
||||
{
|
||||
return read_sysreg(mdcr_el2);
|
||||
}
|
||||
|
|
|
@ -91,17 +91,9 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
|
|||
__activate_traps_common(vcpu);
|
||||
}
|
||||
|
||||
void deactivate_traps_vhe_put(void)
|
||||
void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 mdcr_el2 = read_sysreg(mdcr_el2);
|
||||
|
||||
mdcr_el2 &= MDCR_EL2_HPMN_MASK |
|
||||
MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
|
||||
MDCR_EL2_TPMS;
|
||||
|
||||
write_sysreg(mdcr_el2, mdcr_el2);
|
||||
|
||||
__deactivate_traps_common();
|
||||
__deactivate_traps_common(vcpu);
|
||||
}
|
||||
|
||||
/* Switch to the guest for VHE systems running in EL2 */
|
||||
|
@ -124,11 +116,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
|
|||
*
|
||||
* We have already configured the guest's stage 1 translation in
|
||||
* kvm_vcpu_load_sysregs_vhe above. We must now call
|
||||
* __load_guest_stage2 before __activate_traps, because
|
||||
* __load_guest_stage2 configures stage 2 translation, and
|
||||
* __load_stage2 before __activate_traps, because
|
||||
* __load_stage2 configures stage 2 translation, and
|
||||
* __activate_traps clear HCR_EL2.TGE (among other things).
|
||||
*/
|
||||
__load_guest_stage2(vcpu->arch.hw_mmu);
|
||||
__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
|
||||
__activate_traps(vcpu);
|
||||
|
||||
__kvm_adjust_pc(vcpu);
|
||||
|
|
|
@ -101,7 +101,7 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
|
|||
struct kvm_cpu_context *host_ctxt;
|
||||
|
||||
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
|
||||
deactivate_traps_vhe_put();
|
||||
deactivate_traps_vhe_put(vcpu);
|
||||
|
||||
__sysreg_save_el1_state(guest_ctxt);
|
||||
__sysreg_save_user_state(guest_ctxt);
|
||||
|
|
|
@ -50,10 +50,10 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
|
|||
*
|
||||
* ARM erratum 1165522 requires some special handling (again),
|
||||
* as we need to make sure both stages of translation are in
|
||||
* place before clearing TGE. __load_guest_stage2() already
|
||||
* place before clearing TGE. __load_stage2() already
|
||||
* has an ISB in order to deal with this.
|
||||
*/
|
||||
__load_guest_stage2(mmu);
|
||||
__load_stage2(mmu, mmu->arch);
|
||||
val = read_sysreg(hcr_el2);
|
||||
val &= ~HCR_TGE;
|
||||
write_sysreg(val, hcr_el2);
|
||||
|
|
|
@ -80,6 +80,7 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
|
|||
*/
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
++kvm->stat.generic.remote_tlb_flush_requests;
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
|
||||
}
|
||||
|
||||
|
@ -259,10 +260,8 @@ static int __create_hyp_mappings(unsigned long start, unsigned long size,
|
|||
{
|
||||
int err;
|
||||
|
||||
if (!kvm_host_owns_hyp_mappings()) {
|
||||
return kvm_call_hyp_nvhe(__pkvm_create_mappings,
|
||||
start, size, phys, prot);
|
||||
}
|
||||
if (WARN_ON(!kvm_host_owns_hyp_mappings()))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm_hyp_pgd_mutex);
|
||||
err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
|
||||
|
@ -282,6 +281,21 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
|
|||
}
|
||||
}
|
||||
|
||||
static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
phys_addr_t addr;
|
||||
int ret;
|
||||
|
||||
for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
|
||||
__phys_to_pfn(addr));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
|
||||
* @from: The virtual kernel start address of the range
|
||||
|
@ -302,6 +316,13 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
|||
if (is_kernel_in_hyp_mode())
|
||||
return 0;
|
||||
|
||||
if (!kvm_host_owns_hyp_mappings()) {
|
||||
if (WARN_ON(prot != PAGE_HYP))
|
||||
return -EPERM;
|
||||
return pkvm_share_hyp(kvm_kaddr_to_phys(from),
|
||||
kvm_kaddr_to_phys(to));
|
||||
}
|
||||
|
||||
start = start & PAGE_MASK;
|
||||
end = PAGE_ALIGN(end);
|
||||
|
||||
|
@ -433,6 +454,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
|
||||
/* We shouldn't need any other callback to walk the PT */
|
||||
.phys_to_virt = kvm_host_va,
|
||||
};
|
||||
|
||||
static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
||||
{
|
||||
struct kvm_pgtable pgt = {
|
||||
.pgd = (kvm_pte_t *)kvm->mm->pgd,
|
||||
.ia_bits = VA_BITS,
|
||||
.start_level = (KVM_PGTABLE_MAX_LEVELS -
|
||||
CONFIG_PGTABLE_LEVELS),
|
||||
.mm_ops = &kvm_user_mm_ops,
|
||||
};
|
||||
kvm_pte_t pte = 0; /* Keep GCC quiet... */
|
||||
u32 level = ~0;
|
||||
int ret;
|
||||
|
||||
ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
|
||||
VM_BUG_ON(ret);
|
||||
VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
|
||||
VM_BUG_ON(!(pte & PTE_VALID));
|
||||
|
||||
return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
|
||||
}
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
|
||||
.zalloc_page = stage2_memcache_zalloc_page,
|
||||
.zalloc_pages_exact = kvm_host_zalloc_pages_exact,
|
||||
|
@ -485,7 +532,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
|
|||
mmu->arch = &kvm->arch;
|
||||
mmu->pgt = pgt;
|
||||
mmu->pgd_phys = __pa(pgt->pgd);
|
||||
mmu->vmid.vmid_gen = 0;
|
||||
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
|
||||
return 0;
|
||||
|
||||
out_destroy_pgtable:
|
||||
|
@ -780,7 +827,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
|
|||
* Returns the size of the mapping.
|
||||
*/
|
||||
static unsigned long
|
||||
transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
|
||||
transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long hva, kvm_pfn_t *pfnp,
|
||||
phys_addr_t *ipap)
|
||||
{
|
||||
|
@ -791,8 +838,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
|
|||
* sure that the HVA and IPA are sufficiently aligned and that the
|
||||
* block map is contained within the memslot.
|
||||
*/
|
||||
if (kvm_is_transparent_hugepage(pfn) &&
|
||||
fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
|
||||
if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
|
||||
get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
|
||||
/*
|
||||
* The address we faulted on is backed by a transparent huge
|
||||
* page. However, because we map the compound huge page and
|
||||
|
@ -814,7 +861,7 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
|
|||
*ipap &= PMD_MASK;
|
||||
kvm_release_pfn_clean(pfn);
|
||||
pfn &= ~(PTRS_PER_PMD - 1);
|
||||
kvm_get_pfn(pfn);
|
||||
get_page(pfn_to_page(pfn));
|
||||
*pfnp = pfn;
|
||||
|
||||
return PMD_SIZE;
|
||||
|
@ -1050,9 +1097,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
* If we are not forced to use page mapping, check if we are
|
||||
* backed by a THP and thus use block mapping if possible.
|
||||
*/
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
|
||||
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
|
||||
&pfn, &fault_ipa);
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
|
||||
if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
|
||||
vma_pagesize = fault_granule;
|
||||
else
|
||||
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
|
||||
hva, &pfn,
|
||||
&fault_ipa);
|
||||
}
|
||||
|
||||
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
|
||||
|
|
|
@ -50,7 +50,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
|
|||
|
||||
int kvm_perf_init(void)
|
||||
{
|
||||
if (kvm_pmu_probe_pmuver() != 0xf && !is_protected_kvm_enabled())
|
||||
if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
|
||||
static_branch_enable(&kvm_arm_pmu_available);
|
||||
|
||||
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
|
||||
|
|
|
@ -373,7 +373,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
|
|||
reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
|
||||
reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
|
||||
reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
|
||||
reg &= kvm_pmu_valid_counter_mask(vcpu);
|
||||
}
|
||||
|
||||
return reg;
|
||||
|
@ -564,20 +563,21 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
|
|||
*/
|
||||
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
|
||||
int i;
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_E) {
|
||||
kvm_pmu_enable_counter_mask(vcpu,
|
||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
|
||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
|
||||
} else {
|
||||
kvm_pmu_disable_counter_mask(vcpu, mask);
|
||||
kvm_pmu_disable_counter_mask(vcpu,
|
||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
|
||||
}
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_C)
|
||||
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_P) {
|
||||
unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
|
||||
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
|
||||
for_each_set_bit(i, &mask, 32)
|
||||
kvm_pmu_set_counter_value(vcpu, i, 0);
|
||||
|
@ -745,7 +745,7 @@ int kvm_pmu_probe_pmuver(void)
|
|||
struct perf_event_attr attr = { };
|
||||
struct perf_event *event;
|
||||
struct arm_pmu *pmu;
|
||||
int pmuver = 0xf;
|
||||
int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
|
||||
|
||||
/*
|
||||
* Create a dummy event that only counts user cycles. As we'll never
|
||||
|
@ -770,7 +770,7 @@ int kvm_pmu_probe_pmuver(void)
|
|||
if (IS_ERR(event)) {
|
||||
pr_err_once("kvm: pmu event creation failed %ld\n",
|
||||
PTR_ERR(event));
|
||||
return 0xf;
|
||||
return ID_AA64DFR0_PMUVER_IMP_DEF;
|
||||
}
|
||||
|
||||
if (event->pmu) {
|
||||
|
@ -923,7 +923,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
|||
if (!vcpu->kvm->arch.pmuver)
|
||||
vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
|
||||
|
||||
if (vcpu->kvm->arch.pmuver == 0xf)
|
||||
if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
|
||||
return -ENODEV;
|
||||
|
||||
switch (attr->attr) {
|
||||
|
|
|
@ -59,6 +59,12 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
|
|||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
|
||||
unsigned long affinity)
|
||||
{
|
||||
return !(affinity & ~MPIDR_HWID_BITMASK);
|
||||
}
|
||||
|
||||
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
||||
{
|
||||
struct vcpu_reset_state *reset_state;
|
||||
|
@ -66,9 +72,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
|||
struct kvm_vcpu *vcpu = NULL;
|
||||
unsigned long cpu_id;
|
||||
|
||||
cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK;
|
||||
if (vcpu_mode_is_32bit(source_vcpu))
|
||||
cpu_id &= ~((u32) 0);
|
||||
cpu_id = smccc_get_arg1(source_vcpu);
|
||||
if (!kvm_psci_valid_affinity(source_vcpu, cpu_id))
|
||||
return PSCI_RET_INVALID_PARAMS;
|
||||
|
||||
vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
|
||||
|
||||
|
@ -126,6 +132,9 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
|
|||
target_affinity = smccc_get_arg1(vcpu);
|
||||
lowest_affinity_level = smccc_get_arg2(vcpu);
|
||||
|
||||
if (!kvm_psci_valid_affinity(vcpu, target_affinity))
|
||||
return PSCI_RET_INVALID_PARAMS;
|
||||
|
||||
/* Determine target affinity mask */
|
||||
target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
|
||||
if (!target_affinity_mask)
|
||||
|
|
|
@ -210,10 +210,16 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_reset_state reset_state;
|
||||
int ret;
|
||||
bool loaded;
|
||||
u32 pstate;
|
||||
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
reset_state = vcpu->arch.reset_state;
|
||||
WRITE_ONCE(vcpu->arch.reset_state.reset, false);
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
|
||||
/* Reset PMU outside of the non-preemptible section */
|
||||
kvm_pmu_vcpu_reset(vcpu);
|
||||
|
||||
|
@ -276,8 +282,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
|||
* Additional reset state handling that PSCI may have imposed on us.
|
||||
* Must be done after all the sys_reg reset.
|
||||
*/
|
||||
if (vcpu->arch.reset_state.reset) {
|
||||
unsigned long target_pc = vcpu->arch.reset_state.pc;
|
||||
if (reset_state.reset) {
|
||||
unsigned long target_pc = reset_state.pc;
|
||||
|
||||
/* Gracefully handle Thumb2 entry point */
|
||||
if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
|
||||
|
@ -286,13 +292,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
/* Propagate caller endianness */
|
||||
if (vcpu->arch.reset_state.be)
|
||||
if (reset_state.be)
|
||||
kvm_vcpu_set_be(vcpu);
|
||||
|
||||
*vcpu_pc(vcpu) = target_pc;
|
||||
vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0);
|
||||
|
||||
vcpu->arch.reset_state.reset = false;
|
||||
vcpu_set_reg(vcpu, 0, reset_state.r0);
|
||||
}
|
||||
|
||||
/* Reset timer */
|
||||
|
@ -311,31 +315,26 @@ u32 get_kvm_ipa_limit(void)
|
|||
|
||||
int kvm_set_ipa_limit(void)
|
||||
{
|
||||
unsigned int parange, tgran_2;
|
||||
unsigned int parange;
|
||||
u64 mmfr0;
|
||||
|
||||
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
parange = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_PARANGE_SHIFT);
|
||||
/*
|
||||
* IPA size beyond 48 bits could not be supported
|
||||
* on either 4K or 16K page size. Hence let's cap
|
||||
* it to 48 bits, in case it's reported as larger
|
||||
* on the system.
|
||||
*/
|
||||
if (PAGE_SIZE != SZ_64K)
|
||||
parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
|
||||
|
||||
/*
|
||||
* Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
|
||||
* Stage-2. If not, things will stop very quickly.
|
||||
*/
|
||||
switch (PAGE_SIZE) {
|
||||
default:
|
||||
case SZ_4K:
|
||||
tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
|
||||
break;
|
||||
case SZ_16K:
|
||||
tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
|
||||
break;
|
||||
case SZ_64K:
|
||||
tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
|
||||
switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) {
|
||||
case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
|
||||
kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
|
||||
return -EINVAL;
|
||||
|
@ -369,7 +368,7 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
|
|||
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
|
||||
if (phys_shift) {
|
||||
if (phys_shift > kvm_ipa_limit ||
|
||||
phys_shift < 32)
|
||||
phys_shift < ARM64_MIN_PARANGE_BITS)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
phys_shift = KVM_PHYS_SHIFT;
|
||||
|
|
|
@ -44,10 +44,6 @@
|
|||
* 64bit interface.
|
||||
*/
|
||||
|
||||
#define reg_to_encoding(x) \
|
||||
sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \
|
||||
(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
|
||||
|
||||
static bool read_from_write_only(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *r)
|
||||
|
@ -318,14 +314,14 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
|
|||
/*
|
||||
* We want to avoid world-switching all the DBG registers all the
|
||||
* time:
|
||||
*
|
||||
*
|
||||
* - If we've touched any debug register, it is likely that we're
|
||||
* going to touch more of them. It then makes sense to disable the
|
||||
* traps and start doing the save/restore dance
|
||||
* - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
|
||||
* then mandatory to save/restore the registers, as the guest
|
||||
* depends on them.
|
||||
*
|
||||
*
|
||||
* For this, we use a DIRTY bit, indicating the guest has modified the
|
||||
* debug registers, used as follow:
|
||||
*
|
||||
|
@ -603,6 +599,41 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
|
|||
return REG_HIDDEN;
|
||||
}
|
||||
|
||||
static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX);
|
||||
|
||||
/* No PMU available, any PMU reg may UNDEF... */
|
||||
if (!kvm_arm_support_pmu_v3())
|
||||
return;
|
||||
|
||||
n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT;
|
||||
n &= ARMV8_PMU_PMCR_N_MASK;
|
||||
if (n)
|
||||
mask |= GENMASK(n - 1, 0);
|
||||
|
||||
reset_unknown(vcpu, r);
|
||||
__vcpu_sys_reg(vcpu, r->reg) &= mask;
|
||||
}
|
||||
|
||||
static void reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
reset_unknown(vcpu, r);
|
||||
__vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0);
|
||||
}
|
||||
|
||||
static void reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
reset_unknown(vcpu, r);
|
||||
__vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK;
|
||||
}
|
||||
|
||||
static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
reset_unknown(vcpu, r);
|
||||
__vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK;
|
||||
}
|
||||
|
||||
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 pmcr, val;
|
||||
|
@ -845,7 +876,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
kvm_pmu_disable_counter_mask(vcpu, val);
|
||||
}
|
||||
} else {
|
||||
p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
|
||||
p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -869,7 +900,7 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
/* accessing PMINTENCLR_EL1 */
|
||||
__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
|
||||
} else {
|
||||
p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
|
||||
p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -891,7 +922,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
/* accessing PMOVSCLR_EL0 */
|
||||
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
|
||||
} else {
|
||||
p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
|
||||
p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -944,16 +975,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
|
||||
|
||||
#define PMU_SYS_REG(r) \
|
||||
SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility
|
||||
SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility
|
||||
|
||||
/* Macro to expand the PMEVCNTRn_EL0 register */
|
||||
#define PMU_PMEVCNTR_EL0(n) \
|
||||
{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
|
||||
.reset = reset_pmevcntr, \
|
||||
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
|
||||
|
||||
/* Macro to expand the PMEVTYPERn_EL0 register */
|
||||
#define PMU_PMEVTYPER_EL0(n) \
|
||||
{ PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \
|
||||
.reset = reset_pmevtyper, \
|
||||
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
|
||||
|
||||
static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
|
@ -1026,8 +1059,6 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
|
|||
return true;
|
||||
}
|
||||
|
||||
#define FEATURE(x) (GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT))
|
||||
|
||||
/* Read a sanitised cpufeature ID register by sys_reg_desc */
|
||||
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_desc const *r, bool raz)
|
||||
|
@ -1038,40 +1069,40 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
|||
switch (id) {
|
||||
case SYS_ID_AA64PFR0_EL1:
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
val &= ~FEATURE(ID_AA64PFR0_SVE);
|
||||
val &= ~FEATURE(ID_AA64PFR0_AMU);
|
||||
val &= ~FEATURE(ID_AA64PFR0_CSV2);
|
||||
val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
|
||||
val &= ~FEATURE(ID_AA64PFR0_CSV3);
|
||||
val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_AMU);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
|
||||
break;
|
||||
case SYS_ID_AA64PFR1_EL1:
|
||||
val &= ~FEATURE(ID_AA64PFR1_MTE);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
|
||||
if (kvm_has_mte(vcpu->kvm)) {
|
||||
u64 pfr, mte;
|
||||
|
||||
pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
|
||||
mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
|
||||
val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte);
|
||||
}
|
||||
break;
|
||||
case SYS_ID_AA64ISAR1_EL1:
|
||||
if (!vcpu_has_ptrauth(vcpu))
|
||||
val &= ~(FEATURE(ID_AA64ISAR1_APA) |
|
||||
FEATURE(ID_AA64ISAR1_API) |
|
||||
FEATURE(ID_AA64ISAR1_GPA) |
|
||||
FEATURE(ID_AA64ISAR1_GPI));
|
||||
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
|
||||
break;
|
||||
case SYS_ID_AA64DFR0_EL1:
|
||||
/* Limit debug to ARMv8.0 */
|
||||
val &= ~FEATURE(ID_AA64DFR0_DEBUGVER);
|
||||
val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 6);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), 6);
|
||||
/* Limit guests to PMUv3 for ARMv8.4 */
|
||||
val = cpuid_feature_cap_perfmon_field(val,
|
||||
ID_AA64DFR0_PMUVER_SHIFT,
|
||||
kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
|
||||
/* Hide SPE from guests */
|
||||
val &= ~FEATURE(ID_AA64DFR0_PMSVER);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER);
|
||||
break;
|
||||
case SYS_ID_DFR0_EL1:
|
||||
/* Limit guests to PMUv3 for ARMv8.4 */
|
||||
|
@ -1249,6 +1280,20 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
|||
return __set_id_reg(vcpu, rd, uaddr, true);
|
||||
}
|
||||
|
||||
static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
int err;
|
||||
u64 val;
|
||||
|
||||
/* Perform the access even if we are going to ignore the value */
|
||||
err = reg_from_user(&val, uaddr, sys_reg_to_index(rd));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
|
@ -1592,16 +1637,21 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
|
||||
{ PMU_SYS_REG(SYS_PMOVSCLR_EL0),
|
||||
.access = access_pmovs, .reg = PMOVSSET_EL0 },
|
||||
/*
|
||||
* PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
|
||||
* previously (and pointlessly) advertised in the past...
|
||||
*/
|
||||
{ PMU_SYS_REG(SYS_PMSWINC_EL0),
|
||||
.access = access_pmswinc, .reg = PMSWINC_EL0 },
|
||||
.get_user = get_raz_id_reg, .set_user = set_wi_reg,
|
||||
.access = access_pmswinc, .reset = NULL },
|
||||
{ PMU_SYS_REG(SYS_PMSELR_EL0),
|
||||
.access = access_pmselr, .reg = PMSELR_EL0 },
|
||||
.access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
|
||||
{ PMU_SYS_REG(SYS_PMCEID0_EL0),
|
||||
.access = access_pmceid, .reset = NULL },
|
||||
{ PMU_SYS_REG(SYS_PMCEID1_EL0),
|
||||
.access = access_pmceid, .reset = NULL },
|
||||
{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
|
||||
.access = access_pmu_evcntr, .reg = PMCCNTR_EL0 },
|
||||
.access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
|
||||
{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
|
||||
.access = access_pmu_evtyper, .reset = NULL },
|
||||
{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
|
||||
|
@ -2106,23 +2156,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int match_sys_reg(const void *key, const void *elt)
|
||||
{
|
||||
const unsigned long pval = (unsigned long)key;
|
||||
const struct sys_reg_desc *r = elt;
|
||||
|
||||
return pval - reg_to_encoding(r);
|
||||
}
|
||||
|
||||
static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
|
||||
const struct sys_reg_desc table[],
|
||||
unsigned int num)
|
||||
{
|
||||
unsigned long pval = reg_to_encoding(params);
|
||||
|
||||
return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
|
||||
}
|
||||
|
||||
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_inject_undefined(vcpu);
|
||||
|
@ -2365,13 +2398,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
|
|||
|
||||
trace_kvm_handle_sys_reg(esr);
|
||||
|
||||
params.Op0 = (esr >> 20) & 3;
|
||||
params.Op1 = (esr >> 14) & 0x7;
|
||||
params.CRn = (esr >> 10) & 0xf;
|
||||
params.CRm = (esr >> 1) & 0xf;
|
||||
params.Op2 = (esr >> 17) & 0x7;
|
||||
params = esr_sys64_to_params(esr);
|
||||
params.regval = vcpu_get_reg(vcpu, Rt);
|
||||
params.is_write = !(esr & 1);
|
||||
|
||||
ret = emulate_sys_reg(vcpu, ¶ms);
|
||||
|
||||
|
|
|
@ -11,6 +11,12 @@
|
|||
#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
|
||||
#define __ARM64_KVM_SYS_REGS_LOCAL_H__
|
||||
|
||||
#include <linux/bsearch.h>
|
||||
|
||||
#define reg_to_encoding(x) \
|
||||
sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \
|
||||
(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
|
||||
|
||||
struct sys_reg_params {
|
||||
u8 Op0;
|
||||
u8 Op1;
|
||||
|
@ -21,6 +27,14 @@ struct sys_reg_params {
|
|||
bool is_write;
|
||||
};
|
||||
|
||||
#define esr_sys64_to_params(esr) \
|
||||
((struct sys_reg_params){ .Op0 = ((esr) >> 20) & 3, \
|
||||
.Op1 = ((esr) >> 14) & 0x7, \
|
||||
.CRn = ((esr) >> 10) & 0xf, \
|
||||
.CRm = ((esr) >> 1) & 0xf, \
|
||||
.Op2 = ((esr) >> 17) & 0x7, \
|
||||
.is_write = !((esr) & 1) })
|
||||
|
||||
struct sys_reg_desc {
|
||||
/* Sysreg string for debug */
|
||||
const char *name;
|
||||
|
@ -152,6 +166,23 @@ static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
|
|||
return i1->Op2 - i2->Op2;
|
||||
}
|
||||
|
||||
static inline int match_sys_reg(const void *key, const void *elt)
|
||||
{
|
||||
const unsigned long pval = (unsigned long)key;
|
||||
const struct sys_reg_desc *r = elt;
|
||||
|
||||
return pval - reg_to_encoding(r);
|
||||
}
|
||||
|
||||
static inline const struct sys_reg_desc *
|
||||
find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[],
|
||||
unsigned int num)
|
||||
{
|
||||
unsigned long pval = reg_to_encoding(params);
|
||||
|
||||
return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
|
||||
}
|
||||
|
||||
const struct sys_reg_desc *find_reg_by_id(u64 id,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc table[],
|
||||
|
|
|
@ -78,13 +78,17 @@ TRACE_EVENT(kvm_arm_clear_debug,
|
|||
TP_printk("flags: 0x%08x", __entry->guest_debug)
|
||||
);
|
||||
|
||||
/*
|
||||
* The dreg32 name is a leftover from a distant past. This will really
|
||||
* output a 64bit value...
|
||||
*/
|
||||
TRACE_EVENT(kvm_arm_set_dreg32,
|
||||
TP_PROTO(const char *name, __u32 value),
|
||||
TP_PROTO(const char *name, __u64 value),
|
||||
TP_ARGS(name, value),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(const char *, name)
|
||||
__field(__u32, value)
|
||||
__field(__u64, value)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
|
@ -92,7 +96,7 @@ TRACE_EVENT(kvm_arm_set_dreg32,
|
|||
__entry->value = value;
|
||||
),
|
||||
|
||||
TP_printk("%s: 0x%08x", __entry->name, __entry->value)
|
||||
TP_printk("%s: 0x%llx", __entry->name, __entry->value)
|
||||
);
|
||||
|
||||
TRACE_DEFINE_SIZEOF(__u64);
|
||||
|
|
|
@ -282,7 +282,7 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
|
|||
case GIC_CPU_PRIMASK:
|
||||
/*
|
||||
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
|
||||
* the PMR field as GICH_VMCR.VMPriMask rather than
|
||||
* PMR field as GICH_VMCR.VMPriMask rather than
|
||||
* GICC_PMR.Priority, so we expose the upper five bits of
|
||||
* priority mask to userspace using the lower bits in the
|
||||
* unsigned long.
|
||||
|
@ -329,7 +329,7 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
|
|||
case GIC_CPU_PRIMASK:
|
||||
/*
|
||||
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
|
||||
* the PMR field as GICH_VMCR.VMPriMask rather than
|
||||
* PMR field as GICH_VMCR.VMPriMask rather than
|
||||
* GICC_PMR.Priority, so we expose the upper five bits of
|
||||
* priority mask to userspace using the lower bits in the
|
||||
* unsigned long.
|
||||
|
|
|
@ -60,6 +60,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
|||
u32 val = cpuif->vgic_lr[lr];
|
||||
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
|
||||
struct vgic_irq *irq;
|
||||
bool deactivated;
|
||||
|
||||
/* Extract the source vCPU id from the LR */
|
||||
cpuid = val & GICH_LR_PHYSID_CPUID;
|
||||
|
@ -75,7 +76,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
|||
|
||||
raw_spin_lock(&irq->irq_lock);
|
||||
|
||||
/* Always preserve the active bit */
|
||||
/* Always preserve the active bit, note deactivation */
|
||||
deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
|
||||
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
|
||||
|
||||
if (irq->active && vgic_irq_is_sgi(intid))
|
||||
|
@ -96,36 +98,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
|||
if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
|
||||
irq->pending_latch = false;
|
||||
|
||||
/*
|
||||
* Level-triggered mapped IRQs are special because we only
|
||||
* observe rising edges as input to the VGIC.
|
||||
*
|
||||
* If the guest never acked the interrupt we have to sample
|
||||
* the physical line and set the line level, because the
|
||||
* device state could have changed or we simply need to
|
||||
* process the still pending interrupt later.
|
||||
*
|
||||
* If this causes us to lower the level, we have to also clear
|
||||
* the physical active state, since we will otherwise never be
|
||||
* told when the interrupt becomes asserted again.
|
||||
*
|
||||
* Another case is when the interrupt requires a helping hand
|
||||
* on deactivation (no HW deactivation, for example).
|
||||
*/
|
||||
if (vgic_irq_is_mapped_level(irq)) {
|
||||
bool resample = false;
|
||||
|
||||
if (val & GICH_LR_PENDING_BIT) {
|
||||
irq->line_level = vgic_get_phys_line_level(irq);
|
||||
resample = !irq->line_level;
|
||||
} else if (vgic_irq_needs_resampling(irq) &&
|
||||
!(irq->active || irq->pending_latch)) {
|
||||
resample = true;
|
||||
}
|
||||
|
||||
if (resample)
|
||||
vgic_irq_set_phys_active(irq, false);
|
||||
}
|
||||
/* Handle resampling for mapped interrupts if required */
|
||||
vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
|
||||
|
||||
raw_spin_unlock(&irq->irq_lock);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
|
|
|
@ -46,6 +46,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
|||
u32 intid, cpuid;
|
||||
struct vgic_irq *irq;
|
||||
bool is_v2_sgi = false;
|
||||
bool deactivated;
|
||||
|
||||
cpuid = val & GICH_LR_PHYSID_CPUID;
|
||||
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
|
@ -68,7 +69,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
|||
|
||||
raw_spin_lock(&irq->irq_lock);
|
||||
|
||||
/* Always preserve the active bit */
|
||||
/* Always preserve the active bit, note deactivation */
|
||||
deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
|
||||
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
|
||||
|
||||
if (irq->active && is_v2_sgi)
|
||||
|
@ -89,36 +91,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
|||
if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
|
||||
irq->pending_latch = false;
|
||||
|
||||
/*
|
||||
* Level-triggered mapped IRQs are special because we only
|
||||
* observe rising edges as input to the VGIC.
|
||||
*
|
||||
* If the guest never acked the interrupt we have to sample
|
||||
* the physical line and set the line level, because the
|
||||
* device state could have changed or we simply need to
|
||||
* process the still pending interrupt later.
|
||||
*
|
||||
* If this causes us to lower the level, we have to also clear
|
||||
* the physical active state, since we will otherwise never be
|
||||
* told when the interrupt becomes asserted again.
|
||||
*
|
||||
* Another case is when the interrupt requires a helping hand
|
||||
* on deactivation (no HW deactivation, for example).
|
||||
*/
|
||||
if (vgic_irq_is_mapped_level(irq)) {
|
||||
bool resample = false;
|
||||
|
||||
if (val & ICH_LR_PENDING_BIT) {
|
||||
irq->line_level = vgic_get_phys_line_level(irq);
|
||||
resample = !irq->line_level;
|
||||
} else if (vgic_irq_needs_resampling(irq) &&
|
||||
!(irq->active || irq->pending_latch)) {
|
||||
resample = true;
|
||||
}
|
||||
|
||||
if (resample)
|
||||
vgic_irq_set_phys_active(irq, false);
|
||||
}
|
||||
/* Handle resampling for mapped interrupts if required */
|
||||
vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
|
||||
|
||||
raw_spin_unlock(&irq->irq_lock);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
|
|
|
@ -106,7 +106,6 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
|||
if (intid >= VGIC_MIN_LPI)
|
||||
return vgic_get_lpi(kvm, intid);
|
||||
|
||||
WARN(1, "Looking up struct vgic_irq for reserved INTID");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -1022,3 +1021,41 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
|
|||
|
||||
return map_is_active;
|
||||
}
|
||||
|
||||
/*
|
||||
* Level-triggered mapped IRQs are special because we only observe rising
|
||||
* edges as input to the VGIC.
|
||||
*
|
||||
* If the guest never acked the interrupt we have to sample the physical
|
||||
* line and set the line level, because the device state could have changed
|
||||
* or we simply need to process the still pending interrupt later.
|
||||
*
|
||||
* We could also have entered the guest with the interrupt active+pending.
|
||||
* On the next exit, we need to re-evaluate the pending state, as it could
|
||||
* otherwise result in a spurious interrupt by injecting a now potentially
|
||||
* stale pending state.
|
||||
*
|
||||
* If this causes us to lower the level, we have to also clear the physical
|
||||
* active state, since we will otherwise never be told when the interrupt
|
||||
* becomes asserted again.
|
||||
*
|
||||
* Another case is when the interrupt requires a helping hand on
|
||||
* deactivation (no HW deactivation, for example).
|
||||
*/
|
||||
void vgic_irq_handle_resampling(struct vgic_irq *irq,
|
||||
bool lr_deactivated, bool lr_pending)
|
||||
{
|
||||
if (vgic_irq_is_mapped_level(irq)) {
|
||||
bool resample = false;
|
||||
|
||||
if (unlikely(vgic_irq_needs_resampling(irq))) {
|
||||
resample = !(irq->active || irq->pending_latch);
|
||||
} else if (lr_pending || (lr_deactivated && irq->line_level)) {
|
||||
irq->line_level = vgic_get_phys_line_level(irq);
|
||||
resample = !irq->line_level;
|
||||
}
|
||||
|
||||
if (resample)
|
||||
vgic_irq_set_phys_active(irq, false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -169,6 +169,8 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
|
|||
bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
|
||||
unsigned long flags);
|
||||
void vgic_kick_vcpus(struct kvm *kvm);
|
||||
void vgic_irq_handle_resampling(struct vgic_irq *irq,
|
||||
bool lr_deactivated, bool lr_pending);
|
||||
|
||||
int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
|
||||
phys_addr_t addr, phys_addr_t alignment);
|
||||
|
|
|
@ -41,8 +41,6 @@
|
|||
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||
KVM_GENERIC_VM_STATS()
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -85,8 +83,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VCPU, vz_cpucfg_exits),
|
||||
#endif
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
|
|
@ -388,7 +388,6 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
|
|||
u32 compare, u32 cause)
|
||||
{
|
||||
u32 start_count, after_count;
|
||||
ktime_t freeze_time;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
|
@ -396,7 +395,7 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
|
|||
* this with interrupts disabled to avoid latency.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count);
|
||||
kvm_mips_freeze_hrtimer(vcpu, &start_count);
|
||||
write_c0_gtoffset(start_count - read_c0_count());
|
||||
local_irq_restore(flags);
|
||||
|
||||
|
|
|
@ -103,7 +103,6 @@ struct kvm_vcpu_stat {
|
|||
u64 emulated_inst_exits;
|
||||
u64 dec_exits;
|
||||
u64 ext_intr_exits;
|
||||
u64 halt_wait_ns;
|
||||
u64 halt_successful_wait;
|
||||
u64 dbell_exits;
|
||||
u64 gdbell_exits;
|
||||
|
|
|
@ -43,8 +43,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
|||
STATS_DESC_ICOUNTER(VM, num_2M_pages),
|
||||
STATS_DESC_ICOUNTER(VM, num_1G_pages)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -71,7 +69,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
|
||||
STATS_DESC_COUNTER(VCPU, dec_exits),
|
||||
STATS_DESC_COUNTER(VCPU, ext_intr_exits),
|
||||
STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
|
||||
STATS_DESC_COUNTER(VCPU, halt_successful_wait),
|
||||
STATS_DESC_COUNTER(VCPU, dbell_exits),
|
||||
STATS_DESC_COUNTER(VCPU, gdbell_exits),
|
||||
|
@ -88,8 +85,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VCPU, pthru_host),
|
||||
STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
|
|
@ -346,7 +346,7 @@ static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
|
|||
unsigned long gfn = tce >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
memslot = search_memslots(kvm_memslots(kvm), gfn);
|
||||
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
if (!memslot)
|
||||
return -EINVAL;
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm,
|
|||
unsigned long gfn = tce >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
|
||||
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
|
||||
if (!memslot)
|
||||
return -EINVAL;
|
||||
|
||||
|
|
|
@ -4202,19 +4202,31 @@ out:
|
|||
|
||||
/* Attribute wait time */
|
||||
if (do_sleep) {
|
||||
vc->runner->stat.halt_wait_ns +=
|
||||
vc->runner->stat.generic.halt_wait_ns +=
|
||||
ktime_to_ns(cur) - ktime_to_ns(start_wait);
|
||||
KVM_STATS_LOG_HIST_UPDATE(
|
||||
vc->runner->stat.generic.halt_wait_hist,
|
||||
ktime_to_ns(cur) - ktime_to_ns(start_wait));
|
||||
/* Attribute failed poll time */
|
||||
if (vc->halt_poll_ns)
|
||||
if (vc->halt_poll_ns) {
|
||||
vc->runner->stat.generic.halt_poll_fail_ns +=
|
||||
ktime_to_ns(start_wait) -
|
||||
ktime_to_ns(start_poll);
|
||||
KVM_STATS_LOG_HIST_UPDATE(
|
||||
vc->runner->stat.generic.halt_poll_fail_hist,
|
||||
ktime_to_ns(start_wait) -
|
||||
ktime_to_ns(start_poll));
|
||||
}
|
||||
} else {
|
||||
/* Attribute successful poll time */
|
||||
if (vc->halt_poll_ns)
|
||||
if (vc->halt_poll_ns) {
|
||||
vc->runner->stat.generic.halt_poll_success_ns +=
|
||||
ktime_to_ns(cur) -
|
||||
ktime_to_ns(start_poll);
|
||||
KVM_STATS_LOG_HIST_UPDATE(
|
||||
vc->runner->stat.generic.halt_poll_success_hist,
|
||||
ktime_to_ns(cur) - ktime_to_ns(start_poll));
|
||||
}
|
||||
}
|
||||
|
||||
/* Adjust poll time */
|
||||
|
|
|
@ -41,8 +41,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
|||
STATS_DESC_ICOUNTER(VM, num_2M_pages),
|
||||
STATS_DESC_ICOUNTER(VM, num_1G_pages)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -69,7 +67,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
|
||||
STATS_DESC_COUNTER(VCPU, dec_exits),
|
||||
STATS_DESC_COUNTER(VCPU, ext_intr_exits),
|
||||
STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
|
||||
STATS_DESC_COUNTER(VCPU, halt_successful_wait),
|
||||
STATS_DESC_COUNTER(VCPU, dbell_exits),
|
||||
STATS_DESC_COUNTER(VCPU, gdbell_exits),
|
||||
|
@ -79,8 +76,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VCPU, pthru_host),
|
||||
STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
|
|
@ -244,6 +244,7 @@ struct kvm_s390_sie_block {
|
|||
__u8 fpf; /* 0x0060 */
|
||||
#define ECB_GS 0x40
|
||||
#define ECB_TE 0x10
|
||||
#define ECB_SPECI 0x08
|
||||
#define ECB_SRSI 0x04
|
||||
#define ECB_HOSTPROTINT 0x02
|
||||
__u8 ecb; /* 0x0061 */
|
||||
|
@ -955,6 +956,7 @@ struct kvm_arch{
|
|||
atomic64_t cmma_dirty_pages;
|
||||
/* subset of available cpu features enabled by user space */
|
||||
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
|
||||
/* indexed by vcpu_idx */
|
||||
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
|
||||
struct kvm_s390_gisa_interrupt gisa_int;
|
||||
struct kvm_s390_pv pv;
|
||||
|
|
|
@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
|
|||
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
|
||||
set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
|
||||
set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
|
||||
}
|
||||
|
||||
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
|
||||
clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
|
||||
clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
|
||||
}
|
||||
|
||||
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
|
||||
|
@ -3050,18 +3050,18 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
|
|||
|
||||
static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
|
||||
{
|
||||
int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus);
|
||||
int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
|
||||
struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
|
||||
vcpu = kvm_get_vcpu(kvm, vcpu_id);
|
||||
for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
|
||||
vcpu = kvm_get_vcpu(kvm, vcpu_idx);
|
||||
if (psw_ioint_disabled(vcpu))
|
||||
continue;
|
||||
deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
|
||||
if (deliverable_mask) {
|
||||
/* lately kicked but not yet running */
|
||||
if (test_and_set_bit(vcpu_id, gi->kicked_mask))
|
||||
if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
|
||||
return;
|
||||
kvm_s390_vcpu_wakeup(vcpu);
|
||||
return;
|
||||
|
|
|
@ -66,8 +66,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VM, inject_service_signal),
|
||||
STATS_DESC_COUNTER(VM, inject_virtio)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -174,8 +172,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
|
||||
STATS_DESC_COUNTER(VCPU, pfault_sync)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -1953,7 +1949,7 @@ out:
|
|||
static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
|
||||
{
|
||||
int start = 0, end = slots->used_slots;
|
||||
int slot = atomic_read(&slots->lru_slot);
|
||||
int slot = atomic_read(&slots->last_used_slot);
|
||||
struct kvm_memory_slot *memslots = slots->memslots;
|
||||
|
||||
if (gfn >= memslots[slot].base_gfn &&
|
||||
|
@ -1974,7 +1970,7 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
|
|||
|
||||
if (gfn >= memslots[start].base_gfn &&
|
||||
gfn < memslots[start].base_gfn + memslots[start].npages) {
|
||||
atomic_set(&slots->lru_slot, start);
|
||||
atomic_set(&slots->last_used_slot, start);
|
||||
}
|
||||
|
||||
return start;
|
||||
|
@ -3224,6 +3220,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.sie_block->ecb |= ECB_SRSI;
|
||||
if (test_kvm_facility(vcpu->kvm, 73))
|
||||
vcpu->arch.sie_block->ecb |= ECB_TE;
|
||||
if (!kvm_is_ucontrol(vcpu->kvm))
|
||||
vcpu->arch.sie_block->ecb |= ECB_SPECI;
|
||||
|
||||
if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
|
||||
vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
|
||||
|
@ -4068,7 +4066,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
|
|||
kvm_s390_patch_guest_per_regs(vcpu);
|
||||
}
|
||||
|
||||
clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
|
||||
clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
|
||||
|
||||
vcpu->arch.sie_block->icptcode = 0;
|
||||
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
|
||||
|
|
|
@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
|
|||
|
||||
static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
|
||||
return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
|
||||
}
|
||||
|
||||
static inline int kvm_is_ucontrol(struct kvm *kvm)
|
||||
|
|
|
@ -510,6 +510,8 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
prefix_unmapped(vsie_page);
|
||||
scb_s->ecb |= ECB_TE;
|
||||
}
|
||||
/* specification exception interpretation */
|
||||
scb_s->ecb |= scb_o->ecb & ECB_SPECI;
|
||||
/* branch prediction */
|
||||
if (test_kvm_facility(vcpu->kvm, 82))
|
||||
scb_s->fpf |= scb_o->fpf & FPF_BPBC;
|
||||
|
|
|
@ -72,7 +72,6 @@ KVM_X86_OP(enable_nmi_window)
|
|||
KVM_X86_OP(enable_irq_window)
|
||||
KVM_X86_OP(update_cr8_intercept)
|
||||
KVM_X86_OP(check_apicv_inhibit_reasons)
|
||||
KVM_X86_OP_NULL(pre_update_apicv_exec_ctrl)
|
||||
KVM_X86_OP(refresh_apicv_exec_ctrl)
|
||||
KVM_X86_OP(hwapic_irr_update)
|
||||
KVM_X86_OP(hwapic_isr_update)
|
||||
|
|
|
@ -37,9 +37,21 @@
|
|||
|
||||
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
|
||||
|
||||
#define KVM_MAX_VCPUS 288
|
||||
#define KVM_SOFT_MAX_VCPUS 240
|
||||
#define KVM_MAX_VCPU_ID 1023
|
||||
#define KVM_MAX_VCPUS 1024
|
||||
#define KVM_SOFT_MAX_VCPUS 710
|
||||
|
||||
/*
|
||||
* In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
|
||||
* might be larger than the actual number of VCPUs because the
|
||||
* APIC ID encodes CPU topology information.
|
||||
*
|
||||
* In the worst case, we'll need less than one extra bit for the
|
||||
* Core ID, and less than one extra bit for the Package (Die) ID,
|
||||
* so ratio of 4 should be enough.
|
||||
*/
|
||||
#define KVM_VCPU_ID_RATIO 4
|
||||
#define KVM_MAX_VCPU_ID (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
|
||||
|
||||
/* memory slots that are not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 3
|
||||
|
||||
|
@ -124,13 +136,6 @@
|
|||
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
|
||||
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
|
||||
|
||||
static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
|
||||
{
|
||||
/* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
|
||||
return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
|
||||
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
||||
}
|
||||
|
||||
#define KVM_PERMILLE_MMU_PAGES 20
|
||||
#define KVM_MIN_ALLOC_MMU_PAGES 64UL
|
||||
#define KVM_MMU_HASH_SHIFT 12
|
||||
|
@ -229,7 +234,8 @@ enum x86_intercept_stage;
|
|||
KVM_GUESTDBG_USE_HW_BP | \
|
||||
KVM_GUESTDBG_USE_SW_BP | \
|
||||
KVM_GUESTDBG_INJECT_BP | \
|
||||
KVM_GUESTDBG_INJECT_DB)
|
||||
KVM_GUESTDBG_INJECT_DB | \
|
||||
KVM_GUESTDBG_BLOCKIRQ)
|
||||
|
||||
|
||||
#define PFERR_PRESENT_BIT 0
|
||||
|
@ -447,6 +453,7 @@ struct kvm_mmu {
|
|||
|
||||
u64 *pae_root;
|
||||
u64 *pml4_root;
|
||||
u64 *pml5_root;
|
||||
|
||||
/*
|
||||
* check zero bits on shadow page table entries, these
|
||||
|
@ -482,6 +489,7 @@ struct kvm_pmc {
|
|||
* ctrl value for fixed counters.
|
||||
*/
|
||||
u64 current_config;
|
||||
bool is_paused;
|
||||
};
|
||||
|
||||
struct kvm_pmu {
|
||||
|
@ -522,7 +530,6 @@ struct kvm_pmu_ops;
|
|||
enum {
|
||||
KVM_DEBUGREG_BP_ENABLED = 1,
|
||||
KVM_DEBUGREG_WONT_EXIT = 2,
|
||||
KVM_DEBUGREG_RELOAD = 4,
|
||||
};
|
||||
|
||||
struct kvm_mtrr_range {
|
||||
|
@ -723,7 +730,6 @@ struct kvm_vcpu_arch {
|
|||
|
||||
u64 reserved_gpa_bits;
|
||||
int maxphyaddr;
|
||||
int max_tdp_level;
|
||||
|
||||
/* emulate context */
|
||||
|
||||
|
@ -988,6 +994,12 @@ struct kvm_hv {
|
|||
/* How many vCPUs have VP index != vCPU index */
|
||||
atomic_t num_mismatched_vp_indexes;
|
||||
|
||||
/*
|
||||
* How many SynICs use 'AutoEOI' feature
|
||||
* (protected by arch.apicv_update_lock)
|
||||
*/
|
||||
unsigned int synic_auto_eoi_used;
|
||||
|
||||
struct hv_partition_assist_pg *hv_pa_pg;
|
||||
struct kvm_hv_syndbg hv_syndbg;
|
||||
};
|
||||
|
@ -1002,9 +1014,8 @@ struct msr_bitmap_range {
|
|||
/* Xen emulation context */
|
||||
struct kvm_xen {
|
||||
bool long_mode;
|
||||
bool shinfo_set;
|
||||
u8 upcall_vector;
|
||||
struct gfn_to_hva_cache shinfo_cache;
|
||||
gfn_t shinfo_gfn;
|
||||
};
|
||||
|
||||
enum kvm_irqchip_mode {
|
||||
|
@ -1061,6 +1072,9 @@ struct kvm_arch {
|
|||
struct kvm_apic_map __rcu *apic_map;
|
||||
atomic_t apic_map_dirty;
|
||||
|
||||
/* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */
|
||||
struct mutex apicv_update_lock;
|
||||
|
||||
bool apic_access_memslot_enabled;
|
||||
unsigned long apicv_inhibit_reasons;
|
||||
|
||||
|
@ -1213,9 +1227,17 @@ struct kvm_vm_stat {
|
|||
u64 mmu_recycled;
|
||||
u64 mmu_cache_miss;
|
||||
u64 mmu_unsync;
|
||||
u64 lpages;
|
||||
union {
|
||||
struct {
|
||||
atomic64_t pages_4k;
|
||||
atomic64_t pages_2m;
|
||||
atomic64_t pages_1g;
|
||||
};
|
||||
atomic64_t pages[KVM_NR_PAGE_SIZES];
|
||||
};
|
||||
u64 nx_lpage_splits;
|
||||
u64 max_mmu_page_hash_collisions;
|
||||
u64 max_mmu_rmap_size;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_stat {
|
||||
|
@ -1359,7 +1381,6 @@ struct kvm_x86_ops {
|
|||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
bool (*check_apicv_inhibit_reasons)(ulong bit);
|
||||
void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate);
|
||||
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
|
||||
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
|
||||
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
|
||||
|
@ -1543,12 +1564,12 @@ void kvm_mmu_uninit_vm(struct kvm *kvm);
|
|||
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_memory_slot *memslot,
|
||||
int start_level);
|
||||
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_zap_all(struct kvm *kvm);
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
|
||||
unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
|
||||
|
@ -1744,6 +1765,9 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
|
|||
void kvm_request_apicv_update(struct kvm *kvm, bool activate,
|
||||
unsigned long bit);
|
||||
|
||||
void __kvm_request_apicv_update(struct kvm *kvm, bool activate,
|
||||
unsigned long bit);
|
||||
|
||||
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
|
||||
|
@ -1754,8 +1778,8 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|||
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
|
||||
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
|
||||
|
||||
void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
|
||||
int tdp_huge_page_level);
|
||||
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
|
||||
int tdp_max_root_level, int tdp_huge_page_level);
|
||||
|
||||
static inline u16 kvm_read_ldt(void)
|
||||
{
|
||||
|
@ -1779,11 +1803,6 @@ static inline unsigned long read_msr(unsigned long msr)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline u32 get_rdx_init_val(void)
|
||||
{
|
||||
return 0x600; /* P6 family */
|
||||
}
|
||||
|
||||
static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
|
||||
{
|
||||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||
|
@ -1816,31 +1835,6 @@ enum {
|
|||
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
|
||||
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
|
||||
|
||||
asmlinkage void kvm_spurious_fault(void);
|
||||
|
||||
/*
|
||||
* Hardware virtualization extension instructions may fault if a
|
||||
* reboot turns off virtualization while processes are running.
|
||||
* Usually after catching the fault we just panic; during reboot
|
||||
* instead the instruction is ignored.
|
||||
*/
|
||||
#define __kvm_handle_fault_on_reboot(insn) \
|
||||
"666: \n\t" \
|
||||
insn "\n\t" \
|
||||
"jmp 668f \n\t" \
|
||||
"667: \n\t" \
|
||||
"1: \n\t" \
|
||||
".pushsection .discard.instr_begin \n\t" \
|
||||
".long 1b - . \n\t" \
|
||||
".popsection \n\t" \
|
||||
"call kvm_spurious_fault \n\t" \
|
||||
"1: \n\t" \
|
||||
".pushsection .discard.instr_end \n\t" \
|
||||
".long 1b - . \n\t" \
|
||||
".popsection \n\t" \
|
||||
"668: \n\t" \
|
||||
_ASM_EXTABLE(666b, 667b)
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
|
||||
|
|
|
@ -295,6 +295,7 @@ struct kvm_debug_exit_arch {
|
|||
#define KVM_GUESTDBG_USE_HW_BP 0x00020000
|
||||
#define KVM_GUESTDBG_INJECT_DB 0x00040000
|
||||
#define KVM_GUESTDBG_INJECT_BP 0x00080000
|
||||
#define KVM_GUESTDBG_BLOCKIRQ 0x00100000
|
||||
|
||||
/* for KVM_SET_GUEST_DEBUG */
|
||||
struct kvm_guest_debug_arch {
|
||||
|
|
|
@ -884,10 +884,11 @@ static void kvm_wait(u8 *ptr, u8 val)
|
|||
} else {
|
||||
local_irq_disable();
|
||||
|
||||
/* safe_halt() will enable IRQ */
|
||||
if (READ_ONCE(*ptr) == val)
|
||||
safe_halt();
|
||||
|
||||
local_irq_enable();
|
||||
else
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#include <linux/kvm_host.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include "lapic.h"
|
||||
#include "mmu.h"
|
||||
#include "mmu/mmu_internal.h"
|
||||
|
||||
static int vcpu_get_timer_advance_ns(void *data, u64 *val)
|
||||
{
|
||||
|
@ -73,3 +75,112 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
|
|||
&vcpu_tsc_scaling_frac_fops);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This covers statistics <1024 (11=log(1024)+1), which should be enough to
|
||||
* cover RMAP_RECYCLE_THRESHOLD.
|
||||
*/
|
||||
#define RMAP_LOG_SIZE 11
|
||||
|
||||
static const char *kvm_lpage_str[KVM_NR_PAGE_SIZES] = { "4K", "2M", "1G" };
|
||||
|
||||
static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct kvm_rmap_head *rmap;
|
||||
struct kvm *kvm = m->private;
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm_memslots *slots;
|
||||
unsigned int lpage_size, index;
|
||||
/* Still small enough to be on the stack */
|
||||
unsigned int *log[KVM_NR_PAGE_SIZES], *cur;
|
||||
int i, j, k, l, ret;
|
||||
|
||||
ret = -ENOMEM;
|
||||
memset(log, 0, sizeof(log));
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
|
||||
log[i] = kcalloc(RMAP_LOG_SIZE, sizeof(unsigned int), GFP_KERNEL);
|
||||
if (!log[i])
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
for (j = 0; j < slots->used_slots; j++) {
|
||||
slot = &slots->memslots[j];
|
||||
for (k = 0; k < KVM_NR_PAGE_SIZES; k++) {
|
||||
rmap = slot->arch.rmap[k];
|
||||
lpage_size = kvm_mmu_slot_lpages(slot, k + 1);
|
||||
cur = log[k];
|
||||
for (l = 0; l < lpage_size; l++) {
|
||||
index = ffs(pte_list_count(&rmap[l]));
|
||||
if (WARN_ON_ONCE(index >= RMAP_LOG_SIZE))
|
||||
index = RMAP_LOG_SIZE - 1;
|
||||
cur[index]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
/* index=0 counts no rmap; index=1 counts 1 rmap */
|
||||
seq_printf(m, "Rmap_Count:\t0\t1\t");
|
||||
for (i = 2; i < RMAP_LOG_SIZE; i++) {
|
||||
j = 1 << (i - 1);
|
||||
k = (1 << i) - 1;
|
||||
seq_printf(m, "%d-%d\t", j, k);
|
||||
}
|
||||
seq_printf(m, "\n");
|
||||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
|
||||
seq_printf(m, "Level=%s:\t", kvm_lpage_str[i]);
|
||||
cur = log[i];
|
||||
for (j = 0; j < RMAP_LOG_SIZE; j++)
|
||||
seq_printf(m, "%d\t", cur[j]);
|
||||
seq_printf(m, "\n");
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES; i++)
|
||||
kfree(log[i]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct kvm *kvm = inode->i_private;
|
||||
|
||||
if (!kvm_get_kvm_safe(kvm))
|
||||
return -ENOENT;
|
||||
|
||||
return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
|
||||
}
|
||||
|
||||
static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct kvm *kvm = inode->i_private;
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
static const struct file_operations mmu_rmaps_stat_fops = {
|
||||
.open = kvm_mmu_rmaps_stat_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = kvm_mmu_rmaps_stat_release,
|
||||
};
|
||||
|
||||
int kvm_arch_create_vm_debugfs(struct kvm *kvm)
|
||||
{
|
||||
debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
|
||||
&mmu_rmaps_stat_fops);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -88,6 +88,10 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
|
|||
static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
|
||||
int vector)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
|
||||
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
|
||||
int auto_eoi_old, auto_eoi_new;
|
||||
|
||||
if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
|
||||
return;
|
||||
|
||||
|
@ -96,10 +100,30 @@ static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
|
|||
else
|
||||
__clear_bit(vector, synic->vec_bitmap);
|
||||
|
||||
auto_eoi_old = bitmap_weight(synic->auto_eoi_bitmap, 256);
|
||||
|
||||
if (synic_has_vector_auto_eoi(synic, vector))
|
||||
__set_bit(vector, synic->auto_eoi_bitmap);
|
||||
else
|
||||
__clear_bit(vector, synic->auto_eoi_bitmap);
|
||||
|
||||
auto_eoi_new = bitmap_weight(synic->auto_eoi_bitmap, 256);
|
||||
|
||||
if (!!auto_eoi_old == !!auto_eoi_new)
|
||||
return;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
|
||||
|
||||
if (auto_eoi_new)
|
||||
hv->synic_auto_eoi_used++;
|
||||
else
|
||||
hv->synic_auto_eoi_used--;
|
||||
|
||||
__kvm_request_apicv_update(vcpu->kvm,
|
||||
!hv->synic_auto_eoi_used,
|
||||
APICV_INHIBIT_REASON_HYPERV);
|
||||
|
||||
mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
|
||||
}
|
||||
|
||||
static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
|
||||
|
@ -933,12 +957,6 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
|
|||
|
||||
synic = to_hv_synic(vcpu);
|
||||
|
||||
/*
|
||||
* Hyper-V SynIC auto EOI SINT's are
|
||||
* not compatible with APICV, so request
|
||||
* to deactivate APICV permanently.
|
||||
*/
|
||||
kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV);
|
||||
synic->active = true;
|
||||
synic->dont_zero_synic_pages = dont_zero_synic_pages;
|
||||
synic->control = HV_SYNIC_CONTROL_ENABLE;
|
||||
|
@ -2476,6 +2494,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
|||
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
|
||||
if (!cpu_smt_possible())
|
||||
ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
|
||||
|
||||
ent->eax |= HV_DEPRECATING_AEOI_RECOMMENDED;
|
||||
/*
|
||||
* Default number of spinlock retry attempts, matches
|
||||
* HyperV 2016.
|
||||
|
|
|
@ -220,7 +220,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
|
|||
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
|
||||
struct hrtimer *timer;
|
||||
|
||||
if (!kvm_vcpu_is_bsp(vcpu) || !pit)
|
||||
/* Somewhat arbitrarily make vcpu0 the owner of the PIT. */
|
||||
if (vcpu->vcpu_id || !pit)
|
||||
return;
|
||||
|
||||
timer = &pit->pit_state.timer;
|
||||
|
|
|
@ -35,11 +35,7 @@ struct kvm_vcpu;
|
|||
#define IOAPIC_INIT 0x5
|
||||
#define IOAPIC_EXTINT 0x7
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#define RTC_GSI 8
|
||||
#else
|
||||
#define RTC_GSI -1U
|
||||
#endif
|
||||
|
||||
struct dest_map {
|
||||
/* vcpu bitmap where IRQ has been sent */
|
||||
|
|
|
@ -192,6 +192,9 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
|
|||
if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
|
||||
return;
|
||||
|
||||
WARN_ONCE(!irqchip_in_kernel(kvm),
|
||||
"Dirty APIC map without an in-kernel local APIC");
|
||||
|
||||
mutex_lock(&kvm->arch.apic_map_lock);
|
||||
/*
|
||||
* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
|
||||
|
@ -2265,9 +2268,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
|||
u64 old_value = vcpu->arch.apic_base;
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
if (!apic)
|
||||
value |= MSR_IA32_APICBASE_BSP;
|
||||
|
||||
vcpu->arch.apic_base = value;
|
||||
|
||||
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
|
||||
|
@ -2323,6 +2323,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
int i;
|
||||
|
||||
if (!init_event) {
|
||||
vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
|
||||
MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_reset_bsp(vcpu))
|
||||
vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
|
||||
}
|
||||
|
||||
if (!apic)
|
||||
return;
|
||||
|
||||
|
@ -2330,8 +2337,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
|
||||
if (!init_event) {
|
||||
kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
|
||||
MSR_IA32_APICBASE_ENABLE);
|
||||
apic->base_address = APIC_DEFAULT_PHYS_BASE;
|
||||
|
||||
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
|
||||
}
|
||||
kvm_apic_set_version(apic->vcpu);
|
||||
|
@ -2364,9 +2371,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
apic->highest_isr_cache = -1;
|
||||
update_divide_count(apic);
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
if (kvm_vcpu_is_bsp(vcpu))
|
||||
kvm_lapic_set_base(vcpu,
|
||||
vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
|
||||
|
||||
vcpu->arch.pv_eoi.msr_val = 0;
|
||||
apic_update_ppr(apic);
|
||||
if (vcpu->arch.apicv_active) {
|
||||
|
@ -2476,11 +2481,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
|
|||
lapic_timer_advance_dynamic = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* APIC is created enabled. This will prevent kvm_lapic_set_base from
|
||||
* thinking that APIC state has changed.
|
||||
*/
|
||||
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
|
||||
static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
|
||||
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
|
||||
|
||||
|
|
|
@ -240,4 +240,29 @@ static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
|
|||
return smp_load_acquire(&kvm->arch.memslots_have_rmaps);
|
||||
}
|
||||
|
||||
static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
|
||||
{
|
||||
/* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
|
||||
return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
|
||||
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
__kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, unsigned long npages,
|
||||
int level)
|
||||
{
|
||||
return gfn_to_index(slot->base_gfn + npages - 1,
|
||||
slot->base_gfn, level) + 1;
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, int level)
|
||||
{
|
||||
return __kvm_mmu_slot_lpages(slot, slot->npages, level);
|
||||
}
|
||||
|
||||
static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
|
||||
{
|
||||
atomic64_add(count, &kvm->stat.pages[level - 1]);
|
||||
}
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -147,7 +147,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
|||
return;
|
||||
}
|
||||
|
||||
rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
|
||||
rmap_head = gfn_to_rmap(gfn, rev_sp->role.level, slot);
|
||||
if (!rmap_head->val) {
|
||||
if (!__ratelimit(&ratelimit_state))
|
||||
return;
|
||||
|
@ -200,7 +200,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
|
||||
slots = kvm_memslots_for_spte_role(kvm, sp->role);
|
||||
slot = __gfn_to_memslot(slots, sp->gfn);
|
||||
rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot);
|
||||
rmap_head = gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot);
|
||||
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep) {
|
||||
if (is_writable_pte(*sptep))
|
||||
|
|
|
@ -31,13 +31,16 @@ extern bool dbg;
|
|||
#define IS_VALID_PAE_ROOT(x) (!!(x))
|
||||
|
||||
struct kvm_mmu_page {
|
||||
/*
|
||||
* Note, "link" through "spt" fit in a single 64 byte cache line on
|
||||
* 64-bit kernels, keep it that way unless there's a reason not to.
|
||||
*/
|
||||
struct list_head link;
|
||||
struct hlist_node hash_link;
|
||||
struct list_head lpage_disallowed_link;
|
||||
|
||||
bool tdp_mmu_page;
|
||||
bool unsync;
|
||||
u8 mmu_valid_gen;
|
||||
bool mmio_cached;
|
||||
bool lpage_disallowed; /* Can't be replaced by an equiv large page */
|
||||
|
||||
/*
|
||||
|
@ -59,6 +62,7 @@ struct kvm_mmu_page {
|
|||
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
|
||||
DECLARE_BITMAP(unsync_child_bitmap, 512);
|
||||
|
||||
struct list_head lpage_disallowed_link;
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Used out of the mmu-lock to avoid reading spte values while an
|
||||
|
@ -71,8 +75,6 @@ struct kvm_mmu_page {
|
|||
atomic_t write_flooding_count;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
bool tdp_mmu_page;
|
||||
|
||||
/* Used for freeing the page asynchronously if it is a TDP MMU page. */
|
||||
struct rcu_head rcu_head;
|
||||
#endif
|
||||
|
@ -124,13 +126,14 @@ static inline bool is_nx_huge_page_enabled(void)
|
|||
|
||||
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync);
|
||||
|
||||
void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, u64 gfn,
|
||||
int min_level);
|
||||
void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
|
||||
u64 start_gfn, u64 pages);
|
||||
unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
|
||||
|
||||
/*
|
||||
* Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
|
||||
|
@ -140,6 +143,9 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
|
|||
* RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
|
||||
* RET_PF_FIXED: The faulting entry has been fixed.
|
||||
* RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
|
||||
*
|
||||
* Any names added to this enum should be exported to userspace for use in
|
||||
* tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
|
||||
*/
|
||||
enum {
|
||||
RET_PF_RETRY = 0,
|
||||
|
|
|
@ -54,6 +54,12 @@
|
|||
{ PFERR_RSVD_MASK, "RSVD" }, \
|
||||
{ PFERR_FETCH_MASK, "F" }
|
||||
|
||||
TRACE_DEFINE_ENUM(RET_PF_RETRY);
|
||||
TRACE_DEFINE_ENUM(RET_PF_EMULATE);
|
||||
TRACE_DEFINE_ENUM(RET_PF_INVALID);
|
||||
TRACE_DEFINE_ENUM(RET_PF_FIXED);
|
||||
TRACE_DEFINE_ENUM(RET_PF_SPURIOUS);
|
||||
|
||||
/*
|
||||
* A pagetable walk has started
|
||||
*/
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include <asm/kvm_page_track.h>
|
||||
|
||||
#include "mmu.h"
|
||||
#include "mmu_internal.h"
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
|
||||
|
|
|
@ -881,9 +881,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
|
|||
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
|
||||
write_fault, &map_writable))
|
||||
return RET_PF_RETRY;
|
||||
if (kvm_faultin_pfn(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
|
||||
write_fault, &map_writable, &r))
|
||||
return r;
|
||||
|
||||
if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
|
||||
return r;
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include <asm/cmpxchg.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
static bool __read_mostly tdp_mmu_enabled = false;
|
||||
static bool __read_mostly tdp_mmu_enabled = true;
|
||||
module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
|
||||
|
||||
/* Initializes the TDP MMU for the VM, if enabled. */
|
||||
|
@ -255,26 +255,17 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
|
|||
*
|
||||
* @kvm: kvm instance
|
||||
* @sp: the new page
|
||||
* @shared: This operation may not be running under the exclusive use of
|
||||
* the MMU lock and the operation must synchronize with other
|
||||
* threads that might be adding or removing pages.
|
||||
* @account_nx: This page replaces a NX large page and should be marked for
|
||||
* eventual reclaim.
|
||||
*/
|
||||
static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
bool shared, bool account_nx)
|
||||
bool account_nx)
|
||||
{
|
||||
if (shared)
|
||||
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
else
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
|
||||
if (account_nx)
|
||||
account_huge_nx_page(kvm, sp);
|
||||
|
||||
if (shared)
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -445,13 +436,6 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
|||
|
||||
trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
|
||||
|
||||
if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
|
||||
if (is_large_pte(old_spte))
|
||||
atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
|
||||
else
|
||||
atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
|
||||
}
|
||||
|
||||
/*
|
||||
* The only times a SPTE should be changed from a non-present to
|
||||
* non-present state is when an MMIO entry is installed/modified/
|
||||
|
@ -477,6 +461,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
|||
return;
|
||||
}
|
||||
|
||||
if (is_leaf != was_leaf)
|
||||
kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1);
|
||||
|
||||
if (was_leaf && is_dirty_spte(old_spte) &&
|
||||
(!is_present || !is_dirty_spte(new_spte) || pfn_changed))
|
||||
|
@ -526,6 +512,10 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
|
|||
if (is_removed_spte(iter->old_spte))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
|
||||
* does not hold the mmu_lock.
|
||||
*/
|
||||
if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
|
||||
new_spte) != iter->old_spte)
|
||||
return false;
|
||||
|
@ -537,15 +527,40 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
|
||||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
/*
|
||||
* tdp_mmu_map_set_spte_atomic - Set a leaf TDP MMU SPTE atomically to resolve a
|
||||
* TDP page fault.
|
||||
*
|
||||
* @vcpu: The vcpu instance that took the TDP page fault.
|
||||
* @iter: a tdp_iter instance currently on the SPTE that should be set
|
||||
* @new_spte: The value the SPTE should be set to
|
||||
*
|
||||
* Returns: true if the SPTE was set, false if it was not. If false is returned,
|
||||
* this function will have no side-effects.
|
||||
*/
|
||||
static inline bool tdp_mmu_map_set_spte_atomic(struct kvm_vcpu *vcpu,
|
||||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte))
|
||||
return false;
|
||||
|
||||
handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
|
||||
iter->old_spte, new_spte, iter->level);
|
||||
/*
|
||||
* Use kvm_vcpu_gfn_to_memslot() instead of going through
|
||||
* handle_changed_spte_dirty_log() to leverage vcpu->last_used_slot.
|
||||
*/
|
||||
if (is_writable_pte(new_spte)) {
|
||||
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, iter->gfn);
|
||||
|
||||
if (slot && kvm_slot_dirty_track_enabled(slot)) {
|
||||
/* Enforced by kvm_mmu_hugepage_adjust. */
|
||||
WARN_ON_ONCE(iter->level > PG_LEVEL_4K);
|
||||
mark_page_dirty_in_slot(kvm, slot, iter->gfn);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -558,7 +573,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
|||
* immediately installing a present entry in its place
|
||||
* before the TLBs are flushed.
|
||||
*/
|
||||
if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
|
||||
if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, REMOVED_SPTE))
|
||||
return false;
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
|
||||
|
@ -789,21 +804,15 @@ retry:
|
|||
* non-root pages mapping GFNs strictly within that range. Returns true if
|
||||
* SPTEs have been cleared and a TLB flush is needed before releasing the
|
||||
* MMU lock.
|
||||
*
|
||||
* If shared is true, this thread holds the MMU lock in read mode and must
|
||||
* account for the possibility that other threads are modifying the paging
|
||||
* structures concurrently. If shared is false, this thread should hold the
|
||||
* MMU in write mode.
|
||||
*/
|
||||
bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
|
||||
gfn_t end, bool can_yield, bool flush,
|
||||
bool shared)
|
||||
gfn_t end, bool can_yield, bool flush)
|
||||
{
|
||||
struct kvm_mmu_page *root;
|
||||
|
||||
for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, shared)
|
||||
for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, false)
|
||||
flush = zap_gfn_range(kvm, root, start, end, can_yield, flush,
|
||||
shared);
|
||||
false);
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
@ -814,8 +823,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
|
||||
flush, false);
|
||||
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, flush);
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
@ -940,7 +948,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
|
|||
|
||||
if (new_spte == iter->old_spte)
|
||||
ret = RET_PF_SPURIOUS;
|
||||
else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
|
||||
else if (!tdp_mmu_map_set_spte_atomic(vcpu, iter, new_spte))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
/*
|
||||
|
@ -1044,9 +1052,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
|||
new_spte = make_nonleaf_spte(child_pt,
|
||||
!shadow_accessed_mask);
|
||||
|
||||
if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter,
|
||||
new_spte)) {
|
||||
tdp_mmu_link_page(vcpu->kvm, sp, true,
|
||||
if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) {
|
||||
tdp_mmu_link_page(vcpu->kvm, sp,
|
||||
huge_page_disallowed &&
|
||||
req_level >= iter.level);
|
||||
|
||||
|
@ -1255,8 +1262,8 @@ retry:
|
|||
* only affect leaf SPTEs down to min_level.
|
||||
* Returns true if an SPTE has been changed and the TLBs need to be flushed.
|
||||
*/
|
||||
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
int min_level)
|
||||
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, int min_level)
|
||||
{
|
||||
struct kvm_mmu_page *root;
|
||||
bool spte_set = false;
|
||||
|
@ -1326,7 +1333,8 @@ retry:
|
|||
* each SPTE. Returns true if an SPTE has been changed and the TLBs need to
|
||||
* be flushed.
|
||||
*/
|
||||
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvm_mmu_page *root;
|
||||
bool spte_set = false;
|
||||
|
@ -1529,6 +1537,8 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
|
|||
/*
|
||||
* Return the level of the lowest level SPTE added to sptes.
|
||||
* That SPTE may be non-present.
|
||||
*
|
||||
* Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
|
||||
*/
|
||||
int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
|
||||
int *root_level)
|
||||
|
@ -1540,14 +1550,47 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
|
|||
|
||||
*root_level = vcpu->arch.mmu->shadow_root_level;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
|
||||
leaf = iter.level;
|
||||
sptes[leaf] = iter.old_spte;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return leaf;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the last level spte pointer of the shadow page walk for the given
|
||||
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no
|
||||
* walk could be performed, returns NULL and *spte does not contain valid data.
|
||||
*
|
||||
* Contract:
|
||||
* - Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
|
||||
* - The returned sptep must not be used after kvm_tdp_mmu_walk_lockless_end.
|
||||
*
|
||||
* WARNING: This function is only intended to be called during fast_page_fault.
|
||||
*/
|
||||
u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
|
||||
u64 *spte)
|
||||
{
|
||||
struct tdp_iter iter;
|
||||
struct kvm_mmu *mmu = vcpu->arch.mmu;
|
||||
gfn_t gfn = addr >> PAGE_SHIFT;
|
||||
tdp_ptep_t sptep = NULL;
|
||||
|
||||
tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
|
||||
*spte = iter.old_spte;
|
||||
sptep = iter.sptep;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the rcu_dereference to get the raw spte pointer value since
|
||||
* we are passing it up to fast_page_fault, which is shared with the
|
||||
* legacy MMU and thus does not retain the TDP MMU-specific __rcu
|
||||
* annotation.
|
||||
*
|
||||
* This is safe since fast_page_fault obeys the contracts of this
|
||||
* function as well as all TDP MMU contracts around modifying SPTEs
|
||||
* outside of mmu_lock.
|
||||
*/
|
||||
return rcu_dereference(sptep);
|
||||
}
|
||||
|
|
|
@ -20,14 +20,11 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
|||
bool shared);
|
||||
|
||||
bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
|
||||
gfn_t end, bool can_yield, bool flush,
|
||||
bool shared);
|
||||
gfn_t end, bool can_yield, bool flush);
|
||||
static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id,
|
||||
gfn_t start, gfn_t end, bool flush,
|
||||
bool shared)
|
||||
gfn_t start, gfn_t end, bool flush)
|
||||
{
|
||||
return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush,
|
||||
shared);
|
||||
return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush);
|
||||
}
|
||||
static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
|
@ -44,7 +41,7 @@ static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
*/
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
return __kvm_tdp_mmu_zap_gfn_range(kvm, kvm_mmu_page_as_id(sp),
|
||||
sp->gfn, end, false, false, false);
|
||||
sp->gfn, end, false, false);
|
||||
}
|
||||
|
||||
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
|
||||
|
@ -61,10 +58,10 @@ bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
|
|||
bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||
bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||
|
||||
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
int min_level);
|
||||
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, int min_level);
|
||||
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot);
|
||||
const struct kvm_memory_slot *slot);
|
||||
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, unsigned long mask,
|
||||
|
@ -77,8 +74,20 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
|
|||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int min_level);
|
||||
|
||||
static inline void kvm_tdp_mmu_walk_lockless_begin(void)
|
||||
{
|
||||
rcu_read_lock();
|
||||
}
|
||||
|
||||
static inline void kvm_tdp_mmu_walk_lockless_end(void)
|
||||
{
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
|
||||
int *root_level);
|
||||
u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
|
||||
u64 *spte);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
|
||||
|
|
|
@ -137,18 +137,20 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
|||
pmc->perf_event = event;
|
||||
pmc_to_pmu(pmc)->event_count++;
|
||||
clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
|
||||
pmc->is_paused = false;
|
||||
}
|
||||
|
||||
static void pmc_pause_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 counter = pmc->counter;
|
||||
|
||||
if (!pmc->perf_event)
|
||||
if (!pmc->perf_event || pmc->is_paused)
|
||||
return;
|
||||
|
||||
/* update counter, reset event value to avoid redundant accumulation */
|
||||
counter += perf_event_pause(pmc->perf_event, true);
|
||||
pmc->counter = counter & pmc_bitmask(pmc);
|
||||
pmc->is_paused = true;
|
||||
}
|
||||
|
||||
static bool pmc_resume_counter(struct kvm_pmc *pmc)
|
||||
|
@ -163,6 +165,7 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
|
|||
|
||||
/* reuse perf_event to serve as pmc_reprogram_counter() does*/
|
||||
perf_event_enable(pmc->perf_event);
|
||||
pmc->is_paused = false;
|
||||
|
||||
clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
|
||||
return true;
|
||||
|
|
|
@ -55,7 +55,7 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
|
|||
u64 counter, enabled, running;
|
||||
|
||||
counter = pmc->counter;
|
||||
if (pmc->perf_event)
|
||||
if (pmc->perf_event && !pmc->is_paused)
|
||||
counter += perf_event_read_value(pmc->perf_event,
|
||||
&enabled, &running);
|
||||
/* FIXME: Scaling needed? */
|
||||
|
|
|
@ -197,6 +197,8 @@ void avic_init_vmcb(struct vcpu_svm *svm)
|
|||
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
|
||||
vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
|
||||
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
|
||||
vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
|
||||
|
||||
if (kvm_apicv_activated(svm->vcpu.kvm))
|
||||
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
|
||||
else
|
||||
|
@ -225,31 +227,26 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
|
|||
* field of the VMCB. Therefore, we set up the
|
||||
* APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
|
||||
*/
|
||||
static int avic_update_access_page(struct kvm *kvm, bool activate)
|
||||
static int avic_alloc_access_page(struct kvm *kvm)
|
||||
{
|
||||
void __user *ret;
|
||||
int r = 0;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
/*
|
||||
* During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
|
||||
* APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
|
||||
* memory region. So, we need to ensure that kvm->mm == current->mm.
|
||||
*/
|
||||
if ((kvm->arch.apic_access_memslot_enabled == activate) ||
|
||||
(kvm->mm != current->mm))
|
||||
|
||||
if (kvm->arch.apic_access_memslot_enabled)
|
||||
goto out;
|
||||
|
||||
ret = __x86_set_memory_region(kvm,
|
||||
APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
|
||||
APIC_DEFAULT_PHYS_BASE,
|
||||
activate ? PAGE_SIZE : 0);
|
||||
PAGE_SIZE);
|
||||
if (IS_ERR(ret)) {
|
||||
r = PTR_ERR(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvm->arch.apic_access_memslot_enabled = activate;
|
||||
kvm->arch.apic_access_memslot_enabled = true;
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
|
@ -270,7 +267,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
|||
if (kvm_apicv_activated(vcpu->kvm)) {
|
||||
int ret;
|
||||
|
||||
ret = avic_update_access_page(vcpu->kvm, true);
|
||||
ret = avic_alloc_access_page(vcpu->kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -587,17 +584,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu)
|
|||
avic_handle_ldr_update(vcpu);
|
||||
}
|
||||
|
||||
void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
|
||||
{
|
||||
if (!enable_apicv || !lapic_in_kernel(vcpu))
|
||||
return;
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_request_apicv_update(vcpu->kvm, activate,
|
||||
APICV_INHIBIT_REASON_IRQWIN);
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
}
|
||||
|
||||
void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return;
|
||||
|
@ -667,6 +653,11 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
vmcb_mark_dirty(vmcb, VMCB_AVIC);
|
||||
|
||||
if (activated)
|
||||
avic_vcpu_load(vcpu, vcpu->cpu);
|
||||
else
|
||||
avic_vcpu_put(vcpu);
|
||||
|
||||
svm_set_pi_irte_mode(vcpu, activated);
|
||||
}
|
||||
|
||||
|
@ -918,10 +909,6 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
|
|||
return supported & BIT(bit);
|
||||
}
|
||||
|
||||
void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
|
||||
{
|
||||
avic_update_access_page(kvm, activate);
|
||||
}
|
||||
|
||||
static inline int
|
||||
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
||||
|
@ -960,9 +947,6 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
int h_physical_id = kvm_cpu_get_apicid(cpu);
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Since the host physical APIC id is 8 bits,
|
||||
* we can support host APIC ID upto 255.
|
||||
|
@ -990,9 +974,6 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
u64 entry;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
return;
|
||||
|
||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
||||
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
|
||||
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
|
||||
|
@ -1009,6 +990,10 @@ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
|
|||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->avic_is_running = is_run;
|
||||
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
return;
|
||||
|
||||
if (is_run)
|
||||
avic_vcpu_load(vcpu, vcpu->cpu);
|
||||
else
|
||||
|
|
|
@ -666,11 +666,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
|
|||
goto out;
|
||||
}
|
||||
|
||||
|
||||
/* Clear internal status */
|
||||
kvm_clear_exception_queue(vcpu);
|
||||
kvm_clear_interrupt_queue(vcpu);
|
||||
|
||||
/*
|
||||
* Since vmcb01 is not in use, we can use it to store some of the L1
|
||||
* state.
|
||||
|
|
|
@ -28,8 +28,6 @@
|
|||
#include "cpuid.h"
|
||||
#include "trace.h"
|
||||
|
||||
#define __ex(x) __kvm_handle_fault_on_reboot(x)
|
||||
|
||||
#ifndef CONFIG_KVM_AMD_SEV
|
||||
/*
|
||||
* When this config is not defined, SEV feature is not supported and APIs in
|
||||
|
@ -584,6 +582,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
|
|||
save->xcr0 = svm->vcpu.arch.xcr0;
|
||||
save->pkru = svm->vcpu.arch.pkru;
|
||||
save->xss = svm->vcpu.arch.ia32_xss;
|
||||
save->dr6 = svm->vcpu.arch.dr6;
|
||||
|
||||
/*
|
||||
* SEV-ES will use a VMSA that is pointed to by the VMCB, not
|
||||
|
|
|
@ -46,8 +46,6 @@
|
|||
#include "kvm_onhyperv.h"
|
||||
#include "svm_onhyperv.h"
|
||||
|
||||
#define __ex(x) __kvm_handle_fault_on_reboot(x)
|
||||
|
||||
MODULE_AUTHOR("Qumranet");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
|
@ -261,7 +259,7 @@ u32 svm_msrpm_offset(u32 msr)
|
|||
static int get_max_npt_level(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return PT64_ROOT_4LEVEL;
|
||||
return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
|
||||
#else
|
||||
return PT32E_ROOT_LEVEL;
|
||||
#endif
|
||||
|
@ -462,11 +460,6 @@ static int has_svm(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (pgtable_l5_enabled()) {
|
||||
pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -1015,7 +1008,9 @@ static __init int svm_hardware_setup(void)
|
|||
if (!boot_cpu_has(X86_FEATURE_NPT))
|
||||
npt_enabled = false;
|
||||
|
||||
kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
|
||||
/* Force VM NPT level equal to the host's max NPT level */
|
||||
kvm_configure_mmu(npt_enabled, get_max_npt_level(),
|
||||
get_max_npt_level(), PG_LEVEL_1G);
|
||||
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
|
||||
|
||||
/* Note, SEV setup consumes npt_enabled. */
|
||||
|
@ -1161,8 +1156,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
|||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
struct vmcb_save_area *save = &svm->vmcb->save;
|
||||
|
||||
vcpu->arch.hflags = 0;
|
||||
|
||||
svm_set_intercept(svm, INTERCEPT_CR0_READ);
|
||||
svm_set_intercept(svm, INTERCEPT_CR3_READ);
|
||||
svm_set_intercept(svm, INTERCEPT_CR4_READ);
|
||||
|
@ -1241,29 +1234,14 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
|||
SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
|
||||
save->cs.limit = 0xffff;
|
||||
|
||||
save->gdtr.base = 0;
|
||||
save->gdtr.limit = 0xffff;
|
||||
save->idtr.base = 0;
|
||||
save->idtr.limit = 0xffff;
|
||||
|
||||
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
|
||||
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
|
||||
|
||||
svm_set_cr4(vcpu, 0);
|
||||
svm_set_efer(vcpu, 0);
|
||||
save->dr6 = 0xffff0ff0;
|
||||
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
|
||||
save->rip = 0x0000fff0;
|
||||
vcpu->arch.regs[VCPU_REGS_RIP] = save->rip;
|
||||
|
||||
/*
|
||||
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
|
||||
* It also updates the guest-visible cr0 value.
|
||||
*/
|
||||
svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
save->cr4 = X86_CR4_PAE;
|
||||
/* rdx = ?? */
|
||||
|
||||
if (npt_enabled) {
|
||||
/* Setup VMCB for Nested Paging */
|
||||
control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
|
||||
|
@ -1273,14 +1251,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
|||
svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
|
||||
save->g_pat = vcpu->arch.pat;
|
||||
save->cr3 = 0;
|
||||
save->cr4 = 0;
|
||||
}
|
||||
svm->current_vmcb->asid_generation = 0;
|
||||
svm->asid = 0;
|
||||
|
||||
svm->nested.vmcb12_gpa = INVALID_GPA;
|
||||
svm->nested.last_vmcb12_gpa = INVALID_GPA;
|
||||
vcpu->arch.hflags = 0;
|
||||
|
||||
if (!kvm_pause_in_guest(vcpu->kvm)) {
|
||||
control->pause_filter_count = pause_filter_count;
|
||||
|
@ -1330,25 +1306,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
|||
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 dummy;
|
||||
u32 eax = 1;
|
||||
|
||||
svm->spec_ctrl = 0;
|
||||
svm->virt_spec_ctrl = 0;
|
||||
|
||||
if (!init_event) {
|
||||
vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
|
||||
MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_reset_bsp(vcpu))
|
||||
vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
|
||||
}
|
||||
init_vmcb(vcpu);
|
||||
|
||||
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
|
||||
kvm_rdx_write(vcpu, eax);
|
||||
|
||||
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
|
||||
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
|
||||
}
|
||||
|
||||
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
|
||||
|
@ -1513,12 +1475,15 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
sd->current_vmcb = svm->vmcb;
|
||||
indirect_branch_prediction_barrier();
|
||||
}
|
||||
avic_vcpu_load(vcpu, cpu);
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
avic_vcpu_load(vcpu, cpu);
|
||||
}
|
||||
|
||||
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
avic_vcpu_put(vcpu);
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
avic_vcpu_put(vcpu);
|
||||
|
||||
svm_prepare_host_switch(vcpu);
|
||||
|
||||
++vcpu->stat.host_state_reload;
|
||||
|
@ -1560,7 +1525,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
|
|||
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
KVM_BUG_ON(1, vcpu->kvm);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2078,11 +2043,15 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
|
|||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* VMCB is undefined after a SHUTDOWN intercept
|
||||
* so reinitialize it.
|
||||
* VMCB is undefined after a SHUTDOWN intercept. INIT the vCPU to put
|
||||
* the VMCB in a known good state. Unfortuately, KVM doesn't have
|
||||
* KVM_MP_STATE_SHUTDOWN and can't add it without potentially breaking
|
||||
* userspace. At a platform view, INIT is acceptable behavior as
|
||||
* there exist bare metal platforms that automatically INIT the CPU
|
||||
* in response to shutdown.
|
||||
*/
|
||||
clear_page(svm->vmcb);
|
||||
init_vmcb(vcpu);
|
||||
kvm_vcpu_reset(vcpu, true);
|
||||
|
||||
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
|
||||
return 0;
|
||||
|
@ -2993,10 +2962,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|||
svm->msr_decfg = data;
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_APICBASE:
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
avic_update_vapic_bar(to_svm(vcpu), data);
|
||||
fallthrough;
|
||||
default:
|
||||
return kvm_set_msr_common(vcpu, msr);
|
||||
}
|
||||
|
@ -3021,7 +2986,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
|
|||
* In this case AVIC was temporarily disabled for
|
||||
* requesting the IRQ window and we have to re-enable it.
|
||||
*/
|
||||
svm_toggle_avic_for_irq_window(vcpu, true);
|
||||
kvm_request_apicv_update(vcpu->kvm, true, APICV_INHIBIT_REASON_IRQWIN);
|
||||
|
||||
++vcpu->stat.irq_window_exits;
|
||||
return 1;
|
||||
|
@ -3269,12 +3234,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
|
|||
"excp_to:", save->last_excp_to);
|
||||
}
|
||||
|
||||
static bool svm_check_exit_valid(struct kvm_vcpu *vcpu, u64 exit_code)
|
||||
{
|
||||
return (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
|
||||
svm_exit_handlers[exit_code]);
|
||||
}
|
||||
|
||||
static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
|
||||
{
|
||||
if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
|
||||
svm_exit_handlers[exit_code])
|
||||
return 0;
|
||||
|
||||
vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
|
||||
dump_vmcb(vcpu);
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
|
@ -3282,14 +3249,13 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
|
|||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.data[0] = exit_code;
|
||||
vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
|
||||
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
|
||||
{
|
||||
if (svm_handle_invalid_exit(vcpu, exit_code))
|
||||
return 0;
|
||||
if (!svm_check_exit_valid(vcpu, exit_code))
|
||||
return svm_handle_invalid_exit(vcpu, exit_code);
|
||||
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
if (exit_code == SVM_EXIT_MSR)
|
||||
|
@ -3573,7 +3539,7 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
|
|||
* via AVIC. In such case, we need to temporarily disable AVIC,
|
||||
* and fallback to injecting IRQ via V_IRQ.
|
||||
*/
|
||||
svm_toggle_avic_for_irq_window(vcpu, false);
|
||||
kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_IRQWIN);
|
||||
svm_set_vintr(svm);
|
||||
}
|
||||
}
|
||||
|
@ -3808,6 +3774,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
|
||||
pre_svm_run(vcpu);
|
||||
|
||||
WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
|
||||
|
||||
sync_lapic_to_cr8(vcpu);
|
||||
|
||||
if (unlikely(svm->asid != svm->vmcb->control.asid)) {
|
||||
|
@ -4610,7 +4578,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
|||
.set_virtual_apic_mode = svm_set_virtual_apic_mode,
|
||||
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
|
||||
.check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons,
|
||||
.pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl,
|
||||
.load_eoi_exitmap = svm_load_eoi_exitmap,
|
||||
.hwapic_irr_update = svm_hwapic_irr_update,
|
||||
.hwapic_isr_update = svm_hwapic_isr_update,
|
||||
|
|
|
@ -503,12 +503,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
|
|||
|
||||
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
|
||||
|
||||
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
|
||||
{
|
||||
svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
|
||||
vmcb_mark_dirty(svm->vmcb, VMCB_AVIC);
|
||||
}
|
||||
|
||||
static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
@ -524,7 +518,6 @@ int avic_ga_log_notifier(u32 ga_tag);
|
|||
void avic_vm_destroy(struct kvm *kvm);
|
||||
int avic_vm_init(struct kvm *kvm);
|
||||
void avic_init_vmcb(struct vcpu_svm *svm);
|
||||
void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
|
||||
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
|
||||
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
|
||||
int avic_init_vcpu(struct vcpu_svm *svm);
|
||||
|
@ -534,7 +527,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu);
|
|||
void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
|
||||
void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
|
||||
bool svm_check_apicv_inhibit_reasons(ulong bit);
|
||||
void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate);
|
||||
void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
|
||||
void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
|
||||
void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
#include <linux/compiler_types.h>
|
||||
|
||||
#include <asm/kvm_host.h>
|
||||
#include "x86.h"
|
||||
|
||||
#define svm_asm(insn, clobber...) \
|
||||
do { \
|
||||
|
|
|
@ -14,7 +14,6 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
|
|||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
|
||||
#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
|
||||
{EVMCS1_OFFSET(name), clean_field}
|
||||
|
|
|
@ -73,8 +73,6 @@ struct evmcs_field {
|
|||
extern const struct evmcs_field vmcs_field_to_evmcs_1[];
|
||||
extern const unsigned int nr_evmcs_1_fields;
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
|
||||
static __always_inline int get_evmcs_offset(unsigned long field,
|
||||
u16 *clean_field)
|
||||
{
|
||||
|
@ -95,8 +93,6 @@ static __always_inline int get_evmcs_offset(unsigned long field,
|
|||
return evmcs_field->offset;
|
||||
}
|
||||
|
||||
#undef ROL16
|
||||
|
||||
static inline void evmcs_write64(unsigned long field, u64 value)
|
||||
{
|
||||
u16 clean_field;
|
||||
|
|
|
@ -2207,7 +2207,8 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
|
|||
}
|
||||
}
|
||||
|
||||
static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
||||
static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
|
||||
struct vmcs12 *vmcs12)
|
||||
{
|
||||
u32 exec_control;
|
||||
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
|
||||
|
@ -2218,23 +2219,22 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
|||
/*
|
||||
* PIN CONTROLS
|
||||
*/
|
||||
exec_control = vmx_pin_based_exec_ctrl(vmx);
|
||||
exec_control = __pin_controls_get(vmcs01);
|
||||
exec_control |= (vmcs12->pin_based_vm_exec_control &
|
||||
~PIN_BASED_VMX_PREEMPTION_TIMER);
|
||||
|
||||
/* Posted interrupts setting is only taken from vmcs12. */
|
||||
if (nested_cpu_has_posted_intr(vmcs12)) {
|
||||
vmx->nested.pi_pending = false;
|
||||
if (nested_cpu_has_posted_intr(vmcs12))
|
||||
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
|
||||
vmx->nested.pi_pending = false;
|
||||
} else {
|
||||
else
|
||||
exec_control &= ~PIN_BASED_POSTED_INTR;
|
||||
}
|
||||
pin_controls_set(vmx, exec_control);
|
||||
|
||||
/*
|
||||
* EXEC CONTROLS
|
||||
*/
|
||||
exec_control = vmx_exec_control(vmx); /* L0's desires */
|
||||
exec_control = __exec_controls_get(vmcs01); /* L0's desires */
|
||||
exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
|
||||
exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
|
||||
exec_control &= ~CPU_BASED_TPR_SHADOW;
|
||||
|
@ -2271,10 +2271,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
|||
* SECONDARY EXEC CONTROLS
|
||||
*/
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
exec_control = vmx->secondary_exec_control;
|
||||
exec_control = __secondary_exec_controls_get(vmcs01);
|
||||
|
||||
/* Take the following fields only from vmcs12 */
|
||||
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
||||
SECONDARY_EXEC_ENABLE_INVPCID |
|
||||
SECONDARY_EXEC_ENABLE_RDTSCP |
|
||||
SECONDARY_EXEC_XSAVES |
|
||||
|
@ -2282,7 +2283,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
|||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_ENABLE_VMFUNC |
|
||||
SECONDARY_EXEC_TSC_SCALING);
|
||||
SECONDARY_EXEC_TSC_SCALING |
|
||||
SECONDARY_EXEC_DESC);
|
||||
|
||||
if (nested_cpu_has(vmcs12,
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
|
||||
exec_control |= vmcs12->secondary_vm_exec_control;
|
||||
|
@ -2322,8 +2325,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
|||
* on the related bits (if supported by the CPU) in the hope that
|
||||
* we can avoid VMWrites during vmx_set_efer().
|
||||
*/
|
||||
exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
|
||||
~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
|
||||
exec_control = __vm_entry_controls_get(vmcs01);
|
||||
exec_control |= vmcs12->vm_entry_controls;
|
||||
exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
|
||||
if (cpu_has_load_ia32_efer()) {
|
||||
if (guest_efer & EFER_LMA)
|
||||
exec_control |= VM_ENTRY_IA32E_MODE;
|
||||
|
@ -2339,9 +2343,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
|||
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
|
||||
* bits may be modified by vmx_set_efer() in prepare_vmcs02().
|
||||
*/
|
||||
exec_control = vmx_vmexit_ctrl();
|
||||
exec_control = __vm_exit_controls_get(vmcs01);
|
||||
if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
|
||||
exec_control |= VM_EXIT_LOAD_IA32_EFER;
|
||||
else
|
||||
exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
|
||||
vm_exit_controls_set(vmx, exec_control);
|
||||
|
||||
/*
|
||||
|
@ -3384,7 +3390,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
|
|||
|
||||
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
|
||||
|
||||
prepare_vmcs02_early(vmx, vmcs12);
|
||||
prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
|
||||
|
||||
if (from_vmentry) {
|
||||
if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
|
||||
|
@ -4304,7 +4310,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|||
seg.l = 1;
|
||||
else
|
||||
seg.db = 1;
|
||||
vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
|
||||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
|
||||
seg = (struct kvm_segment) {
|
||||
.base = 0,
|
||||
.limit = 0xFFFFFFFF,
|
||||
|
@ -4315,17 +4321,17 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|||
.g = 1
|
||||
};
|
||||
seg.selector = vmcs12->host_ds_selector;
|
||||
vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
|
||||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
|
||||
seg.selector = vmcs12->host_es_selector;
|
||||
vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
|
||||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
|
||||
seg.selector = vmcs12->host_ss_selector;
|
||||
vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
|
||||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
|
||||
seg.selector = vmcs12->host_fs_selector;
|
||||
seg.base = vmcs12->host_fs_base;
|
||||
vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
|
||||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
|
||||
seg.selector = vmcs12->host_gs_selector;
|
||||
seg.base = vmcs12->host_gs_base;
|
||||
vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
|
||||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
|
||||
seg = (struct kvm_segment) {
|
||||
.base = vmcs12->host_tr_base,
|
||||
.limit = 0x67,
|
||||
|
@ -4333,14 +4339,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|||
.type = 11,
|
||||
.present = 1
|
||||
};
|
||||
vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
|
||||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
|
||||
|
||||
memset(&seg, 0, sizeof(seg));
|
||||
seg.unusable = 1;
|
||||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
|
||||
|
||||
kvm_set_dr(vcpu, 7, 0x400);
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
||||
|
||||
if (cpu_has_vmx_msr_bitmap())
|
||||
vmx_update_msr_bitmap(vcpu);
|
||||
|
||||
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
|
||||
vmcs12->vm_exit_msr_load_count))
|
||||
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
|
||||
|
@ -4419,9 +4426,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
|||
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
if (cpu_has_vmx_msr_bitmap())
|
||||
vmx_update_msr_bitmap(vcpu);
|
||||
|
||||
/*
|
||||
* This nasty bit of open coding is a compromise between blindly
|
||||
* loading L1's MSRs using the exit load lists (incorrect emulation
|
||||
|
|
|
@ -437,13 +437,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
!(msr & MSR_PMC_FULL_WIDTH_BIT))
|
||||
data = (s64)(s32)data;
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
if (pmc->perf_event)
|
||||
if (pmc->perf_event && !pmc->is_paused)
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, data));
|
||||
return 0;
|
||||
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
if (pmc->perf_event)
|
||||
if (pmc->perf_event && !pmc->is_paused)
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, data));
|
||||
return 0;
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
|
||||
#include "capabilities.h"
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
|
||||
struct vmcs_hdr {
|
||||
u32 revision_id:31;
|
||||
u32 shadow_vmcs:1;
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
#include "vmcs12.h"
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
|
||||
#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
|
||||
#define FIELD64(number, name) \
|
||||
|
|
|
@ -364,8 +364,6 @@ static inline void vmx_check_vmcs12_offsets(void)
|
|||
extern const unsigned short vmcs_field_to_offset_table[];
|
||||
extern const unsigned int nr_vmcs12_fields;
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
|
||||
static inline short vmcs_field_to_offset(unsigned long field)
|
||||
{
|
||||
unsigned short offset;
|
||||
|
@ -385,8 +383,6 @@ static inline short vmcs_field_to_offset(unsigned long field)
|
|||
return offset;
|
||||
}
|
||||
|
||||
#undef ROL16
|
||||
|
||||
static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field,
|
||||
u16 offset)
|
||||
{
|
||||
|
|
|
@ -136,8 +136,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
|
|||
#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
|
||||
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
|
||||
#define KVM_VM_CR0_ALWAYS_ON \
|
||||
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
|
||||
X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
|
||||
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
|
||||
|
||||
#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
|
||||
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
|
||||
|
@ -1648,11 +1647,12 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
|
|||
}
|
||||
|
||||
/*
|
||||
* Set up the vmcs to automatically save and restore system
|
||||
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
|
||||
* mode, as fiddling with msrs is very expensive.
|
||||
* Configuring user return MSRs to automatically save, load, and restore MSRs
|
||||
* that need to be shoved into hardware when running the guest. Note, omitting
|
||||
* an MSR here does _NOT_ mean it's not emulated, only that it will not be
|
||||
* loaded into hardware when running the guest.
|
||||
*/
|
||||
static void setup_msrs(struct vcpu_vmx *vmx)
|
||||
static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
bool load_syscall_msrs;
|
||||
|
@ -1682,9 +1682,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
|
|||
*/
|
||||
vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
|
||||
|
||||
if (cpu_has_vmx_msr_bitmap())
|
||||
vmx_update_msr_bitmap(&vmx->vcpu);
|
||||
|
||||
/*
|
||||
* The set of MSRs to load may have changed, reload MSRs before the
|
||||
* next VM-Enter.
|
||||
|
@ -2263,8 +2260,11 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
|
|||
vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits;
|
||||
break;
|
||||
case VCPU_EXREG_CR3:
|
||||
if (is_unrestricted_guest(vcpu) ||
|
||||
(enable_ept && is_paging(vcpu)))
|
||||
/*
|
||||
* When intercepting CR3 loads, e.g. for shadowing paging, KVM's
|
||||
* CR3 is loaded into hardware, not the guest's CR3.
|
||||
*/
|
||||
if (!(exec_controls_get(to_vmx(vcpu)) & CPU_BASED_CR3_LOAD_EXITING))
|
||||
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
|
||||
break;
|
||||
case VCPU_EXREG_CR4:
|
||||
|
@ -2274,7 +2274,7 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
|
|||
vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
KVM_BUG_ON(1, vcpu->kvm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2733,7 +2733,7 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
|
|||
save->dpl = save->selector & SEGMENT_RPL_MASK;
|
||||
save->s = 1;
|
||||
}
|
||||
vmx_set_segment(vcpu, save, seg);
|
||||
__vmx_set_segment(vcpu, save, seg);
|
||||
}
|
||||
|
||||
static void enter_pmode(struct kvm_vcpu *vcpu)
|
||||
|
@ -2754,7 +2754,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
|
|||
|
||||
vmx->rmode.vm86_active = 0;
|
||||
|
||||
vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
|
||||
__vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
|
||||
|
||||
flags = vmcs_readl(GUEST_RFLAGS);
|
||||
flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
|
||||
|
@ -2852,8 +2852,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
|
|||
fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
|
||||
fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
|
||||
fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
|
||||
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
}
|
||||
|
||||
int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
|
@ -2874,7 +2872,7 @@ int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
|||
|
||||
msr->data = efer & ~EFER_LME;
|
||||
}
|
||||
setup_msrs(vmx);
|
||||
vmx_setup_uret_msrs(vmx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2997,42 +2995,24 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
|
|||
kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
|
||||
}
|
||||
|
||||
static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
|
||||
unsigned long cr0,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
|
||||
vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
|
||||
if (!(cr0 & X86_CR0_PG)) {
|
||||
/* From paging/starting to nonpaging */
|
||||
exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING);
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
|
||||
} else if (!is_paging(vcpu)) {
|
||||
/* From nonpaging to paging */
|
||||
exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING);
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
|
||||
}
|
||||
|
||||
if (!(cr0 & X86_CR0_WP))
|
||||
*hw_cr0 &= ~X86_CR0_WP;
|
||||
}
|
||||
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
|
||||
CPU_BASED_CR3_STORE_EXITING)
|
||||
|
||||
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned long hw_cr0;
|
||||
unsigned long hw_cr0, old_cr0_pg;
|
||||
u32 tmp;
|
||||
|
||||
old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
|
||||
|
||||
hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
|
||||
if (is_unrestricted_guest(vcpu))
|
||||
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
|
||||
else {
|
||||
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
|
||||
if (!enable_ept)
|
||||
hw_cr0 |= X86_CR0_WP;
|
||||
|
||||
if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
|
||||
enter_pmode(vcpu);
|
||||
|
@ -3041,23 +3021,61 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|||
enter_rmode(vcpu);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (vcpu->arch.efer & EFER_LME) {
|
||||
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
|
||||
enter_lmode(vcpu);
|
||||
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
|
||||
exit_lmode(vcpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (enable_ept && !is_unrestricted_guest(vcpu))
|
||||
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
|
||||
|
||||
vmcs_writel(CR0_READ_SHADOW, cr0);
|
||||
vmcs_writel(GUEST_CR0, hw_cr0);
|
||||
vcpu->arch.cr0 = cr0;
|
||||
kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (vcpu->arch.efer & EFER_LME) {
|
||||
if (!old_cr0_pg && (cr0 & X86_CR0_PG))
|
||||
enter_lmode(vcpu);
|
||||
else if (old_cr0_pg && !(cr0 & X86_CR0_PG))
|
||||
exit_lmode(vcpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (enable_ept && !is_unrestricted_guest(vcpu)) {
|
||||
/*
|
||||
* Ensure KVM has an up-to-date snapshot of the guest's CR3. If
|
||||
* the below code _enables_ CR3 exiting, vmx_cache_reg() will
|
||||
* (correctly) stop reading vmcs.GUEST_CR3 because it thinks
|
||||
* KVM's CR3 is installed.
|
||||
*/
|
||||
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
|
||||
vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
|
||||
|
||||
/*
|
||||
* When running with EPT but not unrestricted guest, KVM must
|
||||
* intercept CR3 accesses when paging is _disabled_. This is
|
||||
* necessary because restricted guests can't actually run with
|
||||
* paging disabled, and so KVM stuffs its own CR3 in order to
|
||||
* run the guest when identity mapped page tables.
|
||||
*
|
||||
* Do _NOT_ check the old CR0.PG, e.g. to optimize away the
|
||||
* update, it may be stale with respect to CR3 interception,
|
||||
* e.g. after nested VM-Enter.
|
||||
*
|
||||
* Lastly, honor L1's desires, i.e. intercept CR3 loads and/or
|
||||
* stores to forward them to L1, even if KVM does not need to
|
||||
* intercept them to preserve its identity mapped page tables.
|
||||
*/
|
||||
if (!(cr0 & X86_CR0_PG)) {
|
||||
exec_controls_setbit(vmx, CR3_EXITING_BITS);
|
||||
} else if (!is_guest_mode(vcpu)) {
|
||||
exec_controls_clearbit(vmx, CR3_EXITING_BITS);
|
||||
} else {
|
||||
tmp = exec_controls_get(vmx);
|
||||
tmp &= ~CR3_EXITING_BITS;
|
||||
tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS;
|
||||
exec_controls_set(vmx, tmp);
|
||||
}
|
||||
|
||||
/* Note, vmx_set_cr4() consumes the new vcpu->arch.cr0. */
|
||||
if ((old_cr0_pg ^ cr0) & X86_CR0_PG)
|
||||
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
|
||||
}
|
||||
|
||||
/* depends on vcpu->arch.cr0 to be set to a new value */
|
||||
vmx->emulation_required = emulation_required(vcpu);
|
||||
}
|
||||
|
@ -3271,7 +3289,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
|
|||
return ar;
|
||||
}
|
||||
|
||||
void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
|
||||
void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
||||
|
@ -3284,7 +3302,7 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
|
|||
vmcs_write16(sf->selector, var->selector);
|
||||
else if (var->s)
|
||||
fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
|
||||
goto out;
|
||||
return;
|
||||
}
|
||||
|
||||
vmcs_writel(sf->base, var->base);
|
||||
|
@ -3306,9 +3324,13 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
|
|||
var->type |= 0x1; /* Accessed */
|
||||
|
||||
vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
|
||||
}
|
||||
|
||||
out:
|
||||
vmx->emulation_required = emulation_required(vcpu);
|
||||
static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
|
||||
{
|
||||
__vmx_set_segment(vcpu, var, seg);
|
||||
|
||||
to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
|
||||
|
@ -3790,21 +3812,6 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
|
|||
vmx_set_msr_bitmap_write(msr_bitmap, msr);
|
||||
}
|
||||
|
||||
static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u8 mode = 0;
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls() &&
|
||||
(secondary_exec_controls_get(to_vmx(vcpu)) &
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
|
||||
mode |= MSR_BITMAP_MODE_X2APIC;
|
||||
if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
|
||||
mode |= MSR_BITMAP_MODE_X2APIC_APICV;
|
||||
}
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
|
||||
{
|
||||
unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
|
||||
|
@ -3822,11 +3829,29 @@ static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
|
|||
}
|
||||
}
|
||||
|
||||
static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
|
||||
static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u8 mode;
|
||||
|
||||
if (!cpu_has_vmx_msr_bitmap())
|
||||
return;
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls() &&
|
||||
(secondary_exec_controls_get(vmx) &
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
|
||||
mode = MSR_BITMAP_MODE_X2APIC;
|
||||
if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
|
||||
mode |= MSR_BITMAP_MODE_X2APIC_APICV;
|
||||
} else {
|
||||
mode = 0;
|
||||
}
|
||||
|
||||
if (mode == vmx->x2apic_msr_bitmap_mode)
|
||||
return;
|
||||
|
||||
vmx->x2apic_msr_bitmap_mode = mode;
|
||||
|
||||
vmx_reset_x2apic_msrs(vcpu, mode);
|
||||
|
||||
/*
|
||||
|
@ -3843,21 +3868,6 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
|
|||
}
|
||||
}
|
||||
|
||||
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u8 mode = vmx_msr_bitmap_mode(vcpu);
|
||||
u8 changed = mode ^ vmx->msr_bitmap_mode;
|
||||
|
||||
if (!changed)
|
||||
return;
|
||||
|
||||
if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
|
||||
vmx_update_msr_bitmap_x2apic(vcpu, mode);
|
||||
|
||||
vmx->msr_bitmap_mode = mode;
|
||||
}
|
||||
|
||||
void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
@ -3914,7 +3924,6 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
pt_update_intercept_for_msr(vcpu);
|
||||
vmx_update_msr_bitmap_x2apic(vcpu, vmx_msr_bitmap_mode(vcpu));
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
|
||||
|
@ -4086,7 +4095,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
|
|||
vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits);
|
||||
}
|
||||
|
||||
u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
|
||||
static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
|
||||
|
||||
|
@ -4102,6 +4111,30 @@ u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
|
|||
return pin_based_exec_ctrl;
|
||||
}
|
||||
|
||||
static u32 vmx_vmentry_ctrl(void)
|
||||
{
|
||||
u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
|
||||
|
||||
if (vmx_pt_mode_is_system())
|
||||
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
|
||||
VM_ENTRY_LOAD_IA32_RTIT_CTL);
|
||||
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
|
||||
return vmentry_ctrl &
|
||||
~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
|
||||
}
|
||||
|
||||
static u32 vmx_vmexit_ctrl(void)
|
||||
{
|
||||
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
|
||||
|
||||
if (vmx_pt_mode_is_system())
|
||||
vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
|
||||
VM_EXIT_CLEAR_IA32_RTIT_CTL);
|
||||
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
|
||||
return vmexit_ctrl &
|
||||
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
|
||||
}
|
||||
|
||||
static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
@ -4118,11 +4151,10 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
|||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
}
|
||||
|
||||
if (cpu_has_vmx_msr_bitmap())
|
||||
vmx_update_msr_bitmap(vcpu);
|
||||
vmx_update_msr_bitmap_x2apic(vcpu);
|
||||
}
|
||||
|
||||
u32 vmx_exec_control(struct vcpu_vmx *vmx)
|
||||
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
|
||||
|
||||
|
@ -4204,7 +4236,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
|
|||
#define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \
|
||||
vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true)
|
||||
|
||||
static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||
static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = &vmx->vcpu;
|
||||
|
||||
|
@ -4290,7 +4322,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
|
|||
if (!vcpu->kvm->arch.bus_lock_detection_enabled)
|
||||
exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION;
|
||||
|
||||
vmx->secondary_exec_control = exec_control;
|
||||
return exec_control;
|
||||
}
|
||||
|
||||
#define VMX_XSS_EXIT_BITMAP 0
|
||||
|
@ -4314,10 +4346,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
|
|||
|
||||
exec_controls_set(vmx, vmx_exec_control(vmx));
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
vmx_compute_secondary_exec_control(vmx);
|
||||
secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
|
||||
}
|
||||
if (cpu_has_secondary_exec_ctrls())
|
||||
secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx));
|
||||
|
||||
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
|
||||
vmcs_write64(EOI_EXIT_BITMAP0, 0);
|
||||
|
@ -4388,32 +4418,35 @@ static void init_vmcs(struct vcpu_vmx *vmx)
|
|||
vmx->pt_desc.guest.output_mask = 0x7F;
|
||||
vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
|
||||
}
|
||||
|
||||
vmcs_write32(GUEST_SYSENTER_CS, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_EIP, 0);
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
||||
|
||||
if (cpu_has_vmx_tpr_shadow()) {
|
||||
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
|
||||
if (cpu_need_tpr_shadow(&vmx->vcpu))
|
||||
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
|
||||
__pa(vmx->vcpu.arch.apic->regs));
|
||||
vmcs_write32(TPR_THRESHOLD, 0);
|
||||
}
|
||||
|
||||
vmx_setup_uret_msrs(vmx);
|
||||
}
|
||||
|
||||
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct msr_data apic_base_msr;
|
||||
u64 cr0;
|
||||
|
||||
vmx->rmode.vm86_active = 0;
|
||||
vmx->spec_ctrl = 0;
|
||||
|
||||
vmx->msr_ia32_umwait_control = 0;
|
||||
|
||||
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
|
||||
vmx->hv_deadline_tsc = -1;
|
||||
kvm_set_cr8(vcpu, 0);
|
||||
|
||||
if (!init_event) {
|
||||
apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
|
||||
MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_reset_bsp(vcpu))
|
||||
apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
|
||||
apic_base_msr.host_initiated = true;
|
||||
kvm_set_apic_base(vcpu, &apic_base_msr);
|
||||
}
|
||||
|
||||
vmx_segment_cache_clear(vmx);
|
||||
|
||||
seg_setup(VCPU_SREG_CS);
|
||||
|
@ -4436,16 +4469,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
|
||||
vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
|
||||
|
||||
if (!init_event) {
|
||||
vmcs_write32(GUEST_SYSENTER_CS, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_EIP, 0);
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
||||
}
|
||||
|
||||
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
|
||||
kvm_rip_write(vcpu, 0xfff0);
|
||||
|
||||
vmcs_writel(GUEST_GDTR_BASE, 0);
|
||||
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
|
||||
|
||||
|
@ -4458,31 +4481,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
if (kvm_mpx_supported())
|
||||
vmcs_write64(GUEST_BNDCFGS, 0);
|
||||
|
||||
setup_msrs(vmx);
|
||||
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
|
||||
|
||||
if (cpu_has_vmx_tpr_shadow() && !init_event) {
|
||||
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
|
||||
if (cpu_need_tpr_shadow(vcpu))
|
||||
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
|
||||
__pa(vcpu->arch.apic->regs));
|
||||
vmcs_write32(TPR_THRESHOLD, 0);
|
||||
}
|
||||
|
||||
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
|
||||
|
||||
cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
|
||||
vmx->vcpu.arch.cr0 = cr0;
|
||||
vmx_set_cr0(vcpu, cr0); /* enter rmode */
|
||||
vmx_set_cr4(vcpu, 0);
|
||||
vmx_set_efer(vcpu, 0);
|
||||
|
||||
vmx_update_exception_bitmap(vcpu);
|
||||
|
||||
vpid_sync_context(vmx->vpid);
|
||||
if (init_event)
|
||||
vmx_clear_hlt(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
|
@ -4996,6 +4999,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
|
|||
return kvm_complete_insn_gp(vcpu, err);
|
||||
case 3:
|
||||
WARN_ON_ONCE(enable_unrestricted_guest);
|
||||
|
||||
err = kvm_set_cr3(vcpu, val);
|
||||
return kvm_complete_insn_gp(vcpu, err);
|
||||
case 4:
|
||||
|
@ -5021,14 +5025,13 @@ static int handle_cr(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
break;
|
||||
case 2: /* clts */
|
||||
WARN_ONCE(1, "Guest should always own CR0.TS");
|
||||
vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
|
||||
trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
|
||||
return -EIO;
|
||||
case 1: /*mov from cr*/
|
||||
switch (cr) {
|
||||
case 3:
|
||||
WARN_ON_ONCE(enable_unrestricted_guest);
|
||||
|
||||
val = kvm_read_cr3(vcpu);
|
||||
kvm_register_write(vcpu, reg, val);
|
||||
trace_kvm_cr_read(cr, val);
|
||||
|
@ -5129,6 +5132,12 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
|
|||
|
||||
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
|
||||
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
|
||||
|
||||
/*
|
||||
* exc_debug expects dr6 to be cleared after it runs, avoid that it sees
|
||||
* a stale dr6 from the guest.
|
||||
*/
|
||||
set_debugreg(DR6_RESERVED, 6);
|
||||
}
|
||||
|
||||
static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
|
@ -5338,7 +5347,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_nmi_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
WARN_ON_ONCE(!enable_vnmi);
|
||||
if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
|
||||
return -EIO;
|
||||
|
||||
exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
|
||||
++vcpu->stat.nmi_window_exits;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
@ -5896,7 +5907,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
|
|||
* below) should never happen as that means we incorrectly allowed a
|
||||
* nested VM-Enter with an invalid vmcs12.
|
||||
*/
|
||||
WARN_ON_ONCE(vmx->nested.nested_run_pending);
|
||||
if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
|
||||
return -EIO;
|
||||
|
||||
/* If guest state is invalid, start emulating */
|
||||
if (vmx->emulation_required)
|
||||
|
@ -6189,7 +6201,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
secondary_exec_controls_set(vmx, sec_exec_control);
|
||||
|
||||
vmx_update_msr_bitmap(vcpu);
|
||||
vmx_update_msr_bitmap_x2apic(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
|
||||
|
@ -6274,7 +6286,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
|
|||
int max_irr;
|
||||
bool max_irr_updated;
|
||||
|
||||
WARN_ON(!vcpu->arch.apicv_active);
|
||||
if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
|
||||
return -EIO;
|
||||
|
||||
if (pi_test_on(&vmx->pi_desc)) {
|
||||
pi_clear_on(&vmx->pi_desc);
|
||||
/*
|
||||
|
@ -6357,7 +6371,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
|
|||
unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
|
||||
gate_desc *desc = (gate_desc *)host_idt_base + vector;
|
||||
|
||||
if (WARN_ONCE(!is_external_intr(intr_info),
|
||||
if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
|
||||
"KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
|
||||
return;
|
||||
|
||||
|
@ -6368,6 +6382,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (vmx->emulation_required)
|
||||
return;
|
||||
|
||||
if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
|
||||
handle_external_interrupt_irqoff(vcpu);
|
||||
else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
|
||||
|
@ -6639,6 +6656,10 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
vmx->loaded_vmcs->host_state.cr4 = cr4;
|
||||
}
|
||||
|
||||
/* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
|
||||
if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
|
||||
set_debugreg(vcpu->arch.dr6, 6);
|
||||
|
||||
/* When single-stepping over STI and MOV SS, we must clear the
|
||||
* corresponding interruptibility bits in the guest state. Otherwise
|
||||
* vmentry fails as it then expects bit 14 (BS) in pending debug
|
||||
|
@ -6838,7 +6859,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
|
|||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
|
||||
}
|
||||
vmx->msr_bitmap_mode = 0;
|
||||
|
||||
vmx->loaded_vmcs = &vmx->vmcs01;
|
||||
cpu = get_cpu();
|
||||
|
@ -6997,7 +7017,7 @@ exit:
|
|||
return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
|
||||
}
|
||||
|
||||
static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||
static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
|
||||
{
|
||||
/*
|
||||
* These bits in the secondary execution controls field
|
||||
|
@ -7011,7 +7031,6 @@ static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
|
|||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_DESC;
|
||||
|
||||
u32 new_ctl = vmx->secondary_exec_control;
|
||||
u32 cur_ctl = secondary_exec_controls_get(vmx);
|
||||
|
||||
secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
|
||||
|
@ -7154,10 +7173,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
/* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
|
||||
vcpu->arch.xsaves_enabled = false;
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
vmx_compute_secondary_exec_control(vmx);
|
||||
vmcs_set_secondary_exec_control(vmx);
|
||||
}
|
||||
vmx_setup_uret_msrs(vmx);
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls())
|
||||
vmcs_set_secondary_exec_control(vmx,
|
||||
vmx_secondary_exec_control(vmx));
|
||||
|
||||
if (nested_vmx_allowed(vcpu))
|
||||
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
|
||||
|
@ -7803,7 +7823,8 @@ static __init int hardware_setup(void)
|
|||
ept_lpage_level = PG_LEVEL_2M;
|
||||
else
|
||||
ept_lpage_level = PG_LEVEL_4K;
|
||||
kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level);
|
||||
kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
|
||||
ept_lpage_level);
|
||||
|
||||
/*
|
||||
* Only enable PML when hardware supports PML feature, and both EPT
|
||||
|
|
|
@ -227,7 +227,7 @@ struct nested_vmx {
|
|||
struct vcpu_vmx {
|
||||
struct kvm_vcpu vcpu;
|
||||
u8 fail;
|
||||
u8 msr_bitmap_mode;
|
||||
u8 x2apic_msr_bitmap_mode;
|
||||
|
||||
/*
|
||||
* If true, host state has been stored in vmx->loaded_vmcs for
|
||||
|
@ -263,8 +263,6 @@ struct vcpu_vmx {
|
|||
u64 spec_ctrl;
|
||||
u32 msr_ia32_umwait_control;
|
||||
|
||||
u32 secondary_exec_control;
|
||||
|
||||
/*
|
||||
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
|
||||
* non-nested (L1) guest, it always points to vmcs01. For a nested
|
||||
|
@ -371,12 +369,11 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
|
|||
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
|
||||
void ept_save_pdptrs(struct kvm_vcpu *vcpu);
|
||||
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
|
||||
void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
|
||||
void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
|
||||
u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
|
||||
|
||||
bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
|
||||
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
|
||||
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
|
||||
bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
|
||||
bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
|
||||
bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
|
||||
|
@ -419,9 +416,13 @@ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \
|
|||
vmx->loaded_vmcs->controls_shadow.lname = val; \
|
||||
} \
|
||||
} \
|
||||
static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs) \
|
||||
{ \
|
||||
return vmcs->controls_shadow.lname; \
|
||||
} \
|
||||
static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \
|
||||
{ \
|
||||
return vmx->loaded_vmcs->controls_shadow.lname; \
|
||||
return __##lname##_controls_get(vmx->loaded_vmcs); \
|
||||
} \
|
||||
static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \
|
||||
{ \
|
||||
|
@ -451,31 +452,6 @@ static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.regs_dirty = 0;
|
||||
}
|
||||
|
||||
static inline u32 vmx_vmentry_ctrl(void)
|
||||
{
|
||||
u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
|
||||
if (vmx_pt_mode_is_system())
|
||||
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
|
||||
VM_ENTRY_LOAD_IA32_RTIT_CTL);
|
||||
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
|
||||
return vmentry_ctrl &
|
||||
~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
|
||||
}
|
||||
|
||||
static inline u32 vmx_vmexit_ctrl(void)
|
||||
{
|
||||
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
|
||||
if (vmx_pt_mode_is_system())
|
||||
vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
|
||||
VM_EXIT_CLEAR_IA32_RTIT_CTL);
|
||||
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
|
||||
return vmexit_ctrl &
|
||||
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
|
||||
}
|
||||
|
||||
u32 vmx_exec_control(struct vcpu_vmx *vmx);
|
||||
u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx);
|
||||
|
||||
static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
|
||||
{
|
||||
return container_of(kvm, struct kvm_vmx, kvm);
|
||||
|
|
|
@ -4,13 +4,11 @@
|
|||
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/vmx.h>
|
||||
|
||||
#include "evmcs.h"
|
||||
#include "vmcs.h"
|
||||
|
||||
#define __ex(x) __kvm_handle_fault_on_reboot(x)
|
||||
#include "x86.h"
|
||||
|
||||
asmlinkage void vmread_error(unsigned long field, bool fault);
|
||||
__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
|
||||
|
|
|
@ -233,12 +233,13 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VM, mmu_recycled),
|
||||
STATS_DESC_COUNTER(VM, mmu_cache_miss),
|
||||
STATS_DESC_ICOUNTER(VM, mmu_unsync),
|
||||
STATS_DESC_ICOUNTER(VM, lpages),
|
||||
STATS_DESC_ICOUNTER(VM, pages_4k),
|
||||
STATS_DESC_ICOUNTER(VM, pages_2m),
|
||||
STATS_DESC_ICOUNTER(VM, pages_1g),
|
||||
STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
|
||||
STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
|
||||
STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -278,8 +279,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
|||
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
|
||||
STATS_DESC_ICOUNTER(VCPU, guest_mode)
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
|
@ -485,7 +484,14 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
|
||||
|
||||
asmlinkage __visible noinstr void kvm_spurious_fault(void)
|
||||
/*
|
||||
* Handle a fault on a hardware virtualization (VMX or SVM) instruction.
|
||||
*
|
||||
* Hardware virtualization extension instructions may fault if a reboot turns
|
||||
* off virtualization while processes are running. Usually after catching the
|
||||
* fault we just panic; during reboot instead the instruction is ignored.
|
||||
*/
|
||||
noinstr void kvm_spurious_fault(void)
|
||||
{
|
||||
/* Fault while not rebooting. We want the trace. */
|
||||
BUG_ON(!kvm_rebooting);
|
||||
|
@ -1180,7 +1186,6 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
|
|||
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
|
||||
for (i = 0; i < KVM_NR_DB_REGS; i++)
|
||||
vcpu->arch.eff_db[i] = vcpu->arch.db[i];
|
||||
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3316,6 +3321,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
if (!msr_info->host_initiated) {
|
||||
s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
|
||||
adjust_tsc_offset_guest(vcpu, adj);
|
||||
/* Before back to guest, tsc_timestamp must be adjusted
|
||||
* as well, otherwise guest's percpu pvclock time could jump.
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
}
|
||||
vcpu->arch.ia32_tsc_adjust_msr = data;
|
||||
}
|
||||
|
@ -4310,12 +4319,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
|
||||
static_call(kvm_x86_vcpu_put)(vcpu);
|
||||
vcpu->arch.last_host_tsc = rdtsc();
|
||||
/*
|
||||
* If userspace has set any breakpoints or watchpoints, dr6 is restored
|
||||
* on every vmexit, but if not, we might have a stale dr6 from the
|
||||
* guest. do_debug expects dr6 to be cleared after it runs, do the same.
|
||||
*/
|
||||
set_debugreg(0, 6);
|
||||
}
|
||||
|
||||
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
|
||||
|
@ -6567,9 +6570,9 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
|
|||
* there is no pkey in EPT page table for L1 guest or EPT
|
||||
* shadow page table for L2 guest.
|
||||
*/
|
||||
if (vcpu_match_mmio_gva(vcpu, gva)
|
||||
&& !permission_fault(vcpu, vcpu->arch.walk_mmu,
|
||||
vcpu->arch.mmio_access, 0, access)) {
|
||||
if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) ||
|
||||
!permission_fault(vcpu, vcpu->arch.walk_mmu,
|
||||
vcpu->arch.mmio_access, 0, access))) {
|
||||
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
|
||||
(gva & (PAGE_SIZE - 1));
|
||||
trace_vcpu_match_mmio(gva, *gpa, write, false);
|
||||
|
@ -8578,6 +8581,8 @@ EXPORT_SYMBOL_GPL(kvm_apicv_activated);
|
|||
|
||||
static void kvm_apicv_init(struct kvm *kvm)
|
||||
{
|
||||
mutex_init(&kvm->arch.apicv_update_lock);
|
||||
|
||||
if (enable_apicv)
|
||||
clear_bit(APICV_INHIBIT_REASON_DISABLE,
|
||||
&kvm->arch.apicv_inhibit_reasons);
|
||||
|
@ -8891,6 +8896,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
|
|||
can_inject = false;
|
||||
}
|
||||
|
||||
/* Don't inject interrupts if the user asked to avoid doing so */
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Finally, inject interrupt events. If an event cannot be injected
|
||||
* due to architectural conditions (e.g. IF=0) a window-open exit
|
||||
|
@ -9236,10 +9245,18 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
|
|||
|
||||
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
bool activate;
|
||||
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
return;
|
||||
|
||||
vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
|
||||
mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
|
||||
|
||||
activate = kvm_apicv_activated(vcpu->kvm);
|
||||
if (vcpu->arch.apicv_active == activate)
|
||||
goto out;
|
||||
|
||||
vcpu->arch.apicv_active = activate;
|
||||
kvm_apic_update_apicv(vcpu);
|
||||
static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
|
||||
|
||||
|
@ -9251,54 +9268,45 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
if (!vcpu->arch.apicv_active)
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
out:
|
||||
mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
|
||||
|
||||
/*
|
||||
* NOTE: Do not hold any lock prior to calling this.
|
||||
*
|
||||
* In particular, kvm_request_apicv_update() expects kvm->srcu not to be
|
||||
* locked, because it calls __x86_set_memory_region() which does
|
||||
* synchronize_srcu(&kvm->srcu).
|
||||
*/
|
||||
void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
|
||||
void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
|
||||
{
|
||||
struct kvm_vcpu *except;
|
||||
unsigned long old, new, expected;
|
||||
unsigned long old, new;
|
||||
|
||||
if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
|
||||
!static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
|
||||
return;
|
||||
|
||||
old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
|
||||
do {
|
||||
expected = new = old;
|
||||
if (activate)
|
||||
__clear_bit(bit, &new);
|
||||
else
|
||||
__set_bit(bit, &new);
|
||||
if (new == old)
|
||||
break;
|
||||
old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
|
||||
} while (old != expected);
|
||||
old = new = kvm->arch.apicv_inhibit_reasons;
|
||||
|
||||
if (!!old == !!new)
|
||||
return;
|
||||
if (activate)
|
||||
__clear_bit(bit, &new);
|
||||
else
|
||||
__set_bit(bit, &new);
|
||||
|
||||
trace_kvm_apicv_update_request(activate, bit);
|
||||
if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
|
||||
static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
|
||||
if (!!old != !!new) {
|
||||
trace_kvm_apicv_update_request(activate, bit);
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
|
||||
kvm->arch.apicv_inhibit_reasons = new;
|
||||
if (new) {
|
||||
unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
|
||||
kvm_zap_gfn_range(kvm, gfn, gfn+1);
|
||||
}
|
||||
} else
|
||||
kvm->arch.apicv_inhibit_reasons = new;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__kvm_request_apicv_update);
|
||||
|
||||
/*
|
||||
* Sending request to update APICV for all other vcpus,
|
||||
* while update the calling vcpu immediately instead of
|
||||
* waiting for another #VMEXIT to handle the request.
|
||||
*/
|
||||
except = kvm_get_running_vcpu();
|
||||
kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
|
||||
except);
|
||||
if (except)
|
||||
kvm_vcpu_update_apicv(except);
|
||||
void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
|
||||
{
|
||||
mutex_lock(&kvm->arch.apicv_update_lock);
|
||||
__kvm_request_apicv_update(kvm, activate, bit);
|
||||
mutex_unlock(&kvm->arch.apicv_update_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
|
||||
|
||||
|
@ -9395,6 +9403,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
if (kvm_request_pending(vcpu)) {
|
||||
if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
|
||||
r = -EIO;
|
||||
goto out;
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
|
||||
if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
|
||||
r = 0;
|
||||
|
@ -9608,8 +9620,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
set_debugreg(vcpu->arch.eff_db[1], 1);
|
||||
set_debugreg(vcpu->arch.eff_db[2], 2);
|
||||
set_debugreg(vcpu->arch.eff_db[3], 3);
|
||||
set_debugreg(vcpu->arch.dr6, 6);
|
||||
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
|
||||
} else if (unlikely(hw_breakpoint_active())) {
|
||||
set_debugreg(0, 7);
|
||||
}
|
||||
|
@ -9639,7 +9649,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
|
||||
kvm_update_dr0123(vcpu);
|
||||
kvm_update_dr7(vcpu);
|
||||
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -9976,7 +9985,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
|
||||
if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
|
||||
(kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
@ -10581,9 +10591,6 @@ static void store_regs(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int sync_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
|
||||
return -EINVAL;
|
||||
|
||||
if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
|
||||
__set_regs(vcpu, &vcpu->run->s.regs.regs);
|
||||
vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
|
||||
|
@ -10799,6 +10806,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|||
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
unsigned long old_cr0 = kvm_read_cr0(vcpu);
|
||||
unsigned long new_cr0;
|
||||
u32 eax, dummy;
|
||||
|
||||
kvm_lapic_reset(vcpu, init_event);
|
||||
|
||||
|
@ -10865,10 +10874,41 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
vcpu->arch.regs_avail = ~0;
|
||||
vcpu->arch.regs_dirty = ~0;
|
||||
|
||||
/*
|
||||
* Fall back to KVM's default Family/Model/Stepping of 0x600 (P6/Athlon)
|
||||
* if no CPUID match is found. Note, it's impossible to get a match at
|
||||
* RESET since KVM emulates RESET before exposing the vCPU to userspace,
|
||||
* i.e. it'simpossible for kvm_cpuid() to find a valid entry on RESET.
|
||||
* But, go through the motions in case that's ever remedied.
|
||||
*/
|
||||
eax = 1;
|
||||
if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true))
|
||||
eax = 0x600;
|
||||
kvm_rdx_write(vcpu, eax);
|
||||
|
||||
vcpu->arch.ia32_xss = 0;
|
||||
|
||||
static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
|
||||
|
||||
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
|
||||
kvm_rip_write(vcpu, 0xfff0);
|
||||
|
||||
/*
|
||||
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
|
||||
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
|
||||
* (or qualify) that with a footnote stating that CD/NW are preserved.
|
||||
*/
|
||||
new_cr0 = X86_CR0_ET;
|
||||
if (init_event)
|
||||
new_cr0 |= (old_cr0 & (X86_CR0_NW | X86_CR0_CD));
|
||||
else
|
||||
new_cr0 |= X86_CR0_NW | X86_CR0_CD;
|
||||
|
||||
static_call(kvm_x86_set_cr0)(vcpu, new_cr0);
|
||||
static_call(kvm_x86_set_cr4)(vcpu, 0);
|
||||
static_call(kvm_x86_set_efer)(vcpu, 0);
|
||||
static_call(kvm_x86_update_exception_bitmap)(vcpu);
|
||||
|
||||
/*
|
||||
* Reset the MMU context if paging was enabled prior to INIT (which is
|
||||
* implied if CR0.PG=1 as CR0 will be '0' prior to RESET). Unlike the
|
||||
|
@ -10879,7 +10919,20 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
*/
|
||||
if (old_cr0 & X86_CR0_PG)
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
/*
|
||||
* Intel's SDM states that all TLB entries are flushed on INIT. AMD's
|
||||
* APM states the TLBs are untouched by INIT, but it also states that
|
||||
* the TLBs are flushed on "External initialization of the processor."
|
||||
* Flush the guest TLB regardless of vendor, there is no meaningful
|
||||
* benefit in relying on the guest to flush the TLB immediately after
|
||||
* INIT. A spurious TLB flush is benign and likely negligible from a
|
||||
* performance perspective.
|
||||
*/
|
||||
if (init_event)
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
|
||||
|
||||
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
|
||||
{
|
||||
|
@ -11123,6 +11176,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
kvm_hv_init_vm(kvm);
|
||||
kvm_page_track_init(kvm);
|
||||
kvm_mmu_init_vm(kvm);
|
||||
kvm_xen_init_vm(kvm);
|
||||
|
||||
return static_call(kvm_x86_vm_init)(kvm);
|
||||
}
|
||||
|
@ -11312,8 +11366,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
|
|||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
|
||||
int level = i + 1;
|
||||
int lpages = gfn_to_index(slot->base_gfn + npages - 1,
|
||||
slot->base_gfn, level) + 1;
|
||||
int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
|
||||
|
||||
WARN_ON(slot->arch.rmap[i]);
|
||||
|
||||
|
@ -11396,8 +11449,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
|
|||
int lpages;
|
||||
int level = i + 1;
|
||||
|
||||
lpages = gfn_to_index(slot->base_gfn + npages - 1,
|
||||
slot->base_gfn, level) + 1;
|
||||
lpages = __kvm_mmu_slot_lpages(slot, npages, level);
|
||||
|
||||
linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
|
||||
if (!linfo)
|
||||
|
@ -11481,7 +11533,7 @@ static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
|
|||
|
||||
static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
|
||||
struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *new,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
@ -11561,10 +11613,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||
kvm_mmu_change_mmu_pages(kvm,
|
||||
kvm_mmu_calculate_default_mmu_pages(kvm));
|
||||
|
||||
/*
|
||||
* FIXME: const-ify all uses of struct kvm_memory_slot.
|
||||
*/
|
||||
kvm_mmu_slot_apply_flags(kvm, old, (struct kvm_memory_slot *) new, change);
|
||||
kvm_mmu_slot_apply_flags(kvm, old, new, change);
|
||||
|
||||
/* Free the arrays associated with the old memslot. */
|
||||
if (change == KVM_MR_MOVE)
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
#include "kvm_cache_regs.h"
|
||||
#include "kvm_emulate.h"
|
||||
|
||||
void kvm_spurious_fault(void);
|
||||
|
||||
static __always_inline void kvm_guest_enter_irqoff(void)
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -25,15 +25,14 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
|
|||
{
|
||||
gpa_t gpa = gfn_to_gpa(gfn);
|
||||
int wc_ofs, sec_hi_ofs;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
int idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
ret = kvm_gfn_to_hva_cache_init(kvm, &kvm->arch.xen.shinfo_cache,
|
||||
gpa, PAGE_SIZE);
|
||||
if (ret)
|
||||
if (kvm_is_error_hva(gfn_to_hva(kvm, gfn))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
|
||||
kvm->arch.xen.shinfo_set = true;
|
||||
}
|
||||
kvm->arch.xen.shinfo_gfn = gfn;
|
||||
|
||||
/* Paranoia checks on the 32-bit struct layout */
|
||||
BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
|
||||
|
@ -245,7 +244,7 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
|
|||
|
||||
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
|
||||
if (data->u.shared_info.gfn == GPA_INVALID) {
|
||||
kvm->arch.xen.shinfo_set = false;
|
||||
kvm->arch.xen.shinfo_gfn = GPA_INVALID;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
@ -283,10 +282,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
|
|||
break;
|
||||
|
||||
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
|
||||
if (kvm->arch.xen.shinfo_set)
|
||||
data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
|
||||
else
|
||||
data->u.shared_info.gfn = GPA_INVALID;
|
||||
data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
|
@ -646,6 +642,11 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_xen_init_vm(struct kvm *kvm)
|
||||
{
|
||||
kvm->arch.xen.shinfo_gfn = GPA_INVALID;
|
||||
}
|
||||
|
||||
void kvm_xen_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
if (kvm->arch.xen_hvm_config.msr)
|
||||
|
|
|
@ -21,6 +21,7 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
|
|||
int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
|
||||
int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
|
||||
int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
|
||||
void kvm_xen_init_vm(struct kvm *kvm);
|
||||
void kvm_xen_destroy_vm(struct kvm *kvm);
|
||||
|
||||
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
|
||||
|
@ -50,6 +51,10 @@ static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static inline void kvm_xen_init_vm(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kvm_xen_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue