ARM:
* support for chained PMU counters in guests * improved SError handling * handle Neoverse N1 erratum #1349291 * allow side-channel mitigation status to be migrated * standardise most AArch64 system register accesses to msr_s/mrs_s * fix host MPIDR corruption on 32bit * selftests ckleanups x86: * PMU event {white,black}listing * ability for the guest to disable host-side interrupt polling * fixes for enlightened VMCS (Hyper-V pv nested virtualization), * new hypercall to yield to IPI target * support for passing cstate MSRs through to the guest * lots of cleanups and optimizations Generic: * Some txt->rST conversions for the documentation -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJdJzdIAAoJEL/70l94x66DQDoH/i83/8kX4I8AWDlushPru4ts Q4lCE5VAPha+o4pLb1dtfFL3gTmSbsB1N++JSlqK3JOo6LphIOy6b0wBjQBbAa6U 3CT1dJaHJoScLLj09vyBlvClGUH2ZKEQTWOiquCCf7JfPofxwPUA6vJ7TYsdkckx zR3ygbADWmnfS7hFfiqN3JzuYh9eoooGNWSU+Giq6VF41SiL3IqhBGZhWS0zE9c2 2c5lpqqdeHmAYNBqsyzNiDRKp7+zLFSmZ7Z5/0L755L8KYwR6F5beTnmBMHvb4lA PWH/SWOC8EYR+PEowfrH+TxKZwp0gMn1kcAKjilHk0uCRwG1IzuHAr2jlNxICCk= =t/Oq -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "ARM: - support for chained PMU counters in guests - improved SError handling - handle Neoverse N1 erratum #1349291 - allow side-channel mitigation status to be migrated - standardise most AArch64 system register accesses to msr_s/mrs_s - fix host MPIDR corruption on 32bit - selftests ckleanups x86: - PMU event {white,black}listing - ability for the guest to disable host-side interrupt polling - fixes for enlightened VMCS (Hyper-V pv nested virtualization), - new hypercall to yield to IPI target - support for passing cstate MSRs through to the guest - lots of cleanups and optimizations Generic: - Some txt->rST conversions for the documentation" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (128 commits) Documentation: virtual: Add toctree hooks Documentation: kvm: Convert cpuid.txt to .rst Documentation: virtual: Convert paravirt_ops.txt to .rst KVM: x86: Unconditionally enable irqs in guest context KVM: x86: PMU Event Filter kvm: x86: Fix -Wmissing-prototypes warnings KVM: Properly check if "page" is valid in kvm_vcpu_unmap KVM: arm/arm64: Initialise host's MPIDRs by reading the actual register KVM: LAPIC: Retry tune per-vCPU timer_advance_ns if adaptive tuning goes insane kvm: LAPIC: write down valid APIC registers KVM: arm64: Migrate _elx sysreg accessors to msr_s/mrs_s KVM: doc: Add API documentation on the KVM_REG_ARM_WORKAROUNDS register KVM: arm/arm64: Add save/restore support for firmware workaround state arm64: KVM: Propagate full Spectre v2 workaround state to KVM guests KVM: arm/arm64: Support chained PMU counters KVM: arm/arm64: Remove pmc->bitmask KVM: arm/arm64: Re-create event when setting counter value KVM: arm/arm64: Extract duplicated code to own function KVM: arm/arm64: Rename kvm_pmu_{enable/disable}_counter functions KVM: LAPIC: ARBPRI is a reserved register for x2APIC ...
This commit is contained in:
commit
39d7530d74
|
@ -86,6 +86,8 @@ stable kernels.
|
|||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-N1 | #1349291 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | MMU-500 | #841119,826419 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
============================
|
||||
Linux Virtualization Support
|
||||
============================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
kvm/index
|
||||
paravirt_ops
|
||||
|
||||
.. only:: html and subproject
|
||||
|
||||
Indices
|
||||
=======
|
||||
|
||||
* :ref:`genindex`
|
|
@ -4081,6 +4081,32 @@ KVM_ARM_VCPU_FINALIZE call.
|
|||
See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
|
||||
using this ioctl.
|
||||
|
||||
4.120 KVM_SET_PMU_EVENT_FILTER
|
||||
|
||||
Capability: KVM_CAP_PMU_EVENT_FILTER
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_pmu_event_filter (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
struct kvm_pmu_event_filter {
|
||||
__u32 action;
|
||||
__u32 nevents;
|
||||
__u64 events[0];
|
||||
};
|
||||
|
||||
This ioctl restricts the set of PMU events that the guest can program.
|
||||
The argument holds a list of events which will be allowed or denied.
|
||||
The eventsel+umask of each event the guest attempts to program is compared
|
||||
against the events field to determine whether the guest should have access.
|
||||
This only affects general purpose counters; fixed purpose counters can
|
||||
be disabled by changing the perfmon CPUID leaf.
|
||||
|
||||
Valid values for 'action':
|
||||
#define KVM_PMU_EVENT_ALLOW 0
|
||||
#define KVM_PMU_EVENT_DENY 1
|
||||
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
|
@ -4909,6 +4935,8 @@ Valid bits in args[0] are
|
|||
|
||||
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
|
||||
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
|
||||
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
|
||||
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
|
||||
|
||||
Enabling this capability on a VM provides userspace with a way to no
|
||||
longer intercept some instructions for improved latency in some
|
||||
|
|
|
@ -28,3 +28,34 @@ The following register is defined:
|
|||
- Allows any PSCI version implemented by KVM and compatible with
|
||||
v0.2 to be set with SET_ONE_REG
|
||||
- Affects the whole VM (even if the register view is per-vcpu)
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
||||
Accepted values are:
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
|
||||
firmware support for the workaround. The mitigation status for the
|
||||
guest is unknown.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
|
||||
available to the guest and required for the mitigation.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
|
||||
is available to the guest, but it is not needed on this VCPU.
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_2 in [1].
|
||||
Accepted values are:
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
|
||||
available. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
|
||||
unknown. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
|
||||
and can be disabled by a vCPU. If
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
|
||||
this vCPU.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
|
||||
always active on this vCPU or it is not needed.
|
||||
|
||||
[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============
|
||||
KVM CPUID bits
|
||||
==============
|
||||
|
||||
:Author: Glauber Costa <glommer@gmail.com>
|
||||
|
||||
A guest running on a kvm host, can check some of its features using
|
||||
cpuid. This is not always guaranteed to work, since userspace can
|
||||
mask-out some, or even all KVM-related cpuid features before launching
|
||||
a guest.
|
||||
|
||||
KVM cpuid functions are:
|
||||
|
||||
function: KVM_CPUID_SIGNATURE (0x40000000)
|
||||
|
||||
returns::
|
||||
|
||||
eax = 0x40000001
|
||||
ebx = 0x4b4d564b
|
||||
ecx = 0x564b4d56
|
||||
edx = 0x4d
|
||||
|
||||
Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
|
||||
The value in eax corresponds to the maximum cpuid function present in this leaf,
|
||||
and will be updated if more functions are added in the future.
|
||||
Note also that old hosts set eax value to 0x0. This should
|
||||
be interpreted as if the value was 0x40000001.
|
||||
This function queries the presence of KVM cpuid leafs.
|
||||
|
||||
function: define KVM_CPUID_FEATURES (0x40000001)
|
||||
|
||||
returns::
|
||||
|
||||
ebx, ecx
|
||||
eax = an OR'ed group of (1 << flag)
|
||||
|
||||
where ``flag`` is defined as below:
|
||||
|
||||
================================= =========== ================================
|
||||
flag value meaning
|
||||
================================= =========== ================================
|
||||
KVM_FEATURE_CLOCKSOURCE 0 kvmclock available at msrs
|
||||
0x11 and 0x12
|
||||
|
||||
KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays
|
||||
on PIO operations
|
||||
|
||||
KVM_FEATURE_MMU_OP 2 deprecated
|
||||
|
||||
KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs
|
||||
|
||||
0x4b564d00 and 0x4b564d01
|
||||
KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by
|
||||
writing to msr 0x4b564d02
|
||||
|
||||
KVM_FEATURE_STEAL_TIME 5 steal time can be enabled by
|
||||
writing to msr 0x4b564d03
|
||||
|
||||
KVM_FEATURE_PV_EOI 6 paravirtualized end of interrupt
|
||||
handler can be enabled by
|
||||
writing to msr 0x4b564d04
|
||||
|
||||
KVM_FEATURE_PV_UNHAULT 7 guest checks this feature bit
|
||||
before enabling paravirtualized
|
||||
spinlock support
|
||||
|
||||
KVM_FEATURE_PV_TLB_FLUSH 9 guest checks this feature bit
|
||||
before enabling paravirtualized
|
||||
tlb flush
|
||||
|
||||
KVM_FEATURE_ASYNC_PF_VMEXIT 10 paravirtualized async PF VM EXIT
|
||||
can be enabled by setting bit 2
|
||||
when writing to msr 0x4b564d02
|
||||
|
||||
KVM_FEATURE_PV_SEND_IPI 11 guest checks this feature bit
|
||||
before enabling paravirtualized
|
||||
sebd IPIs
|
||||
|
||||
KVM_FEATURE_PV_POLL_CONTROL 12 host-side polling on HLT can
|
||||
be disabled by writing
|
||||
to msr 0x4b564d05.
|
||||
|
||||
KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit
|
||||
before using paravirtualized
|
||||
sched yield.
|
||||
|
||||
KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side
|
||||
per-cpu warps are expeced in
|
||||
kvmclock
|
||||
================================= =========== ================================
|
||||
|
||||
::
|
||||
|
||||
edx = an OR'ed group of (1 << flag)
|
||||
|
||||
Where ``flag`` here is defined as below:
|
||||
|
||||
================== ============ =================================
|
||||
flag value meaning
|
||||
================== ============ =================================
|
||||
KVM_HINTS_REALTIME 0 guest checks this feature bit to
|
||||
determine that vCPUs are never
|
||||
preempted for an unlimited time
|
||||
allowing optimizations
|
||||
================== ============ =================================
|
|
@ -1,83 +0,0 @@
|
|||
KVM CPUID bits
|
||||
Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
|
||||
=====================================================
|
||||
|
||||
A guest running on a kvm host, can check some of its features using
|
||||
cpuid. This is not always guaranteed to work, since userspace can
|
||||
mask-out some, or even all KVM-related cpuid features before launching
|
||||
a guest.
|
||||
|
||||
KVM cpuid functions are:
|
||||
|
||||
function: KVM_CPUID_SIGNATURE (0x40000000)
|
||||
returns : eax = 0x40000001,
|
||||
ebx = 0x4b4d564b,
|
||||
ecx = 0x564b4d56,
|
||||
edx = 0x4d.
|
||||
Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
|
||||
The value in eax corresponds to the maximum cpuid function present in this leaf,
|
||||
and will be updated if more functions are added in the future.
|
||||
Note also that old hosts set eax value to 0x0. This should
|
||||
be interpreted as if the value was 0x40000001.
|
||||
This function queries the presence of KVM cpuid leafs.
|
||||
|
||||
|
||||
function: define KVM_CPUID_FEATURES (0x40000001)
|
||||
returns : ebx, ecx
|
||||
eax = an OR'ed group of (1 << flag), where each flags is:
|
||||
|
||||
|
||||
flag || value || meaning
|
||||
=============================================================================
|
||||
KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs
|
||||
|| || 0x11 and 0x12.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays
|
||||
|| || on PIO operations.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_MMU_OP || 2 || deprecated.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
|
||||
|| || 0x4b564d00 and 0x4b564d01
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
|
||||
|| || writing to msr 0x4b564d02
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by
|
||||
|| || writing to msr 0x4b564d03.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt
|
||||
|| || handler can be enabled by writing
|
||||
|| || to msr 0x4b564d04.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
|
||||
|| || before enabling paravirtualized
|
||||
|| || spinlock support.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit
|
||||
|| || before enabling paravirtualized
|
||||
|| || tlb flush.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
|
||||
|| || can be enabled by setting bit 2
|
||||
|| || when writing to msr 0x4b564d02
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
|
||||
|| || before using paravirtualized
|
||||
|| || send IPIs.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|
||||
|| || per-cpu warps are expected in
|
||||
|| || kvmclock.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
edx = an OR'ed group of (1 << flag), where each flags is:
|
||||
|
||||
|
||||
flag || value || meaning
|
||||
==================================================================================
|
||||
KVM_HINTS_REALTIME || 0 || guest checks this feature bit to
|
||||
|| || determine that vCPUs are never
|
||||
|| || preempted for an unlimited time,
|
||||
|| || allowing optimizations
|
||||
----------------------------------------------------------------------------------
|
|
@ -141,3 +141,14 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1
|
|||
corresponds to the APIC ID a2+1, and so on.
|
||||
|
||||
Returns the number of CPUs to which the IPIs were delivered successfully.
|
||||
|
||||
7. KVM_HC_SCHED_YIELD
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Hypercall used to yield if the IPI target vCPU is preempted
|
||||
|
||||
a0: destination APIC ID
|
||||
|
||||
Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
||||
any of the IPI target vCPUs was preempted.
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===
|
||||
KVM
|
||||
===
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
amd-memory-encryption
|
||||
cpuid
|
|
@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows:
|
|||
|
||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||
|
||||
For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
||||
|
@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above.
|
|||
------------
|
||||
|
||||
Name: kvm_lock
|
||||
Type: spinlock_t
|
||||
Type: mutex
|
||||
Arch: any
|
||||
Protects: - vm_list
|
||||
|
||||
|
|
|
@ -273,3 +273,12 @@ MSR_KVM_EOI_EN: 0x4b564d04
|
|||
guest must both read the least significant bit in the memory area and
|
||||
clear it using a single CPU instruction, such as test and clear, or
|
||||
compare and exchange.
|
||||
|
||||
MSR_KVM_POLL_CONTROL: 0x4b564d05
|
||||
Control host-side polling.
|
||||
|
||||
data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
|
||||
|
||||
KVM guests can request the host not to poll on HLT, for example if
|
||||
they are performing polling themselves.
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
============
|
||||
Paravirt_ops
|
||||
============
|
||||
|
||||
|
@ -18,15 +21,15 @@ at boot time.
|
|||
pv_ops operations are classified into three categories:
|
||||
|
||||
- simple indirect call
|
||||
These operations correspond to high level functionality where it is
|
||||
known that the overhead of indirect call isn't very important.
|
||||
These operations correspond to high level functionality where it is
|
||||
known that the overhead of indirect call isn't very important.
|
||||
|
||||
- indirect call which allows optimization with binary patch
|
||||
Usually these operations correspond to low level critical instructions. They
|
||||
are called frequently and are performance critical. The overhead is
|
||||
very important.
|
||||
Usually these operations correspond to low level critical instructions. They
|
||||
are called frequently and are performance critical. The overhead is
|
||||
very important.
|
||||
|
||||
- a set of macros for hand written assembly code
|
||||
Hand written assembly codes (.S files) also need paravirtualization
|
||||
because they include sensitive instructions or some of code paths in
|
||||
them are very performance critical.
|
||||
Hand written assembly codes (.S files) also need paravirtualization
|
||||
because they include sensitive instructions or some of code paths in
|
||||
them are very performance critical.
|
|
@ -271,6 +271,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
|
|||
return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
|
||||
bool flag)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
*vcpu_cpsr(vcpu) |= PSR_E_BIT;
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmio.h>
|
||||
#include <asm/fpstate.h>
|
||||
#include <asm/smp_plat.h>
|
||||
#include <kvm/arm_arch_timer.h>
|
||||
|
||||
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
||||
|
@ -147,11 +146,10 @@ struct kvm_host_data {
|
|||
|
||||
typedef struct kvm_host_data kvm_host_data_t;
|
||||
|
||||
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
|
||||
int cpu)
|
||||
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
|
||||
{
|
||||
/* The host's MPIDR is immutable, so let's set it up at boot time */
|
||||
cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu);
|
||||
cpu_ctxt->cp15[c0_MPIDR] = read_cpuid_mpidr();
|
||||
}
|
||||
|
||||
struct vcpu_reset_state {
|
||||
|
@ -362,7 +360,11 @@ static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
|
|||
static inline void kvm_arm_vhe_guest_enter(void) {}
|
||||
static inline void kvm_arm_vhe_guest_exit(void) {}
|
||||
|
||||
static inline bool kvm_arm_harden_branch_predictor(void)
|
||||
#define KVM_BP_HARDEN_UNKNOWN -1
|
||||
#define KVM_BP_HARDEN_WA_NEEDED 0
|
||||
#define KVM_BP_HARDEN_NOT_REQUIRED 1
|
||||
|
||||
static inline int kvm_arm_harden_branch_predictor(void)
|
||||
{
|
||||
switch(read_cpuid_part()) {
|
||||
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
|
||||
|
@ -370,10 +372,12 @@ static inline bool kvm_arm_harden_branch_predictor(void)
|
|||
case ARM_CPU_PART_CORTEX_A12:
|
||||
case ARM_CPU_PART_CORTEX_A15:
|
||||
case ARM_CPU_PART_CORTEX_A17:
|
||||
return true;
|
||||
return KVM_BP_HARDEN_WA_NEEDED;
|
||||
#endif
|
||||
case ARM_CPU_PART_CORTEX_A7:
|
||||
return KVM_BP_HARDEN_NOT_REQUIRED;
|
||||
default:
|
||||
return false;
|
||||
return KVM_BP_HARDEN_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -82,13 +82,14 @@
|
|||
#define VFP_FPEXC __ACCESS_VFP(FPEXC)
|
||||
|
||||
/* AArch64 compatibility macros, only for the timer so far */
|
||||
#define read_sysreg_el0(r) read_sysreg(r##_el0)
|
||||
#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0)
|
||||
#define read_sysreg_el0(r) read_sysreg(r##_EL0)
|
||||
#define write_sysreg_el0(v, r) write_sysreg(v, r##_EL0)
|
||||
|
||||
#define SYS_CNTP_CTL_EL0 CNTP_CTL
|
||||
#define SYS_CNTP_CVAL_EL0 CNTP_CVAL
|
||||
#define SYS_CNTV_CTL_EL0 CNTV_CTL
|
||||
#define SYS_CNTV_CVAL_EL0 CNTV_CVAL
|
||||
|
||||
#define cntp_ctl_el0 CNTP_CTL
|
||||
#define cntp_cval_el0 CNTP_CVAL
|
||||
#define cntv_ctl_el0 CNTV_CTL
|
||||
#define cntv_cval_el0 CNTV_CVAL
|
||||
#define cntvoff_el2 CNTVOFF
|
||||
#define cnthctl_el2 CNTHCTL
|
||||
|
||||
|
|
|
@ -214,6 +214,18 @@ struct kvm_vcpu_events {
|
|||
#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
|
||||
KVM_REG_ARM_FW | ((r) & 0xffff))
|
||||
#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
|
||||
/* Higher values mean better protection. */
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
|
||||
/* Higher values mean better protection. */
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
|
||||
|
||||
/* Device Control API: ARM VGIC */
|
||||
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
|
||||
|
|
|
@ -96,7 +96,11 @@
|
|||
* RAS Error Synchronization barrier
|
||||
*/
|
||||
.macro esb
|
||||
#ifdef CONFIG_ARM64_RAS_EXTN
|
||||
hint #16
|
||||
#else
|
||||
nop
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
|
|
@ -620,6 +620,12 @@ static inline bool system_has_prio_mask_debugging(void)
|
|||
system_uses_irq_prio_masking();
|
||||
}
|
||||
|
||||
#define ARM64_BP_HARDEN_UNKNOWN -1
|
||||
#define ARM64_BP_HARDEN_WA_NEEDED 0
|
||||
#define ARM64_BP_HARDEN_NOT_REQUIRED 1
|
||||
|
||||
int get_spectre_v2_workaround_state(void);
|
||||
|
||||
#define ARM64_SSBD_UNKNOWN -1
|
||||
#define ARM64_SSBD_FORCE_DISABLE 0
|
||||
#define ARM64_SSBD_KERNEL 1
|
||||
|
|
|
@ -30,6 +30,12 @@
|
|||
{ARM_EXCEPTION_TRAP, "TRAP" }, \
|
||||
{ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
|
||||
|
||||
/*
|
||||
* Size of the HYP vectors preamble. kvm_patch_vector_branch() generates code
|
||||
* that jumps over this.
|
||||
*/
|
||||
#define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
|
|
@ -126,7 +126,7 @@ static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
|
|||
static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.sysregs_loaded_on_cpu)
|
||||
return read_sysreg_el1(elr);
|
||||
return read_sysreg_el1(SYS_ELR);
|
||||
else
|
||||
return *__vcpu_elr_el1(vcpu);
|
||||
}
|
||||
|
@ -134,7 +134,7 @@ static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
|
|||
static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
|
||||
{
|
||||
if (vcpu->arch.sysregs_loaded_on_cpu)
|
||||
write_sysreg_el1(v, elr);
|
||||
write_sysreg_el1(v, SYS_ELR);
|
||||
else
|
||||
*__vcpu_elr_el1(vcpu) = v;
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
|
|||
return vcpu_read_spsr32(vcpu);
|
||||
|
||||
if (vcpu->arch.sysregs_loaded_on_cpu)
|
||||
return read_sysreg_el1(spsr);
|
||||
return read_sysreg_el1(SYS_SPSR);
|
||||
else
|
||||
return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
|
||||
}
|
||||
|
@ -199,7 +199,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
|
|||
}
|
||||
|
||||
if (vcpu->arch.sysregs_loaded_on_cpu)
|
||||
write_sysreg_el1(v, spsr);
|
||||
write_sysreg_el1(v, SYS_SPSR);
|
||||
else
|
||||
vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
|
||||
}
|
||||
|
@ -353,6 +353,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
|
|||
return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
|
||||
}
|
||||
|
||||
static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
|
||||
bool flag)
|
||||
{
|
||||
if (flag)
|
||||
vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
|
||||
else
|
||||
vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
|
||||
}
|
||||
|
||||
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu_mode_is_32bit(vcpu)) {
|
||||
|
@ -451,13 +465,13 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
|
|||
*/
|
||||
static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
|
||||
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
|
||||
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
|
||||
|
||||
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
|
||||
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
|
||||
write_sysreg_el2(*vcpu_pc(vcpu), elr);
|
||||
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR);
|
||||
write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_EMULATE_H__ */
|
||||
|
|
|
@ -19,12 +19,12 @@
|
|||
#include <asm/arch_gicv3.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/daifflags.h>
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/kvm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmio.h>
|
||||
#include <asm/smp_plat.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
||||
|
@ -484,11 +484,10 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
|
|||
|
||||
DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
|
||||
|
||||
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
|
||||
int cpu)
|
||||
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
|
||||
{
|
||||
/* The host's MPIDR is immutable, so let's set it up at boot time */
|
||||
cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu);
|
||||
cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
|
||||
}
|
||||
|
||||
void __kvm_enable_ssbs(void);
|
||||
|
@ -621,9 +620,21 @@ static inline void kvm_arm_vhe_guest_exit(void)
|
|||
isb();
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_harden_branch_predictor(void)
|
||||
#define KVM_BP_HARDEN_UNKNOWN -1
|
||||
#define KVM_BP_HARDEN_WA_NEEDED 0
|
||||
#define KVM_BP_HARDEN_NOT_REQUIRED 1
|
||||
|
||||
static inline int kvm_arm_harden_branch_predictor(void)
|
||||
{
|
||||
return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
|
||||
switch (get_spectre_v2_workaround_state()) {
|
||||
case ARM64_BP_HARDEN_WA_NEEDED:
|
||||
return KVM_BP_HARDEN_WA_NEEDED;
|
||||
case ARM64_BP_HARDEN_NOT_REQUIRED:
|
||||
return KVM_BP_HARDEN_NOT_REQUIRED;
|
||||
case ARM64_BP_HARDEN_UNKNOWN:
|
||||
default:
|
||||
return KVM_BP_HARDEN_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
#define KVM_SSBD_UNKNOWN -1
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#define read_sysreg_elx(r,nvh,vh) \
|
||||
({ \
|
||||
u64 reg; \
|
||||
asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
|
||||
asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
|
||||
__mrs_s("%0", r##vh), \
|
||||
ARM64_HAS_VIRT_HOST_EXTN) \
|
||||
: "=r" (reg)); \
|
||||
|
@ -28,7 +28,7 @@
|
|||
#define write_sysreg_elx(v,r,nvh,vh) \
|
||||
do { \
|
||||
u64 __val = (u64)(v); \
|
||||
asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
|
||||
asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
|
||||
__msr_s(r##vh, "%x0"), \
|
||||
ARM64_HAS_VIRT_HOST_EXTN) \
|
||||
: : "rZ" (__val)); \
|
||||
|
@ -37,55 +37,15 @@
|
|||
/*
|
||||
* Unified accessors for registers that have a different encoding
|
||||
* between VHE and non-VHE. They must be specified without their "ELx"
|
||||
* encoding.
|
||||
* encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
|
||||
*/
|
||||
#define read_sysreg_el2(r) \
|
||||
({ \
|
||||
u64 reg; \
|
||||
asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\
|
||||
"mrs %0, " __stringify(r##_EL1),\
|
||||
ARM64_HAS_VIRT_HOST_EXTN) \
|
||||
: "=r" (reg)); \
|
||||
reg; \
|
||||
})
|
||||
|
||||
#define write_sysreg_el2(v,r) \
|
||||
do { \
|
||||
u64 __val = (u64)(v); \
|
||||
asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\
|
||||
"msr " __stringify(r##_EL1) ", %x0",\
|
||||
ARM64_HAS_VIRT_HOST_EXTN) \
|
||||
: : "rZ" (__val)); \
|
||||
} while (0)
|
||||
|
||||
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
|
||||
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
|
||||
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
|
||||
#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12)
|
||||
|
||||
/* The VHE specific system registers and their encoding */
|
||||
#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
|
||||
#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
|
||||
#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
|
||||
#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
|
||||
#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
|
||||
#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
|
||||
#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
|
||||
#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
|
||||
#define far_EL12 sys_reg(3, 5, 6, 0, 0)
|
||||
#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
|
||||
#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
|
||||
#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
|
||||
#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
|
||||
#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
|
||||
#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
|
||||
#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
|
||||
#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
|
||||
#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
|
||||
#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
|
||||
#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
|
||||
#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
|
||||
#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
|
||||
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
|
||||
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
|
||||
|
||||
/**
|
||||
* hyp_alternate_select - Generates patchable code sequences that are
|
||||
|
|
|
@ -191,6 +191,9 @@
|
|||
#define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0)
|
||||
#define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1)
|
||||
|
||||
#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0)
|
||||
#define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1)
|
||||
|
||||
#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
|
||||
|
||||
#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0)
|
||||
|
@ -382,6 +385,9 @@
|
|||
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
|
||||
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
|
||||
|
||||
#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
|
||||
#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
|
||||
|
||||
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
|
||||
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
|
||||
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
|
||||
|
@ -392,14 +398,17 @@
|
|||
#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3))
|
||||
#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
|
||||
|
||||
#define SYS_PMCCFILTR_EL0 sys_reg (3, 3, 14, 15, 7)
|
||||
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
|
||||
|
||||
#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
|
||||
|
||||
#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
|
||||
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
|
||||
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
|
||||
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
|
||||
#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
|
||||
#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
|
||||
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
|
||||
#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
|
||||
|
||||
#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
|
||||
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
|
||||
|
@ -444,7 +453,29 @@
|
|||
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
|
||||
|
||||
/* VHE encodings for architectural EL0/1 system registers */
|
||||
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
|
||||
#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
|
||||
#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
|
||||
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
|
||||
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
|
||||
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
|
||||
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
|
||||
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
|
||||
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
|
||||
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
|
||||
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
|
||||
#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
|
||||
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
|
||||
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
|
||||
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
|
||||
#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
|
||||
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
|
||||
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
|
||||
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
|
||||
#define SYS_CNTP_CVAL_EL02 sys_reg(3, 5, 14, 2, 2)
|
||||
#define SYS_CNTV_TVAL_EL02 sys_reg(3, 5, 14, 3, 0)
|
||||
#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1)
|
||||
#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
|
||||
|
||||
/* Common SCTLR_ELx flags. */
|
||||
#define SCTLR_ELx_DSSBS (_BITUL(44))
|
||||
|
|
|
@ -229,6 +229,16 @@ struct kvm_vcpu_events {
|
|||
#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
|
||||
KVM_REG_ARM_FW | ((r) & 0xffff))
|
||||
#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
|
||||
|
||||
/* SVE registers */
|
||||
#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)
|
||||
|
|
|
@ -554,6 +554,17 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
|
|||
static bool __hardenbp_enab = true;
|
||||
static bool __spectrev2_safe = true;
|
||||
|
||||
int get_spectre_v2_workaround_state(void)
|
||||
{
|
||||
if (__spectrev2_safe)
|
||||
return ARM64_BP_HARDEN_NOT_REQUIRED;
|
||||
|
||||
if (!__hardenbp_enab)
|
||||
return ARM64_BP_HARDEN_UNKNOWN;
|
||||
|
||||
return ARM64_BP_HARDEN_WA_NEEDED;
|
||||
}
|
||||
|
||||
/*
|
||||
* List of CPUs that do not need any Spectre-v2 mitigation at all.
|
||||
*/
|
||||
|
@ -854,13 +865,15 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
|
|||
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
if (__spectrev2_safe)
|
||||
switch (get_spectre_v2_workaround_state()) {
|
||||
case ARM64_BP_HARDEN_NOT_REQUIRED:
|
||||
return sprintf(buf, "Not affected\n");
|
||||
|
||||
if (__hardenbp_enab)
|
||||
case ARM64_BP_HARDEN_WA_NEEDED:
|
||||
return sprintf(buf, "Mitigation: Branch predictor hardening\n");
|
||||
|
||||
return sprintf(buf, "Vulnerable\n");
|
||||
case ARM64_BP_HARDEN_UNKNOWN:
|
||||
default:
|
||||
return sprintf(buf, "Vulnerable\n");
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t cpu_show_spec_store_bypass(struct device *dev,
|
||||
|
|
|
@ -871,6 +871,10 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
|
|||
/*
|
||||
* The CPU can't make progress. The exception may have
|
||||
* been imprecise.
|
||||
*
|
||||
* Neoverse-N1 #1349291 means a non-KVM SError reported as
|
||||
* Unrecoverable should be treated as Uncontainable. We
|
||||
* call arm64_serror_panic() in both cases.
|
||||
*/
|
||||
return true;
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/fpsimdmacros.h>
|
||||
|
@ -52,6 +53,20 @@ ENTRY(__guest_enter)
|
|||
// Store the host regs
|
||||
save_callee_saved_regs x1
|
||||
|
||||
// Now the host state is stored if we have a pending RAS SError it must
|
||||
// affect the host. If any asynchronous exception is pending we defer
|
||||
// the guest entry. The DSB isn't necessary before v8.2 as any SError
|
||||
// would be fatal.
|
||||
alternative_if ARM64_HAS_RAS_EXTN
|
||||
dsb nshst
|
||||
isb
|
||||
alternative_else_nop_endif
|
||||
mrs x1, isr_el1
|
||||
cbz x1, 1f
|
||||
mov x0, #ARM_EXCEPTION_IRQ
|
||||
ret
|
||||
|
||||
1:
|
||||
add x18, x0, #VCPU_CONTEXT
|
||||
|
||||
// Macro ptrauth_switch_to_guest format:
|
||||
|
@ -127,8 +142,8 @@ ENTRY(__guest_exit)
|
|||
|
||||
alternative_if ARM64_HAS_RAS_EXTN
|
||||
// If we have the RAS extensions we can consume a pending error
|
||||
// without an unmask-SError and isb.
|
||||
esb
|
||||
// without an unmask-SError and isb. The ESB-instruction consumed any
|
||||
// pending guest error when we took the exception from the guest.
|
||||
mrs_s x2, SYS_DISR_EL1
|
||||
str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
|
||||
cbz x2, 1f
|
||||
|
@ -136,8 +151,16 @@ alternative_if ARM64_HAS_RAS_EXTN
|
|||
orr x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT)
|
||||
1: ret
|
||||
alternative_else
|
||||
// If we have a pending asynchronous abort, now is the
|
||||
// time to find out. From your VAXorcist book, page 666:
|
||||
dsb sy // Synchronize against in-flight ld/st
|
||||
isb // Prevent an early read of side-effect free ISR
|
||||
mrs x2, isr_el1
|
||||
tbnz x2, #8, 2f // ISR_EL1.A
|
||||
ret
|
||||
nop
|
||||
2:
|
||||
alternative_endif
|
||||
// We know we have a pending asynchronous abort, now is the
|
||||
// time to flush it out. From your VAXorcist book, page 666:
|
||||
// "Threaten me not, oh Evil one! For I speak with
|
||||
// the power of DEC, and I command thee to show thyself!"
|
||||
mrs x2, elr_el2
|
||||
|
@ -145,10 +168,7 @@ alternative_else
|
|||
mrs x4, spsr_el2
|
||||
mov x5, x0
|
||||
|
||||
dsb sy // Synchronize against in-flight ld/st
|
||||
nop
|
||||
msr daifclr, #4 // Unmask aborts
|
||||
alternative_endif
|
||||
|
||||
// This is our single instruction exception window. A pending
|
||||
// SError is guaranteed to occur at the earliest when we unmask
|
||||
|
@ -161,6 +181,8 @@ abort_guest_exit_start:
|
|||
.global abort_guest_exit_end
|
||||
abort_guest_exit_end:
|
||||
|
||||
msr daifset, #4 // Mask aborts
|
||||
|
||||
// If the exception took place, restore the EL1 exception
|
||||
// context so that we can report some information.
|
||||
// Merge the exception code with the SError pending bit.
|
||||
|
|
|
@ -216,17 +216,34 @@ ENDPROC(\label)
|
|||
|
||||
.align 11
|
||||
|
||||
.macro check_preamble_length start, end
|
||||
/* kvm_patch_vector_branch() generates code that jumps over the preamble. */
|
||||
.if ((\end-\start) != KVM_VECTOR_PREAMBLE)
|
||||
.error "KVM vector preamble length mismatch"
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro valid_vect target
|
||||
.align 7
|
||||
661:
|
||||
esb
|
||||
stp x0, x1, [sp, #-16]!
|
||||
662:
|
||||
b \target
|
||||
|
||||
check_preamble_length 661b, 662b
|
||||
.endm
|
||||
|
||||
.macro invalid_vect target
|
||||
.align 7
|
||||
661:
|
||||
b \target
|
||||
nop
|
||||
662:
|
||||
ldp x0, x1, [sp], #16
|
||||
b \target
|
||||
|
||||
check_preamble_length 661b, 662b
|
||||
.endm
|
||||
|
||||
ENTRY(__kvm_hyp_vector)
|
||||
|
@ -254,13 +271,14 @@ ENDPROC(__kvm_hyp_vector)
|
|||
#ifdef CONFIG_KVM_INDIRECT_VECTORS
|
||||
.macro hyp_ventry
|
||||
.align 7
|
||||
1: .rept 27
|
||||
1: esb
|
||||
.rept 26
|
||||
nop
|
||||
.endr
|
||||
/*
|
||||
* The default sequence is to directly branch to the KVM vectors,
|
||||
* using the computed offset. This applies for VHE as well as
|
||||
* !ARM64_HARDEN_EL2_VECTORS.
|
||||
* !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble.
|
||||
*
|
||||
* For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
|
||||
* with:
|
||||
|
@ -271,12 +289,13 @@ ENDPROC(__kvm_hyp_vector)
|
|||
* movk x0, #((addr >> 32) & 0xffff), lsl #32
|
||||
* br x0
|
||||
*
|
||||
* Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
|
||||
* Where:
|
||||
* addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE.
|
||||
* See kvm_patch_vector_branch for details.
|
||||
*/
|
||||
alternative_cb kvm_patch_vector_branch
|
||||
b __kvm_hyp_vector + (1b - 0b)
|
||||
nop
|
||||
stp x0, x1, [sp, #-16]!
|
||||
b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE)
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
@ -301,6 +320,7 @@ ENTRY(__bp_harden_hyp_vecs_end)
|
|||
.popsection
|
||||
|
||||
ENTRY(__smccc_workaround_1_smc_start)
|
||||
esb
|
||||
sub sp, sp, #(8 * 4)
|
||||
stp x2, x3, [sp, #(8 * 0)]
|
||||
stp x0, x1, [sp, #(8 * 2)]
|
||||
|
|
|
@ -284,7 +284,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
|
|||
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
|
||||
return true;
|
||||
|
||||
far = read_sysreg_el2(far);
|
||||
far = read_sysreg_el2(SYS_FAR);
|
||||
|
||||
/*
|
||||
* The HPFAR can be invalid if the stage 2 fault did not
|
||||
|
@ -401,7 +401,7 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
|
|||
static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
|
||||
vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
|
||||
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
|
||||
|
||||
/*
|
||||
* We're using the raw exception code in order to only process
|
||||
|
@ -697,8 +697,8 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
|
|||
asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va));
|
||||
|
||||
__hyp_do_panic(str_va,
|
||||
spsr, elr,
|
||||
read_sysreg(esr_el2), read_sysreg_el2(far),
|
||||
spsr, elr,
|
||||
read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
|
||||
read_sysreg(hpfar_el2), par, vcpu);
|
||||
}
|
||||
|
||||
|
@ -713,15 +713,15 @@ static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
|
|||
|
||||
panic(__hyp_panic_string,
|
||||
spsr, elr,
|
||||
read_sysreg_el2(esr), read_sysreg_el2(far),
|
||||
read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
|
||||
read_sysreg(hpfar_el2), par, vcpu);
|
||||
}
|
||||
NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
|
||||
|
||||
void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
u64 spsr = read_sysreg_el2(spsr);
|
||||
u64 elr = read_sysreg_el2(elr);
|
||||
u64 spsr = read_sysreg_el2(SYS_SPSR);
|
||||
u64 elr = read_sysreg_el2(SYS_ELR);
|
||||
u64 par = read_sysreg(par_el1);
|
||||
|
||||
if (!has_vhe())
|
||||
|
|
|
@ -43,33 +43,33 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
|
|||
static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
|
||||
ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
|
||||
ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR);
|
||||
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
|
||||
ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
|
||||
ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
|
||||
ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
|
||||
ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr);
|
||||
ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr);
|
||||
ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0);
|
||||
ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1);
|
||||
ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far);
|
||||
ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair);
|
||||
ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar);
|
||||
ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr);
|
||||
ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
|
||||
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
|
||||
ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR);
|
||||
ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0);
|
||||
ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1);
|
||||
ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR);
|
||||
ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR);
|
||||
ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0);
|
||||
ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1);
|
||||
ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR);
|
||||
ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR);
|
||||
ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR);
|
||||
ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR);
|
||||
ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR);
|
||||
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL);
|
||||
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
|
||||
ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
|
||||
|
||||
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
|
||||
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
|
||||
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
|
||||
ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR);
|
||||
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR);
|
||||
}
|
||||
|
||||
static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
|
||||
ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
|
||||
ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR);
|
||||
ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
|
||||
|
||||
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
|
||||
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
|
||||
|
@ -109,35 +109,35 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
|
|||
|
||||
static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
|
||||
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
|
||||
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
|
||||
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
|
||||
}
|
||||
|
||||
static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
|
||||
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
|
||||
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
|
||||
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
|
||||
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
|
||||
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
|
||||
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
|
||||
write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr);
|
||||
write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr);
|
||||
write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0);
|
||||
write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1);
|
||||
write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far);
|
||||
write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair);
|
||||
write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar);
|
||||
write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr);
|
||||
write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
|
||||
write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
|
||||
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
|
||||
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
|
||||
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
|
||||
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
|
||||
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
|
||||
write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
|
||||
write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
|
||||
write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
|
||||
write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
|
||||
write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR);
|
||||
write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR);
|
||||
write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR);
|
||||
write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR);
|
||||
write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR);
|
||||
write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL);
|
||||
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
|
||||
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
|
||||
|
||||
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
|
||||
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
|
||||
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
|
||||
write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
|
||||
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
|
||||
}
|
||||
|
||||
static void __hyp_text
|
||||
|
@ -160,8 +160,8 @@ __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
|
|||
if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
|
||||
pstate = PSR_MODE_EL2h | PSR_IL_BIT;
|
||||
|
||||
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
|
||||
write_sysreg_el2(pstate, spsr);
|
||||
write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR);
|
||||
write_sysreg_el2(pstate, SYS_SPSR);
|
||||
|
||||
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
|
||||
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
|
||||
|
|
|
@ -33,12 +33,12 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
|
|||
* in the TCR_EL1 register. We also need to prevent it to
|
||||
* allocate IPA->PA walks, so we enable the S1 MMU...
|
||||
*/
|
||||
val = cxt->tcr = read_sysreg_el1(tcr);
|
||||
val = cxt->tcr = read_sysreg_el1(SYS_TCR);
|
||||
val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
|
||||
write_sysreg_el1(val, tcr);
|
||||
val = cxt->sctlr = read_sysreg_el1(sctlr);
|
||||
write_sysreg_el1(val, SYS_TCR);
|
||||
val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
|
||||
val |= SCTLR_ELx_M;
|
||||
write_sysreg_el1(val, sctlr);
|
||||
write_sysreg_el1(val, SYS_SCTLR);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -85,8 +85,8 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
|
|||
|
||||
if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) {
|
||||
/* Restore the registers to what they were */
|
||||
write_sysreg_el1(cxt->tcr, tcr);
|
||||
write_sysreg_el1(cxt->sctlr, sctlr);
|
||||
write_sysreg_el1(cxt->tcr, SYS_TCR);
|
||||
write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
|
||||
}
|
||||
|
||||
local_irq_restore(cxt->flags);
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu_mode_is_32bit(vcpu))
|
||||
return !!(read_sysreg_el2(spsr) & PSR_AA32_E_BIT);
|
||||
return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT);
|
||||
|
||||
return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE);
|
||||
}
|
||||
|
|
|
@ -152,7 +152,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
|
|||
|
||||
switch (spsr_idx) {
|
||||
case KVM_SPSR_SVC:
|
||||
return read_sysreg_el1(spsr);
|
||||
return read_sysreg_el1(SYS_SPSR);
|
||||
case KVM_SPSR_ABT:
|
||||
return read_sysreg(spsr_abt);
|
||||
case KVM_SPSR_UND:
|
||||
|
@ -177,7 +177,7 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
|
|||
|
||||
switch (spsr_idx) {
|
||||
case KVM_SPSR_SVC:
|
||||
write_sysreg_el1(v, spsr);
|
||||
write_sysreg_el1(v, SYS_SPSR);
|
||||
case KVM_SPSR_ABT:
|
||||
write_sysreg(v, spsr_abt);
|
||||
case KVM_SPSR_UND:
|
||||
|
|
|
@ -81,24 +81,24 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
|
|||
*/
|
||||
switch (reg) {
|
||||
case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
|
||||
case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
|
||||
case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12);
|
||||
case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
|
||||
case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
|
||||
case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
|
||||
case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
|
||||
case TCR_EL1: return read_sysreg_s(tcr_EL12);
|
||||
case ESR_EL1: return read_sysreg_s(esr_EL12);
|
||||
case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
|
||||
case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
|
||||
case FAR_EL1: return read_sysreg_s(far_EL12);
|
||||
case MAIR_EL1: return read_sysreg_s(mair_EL12);
|
||||
case VBAR_EL1: return read_sysreg_s(vbar_EL12);
|
||||
case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12);
|
||||
case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12);
|
||||
case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12);
|
||||
case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12);
|
||||
case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12);
|
||||
case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12);
|
||||
case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12);
|
||||
case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12);
|
||||
case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12);
|
||||
case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12);
|
||||
case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12);
|
||||
case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12);
|
||||
case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
|
||||
case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
|
||||
case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
|
||||
case AMAIR_EL1: return read_sysreg_s(amair_EL12);
|
||||
case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12);
|
||||
case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12);
|
||||
case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12);
|
||||
case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
|
||||
case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
|
||||
case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
|
||||
|
@ -124,24 +124,24 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
|
|||
*/
|
||||
switch (reg) {
|
||||
case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
|
||||
case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return;
|
||||
case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return;
|
||||
case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
|
||||
case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return;
|
||||
case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return;
|
||||
case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return;
|
||||
case TCR_EL1: write_sysreg_s(val, tcr_EL12); return;
|
||||
case ESR_EL1: write_sysreg_s(val, esr_EL12); return;
|
||||
case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return;
|
||||
case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return;
|
||||
case FAR_EL1: write_sysreg_s(val, far_EL12); return;
|
||||
case MAIR_EL1: write_sysreg_s(val, mair_EL12); return;
|
||||
case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return;
|
||||
case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return;
|
||||
case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return;
|
||||
case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return;
|
||||
case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return;
|
||||
case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return;
|
||||
case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return;
|
||||
case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return;
|
||||
case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return;
|
||||
case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return;
|
||||
case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return;
|
||||
case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return;
|
||||
case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return;
|
||||
case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
|
||||
case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
|
||||
case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
|
||||
case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return;
|
||||
case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return;
|
||||
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return;
|
||||
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return;
|
||||
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
|
||||
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
|
||||
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
|
||||
|
@ -865,12 +865,12 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
if (r->Op2 & 0x1) {
|
||||
/* accessing PMCNTENSET_EL0 */
|
||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
|
||||
kvm_pmu_enable_counter(vcpu, val);
|
||||
kvm_pmu_enable_counter_mask(vcpu, val);
|
||||
kvm_vcpu_pmu_restore_guest(vcpu);
|
||||
} else {
|
||||
/* accessing PMCNTENCLR_EL0 */
|
||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
|
||||
kvm_pmu_disable_counter(vcpu, val);
|
||||
kvm_pmu_disable_counter_mask(vcpu, val);
|
||||
}
|
||||
} else {
|
||||
p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
|
||||
|
|
|
@ -170,11 +170,10 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
|
|||
addr |= ((u64)origptr & GENMASK_ULL(10, 7));
|
||||
|
||||
/*
|
||||
* Branch to the second instruction in the vectors in order to
|
||||
* avoid the initial store on the stack (which we already
|
||||
* perform in the hardening vectors).
|
||||
* Branch over the preamble in order to avoid the initial store on
|
||||
* the stack (which we already perform in the hardening vectors).
|
||||
*/
|
||||
addr += AARCH64_INSN_SIZE;
|
||||
addr += KVM_VECTOR_PREAMBLE;
|
||||
|
||||
/* stp x0, x1, [sp, #-16]! */
|
||||
insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
|
||||
|
|
|
@ -123,9 +123,9 @@ int kvm_arch_hardware_setup(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_check_processor_compat(void *rtn)
|
||||
int kvm_arch_check_processor_compat(void)
|
||||
{
|
||||
*(int *)rtn = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
|
|
@ -414,9 +414,9 @@ int kvm_arch_hardware_setup(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_check_processor_compat(void *rtn)
|
||||
int kvm_arch_check_processor_compat(void)
|
||||
{
|
||||
*(int *)rtn = kvmppc_core_check_processor_compat();
|
||||
return kvmppc_core_check_processor_compat();
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
|
|
@ -912,7 +912,6 @@ extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
|
|||
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
|
||||
|
||||
static inline void kvm_arch_hardware_disable(void) {}
|
||||
static inline void kvm_arch_check_processor_compat(void *rtn) {}
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
|
|
|
@ -227,6 +227,11 @@ int kvm_arch_hardware_enable(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_check_processor_compat(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
|
||||
unsigned long end);
|
||||
|
||||
|
@ -2418,13 +2423,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
|
||||
if (!kvm->arch.sca)
|
||||
goto out_err;
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
sca_offset += 16;
|
||||
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
|
||||
sca_offset = 0;
|
||||
kvm->arch.sca = (struct bsca_block *)
|
||||
((char *) kvm->arch.sca + sca_offset);
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
|
||||
sprintf(debug_name, "kvm-%u", current->pid);
|
||||
|
||||
|
|
|
@ -686,6 +686,7 @@ struct kvm_vcpu_arch {
|
|||
u32 virtual_tsc_mult;
|
||||
u32 virtual_tsc_khz;
|
||||
s64 ia32_tsc_adjust_msr;
|
||||
u64 msr_ia32_power_ctl;
|
||||
u64 tsc_scaling_ratio;
|
||||
|
||||
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
|
||||
|
@ -752,6 +753,8 @@ struct kvm_vcpu_arch {
|
|||
struct gfn_to_hva_cache data;
|
||||
} pv_eoi;
|
||||
|
||||
u64 msr_kvm_poll_control;
|
||||
|
||||
/*
|
||||
* Indicate whether the access faults on its page table in guest
|
||||
* which is set when fix page fault and used to detect unhandeable
|
||||
|
@ -879,6 +882,7 @@ struct kvm_arch {
|
|||
bool mwait_in_guest;
|
||||
bool hlt_in_guest;
|
||||
bool pause_in_guest;
|
||||
bool cstate_in_guest;
|
||||
|
||||
unsigned long irq_sources_bitmap;
|
||||
s64 kvmclock_offset;
|
||||
|
@ -926,6 +930,8 @@ struct kvm_arch {
|
|||
|
||||
bool guest_can_read_msr_platform_info;
|
||||
bool exception_payload_enabled;
|
||||
|
||||
struct kvm_pmu_event_filter *pmu_event_filter;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
|
@ -996,7 +1002,7 @@ struct kvm_x86_ops {
|
|||
int (*disabled_by_bios)(void); /* __init */
|
||||
int (*hardware_enable)(void);
|
||||
void (*hardware_disable)(void);
|
||||
void (*check_processor_compatibility)(void *rtn);
|
||||
int (*check_processor_compatibility)(void);/* __init */
|
||||
int (*hardware_setup)(void); /* __init */
|
||||
void (*hardware_unsetup)(void); /* __exit */
|
||||
bool (*cpu_has_accelerated_tpr)(void);
|
||||
|
@ -1110,7 +1116,7 @@ struct kvm_x86_ops {
|
|||
int (*check_intercept)(struct kvm_vcpu *vcpu,
|
||||
struct x86_instruction_info *info,
|
||||
enum x86_intercept_stage stage);
|
||||
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
|
||||
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
|
||||
bool (*mpx_supported)(void);
|
||||
bool (*xsaves_supported)(void);
|
||||
bool (*umip_emulated)(void);
|
||||
|
@ -1529,7 +1535,6 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
|
|||
unsigned long ipi_bitmap_high, u32 min,
|
||||
unsigned long icr, int op_64_bit);
|
||||
|
||||
u64 kvm_get_arch_capabilities(void);
|
||||
void kvm_define_shared_msr(unsigned index, u32 msr);
|
||||
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
|
||||
|
||||
|
|
|
@ -378,10 +378,11 @@ struct kvm_sync_regs {
|
|||
struct kvm_vcpu_events events;
|
||||
};
|
||||
|
||||
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
|
||||
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
|
||||
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
|
||||
#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
|
||||
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
|
||||
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
|
||||
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
|
||||
#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
|
||||
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
|
||||
|
||||
#define KVM_STATE_NESTED_FORMAT_VMX 0
|
||||
#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */
|
||||
|
@ -432,4 +433,14 @@ struct kvm_nested_state {
|
|||
} data;
|
||||
};
|
||||
|
||||
/* for KVM_CAP_PMU_EVENT_FILTER */
|
||||
struct kvm_pmu_event_filter {
|
||||
__u32 action;
|
||||
__u32 nevents;
|
||||
__u64 events[0];
|
||||
};
|
||||
|
||||
#define KVM_PMU_EVENT_ALLOW 0
|
||||
#define KVM_PMU_EVENT_DENY 1
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#define KVM_FEATURE_PV_TLB_FLUSH 9
|
||||
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
|
||||
#define KVM_FEATURE_PV_SEND_IPI 11
|
||||
#define KVM_FEATURE_POLL_CONTROL 12
|
||||
#define KVM_FEATURE_PV_SCHED_YIELD 13
|
||||
|
||||
#define KVM_HINTS_REALTIME 0
|
||||
|
||||
|
@ -47,6 +49,7 @@
|
|||
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
|
||||
#define MSR_KVM_STEAL_TIME 0x4b564d03
|
||||
#define MSR_KVM_PV_EOI_EN 0x4b564d04
|
||||
#define MSR_KVM_POLL_CONTROL 0x4b564d05
|
||||
|
||||
struct kvm_steal_time {
|
||||
__u64 steal;
|
||||
|
|
|
@ -146,7 +146,6 @@
|
|||
|
||||
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
|
||||
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
|
||||
#define VMX_ABORT_VMCS_CORRUPTED 3
|
||||
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
|
||||
|
||||
#endif /* _UAPIVMX_H */
|
||||
|
|
|
@ -527,6 +527,21 @@ static void kvm_setup_pv_ipi(void)
|
|||
pr_info("KVM setup pv IPIs\n");
|
||||
}
|
||||
|
||||
static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
native_send_call_func_ipi(mask);
|
||||
|
||||
/* Make sure other vCPUs get a chance to run if they need to. */
|
||||
for_each_cpu(cpu, mask) {
|
||||
if (vcpu_is_preempted(cpu)) {
|
||||
kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
native_smp_prepare_cpus(max_cpus);
|
||||
|
@ -638,6 +653,12 @@ static void __init kvm_guest_init(void)
|
|||
#ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
|
||||
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
|
||||
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
|
||||
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
|
||||
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
|
||||
pr_info("KVM setup pv sched yield\n");
|
||||
}
|
||||
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
|
||||
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
|
||||
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
|
||||
|
|
|
@ -41,6 +41,7 @@ config KVM
|
|||
select PERF_EVENTS
|
||||
select HAVE_KVM_MSI
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_NO_POLL
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_VFIO
|
||||
select SRCU
|
||||
|
|
|
@ -134,6 +134,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
|
|||
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
|
||||
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
|
||||
|
||||
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
|
||||
if (best) {
|
||||
if (vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT)
|
||||
best->ecx |= F(MWAIT);
|
||||
else
|
||||
best->ecx &= ~F(MWAIT);
|
||||
}
|
||||
}
|
||||
|
||||
/* Update physical-address width */
|
||||
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
@ -276,19 +286,38 @@ static void cpuid_mask(u32 *word, int wordnum)
|
|||
*word &= boot_cpu_data.x86_capability[wordnum];
|
||||
}
|
||||
|
||||
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
u32 index)
|
||||
{
|
||||
entry->function = function;
|
||||
entry->index = index;
|
||||
entry->flags = 0;
|
||||
|
||||
cpuid_count(entry->function, entry->index,
|
||||
&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
|
||||
entry->flags = 0;
|
||||
|
||||
switch (function) {
|
||||
case 2:
|
||||
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
|
||||
break;
|
||||
case 4:
|
||||
case 7:
|
||||
case 0xb:
|
||||
case 0xd:
|
||||
case 0x14:
|
||||
case 0x8000001d:
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
|
||||
u32 func, u32 index, int *nent, int maxnent)
|
||||
static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry,
|
||||
u32 func, int *nent, int maxnent)
|
||||
{
|
||||
entry->function = func;
|
||||
entry->index = 0;
|
||||
entry->flags = 0;
|
||||
|
||||
switch (func) {
|
||||
case 0:
|
||||
entry->eax = 7;
|
||||
|
@ -300,21 +329,83 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
|
|||
break;
|
||||
case 7:
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
if (index == 0)
|
||||
entry->ecx = F(RDPID);
|
||||
entry->eax = 0;
|
||||
entry->ecx = F(RDPID);
|
||||
++*nent;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
entry->function = func;
|
||||
entry->index = index;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
u32 index, int *nent, int maxnent)
|
||||
static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
|
||||
{
|
||||
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
|
||||
unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
|
||||
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
|
||||
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
|
||||
unsigned f_la57;
|
||||
|
||||
/* cpuid 7.0.ebx */
|
||||
const u32 kvm_cpuid_7_0_ebx_x86_features =
|
||||
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
|
||||
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
|
||||
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
|
||||
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
|
||||
F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
|
||||
|
||||
/* cpuid 7.0.ecx*/
|
||||
const u32 kvm_cpuid_7_0_ecx_x86_features =
|
||||
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
|
||||
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
|
||||
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
|
||||
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
|
||||
|
||||
/* cpuid 7.0.edx*/
|
||||
const u32 kvm_cpuid_7_0_edx_x86_features =
|
||||
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
|
||||
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
|
||||
F(MD_CLEAR);
|
||||
|
||||
switch (index) {
|
||||
case 0:
|
||||
entry->eax = 0;
|
||||
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
|
||||
cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
|
||||
/* TSC_ADJUST is emulated */
|
||||
entry->ebx |= F(TSC_ADJUST);
|
||||
|
||||
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
|
||||
f_la57 = entry->ecx & F(LA57);
|
||||
cpuid_mask(&entry->ecx, CPUID_7_ECX);
|
||||
/* Set LA57 based on hardware capability. */
|
||||
entry->ecx |= f_la57;
|
||||
entry->ecx |= f_umip;
|
||||
/* PKU is not yet implemented for shadow paging. */
|
||||
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
entry->ecx &= ~F(PKU);
|
||||
|
||||
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
|
||||
cpuid_mask(&entry->edx, CPUID_7_EDX);
|
||||
/*
|
||||
* We emulate ARCH_CAPABILITIES in software even
|
||||
* if the host doesn't support it.
|
||||
*/
|
||||
entry->edx |= F(ARCH_CAPABILITIES);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
entry->eax = 0;
|
||||
entry->ebx = 0;
|
||||
entry->ecx = 0;
|
||||
entry->edx = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
int *nent, int maxnent)
|
||||
{
|
||||
int r;
|
||||
unsigned f_nx = is_efer_nx() ? F(NX) : 0;
|
||||
|
@ -327,12 +418,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
unsigned f_lm = 0;
|
||||
#endif
|
||||
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
|
||||
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
|
||||
unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
|
||||
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
|
||||
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
|
||||
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
|
||||
unsigned f_la57 = 0;
|
||||
|
||||
/* cpuid 1.edx */
|
||||
const u32 kvm_cpuid_1_edx_x86_features =
|
||||
|
@ -377,7 +464,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
/* cpuid 0x80000008.ebx */
|
||||
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
|
||||
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
|
||||
F(AMD_SSB_NO) | F(AMD_STIBP);
|
||||
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
|
||||
|
||||
/* cpuid 0xC0000001.edx */
|
||||
const u32 kvm_cpuid_C000_0001_edx_x86_features =
|
||||
|
@ -385,31 +472,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
|
||||
F(PMM) | F(PMM_EN);
|
||||
|
||||
/* cpuid 7.0.ebx */
|
||||
const u32 kvm_cpuid_7_0_ebx_x86_features =
|
||||
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
|
||||
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
|
||||
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
|
||||
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
|
||||
F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
|
||||
|
||||
/* cpuid 0xD.1.eax */
|
||||
const u32 kvm_cpuid_D_1_eax_x86_features =
|
||||
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
|
||||
|
||||
/* cpuid 7.0.ecx*/
|
||||
const u32 kvm_cpuid_7_0_ecx_x86_features =
|
||||
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
|
||||
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
|
||||
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
|
||||
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
|
||||
|
||||
/* cpuid 7.0.edx*/
|
||||
const u32 kvm_cpuid_7_0_edx_x86_features =
|
||||
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
|
||||
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
|
||||
F(MD_CLEAR);
|
||||
|
||||
/* all calls to cpuid_count() should be made on the same cpu */
|
||||
get_cpu();
|
||||
|
||||
|
@ -418,12 +484,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
if (*nent >= maxnent)
|
||||
goto out;
|
||||
|
||||
do_cpuid_1_ent(entry, function, index);
|
||||
do_host_cpuid(entry, function, 0);
|
||||
++*nent;
|
||||
|
||||
switch (function) {
|
||||
case 0:
|
||||
entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd));
|
||||
/* Limited to the highest leaf implemented in KVM. */
|
||||
entry->eax = min(entry->eax, 0x1fU);
|
||||
break;
|
||||
case 1:
|
||||
entry->edx &= kvm_cpuid_1_edx_x86_features;
|
||||
|
@ -441,14 +508,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
case 2: {
|
||||
int t, times = entry->eax & 0xff;
|
||||
|
||||
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
|
||||
entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
|
||||
for (t = 1; t < times; ++t) {
|
||||
if (*nent >= maxnent)
|
||||
goto out;
|
||||
|
||||
do_cpuid_1_ent(&entry[t], function, 0);
|
||||
entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
|
||||
do_host_cpuid(&entry[t], function, 0);
|
||||
++*nent;
|
||||
}
|
||||
break;
|
||||
|
@ -458,7 +523,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
case 0x8000001d: {
|
||||
int i, cache_type;
|
||||
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
/* read more entries until cache_type is zero */
|
||||
for (i = 1; ; ++i) {
|
||||
if (*nent >= maxnent)
|
||||
|
@ -467,9 +531,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
cache_type = entry[i - 1].eax & 0x1f;
|
||||
if (!cache_type)
|
||||
break;
|
||||
do_cpuid_1_ent(&entry[i], function, i);
|
||||
entry[i].flags |=
|
||||
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
do_host_cpuid(&entry[i], function, i);
|
||||
++*nent;
|
||||
}
|
||||
break;
|
||||
|
@ -480,36 +542,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
entry->ecx = 0;
|
||||
entry->edx = 0;
|
||||
break;
|
||||
/* function 7 has additional index. */
|
||||
case 7: {
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
/* Mask ebx against host capability word 9 */
|
||||
if (index == 0) {
|
||||
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
|
||||
cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
|
||||
// TSC_ADJUST is emulated
|
||||
entry->ebx |= F(TSC_ADJUST);
|
||||
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
|
||||
f_la57 = entry->ecx & F(LA57);
|
||||
cpuid_mask(&entry->ecx, CPUID_7_ECX);
|
||||
/* Set LA57 based on hardware capability. */
|
||||
entry->ecx |= f_la57;
|
||||
entry->ecx |= f_umip;
|
||||
/* PKU is not yet implemented for shadow paging. */
|
||||
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
entry->ecx &= ~F(PKU);
|
||||
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
|
||||
cpuid_mask(&entry->edx, CPUID_7_EDX);
|
||||
/*
|
||||
* We emulate ARCH_CAPABILITIES in software even
|
||||
* if the host doesn't support it.
|
||||
*/
|
||||
entry->edx |= F(ARCH_CAPABILITIES);
|
||||
} else {
|
||||
entry->ebx = 0;
|
||||
entry->ecx = 0;
|
||||
entry->edx = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; ; ) {
|
||||
do_cpuid_7_mask(&entry[i], i);
|
||||
if (i == entry->eax)
|
||||
break;
|
||||
if (*nent >= maxnent)
|
||||
goto out;
|
||||
|
||||
++i;
|
||||
do_host_cpuid(&entry[i], function, i);
|
||||
++*nent;
|
||||
}
|
||||
entry->eax = 0;
|
||||
break;
|
||||
}
|
||||
case 9:
|
||||
|
@ -543,11 +590,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
entry->edx = edx.full;
|
||||
break;
|
||||
}
|
||||
/* function 0xb has additional index. */
|
||||
/*
|
||||
* Per Intel's SDM, the 0x1f is a superset of 0xb,
|
||||
* thus they can be handled by common code.
|
||||
*/
|
||||
case 0x1f:
|
||||
case 0xb: {
|
||||
int i, level_type;
|
||||
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
/* read more entries until level_type is zero */
|
||||
for (i = 1; ; ++i) {
|
||||
if (*nent >= maxnent)
|
||||
|
@ -556,9 +606,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
level_type = entry[i - 1].ecx & 0xff00;
|
||||
if (!level_type)
|
||||
break;
|
||||
do_cpuid_1_ent(&entry[i], function, i);
|
||||
entry[i].flags |=
|
||||
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
do_host_cpuid(&entry[i], function, i);
|
||||
++*nent;
|
||||
}
|
||||
break;
|
||||
|
@ -571,7 +619,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
entry->ebx = xstate_required_size(supported, false);
|
||||
entry->ecx = entry->ebx;
|
||||
entry->edx &= supported >> 32;
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
if (!supported)
|
||||
break;
|
||||
|
||||
|
@ -580,7 +627,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
if (*nent >= maxnent)
|
||||
goto out;
|
||||
|
||||
do_cpuid_1_ent(&entry[i], function, idx);
|
||||
do_host_cpuid(&entry[i], function, idx);
|
||||
if (idx == 1) {
|
||||
entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
|
||||
cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
|
||||
|
@ -597,8 +644,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
}
|
||||
entry[i].ecx = 0;
|
||||
entry[i].edx = 0;
|
||||
entry[i].flags |=
|
||||
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
++*nent;
|
||||
++i;
|
||||
}
|
||||
|
@ -611,12 +656,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
if (!f_intel_pt)
|
||||
break;
|
||||
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
for (t = 1; t <= times; ++t) {
|
||||
if (*nent >= maxnent)
|
||||
goto out;
|
||||
do_cpuid_1_ent(&entry[t], function, t);
|
||||
entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
do_host_cpuid(&entry[t], function, t);
|
||||
++*nent;
|
||||
}
|
||||
break;
|
||||
|
@ -640,7 +683,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
(1 << KVM_FEATURE_PV_UNHALT) |
|
||||
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
|
||||
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
|
||||
(1 << KVM_FEATURE_PV_SEND_IPI);
|
||||
(1 << KVM_FEATURE_PV_SEND_IPI) |
|
||||
(1 << KVM_FEATURE_POLL_CONTROL) |
|
||||
(1 << KVM_FEATURE_PV_SCHED_YIELD);
|
||||
|
||||
if (sched_info_on())
|
||||
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
|
||||
|
@ -730,21 +775,19 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
|
||||
u32 idx, int *nent, int maxnent, unsigned int type)
|
||||
static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
|
||||
int *nent, int maxnent, unsigned int type)
|
||||
{
|
||||
if (type == KVM_GET_EMULATED_CPUID)
|
||||
return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
|
||||
return __do_cpuid_func_emulated(entry, func, nent, maxnent);
|
||||
|
||||
return __do_cpuid_ent(entry, func, idx, nent, maxnent);
|
||||
return __do_cpuid_func(entry, func, nent, maxnent);
|
||||
}
|
||||
|
||||
#undef F
|
||||
|
||||
struct kvm_cpuid_param {
|
||||
u32 func;
|
||||
u32 idx;
|
||||
bool has_leaf_count;
|
||||
bool (*qualifier)(const struct kvm_cpuid_param *param);
|
||||
};
|
||||
|
||||
|
@ -788,11 +831,10 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
|
|||
int limit, nent = 0, r = -E2BIG, i;
|
||||
u32 func;
|
||||
static const struct kvm_cpuid_param param[] = {
|
||||
{ .func = 0, .has_leaf_count = true },
|
||||
{ .func = 0x80000000, .has_leaf_count = true },
|
||||
{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
|
||||
{ .func = 0 },
|
||||
{ .func = 0x80000000 },
|
||||
{ .func = 0xC0000000, .qualifier = is_centaur_cpu },
|
||||
{ .func = KVM_CPUID_SIGNATURE },
|
||||
{ .func = KVM_CPUID_FEATURES },
|
||||
};
|
||||
|
||||
if (cpuid->nent < 1)
|
||||
|
@ -816,19 +858,16 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
|
|||
if (ent->qualifier && !ent->qualifier(ent))
|
||||
continue;
|
||||
|
||||
r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
|
||||
&nent, cpuid->nent, type);
|
||||
r = do_cpuid_func(&cpuid_entries[nent], ent->func,
|
||||
&nent, cpuid->nent, type);
|
||||
|
||||
if (r)
|
||||
goto out_free;
|
||||
|
||||
if (!ent->has_leaf_count)
|
||||
continue;
|
||||
|
||||
limit = cpuid_entries[nent - 1].eax;
|
||||
for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
|
||||
r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
|
||||
&nent, cpuid->nent, type);
|
||||
r = do_cpuid_func(&cpuid_entries[nent], func,
|
||||
&nent, cpuid->nent, type);
|
||||
|
||||
if (r)
|
||||
goto out_free;
|
||||
|
|
|
@ -4258,7 +4258,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
|
|||
ulong dr6;
|
||||
|
||||
ctxt->ops->get_dr(ctxt, 6, &dr6);
|
||||
dr6 &= ~15;
|
||||
dr6 &= ~DR_TRAP_BITS;
|
||||
dr6 |= DR6_BD | DR6_RTM;
|
||||
ctxt->ops->set_dr(ctxt, 6, dr6);
|
||||
return emulate_db(ctxt);
|
||||
|
|
|
@ -102,7 +102,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
|
|||
return mode != KVM_IRQCHIP_NONE;
|
||||
}
|
||||
|
||||
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
|
||||
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
|
||||
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
|
||||
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -75,7 +75,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
|||
if (r < 0)
|
||||
r = 0;
|
||||
r += kvm_apic_set_irq(vcpu, irq, dest_map);
|
||||
} else if (kvm_lapic_enabled(vcpu)) {
|
||||
} else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
|
||||
if (!kvm_vector_hashing_enabled()) {
|
||||
if (!lowest)
|
||||
lowest = vcpu;
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
#define X2APIC_BROADCAST 0xFFFFFFFFul
|
||||
|
||||
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
|
||||
#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
|
||||
/* step-by-step approximation to mitigate fluctuation */
|
||||
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
|
||||
|
||||
|
@ -85,11 +86,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
|
|||
apic_test_vector(vector, apic->regs + APIC_IRR);
|
||||
}
|
||||
|
||||
static inline void apic_clear_vector(int vec, void *bitmap)
|
||||
{
|
||||
clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
|
||||
}
|
||||
|
||||
static inline int __apic_test_and_set_vector(int vec, void *bitmap)
|
||||
{
|
||||
return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
|
||||
|
@ -443,12 +439,12 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
|
|||
|
||||
if (unlikely(vcpu->arch.apicv_active)) {
|
||||
/* need to update RVI */
|
||||
apic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
apic_find_highest_irr(apic));
|
||||
} else {
|
||||
apic->irr_pending = false;
|
||||
apic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
if (apic_search_irr(apic) != -1)
|
||||
apic->irr_pending = true;
|
||||
}
|
||||
|
@ -1053,9 +1049,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
|||
|
||||
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
|
||||
if (trig_mode)
|
||||
kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
|
||||
kvm_lapic_set_vector(vector,
|
||||
apic->regs + APIC_TMR);
|
||||
else
|
||||
apic_clear_vector(vector, apic->regs + APIC_TMR);
|
||||
kvm_lapic_clear_vector(vector,
|
||||
apic->regs + APIC_TMR);
|
||||
}
|
||||
|
||||
if (vcpu->arch.apicv_active)
|
||||
|
@ -1313,21 +1311,45 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
|
|||
return container_of(dev, struct kvm_lapic, dev);
|
||||
}
|
||||
|
||||
#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
|
||||
#define APIC_REGS_MASK(first, count) \
|
||||
(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
|
||||
|
||||
int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
|
||||
void *data)
|
||||
{
|
||||
unsigned char alignment = offset & 0xf;
|
||||
u32 result;
|
||||
/* this bitmask has a bit cleared for each reserved register */
|
||||
static const u64 rmask = 0x43ff01ffffffe70cULL;
|
||||
u64 valid_reg_mask =
|
||||
APIC_REG_MASK(APIC_ID) |
|
||||
APIC_REG_MASK(APIC_LVR) |
|
||||
APIC_REG_MASK(APIC_TASKPRI) |
|
||||
APIC_REG_MASK(APIC_PROCPRI) |
|
||||
APIC_REG_MASK(APIC_LDR) |
|
||||
APIC_REG_MASK(APIC_DFR) |
|
||||
APIC_REG_MASK(APIC_SPIV) |
|
||||
APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
|
||||
APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
|
||||
APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
|
||||
APIC_REG_MASK(APIC_ESR) |
|
||||
APIC_REG_MASK(APIC_ICR) |
|
||||
APIC_REG_MASK(APIC_ICR2) |
|
||||
APIC_REG_MASK(APIC_LVTT) |
|
||||
APIC_REG_MASK(APIC_LVTTHMR) |
|
||||
APIC_REG_MASK(APIC_LVTPC) |
|
||||
APIC_REG_MASK(APIC_LVT0) |
|
||||
APIC_REG_MASK(APIC_LVT1) |
|
||||
APIC_REG_MASK(APIC_LVTERR) |
|
||||
APIC_REG_MASK(APIC_TMICT) |
|
||||
APIC_REG_MASK(APIC_TMCCT) |
|
||||
APIC_REG_MASK(APIC_TDCR);
|
||||
|
||||
if ((alignment + len) > 4) {
|
||||
apic_debug("KVM_APIC_READ: alignment error %x %d\n",
|
||||
offset, len);
|
||||
return 1;
|
||||
}
|
||||
/* ARBPRI is not valid on x2APIC */
|
||||
if (!apic_x2apic_mode(apic))
|
||||
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
|
||||
|
||||
if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
|
||||
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) {
|
||||
apic_debug("KVM_APIC_READ: read reserved register %x\n",
|
||||
offset);
|
||||
return 1;
|
||||
|
@ -1499,11 +1521,40 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
|
|||
}
|
||||
}
|
||||
|
||||
void wait_lapic_expire(struct kvm_vcpu *vcpu)
|
||||
static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
|
||||
s64 advance_expire_delta)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
|
||||
u64 guest_tsc, tsc_deadline, ns;
|
||||
u64 ns;
|
||||
|
||||
/* too early */
|
||||
if (advance_expire_delta < 0) {
|
||||
ns = -advance_expire_delta * 1000000ULL;
|
||||
do_div(ns, vcpu->arch.virtual_tsc_khz);
|
||||
timer_advance_ns -= min((u32)ns,
|
||||
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
|
||||
} else {
|
||||
/* too late */
|
||||
ns = advance_expire_delta * 1000000ULL;
|
||||
do_div(ns, vcpu->arch.virtual_tsc_khz);
|
||||
timer_advance_ns += min((u32)ns,
|
||||
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
|
||||
}
|
||||
|
||||
if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
|
||||
apic->lapic_timer.timer_advance_adjust_done = true;
|
||||
if (unlikely(timer_advance_ns > 5000)) {
|
||||
timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
|
||||
apic->lapic_timer.timer_advance_adjust_done = false;
|
||||
}
|
||||
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
|
||||
}
|
||||
|
||||
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u64 guest_tsc, tsc_deadline;
|
||||
|
||||
if (apic->lapic_timer.expired_tscdeadline == 0)
|
||||
return;
|
||||
|
@ -1514,34 +1565,15 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
|
|||
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
|
||||
apic->lapic_timer.expired_tscdeadline = 0;
|
||||
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
|
||||
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
|
||||
apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
|
||||
|
||||
if (guest_tsc < tsc_deadline)
|
||||
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
|
||||
|
||||
if (!apic->lapic_timer.timer_advance_adjust_done) {
|
||||
/* too early */
|
||||
if (guest_tsc < tsc_deadline) {
|
||||
ns = (tsc_deadline - guest_tsc) * 1000000ULL;
|
||||
do_div(ns, vcpu->arch.virtual_tsc_khz);
|
||||
timer_advance_ns -= min((u32)ns,
|
||||
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
|
||||
} else {
|
||||
/* too late */
|
||||
ns = (guest_tsc - tsc_deadline) * 1000000ULL;
|
||||
do_div(ns, vcpu->arch.virtual_tsc_khz);
|
||||
timer_advance_ns += min((u32)ns,
|
||||
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
|
||||
}
|
||||
if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
|
||||
apic->lapic_timer.timer_advance_adjust_done = true;
|
||||
if (unlikely(timer_advance_ns > 5000)) {
|
||||
timer_advance_ns = 0;
|
||||
apic->lapic_timer.timer_advance_adjust_done = true;
|
||||
}
|
||||
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
|
||||
}
|
||||
if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
|
||||
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
|
||||
|
||||
static void start_sw_tscdeadline(struct kvm_lapic *apic)
|
||||
{
|
||||
|
@ -2014,7 +2046,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
|
|||
apic_debug("%s: offset 0x%x with length 0x%x, and value is "
|
||||
"0x%x\n", __func__, offset, len, val);
|
||||
|
||||
kvm_lapic_reg_write(apic, offset & 0xff0, val);
|
||||
kvm_lapic_reg_write(apic, offset, val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2311,7 +2343,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
|
|||
HRTIMER_MODE_ABS_PINNED);
|
||||
apic->lapic_timer.timer.function = apic_timer_fn;
|
||||
if (timer_advance_ns == -1) {
|
||||
apic->lapic_timer.timer_advance_ns = 1000;
|
||||
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
|
||||
apic->lapic_timer.timer_advance_adjust_done = false;
|
||||
} else {
|
||||
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
|
||||
|
@ -2321,7 +2353,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
|
|||
|
||||
/*
|
||||
* APIC is created enabled. This will prevent kvm_lapic_set_base from
|
||||
* thinking that APIC satet has changed.
|
||||
* thinking that APIC state has changed.
|
||||
*/
|
||||
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
|
||||
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
|
||||
|
@ -2330,6 +2362,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
|
|||
return 0;
|
||||
nomem_free_apic:
|
||||
kfree(apic);
|
||||
vcpu->arch.apic = NULL;
|
||||
nomem:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ struct kvm_timer {
|
|||
u64 tscdeadline;
|
||||
u64 expired_tscdeadline;
|
||||
u32 timer_advance_ns;
|
||||
s64 advance_expire_delta;
|
||||
atomic_t pending; /* accumulated triggered timers */
|
||||
bool hv_timer_in_use;
|
||||
bool timer_advance_adjust_done;
|
||||
|
@ -129,6 +130,11 @@ void kvm_lapic_exit(void);
|
|||
#define VEC_POS(v) ((v) & (32 - 1))
|
||||
#define REG_POS(v) (((v) >> 5) << 4)
|
||||
|
||||
static inline void kvm_lapic_clear_vector(int vec, void *bitmap)
|
||||
{
|
||||
clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
|
||||
}
|
||||
|
||||
static inline void kvm_lapic_set_vector(int vec, void *bitmap)
|
||||
{
|
||||
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
|
||||
|
@ -219,7 +225,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
|
|||
|
||||
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
|
||||
|
||||
void wait_lapic_expire(struct kvm_vcpu *vcpu);
|
||||
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
||||
struct kvm_vcpu **dest_vcpu);
|
||||
|
|
|
@ -140,9 +140,6 @@ module_param(dbg, bool, 0644);
|
|||
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "mmutrace.h"
|
||||
|
||||
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
|
||||
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
|
||||
|
||||
|
@ -259,11 +256,20 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
|
|||
*/
|
||||
static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
||||
|
||||
/*
|
||||
* The number of non-reserved physical address bits irrespective of features
|
||||
* that repurpose legal bits, e.g. MKTME.
|
||||
*/
|
||||
static u8 __read_mostly shadow_phys_bits;
|
||||
|
||||
static void mmu_spte_set(u64 *sptep, u64 spte);
|
||||
static bool is_executable_pte(u64 spte);
|
||||
static union kvm_mmu_page_role
|
||||
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "mmutrace.h"
|
||||
|
||||
|
||||
static inline bool kvm_available_flush_tlb_with_range(void)
|
||||
{
|
||||
|
@ -468,6 +474,21 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
|
||||
|
||||
static u8 kvm_get_shadow_phys_bits(void)
|
||||
{
|
||||
/*
|
||||
* boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
|
||||
* in CPU detection code, but MKTME treats those reduced bits as
|
||||
* 'keyID' thus they are not reserved bits. Therefore for MKTME
|
||||
* we should still return physical address bits reported by CPUID.
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_TME) ||
|
||||
WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
|
||||
return boot_cpu_data.x86_phys_bits;
|
||||
|
||||
return cpuid_eax(0x80000008) & 0xff;
|
||||
}
|
||||
|
||||
static void kvm_mmu_reset_all_pte_masks(void)
|
||||
{
|
||||
u8 low_phys_bits;
|
||||
|
@ -481,6 +502,8 @@ static void kvm_mmu_reset_all_pte_masks(void)
|
|||
shadow_present_mask = 0;
|
||||
shadow_acc_track_mask = 0;
|
||||
|
||||
shadow_phys_bits = kvm_get_shadow_phys_bits();
|
||||
|
||||
/*
|
||||
* If the CPU has 46 or less physical address bits, then set an
|
||||
* appropriate mask to guard against L1TF attacks. Otherwise, it is
|
||||
|
@ -1073,10 +1096,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
|
|||
|
||||
static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
|
||||
{
|
||||
if (sp->role.direct)
|
||||
BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
|
||||
else
|
||||
if (!sp->role.direct) {
|
||||
sp->gfns[index] = gfn;
|
||||
return;
|
||||
}
|
||||
|
||||
if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
|
||||
pr_err_ratelimited("gfn mismatch under direct page %llx "
|
||||
"(expected %llx, got %llx)\n",
|
||||
sp->gfn,
|
||||
kvm_mmu_page_get_gfn(sp, index), gfn);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3055,10 +3084,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
|
|||
ret = RET_PF_EMULATE;
|
||||
|
||||
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
|
||||
pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
|
||||
is_large_pte(*sptep)? "2MB" : "4kB",
|
||||
*sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn,
|
||||
*sptep, sptep);
|
||||
trace_kvm_mmu_set_spte(level, gfn, sptep);
|
||||
if (!was_rmapped && is_large_pte(*sptep))
|
||||
++vcpu->kvm->stat.lpages;
|
||||
|
||||
|
@ -3070,8 +3096,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
|
|||
}
|
||||
}
|
||||
|
||||
kvm_release_pfn_clean(pfn);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3106,9 +3130,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
|||
if (ret <= 0)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < ret; i++, gfn++, start++)
|
||||
for (i = 0; i < ret; i++, gfn++, start++) {
|
||||
mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
|
||||
page_to_pfn(pages[i]), true, true);
|
||||
put_page(pages[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -3156,40 +3182,40 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
|
|||
__direct_pte_prefetch(vcpu, sp, sptep);
|
||||
}
|
||||
|
||||
static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
|
||||
int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
|
||||
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
|
||||
int map_writable, int level, kvm_pfn_t pfn,
|
||||
bool prefault)
|
||||
{
|
||||
struct kvm_shadow_walk_iterator iterator;
|
||||
struct kvm_shadow_walk_iterator it;
|
||||
struct kvm_mmu_page *sp;
|
||||
int emulate = 0;
|
||||
gfn_t pseudo_gfn;
|
||||
int ret;
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
gfn_t base_gfn = gfn;
|
||||
|
||||
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
|
||||
return 0;
|
||||
return RET_PF_RETRY;
|
||||
|
||||
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
|
||||
if (iterator.level == level) {
|
||||
emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
|
||||
write, level, gfn, pfn, prefault,
|
||||
map_writable);
|
||||
direct_pte_prefetch(vcpu, iterator.sptep);
|
||||
++vcpu->stat.pf_fixed;
|
||||
trace_kvm_mmu_spte_requested(gpa, level, pfn);
|
||||
for_each_shadow_entry(vcpu, gpa, it) {
|
||||
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
if (it.level == level)
|
||||
break;
|
||||
}
|
||||
|
||||
drop_large_spte(vcpu, iterator.sptep);
|
||||
if (!is_shadow_present_pte(*iterator.sptep)) {
|
||||
u64 base_addr = iterator.addr;
|
||||
drop_large_spte(vcpu, it.sptep);
|
||||
if (!is_shadow_present_pte(*it.sptep)) {
|
||||
sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
|
||||
it.level - 1, true, ACC_ALL);
|
||||
|
||||
base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
|
||||
pseudo_gfn = base_addr >> PAGE_SHIFT;
|
||||
sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
|
||||
iterator.level - 1, 1, ACC_ALL);
|
||||
|
||||
link_shadow_page(vcpu, iterator.sptep, sp);
|
||||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
}
|
||||
}
|
||||
return emulate;
|
||||
|
||||
ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
|
||||
write, level, base_gfn, pfn, prefault,
|
||||
map_writable);
|
||||
direct_pte_prefetch(vcpu, it.sptep);
|
||||
++vcpu->stat.pf_fixed;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
|
||||
|
@ -3216,11 +3242,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
|
|||
}
|
||||
|
||||
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
||||
gfn_t *gfnp, kvm_pfn_t *pfnp,
|
||||
gfn_t gfn, kvm_pfn_t *pfnp,
|
||||
int *levelp)
|
||||
{
|
||||
kvm_pfn_t pfn = *pfnp;
|
||||
gfn_t gfn = *gfnp;
|
||||
int level = *levelp;
|
||||
|
||||
/*
|
||||
|
@ -3247,8 +3272,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
|||
mask = KVM_PAGES_PER_HPAGE(level) - 1;
|
||||
VM_BUG_ON((gfn & mask) != (pfn & mask));
|
||||
if (pfn & mask) {
|
||||
gfn &= ~mask;
|
||||
*gfnp = gfn;
|
||||
kvm_release_pfn_clean(pfn);
|
||||
pfn &= ~mask;
|
||||
kvm_get_pfn(pfn);
|
||||
|
@ -3505,22 +3528,19 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
|||
if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
|
||||
return r;
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
if (make_mmu_pages_available(vcpu) < 0)
|
||||
goto out_unlock;
|
||||
if (likely(!force_pt_level))
|
||||
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
|
||||
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return r;
|
||||
|
||||
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
|
||||
r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return RET_PF_RETRY;
|
||||
return r;
|
||||
}
|
||||
|
||||
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
|
||||
|
@ -4015,19 +4035,6 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
|
|||
return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
|
||||
}
|
||||
|
||||
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (unlikely(!lapic_in_kernel(vcpu) ||
|
||||
kvm_event_needs_reinjection(vcpu) ||
|
||||
vcpu->arch.exception.pending))
|
||||
return false;
|
||||
|
||||
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
|
||||
return false;
|
||||
|
||||
return kvm_x86_ops->interrupt_allowed(vcpu);
|
||||
}
|
||||
|
||||
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
||||
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
|
||||
{
|
||||
|
@ -4147,22 +4154,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|||
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
|
||||
return r;
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
if (make_mmu_pages_available(vcpu) < 0)
|
||||
goto out_unlock;
|
||||
if (likely(!force_pt_level))
|
||||
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
|
||||
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return r;
|
||||
|
||||
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
|
||||
r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return RET_PF_RETRY;
|
||||
return r;
|
||||
}
|
||||
|
||||
static void nonpaging_init_context(struct kvm_vcpu *vcpu,
|
||||
|
@ -4494,7 +4498,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
|
|||
*/
|
||||
shadow_zero_check = &context->shadow_zero_check;
|
||||
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
|
||||
boot_cpu_data.x86_phys_bits,
|
||||
shadow_phys_bits,
|
||||
context->shadow_root_level, uses_nx,
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
|
||||
is_pse(vcpu), true);
|
||||
|
@ -4531,13 +4535,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
|
|||
|
||||
if (boot_cpu_is_amd())
|
||||
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
|
||||
boot_cpu_data.x86_phys_bits,
|
||||
shadow_phys_bits,
|
||||
context->shadow_root_level, false,
|
||||
boot_cpu_has(X86_FEATURE_GBPAGES),
|
||||
true, true);
|
||||
else
|
||||
__reset_rsvds_bits_mask_ept(shadow_zero_check,
|
||||
boot_cpu_data.x86_phys_bits,
|
||||
shadow_phys_bits,
|
||||
false);
|
||||
|
||||
if (!shadow_me_mask)
|
||||
|
@ -4558,7 +4562,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
|
|||
struct kvm_mmu *context, bool execonly)
|
||||
{
|
||||
__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
|
||||
boot_cpu_data.x86_phys_bits, execonly);
|
||||
shadow_phys_bits, execonly);
|
||||
}
|
||||
|
||||
#define BYTE_MASK(access) \
|
||||
|
@ -5935,7 +5939,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
|||
int nr_to_scan = sc->nr_to_scan;
|
||||
unsigned long freed = 0;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
int idx;
|
||||
|
@ -5977,7 +5981,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
|||
break;
|
||||
}
|
||||
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
return freed;
|
||||
}
|
||||
|
||||
|
@ -5999,6 +6003,34 @@ static void mmu_destroy_caches(void)
|
|||
kmem_cache_destroy(mmu_page_header_cache);
|
||||
}
|
||||
|
||||
static void kvm_set_mmio_spte_mask(void)
|
||||
{
|
||||
u64 mask;
|
||||
|
||||
/*
|
||||
* Set the reserved bits and the present bit of an paging-structure
|
||||
* entry to generate page fault with PFER.RSV = 1.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Mask the uppermost physical address bit, which would be reserved as
|
||||
* long as the supported physical address width is less than 52.
|
||||
*/
|
||||
mask = 1ull << 51;
|
||||
|
||||
/* Set the present bit. */
|
||||
mask |= 1ull;
|
||||
|
||||
/*
|
||||
* If reserved bit is not supported, clear the present bit to disable
|
||||
* mmio page fault.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
|
||||
mask &= ~1ull;
|
||||
|
||||
kvm_mmu_set_mmio_spte_mask(mask, mask);
|
||||
}
|
||||
|
||||
int kvm_mmu_module_init(void)
|
||||
{
|
||||
int ret = -ENOMEM;
|
||||
|
@ -6015,6 +6047,8 @@ int kvm_mmu_module_init(void)
|
|||
|
||||
kvm_mmu_reset_all_pte_masks();
|
||||
|
||||
kvm_set_mmio_spte_mask();
|
||||
|
||||
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
|
||||
sizeof(struct pte_list_desc),
|
||||
0, SLAB_ACCOUNT, NULL);
|
||||
|
|
|
@ -301,6 +301,65 @@ TRACE_EVENT(
|
|||
__entry->kvm_gen == __entry->spte_gen
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_set_spte,
|
||||
TP_PROTO(int level, gfn_t gfn, u64 *sptep),
|
||||
TP_ARGS(level, gfn, sptep),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, gfn)
|
||||
__field(u64, spte)
|
||||
__field(u64, sptep)
|
||||
__field(u8, level)
|
||||
/* These depend on page entry type, so compute them now. */
|
||||
__field(bool, r)
|
||||
__field(bool, x)
|
||||
__field(u8, u)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gfn = gfn;
|
||||
__entry->spte = *sptep;
|
||||
__entry->sptep = virt_to_phys(sptep);
|
||||
__entry->level = level;
|
||||
__entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
|
||||
__entry->x = is_executable_pte(__entry->spte);
|
||||
__entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
|
||||
),
|
||||
|
||||
TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
|
||||
__entry->gfn, __entry->spte,
|
||||
__entry->r ? "r" : "-",
|
||||
__entry->spte & PT_WRITABLE_MASK ? "w" : "-",
|
||||
__entry->x ? "x" : "-",
|
||||
__entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
|
||||
__entry->level, __entry->sptep
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_spte_requested,
|
||||
TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
|
||||
TP_ARGS(addr, level, pfn),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, gfn)
|
||||
__field(u64, pfn)
|
||||
__field(u8, level)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gfn = addr >> PAGE_SHIFT;
|
||||
__entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
|
||||
__entry->level = level;
|
||||
),
|
||||
|
||||
TP_printk("gfn %llx pfn %llx level %d",
|
||||
__entry->gfn, __entry->pfn, __entry->level
|
||||
)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVMMMU_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
|
|
@ -540,6 +540,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|||
mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
|
||||
true, true);
|
||||
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -619,6 +620,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
struct kvm_shadow_walk_iterator it;
|
||||
unsigned direct_access, access = gw->pt_access;
|
||||
int top_level, ret;
|
||||
gfn_t base_gfn;
|
||||
|
||||
direct_access = gw->pte_access;
|
||||
|
||||
|
@ -663,35 +665,34 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
}
|
||||
|
||||
for (;
|
||||
shadow_walk_okay(&it) && it.level > hlevel;
|
||||
shadow_walk_next(&it)) {
|
||||
gfn_t direct_gfn;
|
||||
base_gfn = gw->gfn;
|
||||
|
||||
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
|
||||
|
||||
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
if (it.level == hlevel)
|
||||
break;
|
||||
|
||||
validate_direct_spte(vcpu, it.sptep, direct_access);
|
||||
|
||||
drop_large_spte(vcpu, it.sptep);
|
||||
|
||||
if (is_shadow_present_pte(*it.sptep))
|
||||
continue;
|
||||
|
||||
direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
|
||||
sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
|
||||
true, direct_access);
|
||||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
if (!is_shadow_present_pte(*it.sptep)) {
|
||||
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
|
||||
it.level - 1, true, direct_access);
|
||||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
}
|
||||
}
|
||||
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
|
||||
it.level, gw->gfn, pfn, prefault, map_writable);
|
||||
it.level, base_gfn, pfn, prefault, map_writable);
|
||||
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
|
||||
|
||||
++vcpu->stat.pf_fixed;
|
||||
return ret;
|
||||
|
||||
out_gpte_changed:
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return RET_PF_RETRY;
|
||||
}
|
||||
|
||||
|
@ -839,6 +840,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
|||
walker.pte_access &= ~ACC_EXEC_MASK;
|
||||
}
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
@ -847,19 +849,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
|||
if (make_mmu_pages_available(vcpu) < 0)
|
||||
goto out_unlock;
|
||||
if (!force_pt_level)
|
||||
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
|
||||
transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
|
||||
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
|
||||
level, pfn, map_writable, prefault);
|
||||
++vcpu->stat.pf_fixed;
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return r;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return RET_PF_RETRY;
|
||||
return r;
|
||||
}
|
||||
|
||||
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
#include "lapic.h"
|
||||
#include "pmu.h"
|
||||
|
||||
/* This keeps the total size of the filter under 4k. */
|
||||
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63
|
||||
|
||||
/* NOTE:
|
||||
* - Each perf counter is defined as "struct kvm_pmc";
|
||||
* - There are two types of perf counters: general purpose (gp) and fixed.
|
||||
|
@ -141,6 +144,10 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
|||
{
|
||||
unsigned config, type = PERF_TYPE_RAW;
|
||||
u8 event_select, unit_mask;
|
||||
struct kvm *kvm = pmc->vcpu->kvm;
|
||||
struct kvm_pmu_event_filter *filter;
|
||||
int i;
|
||||
bool allow_event = true;
|
||||
|
||||
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
|
||||
printk_once("kvm pmu: pin control bit is ignored\n");
|
||||
|
@ -152,6 +159,22 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
|||
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
|
||||
return;
|
||||
|
||||
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
|
||||
if (filter) {
|
||||
for (i = 0; i < filter->nevents; i++)
|
||||
if (filter->events[i] ==
|
||||
(eventsel & AMD64_RAW_EVENT_MASK_NB))
|
||||
break;
|
||||
if (filter->action == KVM_PMU_EVENT_ALLOW &&
|
||||
i == filter->nevents)
|
||||
allow_event = false;
|
||||
if (filter->action == KVM_PMU_EVENT_DENY &&
|
||||
i < filter->nevents)
|
||||
allow_event = false;
|
||||
}
|
||||
if (!allow_event)
|
||||
return;
|
||||
|
||||
event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
|
||||
unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
|
||||
|
||||
|
@ -348,3 +371,43 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
kvm_pmu_reset(vcpu);
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
|
||||
{
|
||||
struct kvm_pmu_event_filter tmp, *filter;
|
||||
size_t size;
|
||||
int r;
|
||||
|
||||
if (copy_from_user(&tmp, argp, sizeof(tmp)))
|
||||
return -EFAULT;
|
||||
|
||||
if (tmp.action != KVM_PMU_EVENT_ALLOW &&
|
||||
tmp.action != KVM_PMU_EVENT_DENY)
|
||||
return -EINVAL;
|
||||
|
||||
if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
|
||||
return -E2BIG;
|
||||
|
||||
size = struct_size(filter, events, tmp.nevents);
|
||||
filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
|
||||
if (!filter)
|
||||
return -ENOMEM;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(filter, argp, size))
|
||||
goto cleanup;
|
||||
|
||||
/* Ensure nevents can't be changed between the user copies. */
|
||||
*filter = tmp;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
rcu_swap_protected(kvm->arch.pmu_event_filter, filter,
|
||||
mutex_is_locked(&kvm->lock));
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
r = 0;
|
||||
cleanup:
|
||||
kfree(filter);
|
||||
return r;
|
||||
}
|
||||
|
|
|
@ -118,6 +118,7 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
|
|||
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
|
||||
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
|
||||
|
||||
bool is_vmware_backdoor_pmc(u32 pmc_idx);
|
||||
|
||||
|
|
|
@ -364,6 +364,10 @@ static int avic;
|
|||
module_param(avic, int, S_IRUGO);
|
||||
#endif
|
||||
|
||||
/* enable/disable Next RIP Save */
|
||||
static int nrips = true;
|
||||
module_param(nrips, int, 0444);
|
||||
|
||||
/* enable/disable Virtual VMLOAD VMSAVE */
|
||||
static int vls = true;
|
||||
module_param(vls, int, 0444);
|
||||
|
@ -770,7 +774,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (svm->vmcb->control.next_rip != 0) {
|
||||
if (nrips && svm->vmcb->control.next_rip != 0) {
|
||||
WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
|
||||
svm->next_rip = svm->vmcb->control.next_rip;
|
||||
}
|
||||
|
@ -807,7 +811,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
|||
|
||||
kvm_deliver_exception_payload(&svm->vcpu);
|
||||
|
||||
if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
|
||||
if (nr == BP_VECTOR && !nrips) {
|
||||
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
|
||||
|
||||
/*
|
||||
|
@ -1364,6 +1368,11 @@ static __init int svm_hardware_setup(void)
|
|||
} else
|
||||
kvm_disable_tdp();
|
||||
|
||||
if (nrips) {
|
||||
if (!boot_cpu_has(X86_FEATURE_NRIPS))
|
||||
nrips = false;
|
||||
}
|
||||
|
||||
if (avic) {
|
||||
if (!npt_enabled ||
|
||||
!boot_cpu_has(X86_FEATURE_AVIC) ||
|
||||
|
@ -3290,7 +3299,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
|||
vmcb->control.exit_int_info_err,
|
||||
KVM_ISA_SVM);
|
||||
|
||||
rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map);
|
||||
rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
|
||||
if (rc) {
|
||||
if (rc == -EINVAL)
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
|
@ -3580,7 +3589,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
|||
|
||||
vmcb_gpa = svm->vmcb->save.rax;
|
||||
|
||||
rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map);
|
||||
rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
|
||||
if (rc) {
|
||||
if (rc == -EINVAL)
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
|
@ -3935,7 +3944,7 @@ static int rdpmc_interception(struct vcpu_svm *svm)
|
|||
{
|
||||
int err;
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_NRIPS))
|
||||
if (!nrips)
|
||||
return emulate_on_interception(svm);
|
||||
|
||||
err = kvm_rdpmc(&svm->vcpu);
|
||||
|
@ -5160,10 +5169,13 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
|
|||
kvm_lapic_set_irr(vec, vcpu->arch.apic);
|
||||
smp_mb__after_atomic();
|
||||
|
||||
if (avic_vcpu_is_running(vcpu))
|
||||
wrmsrl(SVM_AVIC_DOORBELL,
|
||||
kvm_cpu_get_apicid(vcpu->cpu));
|
||||
else
|
||||
if (avic_vcpu_is_running(vcpu)) {
|
||||
int cpuid = vcpu->cpu;
|
||||
|
||||
if (cpuid != get_cpu())
|
||||
wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
|
||||
put_cpu();
|
||||
} else
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
|
||||
|
@ -5640,6 +5652,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
clgi();
|
||||
kvm_load_guest_xcr0(vcpu);
|
||||
|
||||
if (lapic_in_kernel(vcpu) &&
|
||||
vcpu->arch.apic->lapic_timer.timer_advance_ns)
|
||||
kvm_wait_lapic_expire(vcpu);
|
||||
|
||||
/*
|
||||
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
|
||||
* it's non-zero. Since vmentry is serialising on affected CPUs, there
|
||||
|
@ -5861,9 +5877,9 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
|
|||
hypercall[2] = 0xd9;
|
||||
}
|
||||
|
||||
static void svm_check_processor_compat(void *rtn)
|
||||
static int __init svm_check_processor_compat(void)
|
||||
{
|
||||
*(int *)rtn = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool svm_cpu_has_accelerated_tpr(void)
|
||||
|
@ -5875,6 +5891,7 @@ static bool svm_has_emulated_msr(int index)
|
|||
{
|
||||
switch (index) {
|
||||
case MSR_IA32_MCG_EXT_CTL:
|
||||
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
|
@ -6162,15 +6179,9 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
|
||||
static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
local_irq_enable();
|
||||
/*
|
||||
* We must have an instruction with interrupts enabled, so
|
||||
* the timer interrupt isn't delayed by the interrupt shadow.
|
||||
*/
|
||||
asm("nop");
|
||||
local_irq_disable();
|
||||
|
||||
}
|
||||
|
||||
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
@ -7256,7 +7267,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
|||
.set_tdp_cr3 = set_tdp_cr3,
|
||||
|
||||
.check_intercept = svm_check_intercept,
|
||||
.handle_external_intr = svm_handle_external_intr,
|
||||
.handle_exit_irqoff = svm_handle_exit_irqoff,
|
||||
|
||||
.request_immediate_exit = __kvm_request_immediate_exit,
|
||||
|
||||
|
|
|
@ -1365,7 +1365,7 @@ TRACE_EVENT(kvm_hv_timer_state,
|
|||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->hv_timer_in_use = hv_timer_in_use;
|
||||
),
|
||||
TP_printk("vcpu_id %x hv_timer %x\n",
|
||||
TP_printk("vcpu_id %x hv_timer %x",
|
||||
__entry->vcpu_id,
|
||||
__entry->hv_timer_in_use)
|
||||
);
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <linux/errno.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include "../hyperv.h"
|
||||
#include "evmcs.h"
|
||||
#include "vmcs.h"
|
||||
#include "vmx.h"
|
||||
|
@ -313,6 +314,23 @@ void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
|
|||
}
|
||||
#endif
|
||||
|
||||
bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
|
||||
{
|
||||
struct hv_vp_assist_page assist_page;
|
||||
|
||||
*evmcs_gpa = -1ull;
|
||||
|
||||
if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
|
||||
return false;
|
||||
|
||||
if (unlikely(!assist_page.enlighten_vmentry))
|
||||
return false;
|
||||
|
||||
*evmcs_gpa = assist_page.current_nested_vmcs;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
|
|
@ -195,6 +195,7 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
|
|||
static inline void evmcs_touch_msr_bitmap(void) {}
|
||||
#endif /* IS_ENABLED(CONFIG_HYPERV) */
|
||||
|
||||
bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
|
||||
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
|
||||
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -17,11 +17,11 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
|
|||
bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
|
||||
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
||||
u32 exit_intr_info, unsigned long exit_qualification);
|
||||
void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu);
|
||||
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu);
|
||||
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
|
||||
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
|
||||
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
|
||||
u32 vmx_instruction_info, bool wr, gva_t *ret);
|
||||
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
|
||||
|
||||
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
|
|
@ -146,7 +146,6 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
|
|||
|
||||
__vmcs_writel(field, value);
|
||||
#ifndef CONFIG_X86_64
|
||||
asm volatile ("");
|
||||
__vmcs_writel(field+1, value >> 32);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -42,6 +42,14 @@ struct vmcs_host_state {
|
|||
#endif
|
||||
};
|
||||
|
||||
struct vmcs_controls_shadow {
|
||||
u32 vm_entry;
|
||||
u32 vm_exit;
|
||||
u32 pin;
|
||||
u32 exec;
|
||||
u32 secondary_exec;
|
||||
};
|
||||
|
||||
/*
|
||||
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
|
||||
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
|
||||
|
@ -53,7 +61,7 @@ struct loaded_vmcs {
|
|||
int cpu;
|
||||
bool launched;
|
||||
bool nmi_known_unmasked;
|
||||
bool hv_timer_armed;
|
||||
bool hv_timer_soft_disabled;
|
||||
/* Support for vnmi-less CPUs */
|
||||
int soft_vnmi_blocked;
|
||||
ktime_t entry_time;
|
||||
|
@ -61,6 +69,7 @@ struct loaded_vmcs {
|
|||
unsigned long *msr_bitmap;
|
||||
struct list_head loaded_vmcss_on_cpu_link;
|
||||
struct vmcs_host_state host_state;
|
||||
struct vmcs_controls_shadow controls_shadow;
|
||||
};
|
||||
|
||||
static inline bool is_exception_n(u32 intr_info, u8 vector)
|
||||
|
@ -115,6 +124,12 @@ static inline bool is_nmi(u32 intr_info)
|
|||
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
|
||||
}
|
||||
|
||||
static inline bool is_external_intr(u32 intr_info)
|
||||
{
|
||||
return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
|
||||
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR);
|
||||
}
|
||||
|
||||
enum vmcs_field_width {
|
||||
VMCS_FIELD_WIDTH_U16 = 0,
|
||||
VMCS_FIELD_WIDTH_U64 = 1,
|
||||
|
|
|
@ -395,69 +395,48 @@ static inline short vmcs_field_to_offset(unsigned long field)
|
|||
|
||||
#undef ROL16
|
||||
|
||||
/*
|
||||
* Read a vmcs12 field. Since these can have varying lengths and we return
|
||||
* one type, we chose the biggest type (u64) and zero-extend the return value
|
||||
* to that size. Note that the caller, handle_vmread, might need to use only
|
||||
* some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
|
||||
* 64-bit fields are to be returned).
|
||||
*/
|
||||
static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
|
||||
unsigned long field, u64 *ret)
|
||||
static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field,
|
||||
u16 offset)
|
||||
{
|
||||
short offset = vmcs_field_to_offset(field);
|
||||
char *p;
|
||||
|
||||
if (offset < 0)
|
||||
return offset;
|
||||
|
||||
p = (char *)vmcs12 + offset;
|
||||
char *p = (char *)vmcs12 + offset;
|
||||
|
||||
switch (vmcs_field_width(field)) {
|
||||
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
|
||||
*ret = *((natural_width *)p);
|
||||
return 0;
|
||||
return *((natural_width *)p);
|
||||
case VMCS_FIELD_WIDTH_U16:
|
||||
*ret = *((u16 *)p);
|
||||
return 0;
|
||||
return *((u16 *)p);
|
||||
case VMCS_FIELD_WIDTH_U32:
|
||||
*ret = *((u32 *)p);
|
||||
return 0;
|
||||
return *((u32 *)p);
|
||||
case VMCS_FIELD_WIDTH_U64:
|
||||
*ret = *((u64 *)p);
|
||||
return 0;
|
||||
return *((u64 *)p);
|
||||
default:
|
||||
WARN_ON(1);
|
||||
return -ENOENT;
|
||||
WARN_ON_ONCE(1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
|
||||
unsigned long field, u64 field_value){
|
||||
short offset = vmcs_field_to_offset(field);
|
||||
static inline void vmcs12_write_any(struct vmcs12 *vmcs12, unsigned long field,
|
||||
u16 offset, u64 field_value)
|
||||
{
|
||||
char *p = (char *)vmcs12 + offset;
|
||||
|
||||
if (offset < 0)
|
||||
return offset;
|
||||
|
||||
switch (vmcs_field_width(field)) {
|
||||
case VMCS_FIELD_WIDTH_U16:
|
||||
*(u16 *)p = field_value;
|
||||
return 0;
|
||||
break;
|
||||
case VMCS_FIELD_WIDTH_U32:
|
||||
*(u32 *)p = field_value;
|
||||
return 0;
|
||||
break;
|
||||
case VMCS_FIELD_WIDTH_U64:
|
||||
*(u64 *)p = field_value;
|
||||
return 0;
|
||||
break;
|
||||
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
|
||||
*(natural_width *)p = field_value;
|
||||
return 0;
|
||||
break;
|
||||
default:
|
||||
WARN_ON(1);
|
||||
return -ENOENT;
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_VMCS12_H */
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
#if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW)
|
||||
BUILD_BUG_ON(1)
|
||||
#endif
|
||||
|
||||
#ifndef SHADOW_FIELD_RO
|
||||
#define SHADOW_FIELD_RO(x)
|
||||
#define SHADOW_FIELD_RO(x, y)
|
||||
#endif
|
||||
#ifndef SHADOW_FIELD_RW
|
||||
#define SHADOW_FIELD_RW(x)
|
||||
#define SHADOW_FIELD_RW(x, y)
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -28,47 +32,48 @@
|
|||
*/
|
||||
|
||||
/* 16-bits */
|
||||
SHADOW_FIELD_RW(GUEST_INTR_STATUS)
|
||||
SHADOW_FIELD_RW(GUEST_PML_INDEX)
|
||||
SHADOW_FIELD_RW(HOST_FS_SELECTOR)
|
||||
SHADOW_FIELD_RW(HOST_GS_SELECTOR)
|
||||
SHADOW_FIELD_RW(GUEST_INTR_STATUS, guest_intr_status)
|
||||
SHADOW_FIELD_RW(GUEST_PML_INDEX, guest_pml_index)
|
||||
SHADOW_FIELD_RW(HOST_FS_SELECTOR, host_fs_selector)
|
||||
SHADOW_FIELD_RW(HOST_GS_SELECTOR, host_gs_selector)
|
||||
|
||||
/* 32-bits */
|
||||
SHADOW_FIELD_RO(VM_EXIT_REASON)
|
||||
SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
|
||||
SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
|
||||
SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
|
||||
SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
|
||||
SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
|
||||
SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
|
||||
SHADOW_FIELD_RW(EXCEPTION_BITMAP)
|
||||
SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
|
||||
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
|
||||
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
|
||||
SHADOW_FIELD_RW(TPR_THRESHOLD)
|
||||
SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
|
||||
SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
|
||||
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
|
||||
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
|
||||
SHADOW_FIELD_RO(VM_EXIT_REASON, vm_exit_reason)
|
||||
SHADOW_FIELD_RO(VM_EXIT_INTR_INFO, vm_exit_intr_info)
|
||||
SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len)
|
||||
SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field)
|
||||
SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code)
|
||||
SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code)
|
||||
SHADOW_FIELD_RO(GUEST_CS_AR_BYTES, guest_cs_ar_bytes)
|
||||
SHADOW_FIELD_RO(GUEST_SS_AR_BYTES, guest_ss_ar_bytes)
|
||||
SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control)
|
||||
SHADOW_FIELD_RW(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control)
|
||||
SHADOW_FIELD_RW(EXCEPTION_BITMAP, exception_bitmap)
|
||||
SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code)
|
||||
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field)
|
||||
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len)
|
||||
SHADOW_FIELD_RW(TPR_THRESHOLD, tpr_threshold)
|
||||
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info)
|
||||
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value)
|
||||
|
||||
/* Natural width */
|
||||
SHADOW_FIELD_RO(EXIT_QUALIFICATION)
|
||||
SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
|
||||
SHADOW_FIELD_RW(GUEST_RIP)
|
||||
SHADOW_FIELD_RW(GUEST_RSP)
|
||||
SHADOW_FIELD_RW(GUEST_CR0)
|
||||
SHADOW_FIELD_RW(GUEST_CR3)
|
||||
SHADOW_FIELD_RW(GUEST_CR4)
|
||||
SHADOW_FIELD_RW(GUEST_RFLAGS)
|
||||
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
|
||||
SHADOW_FIELD_RW(CR0_READ_SHADOW)
|
||||
SHADOW_FIELD_RW(CR4_READ_SHADOW)
|
||||
SHADOW_FIELD_RW(HOST_FS_BASE)
|
||||
SHADOW_FIELD_RW(HOST_GS_BASE)
|
||||
SHADOW_FIELD_RO(EXIT_QUALIFICATION, exit_qualification)
|
||||
SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS, guest_linear_address)
|
||||
SHADOW_FIELD_RW(GUEST_RIP, guest_rip)
|
||||
SHADOW_FIELD_RW(GUEST_RSP, guest_rsp)
|
||||
SHADOW_FIELD_RW(GUEST_CR0, guest_cr0)
|
||||
SHADOW_FIELD_RW(GUEST_CR3, guest_cr3)
|
||||
SHADOW_FIELD_RW(GUEST_CR4, guest_cr4)
|
||||
SHADOW_FIELD_RW(GUEST_RFLAGS, guest_rflags)
|
||||
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK, cr0_guest_host_mask)
|
||||
SHADOW_FIELD_RW(CR0_READ_SHADOW, cr0_read_shadow)
|
||||
SHADOW_FIELD_RW(CR4_READ_SHADOW, cr4_read_shadow)
|
||||
SHADOW_FIELD_RW(HOST_FS_BASE, host_fs_base)
|
||||
SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base)
|
||||
|
||||
/* 64-bit */
|
||||
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
|
||||
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
|
||||
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address)
|
||||
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address)
|
||||
|
||||
#undef SHADOW_FIELD_RO
|
||||
#undef SHADOW_FIELD_RW
|
||||
|
|
|
@ -389,6 +389,7 @@ static const struct kvm_vmx_segment_field {
|
|||
};
|
||||
|
||||
u64 host_efer;
|
||||
static unsigned long host_idt_base;
|
||||
|
||||
/*
|
||||
* Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
|
||||
|
@ -1035,6 +1036,33 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
|
|||
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
|
||||
}
|
||||
|
||||
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
|
||||
unsigned long fs_base, unsigned long gs_base)
|
||||
{
|
||||
if (unlikely(fs_sel != host->fs_sel)) {
|
||||
if (!(fs_sel & 7))
|
||||
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
|
||||
else
|
||||
vmcs_write16(HOST_FS_SELECTOR, 0);
|
||||
host->fs_sel = fs_sel;
|
||||
}
|
||||
if (unlikely(gs_sel != host->gs_sel)) {
|
||||
if (!(gs_sel & 7))
|
||||
vmcs_write16(HOST_GS_SELECTOR, gs_sel);
|
||||
else
|
||||
vmcs_write16(HOST_GS_SELECTOR, 0);
|
||||
host->gs_sel = gs_sel;
|
||||
}
|
||||
if (unlikely(fs_base != host->fs_base)) {
|
||||
vmcs_writel(HOST_FS_BASE, fs_base);
|
||||
host->fs_base = fs_base;
|
||||
}
|
||||
if (unlikely(gs_base != host->gs_base)) {
|
||||
vmcs_writel(HOST_GS_BASE, gs_base);
|
||||
host->gs_base = gs_base;
|
||||
}
|
||||
}
|
||||
|
||||
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
@ -1053,20 +1081,18 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
|||
* when guest state is loaded. This happens when guest transitions
|
||||
* to/from long-mode by setting MSR_EFER.LMA.
|
||||
*/
|
||||
if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) {
|
||||
vmx->guest_msrs_dirty = false;
|
||||
if (!vmx->guest_msrs_ready) {
|
||||
vmx->guest_msrs_ready = true;
|
||||
for (i = 0; i < vmx->save_nmsrs; ++i)
|
||||
kvm_set_shared_msr(vmx->guest_msrs[i].index,
|
||||
vmx->guest_msrs[i].data,
|
||||
vmx->guest_msrs[i].mask);
|
||||
|
||||
}
|
||||
|
||||
if (vmx->loaded_cpu_state)
|
||||
if (vmx->guest_state_loaded)
|
||||
return;
|
||||
|
||||
vmx->loaded_cpu_state = vmx->loaded_vmcs;
|
||||
host_state = &vmx->loaded_cpu_state->host_state;
|
||||
host_state = &vmx->loaded_vmcs->host_state;
|
||||
|
||||
/*
|
||||
* Set host fs and gs selectors. Unfortunately, 22.2.3 does not
|
||||
|
@ -1100,42 +1126,20 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
|||
gs_base = segment_base(gs_sel);
|
||||
#endif
|
||||
|
||||
if (unlikely(fs_sel != host_state->fs_sel)) {
|
||||
if (!(fs_sel & 7))
|
||||
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
|
||||
else
|
||||
vmcs_write16(HOST_FS_SELECTOR, 0);
|
||||
host_state->fs_sel = fs_sel;
|
||||
}
|
||||
if (unlikely(gs_sel != host_state->gs_sel)) {
|
||||
if (!(gs_sel & 7))
|
||||
vmcs_write16(HOST_GS_SELECTOR, gs_sel);
|
||||
else
|
||||
vmcs_write16(HOST_GS_SELECTOR, 0);
|
||||
host_state->gs_sel = gs_sel;
|
||||
}
|
||||
if (unlikely(fs_base != host_state->fs_base)) {
|
||||
vmcs_writel(HOST_FS_BASE, fs_base);
|
||||
host_state->fs_base = fs_base;
|
||||
}
|
||||
if (unlikely(gs_base != host_state->gs_base)) {
|
||||
vmcs_writel(HOST_GS_BASE, gs_base);
|
||||
host_state->gs_base = gs_base;
|
||||
}
|
||||
vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
|
||||
vmx->guest_state_loaded = true;
|
||||
}
|
||||
|
||||
static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
|
||||
{
|
||||
struct vmcs_host_state *host_state;
|
||||
|
||||
if (!vmx->loaded_cpu_state)
|
||||
if (!vmx->guest_state_loaded)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
|
||||
host_state = &vmx->loaded_cpu_state->host_state;
|
||||
host_state = &vmx->loaded_vmcs->host_state;
|
||||
|
||||
++vmx->vcpu.stat.host_state_reload;
|
||||
vmx->loaded_cpu_state = NULL;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
|
||||
|
@ -1161,13 +1165,15 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
|
|||
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
|
||||
#endif
|
||||
load_fixmap_gdt(raw_smp_processor_id());
|
||||
vmx->guest_state_loaded = false;
|
||||
vmx->guest_msrs_ready = false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
|
||||
{
|
||||
preempt_disable();
|
||||
if (vmx->loaded_cpu_state)
|
||||
if (vmx->guest_state_loaded)
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
|
||||
preempt_enable();
|
||||
return vmx->msr_guest_kernel_gs_base;
|
||||
|
@ -1176,7 +1182,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
|
|||
static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
|
||||
{
|
||||
preempt_disable();
|
||||
if (vmx->loaded_cpu_state)
|
||||
if (vmx->guest_state_loaded)
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, data);
|
||||
preempt_enable();
|
||||
vmx->msr_guest_kernel_gs_base = data;
|
||||
|
@ -1225,11 +1231,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
pi_set_on(pi_desc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
|
||||
* vcpu mutex is already taken.
|
||||
*/
|
||||
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
|
||||
|
@ -1290,8 +1292,20 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
if (kvm_has_tsc_control &&
|
||||
vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
|
||||
decache_tsc_multiplier(vmx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
|
||||
* vcpu mutex is already taken.
|
||||
*/
|
||||
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu);
|
||||
|
||||
vmx_vcpu_pi_load(vcpu, cpu);
|
||||
|
||||
vmx->host_pkru = read_pkru();
|
||||
vmx->host_debugctlmsr = get_debugctlmsr();
|
||||
}
|
||||
|
@ -1310,7 +1324,7 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
|
|||
pi_set_sn(pi_desc);
|
||||
}
|
||||
|
||||
void vmx_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vmx_vcpu_pi_put(vcpu);
|
||||
|
||||
|
@ -1579,7 +1593,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
|
|||
move_msr_up(vmx, index, save_nmsrs++);
|
||||
|
||||
vmx->save_nmsrs = save_nmsrs;
|
||||
vmx->guest_msrs_dirty = true;
|
||||
vmx->guest_msrs_ready = false;
|
||||
|
||||
if (cpu_has_vmx_msr_bitmap())
|
||||
vmx_update_msr_bitmap(&vmx->vcpu);
|
||||
|
@ -1692,9 +1706,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
case MSR_IA32_SYSENTER_ESP:
|
||||
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
|
||||
break;
|
||||
case MSR_IA32_POWER_CTL:
|
||||
msr_info->data = vmx->msr_ia32_power_ctl;
|
||||
break;
|
||||
case MSR_IA32_BNDCFGS:
|
||||
if (!kvm_mpx_supported() ||
|
||||
(!msr_info->host_initiated &&
|
||||
|
@ -1718,7 +1729,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
|
||||
&msr_info->data);
|
||||
case MSR_IA32_XSS:
|
||||
if (!vmx_xsaves_supported())
|
||||
if (!vmx_xsaves_supported() ||
|
||||
(!msr_info->host_initiated &&
|
||||
!(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
|
||||
return 1;
|
||||
msr_info->data = vcpu->arch.ia32_xss;
|
||||
break;
|
||||
|
@ -1817,17 +1831,28 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
break;
|
||||
#endif
|
||||
case MSR_IA32_SYSENTER_CS:
|
||||
if (is_guest_mode(vcpu))
|
||||
get_vmcs12(vcpu)->guest_sysenter_cs = data;
|
||||
vmcs_write32(GUEST_SYSENTER_CS, data);
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
if (is_guest_mode(vcpu))
|
||||
get_vmcs12(vcpu)->guest_sysenter_eip = data;
|
||||
vmcs_writel(GUEST_SYSENTER_EIP, data);
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_ESP:
|
||||
if (is_guest_mode(vcpu))
|
||||
get_vmcs12(vcpu)->guest_sysenter_esp = data;
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, data);
|
||||
break;
|
||||
case MSR_IA32_POWER_CTL:
|
||||
vmx->msr_ia32_power_ctl = data;
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
|
||||
VM_EXIT_SAVE_DEBUG_CONTROLS)
|
||||
get_vmcs12(vcpu)->guest_ia32_debugctl = data;
|
||||
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
break;
|
||||
|
||||
case MSR_IA32_BNDCFGS:
|
||||
if (!kvm_mpx_supported() ||
|
||||
(!msr_info->host_initiated &&
|
||||
|
@ -1896,9 +1921,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
MSR_TYPE_W);
|
||||
break;
|
||||
case MSR_IA32_CR_PAT:
|
||||
if (!kvm_pat_valid(data))
|
||||
return 1;
|
||||
|
||||
if (is_guest_mode(vcpu) &&
|
||||
get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
|
||||
get_vmcs12(vcpu)->guest_ia32_pat = data;
|
||||
|
||||
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
|
||||
if (!kvm_pat_valid(data))
|
||||
return 1;
|
||||
vmcs_write64(GUEST_IA32_PAT, data);
|
||||
vcpu->arch.pat = data;
|
||||
break;
|
||||
|
@ -1932,7 +1962,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
return 1;
|
||||
return vmx_set_vmx_msr(vcpu, msr_index, data);
|
||||
case MSR_IA32_XSS:
|
||||
if (!vmx_xsaves_supported())
|
||||
if (!vmx_xsaves_supported() ||
|
||||
(!msr_info->host_initiated &&
|
||||
!(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
|
||||
return 1;
|
||||
/*
|
||||
* The only supported bit as of Skylake is bit 8, but
|
||||
|
@ -2435,6 +2468,7 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
|
|||
return -ENOMEM;
|
||||
|
||||
loaded_vmcs->shadow_vmcs = NULL;
|
||||
loaded_vmcs->hv_timer_soft_disabled = false;
|
||||
loaded_vmcs_init(loaded_vmcs);
|
||||
|
||||
if (cpu_has_vmx_msr_bitmap()) {
|
||||
|
@ -2455,6 +2489,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
|
|||
}
|
||||
|
||||
memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
|
||||
memset(&loaded_vmcs->controls_shadow, 0,
|
||||
sizeof(struct vmcs_controls_shadow));
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -2737,7 +2773,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
|
|||
(unsigned long *)&vcpu->arch.regs_dirty))
|
||||
return;
|
||||
|
||||
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
|
||||
if (is_pae_paging(vcpu)) {
|
||||
vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
|
||||
vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
|
||||
vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
|
||||
|
@ -2749,7 +2785,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
|
||||
|
||||
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
|
||||
if (is_pae_paging(vcpu)) {
|
||||
mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
|
||||
mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
|
||||
mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
|
||||
|
@ -2766,22 +2802,20 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
|
|||
unsigned long cr0,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
|
||||
vmx_decache_cr3(vcpu);
|
||||
if (!(cr0 & X86_CR0_PG)) {
|
||||
/* From paging/starting to nonpaging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
|
||||
(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING));
|
||||
exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING);
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
|
||||
} else if (!is_paging(vcpu)) {
|
||||
/* From nonpaging to paging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
|
||||
~(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING));
|
||||
exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING);
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
|
||||
}
|
||||
|
@ -2881,6 +2915,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
|||
|
||||
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
/*
|
||||
* Pass through host's Machine Check Enable value to hw_cr4, which
|
||||
* is in force while we are in guest mode. Do not let guests control
|
||||
|
@ -2891,20 +2926,19 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
|||
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
|
||||
if (enable_unrestricted_guest)
|
||||
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
|
||||
else if (to_vmx(vcpu)->rmode.vm86_active)
|
||||
else if (vmx->rmode.vm86_active)
|
||||
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
|
||||
else
|
||||
hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
|
||||
if (cr4 & X86_CR4_UMIP) {
|
||||
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
|
||||
SECONDARY_EXEC_DESC);
|
||||
secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
|
||||
hw_cr4 &= ~X86_CR4_UMIP;
|
||||
} else if (!is_guest_mode(vcpu) ||
|
||||
!nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
|
||||
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
|
||||
SECONDARY_EXEC_DESC);
|
||||
!nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
|
||||
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
|
||||
}
|
||||
}
|
||||
|
||||
if (cr4 & X86_CR4_VMXE) {
|
||||
|
@ -2919,7 +2953,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
|
||||
if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
|
||||
return 1;
|
||||
|
||||
vcpu->arch.cr4 = cr4;
|
||||
|
@ -3537,7 +3571,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
|
|||
u8 mode = 0;
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls() &&
|
||||
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
|
||||
(secondary_exec_controls_get(to_vmx(vcpu)) &
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
|
||||
mode |= MSR_BITMAP_MODE_X2APIC;
|
||||
if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
|
||||
|
@ -3731,7 +3765,6 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
|
|||
{
|
||||
u32 low32, high32;
|
||||
unsigned long tmpl;
|
||||
struct desc_ptr dt;
|
||||
unsigned long cr0, cr3, cr4;
|
||||
|
||||
cr0 = read_cr0();
|
||||
|
@ -3767,9 +3800,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
|
|||
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
|
||||
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
|
||||
|
||||
store_idt(&dt);
|
||||
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
|
||||
vmx->host_idt_base = dt.address;
|
||||
vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */
|
||||
|
||||
vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
|
||||
|
||||
|
@ -3798,7 +3829,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
|
|||
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
|
||||
}
|
||||
|
||||
static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
|
||||
u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
|
||||
|
||||
|
@ -3808,8 +3839,9 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
|
|||
if (!enable_vnmi)
|
||||
pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
|
||||
|
||||
/* Enable the preemption timer dynamically */
|
||||
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
|
||||
if (!enable_preemption_timer)
|
||||
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
|
||||
|
||||
return pin_based_exec_ctrl;
|
||||
}
|
||||
|
||||
|
@ -3817,14 +3849,14 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
|
||||
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
|
||||
secondary_exec_controls_setbit(vmx,
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
else
|
||||
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
|
||||
secondary_exec_controls_clearbit(vmx,
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
}
|
||||
|
@ -4015,15 +4047,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|||
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
|
||||
|
||||
/* Control */
|
||||
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
|
||||
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
|
||||
vmx->hv_deadline_tsc = -1;
|
||||
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
|
||||
exec_controls_set(vmx, vmx_exec_control(vmx));
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
vmx_compute_secondary_exec_control(vmx);
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
|
||||
vmx->secondary_exec_control);
|
||||
secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
|
||||
}
|
||||
|
||||
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
|
||||
|
@ -4081,10 +4112,10 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|||
++vmx->nmsrs;
|
||||
}
|
||||
|
||||
vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
|
||||
vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
|
||||
|
||||
/* 22.2.1, 20.8.1 */
|
||||
vm_entry_controls_init(vmx, vmx_vmentry_ctrl());
|
||||
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
|
||||
|
||||
vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
|
||||
vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
|
||||
|
@ -4208,8 +4239,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
|
||||
static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
|
||||
CPU_BASED_VIRTUAL_INTR_PENDING);
|
||||
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
|
||||
}
|
||||
|
||||
static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
|
@ -4220,8 +4250,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
|||
return;
|
||||
}
|
||||
|
||||
vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
|
||||
CPU_BASED_VIRTUAL_NMI_PENDING);
|
||||
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
|
||||
}
|
||||
|
||||
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
|
||||
|
@ -4442,11 +4471,11 @@ static void kvm_machine_check(void)
|
|||
|
||||
static int handle_machine_check(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* already handled by vcpu_run */
|
||||
/* handled by vmx_vcpu_run() */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int handle_exception(struct kvm_vcpu *vcpu)
|
||||
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
|
@ -4458,11 +4487,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
|||
vect_info = vmx->idt_vectoring_info;
|
||||
intr_info = vmx->exit_intr_info;
|
||||
|
||||
if (is_machine_check(intr_info))
|
||||
return handle_machine_check(vcpu);
|
||||
|
||||
if (is_nmi(intr_info))
|
||||
return 1; /* already handled by vmx_vcpu_run() */
|
||||
if (is_machine_check(intr_info) || is_nmi(intr_info))
|
||||
return 1; /* handled by handle_exception_nmi_irqoff() */
|
||||
|
||||
if (is_invalid_opcode(intr_info))
|
||||
return handle_ud(vcpu);
|
||||
|
@ -4518,7 +4544,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
|||
dr6 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
if (!(vcpu->guest_debug &
|
||||
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
|
||||
vcpu->arch.dr6 &= ~15;
|
||||
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
|
||||
vcpu->arch.dr6 |= dr6 | DR6_RTM;
|
||||
if (is_icebp(intr_info))
|
||||
skip_emulated_instruction(vcpu);
|
||||
|
@ -4763,7 +4789,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
|
|||
vcpu->run->exit_reason = KVM_EXIT_DEBUG;
|
||||
return 0;
|
||||
} else {
|
||||
vcpu->arch.dr6 &= ~15;
|
||||
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
|
||||
vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
|
||||
kvm_queue_exception(vcpu, DB_VECTOR);
|
||||
return 1;
|
||||
|
@ -4771,8 +4797,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
if (vcpu->guest_debug == 0) {
|
||||
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
|
||||
CPU_BASED_MOV_DR_EXITING);
|
||||
exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
|
||||
|
||||
/*
|
||||
* No more DR vmexits; force a reload of the debug registers
|
||||
|
@ -4816,7 +4841,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
|
||||
|
||||
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
|
||||
vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
|
||||
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
|
||||
}
|
||||
|
||||
static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
|
@ -4876,8 +4901,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_interrupt_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
|
||||
CPU_BASED_VIRTUAL_INTR_PENDING);
|
||||
exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
|
@ -5131,8 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
|
|||
static int handle_nmi_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
WARN_ON_ONCE(!enable_vnmi);
|
||||
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
|
||||
CPU_BASED_VIRTUAL_NMI_PENDING);
|
||||
exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
|
||||
++vcpu->stat.nmi_window_exits;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
|
@ -5144,7 +5167,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
|||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
enum emulation_result err = EMULATE_DONE;
|
||||
int ret = 1;
|
||||
u32 cpu_exec_ctrl;
|
||||
bool intr_window_requested;
|
||||
unsigned count = 130;
|
||||
|
||||
|
@ -5155,8 +5177,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
|
||||
|
||||
cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
|
||||
intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
|
||||
intr_window_requested = exec_controls_get(vmx) &
|
||||
CPU_BASED_VIRTUAL_INTR_PENDING;
|
||||
|
||||
while (vmx->emulation_required && count-- != 0) {
|
||||
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
|
||||
|
@ -5342,7 +5364,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
|
|||
* is read even if it isn't needed (e.g., for type==all)
|
||||
*/
|
||||
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
|
||||
vmx_instruction_info, false, &gva))
|
||||
vmx_instruction_info, false,
|
||||
sizeof(operand), &gva))
|
||||
return 1;
|
||||
|
||||
if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
|
||||
|
@ -5437,8 +5460,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!to_vmx(vcpu)->req_immediate_exit)
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (!vmx->req_immediate_exit &&
|
||||
!unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
|
||||
kvm_lapic_expired_hv_timer(vcpu);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -5469,7 +5496,7 @@ static int handle_encls(struct kvm_vcpu *vcpu)
|
|||
* to be done to userspace and return 0.
|
||||
*/
|
||||
static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
||||
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
|
||||
[EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
|
||||
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
|
||||
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
|
||||
[EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
|
||||
|
@ -5952,6 +5979,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
|||
|
||||
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u32 sec_exec_control;
|
||||
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
|
@ -5963,11 +5991,11 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
|||
|
||||
/* Postpone execution until vmcs01 is the current VMCS. */
|
||||
if (is_guest_mode(vcpu)) {
|
||||
to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
|
||||
vmx->nested.change_vmcs01_virtual_apic_mode = true;
|
||||
return;
|
||||
}
|
||||
|
||||
sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
||||
sec_exec_control = secondary_exec_controls_get(vmx);
|
||||
sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
|
||||
|
||||
|
@ -5989,7 +6017,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
|||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
|
||||
break;
|
||||
}
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
|
||||
secondary_exec_controls_set(vmx, sec_exec_control);
|
||||
|
||||
vmx_update_msr_bitmap(vcpu);
|
||||
}
|
||||
|
@ -6107,76 +6135,81 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
|
|||
memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
|
||||
}
|
||||
|
||||
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
|
||||
static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 exit_intr_info = 0;
|
||||
u16 basic_exit_reason = (u16)vmx->exit_reason;
|
||||
|
||||
if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
|
||||
|| basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
|
||||
return;
|
||||
|
||||
if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
|
||||
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
vmx->exit_intr_info = exit_intr_info;
|
||||
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
|
||||
/* if exit due to PF check for async PF */
|
||||
if (is_page_fault(exit_intr_info))
|
||||
if (is_page_fault(vmx->exit_intr_info))
|
||||
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
|
||||
|
||||
/* Handle machine checks before interrupts are enabled */
|
||||
if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
|
||||
is_machine_check(exit_intr_info))
|
||||
if (is_machine_check(vmx->exit_intr_info))
|
||||
kvm_machine_check();
|
||||
|
||||
/* We need to handle NMIs before interrupts are enabled */
|
||||
if (is_nmi(exit_intr_info)) {
|
||||
if (is_nmi(vmx->exit_intr_info)) {
|
||||
kvm_before_interrupt(&vmx->vcpu);
|
||||
asm("int $2");
|
||||
kvm_after_interrupt(&vmx->vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
|
||||
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
unsigned int vector;
|
||||
unsigned long entry;
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long tmp;
|
||||
#endif
|
||||
gate_desc *desc;
|
||||
u32 intr_info;
|
||||
|
||||
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
|
||||
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
|
||||
unsigned int vector;
|
||||
unsigned long entry;
|
||||
gate_desc *desc;
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long tmp;
|
||||
#endif
|
||||
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
if (WARN_ONCE(!is_external_intr(intr_info),
|
||||
"KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
|
||||
return;
|
||||
|
||||
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
|
||||
desc = (gate_desc *)vmx->host_idt_base + vector;
|
||||
entry = gate_offset(desc);
|
||||
asm volatile(
|
||||
vector = intr_info & INTR_INFO_VECTOR_MASK;
|
||||
desc = (gate_desc *)host_idt_base + vector;
|
||||
entry = gate_offset(desc);
|
||||
|
||||
kvm_before_interrupt(vcpu);
|
||||
|
||||
asm volatile(
|
||||
#ifdef CONFIG_X86_64
|
||||
"mov %%" _ASM_SP ", %[sp]\n\t"
|
||||
"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
|
||||
"push $%c[ss]\n\t"
|
||||
"push %[sp]\n\t"
|
||||
"mov %%" _ASM_SP ", %[sp]\n\t"
|
||||
"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
|
||||
"push $%c[ss]\n\t"
|
||||
"push %[sp]\n\t"
|
||||
#endif
|
||||
"pushf\n\t"
|
||||
__ASM_SIZE(push) " $%c[cs]\n\t"
|
||||
CALL_NOSPEC
|
||||
:
|
||||
"pushf\n\t"
|
||||
__ASM_SIZE(push) " $%c[cs]\n\t"
|
||||
CALL_NOSPEC
|
||||
:
|
||||
#ifdef CONFIG_X86_64
|
||||
[sp]"=&r"(tmp),
|
||||
[sp]"=&r"(tmp),
|
||||
#endif
|
||||
ASM_CALL_CONSTRAINT
|
||||
:
|
||||
THUNK_TARGET(entry),
|
||||
[ss]"i"(__KERNEL_DS),
|
||||
[cs]"i"(__KERNEL_CS)
|
||||
);
|
||||
}
|
||||
ASM_CALL_CONSTRAINT
|
||||
:
|
||||
THUNK_TARGET(entry),
|
||||
[ss]"i"(__KERNEL_DS),
|
||||
[cs]"i"(__KERNEL_CS)
|
||||
);
|
||||
|
||||
kvm_after_interrupt(vcpu);
|
||||
}
|
||||
STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
|
||||
|
||||
static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
|
||||
handle_external_interrupt_irqoff(vcpu);
|
||||
else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
|
||||
handle_exception_nmi_irqoff(vmx);
|
||||
}
|
||||
STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
|
||||
|
||||
static bool vmx_has_emulated_msr(int index)
|
||||
{
|
||||
|
@ -6187,6 +6220,8 @@ static bool vmx_has_emulated_msr(int index)
|
|||
* real mode.
|
||||
*/
|
||||
return enable_unrestricted_guest || emulate_invalid_guest_state;
|
||||
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
|
||||
return nested;
|
||||
case MSR_AMD64_VIRT_SPEC_CTRL:
|
||||
/* This is AMD only. */
|
||||
return false;
|
||||
|
@ -6332,15 +6367,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
|
|||
msrs[i].host, false);
|
||||
}
|
||||
|
||||
static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
|
||||
if (!vmx->loaded_vmcs->hv_timer_armed)
|
||||
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER);
|
||||
vmx->loaded_vmcs->hv_timer_armed = true;
|
||||
}
|
||||
|
||||
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
@ -6348,11 +6374,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
|
|||
u32 delta_tsc;
|
||||
|
||||
if (vmx->req_immediate_exit) {
|
||||
vmx_arm_hv_timer(vmx, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (vmx->hv_deadline_tsc != -1) {
|
||||
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
|
||||
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
|
||||
} else if (vmx->hv_deadline_tsc != -1) {
|
||||
tscl = rdtsc();
|
||||
if (vmx->hv_deadline_tsc > tscl)
|
||||
/* set_hv_timer ensures the delta fits in 32-bits */
|
||||
|
@ -6361,14 +6385,12 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
|
|||
else
|
||||
delta_tsc = 0;
|
||||
|
||||
vmx_arm_hv_timer(vmx, delta_tsc);
|
||||
return;
|
||||
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
|
||||
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
|
||||
} else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
|
||||
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
|
||||
vmx->loaded_vmcs->hv_timer_soft_disabled = true;
|
||||
}
|
||||
|
||||
if (vmx->loaded_vmcs->hv_timer_armed)
|
||||
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER);
|
||||
vmx->loaded_vmcs->hv_timer_armed = false;
|
||||
}
|
||||
|
||||
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
|
||||
|
@ -6401,8 +6423,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
vmcs_write32(PLE_WINDOW, vmx->ple_window);
|
||||
}
|
||||
|
||||
if (vmx->nested.need_vmcs12_sync)
|
||||
nested_sync_from_vmcs12(vcpu);
|
||||
if (vmx->nested.need_vmcs12_to_shadow_sync)
|
||||
nested_sync_vmcs12_to_shadow(vcpu);
|
||||
|
||||
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
|
||||
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
|
||||
|
@ -6440,7 +6462,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
|
||||
atomic_switch_perf_msrs(vmx);
|
||||
|
||||
vmx_update_hv_timer(vcpu);
|
||||
if (enable_preemption_timer)
|
||||
vmx_update_hv_timer(vcpu);
|
||||
|
||||
if (lapic_in_kernel(vcpu) &&
|
||||
vcpu->arch.apic->lapic_timer.timer_advance_ns)
|
||||
kvm_wait_lapic_expire(vcpu);
|
||||
|
||||
/*
|
||||
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
|
||||
|
@ -6533,13 +6560,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
vmx->idt_vectoring_info = 0;
|
||||
|
||||
vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
|
||||
if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
|
||||
kvm_machine_check();
|
||||
|
||||
if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
|
||||
return;
|
||||
|
||||
vmx->loaded_vmcs->launched = 1;
|
||||
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
|
||||
|
||||
vmx_complete_atomic_exit(vmx);
|
||||
vmx_recover_nmi_blocking(vmx);
|
||||
vmx_complete_interrupts(vmx);
|
||||
}
|
||||
|
@ -6630,6 +6659,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
|
||||
if (kvm_cstate_in_guest(kvm)) {
|
||||
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
|
||||
}
|
||||
vmx->msr_bitmap_mode = 0;
|
||||
|
||||
vmx->loaded_vmcs = &vmx->vmcs01;
|
||||
|
@ -6726,22 +6761,22 @@ static int vmx_vm_init(struct kvm *kvm)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void __init vmx_check_processor_compat(void *rtn)
|
||||
static int __init vmx_check_processor_compat(void)
|
||||
{
|
||||
struct vmcs_config vmcs_conf;
|
||||
struct vmx_capability vmx_cap;
|
||||
|
||||
*(int *)rtn = 0;
|
||||
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
|
||||
*(int *)rtn = -EIO;
|
||||
return -EIO;
|
||||
if (nested)
|
||||
nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
|
||||
enable_apicv);
|
||||
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
|
||||
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
|
||||
smp_processor_id());
|
||||
*(int *)rtn = -EIO;
|
||||
return -EIO;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||
|
@ -6795,7 +6830,7 @@ static int vmx_get_lpage_level(void)
|
|||
return PT_PDPE_LEVEL;
|
||||
}
|
||||
|
||||
static void vmcs_set_secondary_exec_control(u32 new_ctl)
|
||||
static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||
{
|
||||
/*
|
||||
* These bits in the secondary execution controls field
|
||||
|
@ -6809,10 +6844,10 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
|
|||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_DESC;
|
||||
|
||||
u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
||||
u32 new_ctl = vmx->secondary_exec_control;
|
||||
u32 cur_ctl = secondary_exec_controls_get(vmx);
|
||||
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
|
||||
(new_ctl & ~mask) | (cur_ctl & mask));
|
||||
secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -6950,7 +6985,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
vmx_compute_secondary_exec_control(vmx);
|
||||
vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
|
||||
vmcs_set_secondary_exec_control(vmx);
|
||||
}
|
||||
|
||||
if (nested_vmx_allowed(vcpu))
|
||||
|
@ -7424,10 +7459,14 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
|
|||
static __init int hardware_setup(void)
|
||||
{
|
||||
unsigned long host_bndcfgs;
|
||||
struct desc_ptr dt;
|
||||
int r, i;
|
||||
|
||||
rdmsrl_safe(MSR_EFER, &host_efer);
|
||||
|
||||
store_idt(&dt);
|
||||
host_idt_base = dt.address;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
|
||||
kvm_define_shared_msr(i, vmx_msr_index[i]);
|
||||
|
||||
|
@ -7531,17 +7570,33 @@ static __init int hardware_setup(void)
|
|||
}
|
||||
|
||||
if (!cpu_has_vmx_preemption_timer())
|
||||
kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
|
||||
enable_preemption_timer = false;
|
||||
|
||||
if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
|
||||
if (enable_preemption_timer) {
|
||||
u64 use_timer_freq = 5000ULL * 1000 * 1000;
|
||||
u64 vmx_msr;
|
||||
|
||||
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
|
||||
cpu_preemption_timer_multi =
|
||||
vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
|
||||
} else {
|
||||
|
||||
if (tsc_khz)
|
||||
use_timer_freq = (u64)tsc_khz * 1000;
|
||||
use_timer_freq >>= cpu_preemption_timer_multi;
|
||||
|
||||
/*
|
||||
* KVM "disables" the preemption timer by setting it to its max
|
||||
* value. Don't use the timer if it might cause spurious exits
|
||||
* at a rate faster than 0.1 Hz (of uninterrupted guest time).
|
||||
*/
|
||||
if (use_timer_freq > 0xffffffffu / 10)
|
||||
enable_preemption_timer = false;
|
||||
}
|
||||
|
||||
if (!enable_preemption_timer) {
|
||||
kvm_x86_ops->set_hv_timer = NULL;
|
||||
kvm_x86_ops->cancel_hv_timer = NULL;
|
||||
kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
|
||||
}
|
||||
|
||||
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
|
||||
|
@ -7683,7 +7738,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
|||
.set_tdp_cr3 = vmx_set_cr3,
|
||||
|
||||
.check_intercept = vmx_check_intercept,
|
||||
.handle_external_intr = vmx_handle_external_intr,
|
||||
.handle_exit_irqoff = vmx_handle_exit_irqoff,
|
||||
.mpx_supported = vmx_mpx_supported,
|
||||
.xsaves_supported = vmx_xsaves_supported,
|
||||
.umip_emulated = vmx_umip_emulated,
|
||||
|
|
|
@ -109,13 +109,20 @@ struct nested_vmx {
|
|||
* to guest memory during VM exit.
|
||||
*/
|
||||
struct vmcs12 *cached_shadow_vmcs12;
|
||||
|
||||
/*
|
||||
* Indicates if the shadow vmcs or enlightened vmcs must be updated
|
||||
* with the data held by struct vmcs12.
|
||||
*/
|
||||
bool need_vmcs12_sync;
|
||||
bool need_vmcs12_to_shadow_sync;
|
||||
bool dirty_vmcs12;
|
||||
|
||||
/*
|
||||
* Indicates lazily loaded guest state has not yet been decached from
|
||||
* vmcs02.
|
||||
*/
|
||||
bool need_sync_vmcs02_to_vmcs12_rare;
|
||||
|
||||
/*
|
||||
* vmcs02 has been initialized, i.e. state that is constant for
|
||||
* vmcs02 has been written to the backing VMCS. Initialization
|
||||
|
@ -180,14 +187,24 @@ struct vcpu_vmx {
|
|||
struct kvm_vcpu vcpu;
|
||||
u8 fail;
|
||||
u8 msr_bitmap_mode;
|
||||
|
||||
/*
|
||||
* If true, host state has been stored in vmx->loaded_vmcs for
|
||||
* the CPU registers that only need to be switched when transitioning
|
||||
* to/from the kernel, and the registers have been loaded with guest
|
||||
* values. If false, host state is loaded in the CPU registers
|
||||
* and vmx->loaded_vmcs->host_state is invalid.
|
||||
*/
|
||||
bool guest_state_loaded;
|
||||
|
||||
u32 exit_intr_info;
|
||||
u32 idt_vectoring_info;
|
||||
ulong rflags;
|
||||
|
||||
struct shared_msr_entry *guest_msrs;
|
||||
int nmsrs;
|
||||
int save_nmsrs;
|
||||
bool guest_msrs_dirty;
|
||||
unsigned long host_idt_base;
|
||||
bool guest_msrs_ready;
|
||||
#ifdef CONFIG_X86_64
|
||||
u64 msr_host_kernel_gs_base;
|
||||
u64 msr_guest_kernel_gs_base;
|
||||
|
@ -195,21 +212,15 @@ struct vcpu_vmx {
|
|||
|
||||
u64 spec_ctrl;
|
||||
|
||||
u32 vm_entry_controls_shadow;
|
||||
u32 vm_exit_controls_shadow;
|
||||
u32 secondary_exec_control;
|
||||
|
||||
/*
|
||||
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
|
||||
* non-nested (L1) guest, it always points to vmcs01. For a nested
|
||||
* guest (L2), it points to a different VMCS. loaded_cpu_state points
|
||||
* to the VMCS whose state is loaded into the CPU registers that only
|
||||
* need to be switched when transitioning to/from the kernel; a NULL
|
||||
* value indicates that host state is loaded.
|
||||
* guest (L2), it points to a different VMCS.
|
||||
*/
|
||||
struct loaded_vmcs vmcs01;
|
||||
struct loaded_vmcs *loaded_vmcs;
|
||||
struct loaded_vmcs *loaded_cpu_state;
|
||||
|
||||
struct msr_autoload {
|
||||
struct vmx_msrs guest;
|
||||
|
@ -260,8 +271,6 @@ struct vcpu_vmx {
|
|||
|
||||
unsigned long host_debugctlmsr;
|
||||
|
||||
u64 msr_ia32_power_ctl;
|
||||
|
||||
/*
|
||||
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
|
||||
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
|
||||
|
@ -292,12 +301,14 @@ struct kvm_vmx {
|
|||
};
|
||||
|
||||
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
|
||||
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
|
||||
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
void vmx_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
int allocate_vpid(void);
|
||||
void free_vpid(int vpid);
|
||||
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
|
||||
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
|
||||
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
|
||||
unsigned long fs_base, unsigned long gs_base);
|
||||
int vmx_get_cpl(struct kvm_vcpu *vcpu);
|
||||
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
|
||||
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
|
@ -376,69 +387,31 @@ static inline u8 vmx_get_rvi(void)
|
|||
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vmcs_write32(VM_ENTRY_CONTROLS, val);
|
||||
vmx->vm_entry_controls_shadow = val;
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
if (vmx->vm_entry_controls_shadow != val)
|
||||
vm_entry_controls_init(vmx, val);
|
||||
}
|
||||
|
||||
static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
|
||||
{
|
||||
return vmx->vm_entry_controls_shadow;
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vmcs_write32(VM_EXIT_CONTROLS, val);
|
||||
vmx->vm_exit_controls_shadow = val;
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
if (vmx->vm_exit_controls_shadow != val)
|
||||
vm_exit_controls_init(vmx, val);
|
||||
}
|
||||
|
||||
static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
|
||||
{
|
||||
return vmx->vm_exit_controls_shadow;
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
|
||||
#define BUILD_CONTROLS_SHADOW(lname, uname) \
|
||||
static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \
|
||||
{ \
|
||||
if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
|
||||
vmcs_write32(uname, val); \
|
||||
vmx->loaded_vmcs->controls_shadow.lname = val; \
|
||||
} \
|
||||
} \
|
||||
static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \
|
||||
{ \
|
||||
return vmx->loaded_vmcs->controls_shadow.lname; \
|
||||
} \
|
||||
static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \
|
||||
{ \
|
||||
lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
|
||||
} \
|
||||
static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \
|
||||
{ \
|
||||
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
|
||||
}
|
||||
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS)
|
||||
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS)
|
||||
BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
|
||||
BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
|
||||
BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
|
||||
|
||||
static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
|
||||
{
|
||||
|
@ -468,6 +441,7 @@ static inline u32 vmx_vmexit_ctrl(void)
|
|||
}
|
||||
|
||||
u32 vmx_exec_control(struct vcpu_vmx *vmx);
|
||||
u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx);
|
||||
|
||||
static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
|
||||
{
|
||||
|
|
|
@ -717,7 +717,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
|
|||
gfn_t gfn;
|
||||
int r;
|
||||
|
||||
if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
|
||||
if (!is_pae_paging(vcpu))
|
||||
return false;
|
||||
|
||||
if (!test_bit(VCPU_EXREG_PDPTR,
|
||||
|
@ -960,8 +960,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
|||
if (is_long_mode(vcpu) &&
|
||||
(cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
|
||||
return 1;
|
||||
else if (is_pae(vcpu) && is_paging(vcpu) &&
|
||||
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
|
||||
else if (is_pae_paging(vcpu) &&
|
||||
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
|
||||
return 1;
|
||||
|
||||
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
|
||||
|
@ -1174,7 +1174,28 @@ static u32 emulated_msrs[] = {
|
|||
MSR_AMD64_VIRT_SPEC_CTRL,
|
||||
MSR_IA32_POWER_CTL,
|
||||
|
||||
/*
|
||||
* The following list leaves out MSRs whose values are determined
|
||||
* by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
|
||||
* We always support the "true" VMX control MSRs, even if the host
|
||||
* processor does not, so I am putting these registers here rather
|
||||
* than in msrs_to_save.
|
||||
*/
|
||||
MSR_IA32_VMX_BASIC,
|
||||
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
|
||||
MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
|
||||
MSR_IA32_VMX_TRUE_EXIT_CTLS,
|
||||
MSR_IA32_VMX_TRUE_ENTRY_CTLS,
|
||||
MSR_IA32_VMX_MISC,
|
||||
MSR_IA32_VMX_CR0_FIXED0,
|
||||
MSR_IA32_VMX_CR4_FIXED0,
|
||||
MSR_IA32_VMX_VMCS_ENUM,
|
||||
MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
MSR_IA32_VMX_EPT_VPID_CAP,
|
||||
MSR_IA32_VMX_VMFUNC,
|
||||
|
||||
MSR_K7_HWCR,
|
||||
MSR_KVM_POLL_CONTROL,
|
||||
};
|
||||
|
||||
static unsigned num_emulated_msrs;
|
||||
|
@ -1210,11 +1231,12 @@ static u32 msr_based_features[] = {
|
|||
|
||||
static unsigned int num_msr_based_features;
|
||||
|
||||
u64 kvm_get_arch_capabilities(void)
|
||||
static u64 kvm_get_arch_capabilities(void)
|
||||
{
|
||||
u64 data;
|
||||
u64 data = 0;
|
||||
|
||||
rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
|
||||
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
|
||||
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
|
||||
|
||||
/*
|
||||
* If we're doing cache flushes (either "always" or "cond")
|
||||
|
@ -1230,7 +1252,6 @@ u64 kvm_get_arch_capabilities(void)
|
|||
|
||||
return data;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
|
||||
|
||||
static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
|
||||
{
|
||||
|
@ -2545,13 +2566,24 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
}
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
|
||||
((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
|
||||
return 1;
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
kvm_update_cpuid(vcpu);
|
||||
} else {
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
}
|
||||
break;
|
||||
case MSR_IA32_SMBASE:
|
||||
if (!msr_info->host_initiated)
|
||||
return 1;
|
||||
vcpu->arch.smbase = data;
|
||||
break;
|
||||
case MSR_IA32_POWER_CTL:
|
||||
vcpu->arch.msr_ia32_power_ctl = data;
|
||||
break;
|
||||
case MSR_IA32_TSC:
|
||||
kvm_write_tsc(vcpu, msr_info);
|
||||
break;
|
||||
|
@ -2626,6 +2658,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
return 1;
|
||||
break;
|
||||
|
||||
case MSR_KVM_POLL_CONTROL:
|
||||
/* only enable bit supported */
|
||||
if (data & (-1ULL << 1))
|
||||
return 1;
|
||||
|
||||
vcpu->arch.msr_kvm_poll_control = data;
|
||||
break;
|
||||
|
||||
case MSR_IA32_MCG_CTL:
|
||||
case MSR_IA32_MCG_STATUS:
|
||||
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
|
||||
|
@ -2803,6 +2843,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
return 1;
|
||||
msr_info->data = vcpu->arch.arch_capabilities;
|
||||
break;
|
||||
case MSR_IA32_POWER_CTL:
|
||||
msr_info->data = vcpu->arch.msr_ia32_power_ctl;
|
||||
break;
|
||||
case MSR_IA32_TSC:
|
||||
msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
|
||||
break;
|
||||
|
@ -2875,6 +2918,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
case MSR_KVM_PV_EOI_EN:
|
||||
msr_info->data = vcpu->arch.pv_eoi.msr_val;
|
||||
break;
|
||||
case MSR_KVM_POLL_CONTROL:
|
||||
msr_info->data = vcpu->arch.msr_kvm_poll_control;
|
||||
break;
|
||||
case MSR_IA32_P5_MC_ADDR:
|
||||
case MSR_IA32_P5_MC_TYPE:
|
||||
case MSR_IA32_MCG_CAP:
|
||||
|
@ -3084,6 +3130,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_SET_BOOT_CPU_ID:
|
||||
case KVM_CAP_SPLIT_IRQCHIP:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
case KVM_CAP_PMU_EVENT_FILTER:
|
||||
case KVM_CAP_GET_MSR_FEATURES:
|
||||
case KVM_CAP_MSR_PLATFORM_INFO:
|
||||
case KVM_CAP_EXCEPTION_PAYLOAD:
|
||||
|
@ -3096,7 +3143,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
r = KVM_CLOCK_TSC_STABLE;
|
||||
break;
|
||||
case KVM_CAP_X86_DISABLE_EXITS:
|
||||
r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
|
||||
r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
|
||||
KVM_X86_DISABLE_EXITS_CSTATE;
|
||||
if(kvm_can_mwait_in_guest())
|
||||
r |= KVM_X86_DISABLE_EXITS_MWAIT;
|
||||
break;
|
||||
|
@ -4613,6 +4661,8 @@ split_irqchip_unlock:
|
|||
kvm->arch.hlt_in_guest = true;
|
||||
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
|
||||
kvm->arch.pause_in_guest = true;
|
||||
if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
|
||||
kvm->arch.cstate_in_guest = true;
|
||||
r = 0;
|
||||
break;
|
||||
case KVM_CAP_MSR_PLATFORM_INFO:
|
||||
|
@ -4927,6 +4977,9 @@ set_identity_unlock:
|
|||
r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
|
||||
break;
|
||||
}
|
||||
case KVM_SET_PMU_EVENT_FILTER:
|
||||
r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
|
||||
break;
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
|
@ -6379,7 +6432,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
|
|||
vcpu->arch.db);
|
||||
|
||||
if (dr6 != 0) {
|
||||
vcpu->arch.dr6 &= ~15;
|
||||
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
|
||||
vcpu->arch.dr6 |= dr6 | DR6_RTM;
|
||||
kvm_queue_exception(vcpu, DB_VECTOR);
|
||||
*r = EMULATE_DONE;
|
||||
|
@ -6706,7 +6759,7 @@ static void kvm_hyperv_tsc_notifier(void)
|
|||
struct kvm_vcpu *vcpu;
|
||||
int cpu;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
kvm_make_mclock_inprogress_request(kvm);
|
||||
|
||||
|
@ -6732,7 +6785,7 @@ static void kvm_hyperv_tsc_notifier(void)
|
|||
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
}
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -6783,17 +6836,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
|
|||
|
||||
smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (vcpu->cpu != cpu)
|
||||
continue;
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
if (vcpu->cpu != smp_processor_id())
|
||||
if (vcpu->cpu != raw_smp_processor_id())
|
||||
send_ipi = 1;
|
||||
}
|
||||
}
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
|
||||
if (freq->old < freq->new && send_ipi) {
|
||||
/*
|
||||
|
@ -6908,35 +6961,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
|
|||
.handle_intel_pt_intr = kvm_handle_intel_pt_intr,
|
||||
};
|
||||
|
||||
static void kvm_set_mmio_spte_mask(void)
|
||||
{
|
||||
u64 mask;
|
||||
int maxphyaddr = boot_cpu_data.x86_phys_bits;
|
||||
|
||||
/*
|
||||
* Set the reserved bits and the present bit of an paging-structure
|
||||
* entry to generate page fault with PFER.RSV = 1.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Mask the uppermost physical address bit, which would be reserved as
|
||||
* long as the supported physical address width is less than 52.
|
||||
*/
|
||||
mask = 1ull << 51;
|
||||
|
||||
/* Set the present bit. */
|
||||
mask |= 1ull;
|
||||
|
||||
/*
|
||||
* If reserved bit is not supported, clear the present bit to disable
|
||||
* mmio page fault.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
|
||||
mask &= ~1ull;
|
||||
|
||||
kvm_mmu_set_mmio_spte_mask(mask, mask);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void pvclock_gtod_update_fn(struct work_struct *work)
|
||||
{
|
||||
|
@ -6945,12 +6969,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
|
|||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
||||
atomic_set(&kvm_guest_has_master_clock, 0);
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
|
||||
|
@ -7033,8 +7057,6 @@ int kvm_arch_init(void *opaque)
|
|||
if (r)
|
||||
goto out_free_percpu;
|
||||
|
||||
kvm_set_mmio_spte_mask();
|
||||
|
||||
kvm_x86_ops = ops;
|
||||
|
||||
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
|
||||
|
@ -7173,6 +7195,23 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
|
|||
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
|
||||
{
|
||||
struct kvm_vcpu *target = NULL;
|
||||
struct kvm_apic_map *map;
|
||||
|
||||
rcu_read_lock();
|
||||
map = rcu_dereference(kvm->arch.apic_map);
|
||||
|
||||
if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
|
||||
target = map->phys_map[dest_id]->vcpu;
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
if (target)
|
||||
kvm_vcpu_yield_to(target);
|
||||
}
|
||||
|
||||
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long nr, a0, a1, a2, a3, ret;
|
||||
|
@ -7219,6 +7258,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
|
|||
case KVM_HC_SEND_IPI:
|
||||
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
|
||||
break;
|
||||
case KVM_HC_SCHED_YIELD:
|
||||
kvm_sched_yield(vcpu->kvm, a0);
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -KVM_ENOSYS;
|
||||
break;
|
||||
|
@ -7951,9 +7994,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
trace_kvm_entry(vcpu->vcpu_id);
|
||||
if (lapic_in_kernel(vcpu) &&
|
||||
vcpu->arch.apic->lapic_timer.timer_advance_ns)
|
||||
wait_lapic_expire(vcpu);
|
||||
guest_enter_irqoff();
|
||||
|
||||
fpregs_assert_state_consistent();
|
||||
|
@ -8002,13 +8042,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
smp_wmb();
|
||||
|
||||
kvm_x86_ops->handle_exit_irqoff(vcpu);
|
||||
|
||||
/*
|
||||
* Consume any pending interrupts, including the possible source of
|
||||
* VM-Exit on SVM and any ticks that occur between VM-Exit and now.
|
||||
* An instruction is required after local_irq_enable() to fully unblock
|
||||
* interrupts on processors that implement an interrupt shadow, the
|
||||
* stat.exits increment will do nicely.
|
||||
*/
|
||||
kvm_before_interrupt(vcpu);
|
||||
kvm_x86_ops->handle_external_intr(vcpu);
|
||||
local_irq_enable();
|
||||
++vcpu->stat.exits;
|
||||
local_irq_disable();
|
||||
kvm_after_interrupt(vcpu);
|
||||
|
||||
++vcpu->stat.exits;
|
||||
|
||||
guest_exit_irqoff();
|
||||
if (lapic_in_kernel(vcpu)) {
|
||||
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
|
||||
if (delta != S64_MIN) {
|
||||
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
|
||||
vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
|
||||
}
|
||||
}
|
||||
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
|
@ -8594,7 +8650,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
|
|||
kvm_update_cpuid(vcpu);
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
|
||||
if (is_pae_paging(vcpu)) {
|
||||
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
|
||||
mmu_reset_needed = 1;
|
||||
}
|
||||
|
@ -8875,6 +8931,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
|||
msr.host_initiated = true;
|
||||
kvm_write_tsc(vcpu, &msr);
|
||||
vcpu_put(vcpu);
|
||||
|
||||
/* poll control enabled by default */
|
||||
vcpu->arch.msr_kvm_poll_control = 1;
|
||||
|
||||
mutex_unlock(&vcpu->mutex);
|
||||
|
||||
if (!kvmclock_periodic_sync)
|
||||
|
@ -9107,9 +9167,9 @@ void kvm_arch_hardware_unsetup(void)
|
|||
kvm_x86_ops->hardware_unsetup();
|
||||
}
|
||||
|
||||
void kvm_arch_check_processor_compat(void *rtn)
|
||||
int kvm_arch_check_processor_compat(void)
|
||||
{
|
||||
kvm_x86_ops->check_processor_compatibility(rtn);
|
||||
return kvm_x86_ops->check_processor_compatibility();
|
||||
}
|
||||
|
||||
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
|
||||
|
@ -9381,6 +9441,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
|||
kvm_ioapic_destroy(kvm);
|
||||
kvm_free_vcpus(kvm);
|
||||
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
|
||||
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
|
||||
kvm_mmu_uninit_vm(kvm);
|
||||
kvm_page_track_cleanup(kvm);
|
||||
kvm_hv_destroy_vm(kvm);
|
||||
|
@ -9789,6 +9850,36 @@ static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
|
|||
sizeof(u32));
|
||||
}
|
||||
|
||||
static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
|
||||
return false;
|
||||
|
||||
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
|
||||
(vcpu->arch.apf.send_user_only &&
|
||||
kvm_x86_ops->get_cpl(vcpu) == 0))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (unlikely(!lapic_in_kernel(vcpu) ||
|
||||
kvm_event_needs_reinjection(vcpu) ||
|
||||
vcpu->arch.exception.pending))
|
||||
return false;
|
||||
|
||||
if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If interrupts are off we cannot even use an artificial
|
||||
* halt state.
|
||||
*/
|
||||
return kvm_x86_ops->interrupt_allowed(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work)
|
||||
{
|
||||
|
@ -9797,11 +9888,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
|||
trace_kvm_async_pf_not_present(work->arch.token, work->gva);
|
||||
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
|
||||
|
||||
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
|
||||
(vcpu->arch.apf.send_user_only &&
|
||||
kvm_x86_ops->get_cpl(vcpu) == 0))
|
||||
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
|
||||
else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
|
||||
if (kvm_can_deliver_async_pf(vcpu) &&
|
||||
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
|
||||
fault.vector = PF_VECTOR;
|
||||
fault.error_code_valid = true;
|
||||
fault.error_code = 0;
|
||||
|
@ -9809,6 +9897,16 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
|||
fault.address = work->arch.token;
|
||||
fault.async_page_fault = true;
|
||||
kvm_inject_page_fault(vcpu, &fault);
|
||||
} else {
|
||||
/*
|
||||
* It is not possible to deliver a paravirtualized asynchronous
|
||||
* page fault, but putting the guest in an artificial halt state
|
||||
* can be beneficial nevertheless: if an interrupt arrives, we
|
||||
* can deliver it timely and perhaps the guest will schedule
|
||||
* another process. When the instruction that triggered a page
|
||||
* fault is retried, hopefully the page will be ready in the host.
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9949,6 +10047,13 @@ bool kvm_vector_hashing_enabled(void)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
|
||||
|
||||
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
|
||||
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
|
||||
|
|
|
@ -139,6 +139,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
|
|||
return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
|
||||
}
|
||||
|
||||
static inline bool is_pae_paging(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu);
|
||||
}
|
||||
|
||||
static inline u32 bit(int bitno)
|
||||
{
|
||||
return 1 << (bitno & 31);
|
||||
|
@ -333,6 +338,11 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm)
|
|||
return kvm->arch.pause_in_guest;
|
||||
}
|
||||
|
||||
static inline bool kvm_cstate_in_guest(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.cstate_in_guest;
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
|
||||
|
||||
static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -11,18 +11,19 @@
|
|||
#include <asm/perf_event.h>
|
||||
|
||||
#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
|
||||
#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
|
||||
|
||||
#ifdef CONFIG_KVM_ARM_PMU
|
||||
|
||||
struct kvm_pmc {
|
||||
u8 idx; /* index into the pmu->pmc array */
|
||||
struct perf_event *perf_event;
|
||||
u64 bitmask;
|
||||
};
|
||||
|
||||
struct kvm_pmu {
|
||||
int irq_num;
|
||||
struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
|
||||
DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
|
||||
bool ready;
|
||||
bool created;
|
||||
bool irq_level;
|
||||
|
@ -35,8 +36,8 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
|
|||
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val);
|
||||
void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val);
|
||||
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val);
|
||||
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val);
|
||||
void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
|
||||
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu);
|
||||
|
@ -72,8 +73,8 @@ static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {}
|
||||
static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {}
|
||||
static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {}
|
||||
static inline void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {}
|
||||
static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
|
||||
static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -159,7 +159,7 @@ static inline bool is_error_page(struct page *page)
|
|||
|
||||
extern struct kmem_cache *kvm_vcpu_cache;
|
||||
|
||||
extern spinlock_t kvm_lock;
|
||||
extern struct mutex kvm_lock;
|
||||
extern struct list_head vm_list;
|
||||
|
||||
struct kvm_io_range {
|
||||
|
@ -867,7 +867,7 @@ int kvm_arch_hardware_enable(void);
|
|||
void kvm_arch_hardware_disable(void);
|
||||
int kvm_arch_hardware_setup(void);
|
||||
void kvm_arch_hardware_unsetup(void);
|
||||
void kvm_arch_check_processor_compat(void *rtn);
|
||||
int kvm_arch_check_processor_compat(void);
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
|
||||
|
@ -990,6 +990,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
|||
struct kvm_irq_ack_notifier *kian);
|
||||
int kvm_request_irq_source_id(struct kvm *kvm);
|
||||
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
|
||||
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
|
||||
|
||||
/*
|
||||
* search_memslots() and __gfn_to_memslot() are here because they are
|
||||
|
|
|
@ -696,9 +696,11 @@ struct kvm_ioeventfd {
|
|||
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
|
||||
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
|
||||
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
|
||||
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
|
||||
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
|
||||
KVM_X86_DISABLE_EXITS_HLT | \
|
||||
KVM_X86_DISABLE_EXITS_PAUSE)
|
||||
KVM_X86_DISABLE_EXITS_PAUSE | \
|
||||
KVM_X86_DISABLE_EXITS_CSTATE)
|
||||
|
||||
/* for KVM_ENABLE_CAP */
|
||||
struct kvm_enable_cap {
|
||||
|
@ -993,6 +995,7 @@ struct kvm_ppc_resize_hpt {
|
|||
#define KVM_CAP_ARM_SVE 170
|
||||
#define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
|
||||
#define KVM_CAP_ARM_PTRAUTH_GENERIC 172
|
||||
#define KVM_CAP_PMU_EVENT_FILTER 173
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -1327,6 +1330,8 @@ struct kvm_s390_ucas_mapping {
|
|||
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
|
||||
/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
|
||||
#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
|
||||
/* Available with KVM_CAP_PMU_EVENT_FILTER */
|
||||
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
|
||||
|
||||
/* ioctl for vm fd */
|
||||
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
|
||||
#define KVM_HC_CLOCK_PAIRING 9
|
||||
#define KVM_HC_SEND_IPI 10
|
||||
#define KVM_HC_SCHED_YIELD 11
|
||||
|
||||
/*
|
||||
* hypercalls use architecture specific
|
||||
|
|
|
@ -696,9 +696,11 @@ struct kvm_ioeventfd {
|
|||
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
|
||||
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
|
||||
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
|
||||
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
|
||||
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
|
||||
KVM_X86_DISABLE_EXITS_HLT | \
|
||||
KVM_X86_DISABLE_EXITS_PAUSE)
|
||||
KVM_X86_DISABLE_EXITS_PAUSE | \
|
||||
KVM_X86_DISABLE_EXITS_CSTATE)
|
||||
|
||||
/* for KVM_ENABLE_CAP */
|
||||
struct kvm_enable_cap {
|
||||
|
|
|
@ -121,7 +121,6 @@ static void *vcpu_worker(void *data)
|
|||
uint64_t *guest_array;
|
||||
uint64_t pages_count = 0;
|
||||
struct kvm_run *run;
|
||||
struct ucall uc;
|
||||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
|
@ -132,7 +131,7 @@ static void *vcpu_worker(void *data)
|
|||
/* Let the guest dirty the random pages */
|
||||
ret = _vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
|
||||
if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) {
|
||||
if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
|
||||
pages_count += TEST_PAGES_PER_LOOP;
|
||||
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
|
||||
} else {
|
||||
|
|
|
@ -52,4 +52,8 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint
|
|||
vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, ®);
|
||||
}
|
||||
|
||||
void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init);
|
||||
void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
struct kvm_vcpu_init *init, void *guest_code);
|
||||
|
||||
#endif /* SELFTEST_KVM_PROCESSOR_H */
|
||||
|
|
|
@ -86,8 +86,7 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
|
|||
void *arg);
|
||||
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
|
||||
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
|
||||
int gdt_memslot);
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
|
||||
uint32_t data_memslot, uint32_t pgd_memslot);
|
||||
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
|
|
|
@ -235,28 +235,21 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
|
|||
return vm;
|
||||
}
|
||||
|
||||
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
|
||||
void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
|
||||
{
|
||||
size_t stack_size = vm->page_size == 4096 ?
|
||||
DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
|
||||
|
||||
vm_vcpu_add(vm, vcpuid, 0, 0);
|
||||
|
||||
set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
|
||||
set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
|
||||
}
|
||||
|
||||
void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
|
||||
{
|
||||
struct kvm_vcpu_init init;
|
||||
struct kvm_vcpu_init default_init = { .target = -1, };
|
||||
uint64_t sctlr_el1, tcr_el1;
|
||||
|
||||
memset(&init, 0, sizeof(init));
|
||||
init.target = KVM_ARM_TARGET_GENERIC_V8;
|
||||
vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, &init);
|
||||
if (!init)
|
||||
init = &default_init;
|
||||
|
||||
if (init->target == -1) {
|
||||
struct kvm_vcpu_init preferred;
|
||||
vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
|
||||
init->target = preferred.target;
|
||||
}
|
||||
|
||||
vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, init);
|
||||
|
||||
/*
|
||||
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
|
||||
|
@ -316,3 +309,24 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
|
|||
fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
|
||||
indent, "", pstate, pc);
|
||||
}
|
||||
|
||||
void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
struct kvm_vcpu_init *init, void *guest_code)
|
||||
{
|
||||
size_t stack_size = vm->page_size == 4096 ?
|
||||
DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
|
||||
|
||||
vm_vcpu_add(vm, vcpuid);
|
||||
aarch64_vcpu_setup(vm, vcpuid, init);
|
||||
|
||||
set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
|
||||
set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
|
||||
}
|
||||
|
||||
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
|
||||
{
|
||||
aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
|
||||
}
|
||||
|
|
|
@ -763,11 +763,10 @@ static int vcpu_mmap_sz(void)
|
|||
*
|
||||
* Return: None
|
||||
*
|
||||
* Creates and adds to the VM specified by vm and virtual CPU with
|
||||
* the ID given by vcpuid.
|
||||
* Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
|
||||
* No additional VCPU setup is done.
|
||||
*/
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
|
||||
int gdt_memslot)
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
struct vcpu *vcpu;
|
||||
|
||||
|
@ -801,8 +800,6 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
|
|||
vm->vcpu_head->prev = vcpu;
|
||||
vcpu->next = vm->vcpu_head;
|
||||
vm->vcpu_head = vcpu;
|
||||
|
||||
vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -64,8 +64,6 @@ struct kvm_vm {
|
|||
};
|
||||
|
||||
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot,
|
||||
int gdt_memslot);
|
||||
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
|
||||
void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
|
||||
void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
|
||||
|
|
|
@ -125,16 +125,16 @@ void ucall(uint64_t cmd, int nargs, ...)
|
|||
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
||||
{
|
||||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
|
||||
memset(uc, 0, sizeof(*uc));
|
||||
struct ucall ucall = {};
|
||||
bool got_ucall = false;
|
||||
|
||||
#ifdef __x86_64__
|
||||
if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
|
||||
run->io.port == UCALL_PIO_PORT) {
|
||||
struct kvm_regs regs;
|
||||
vcpu_regs_get(vm, vcpu_id, ®s);
|
||||
memcpy(uc, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(*uc));
|
||||
return uc->cmd;
|
||||
memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(ucall));
|
||||
got_ucall = true;
|
||||
}
|
||||
#endif
|
||||
if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO &&
|
||||
|
@ -143,8 +143,15 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
|||
TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
|
||||
"Unexpected ucall exit mmio address access");
|
||||
memcpy(&gva, run->mmio.data, sizeof(gva));
|
||||
memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc));
|
||||
memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
|
||||
got_ucall = true;
|
||||
}
|
||||
|
||||
return uc->cmd;
|
||||
if (got_ucall) {
|
||||
vcpu_run_complete_io(vm, vcpu_id);
|
||||
if (uc)
|
||||
memcpy(uc, &ucall, sizeof(ucall));
|
||||
}
|
||||
|
||||
return ucall.cmd;
|
||||
}
|
||||
|
|
|
@ -609,7 +609,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
|
|||
kvm_seg_fill_gdt_64bit(vm, segp);
|
||||
}
|
||||
|
||||
void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
|
||||
static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
|
||||
{
|
||||
struct kvm_sregs sregs;
|
||||
|
||||
|
@ -655,7 +655,8 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
|
|||
DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
|
||||
|
||||
/* Create VCPU */
|
||||
vm_vcpu_add(vm, vcpuid, 0, 0);
|
||||
vm_vcpu_add(vm, vcpuid);
|
||||
vcpu_setup(vm, vcpuid, 0, 0);
|
||||
|
||||
/* Setup guest general purpose registers */
|
||||
vcpu_regs_get(vm, vcpuid, ®s);
|
||||
|
|
|
@ -144,7 +144,7 @@ int main(int argc, char *argv[])
|
|||
|
||||
/* Restore state in a new VM. */
|
||||
kvm_vm_restart(vm, O_RDWR);
|
||||
vm_vcpu_add(vm, VCPU_ID, 0, 0);
|
||||
vm_vcpu_add(vm, VCPU_ID);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
|
||||
vcpu_load_state(vm, VCPU_ID, state);
|
||||
|
|
|
@ -33,7 +33,7 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
|
|||
int vcpu_id = first_vcpu_id + i;
|
||||
|
||||
/* This asserts that the vCPU was created. */
|
||||
vm_vcpu_add(vm, vcpu_id, 0, 0);
|
||||
vm_vcpu_add(vm, vcpu_id);
|
||||
}
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
|
|
@ -144,7 +144,7 @@ int main(int argc, char *argv[])
|
|||
state = vcpu_save_state(vm, VCPU_ID);
|
||||
kvm_vm_release(vm);
|
||||
kvm_vm_restart(vm, O_RDWR);
|
||||
vm_vcpu_add(vm, VCPU_ID, 0, 0);
|
||||
vm_vcpu_add(vm, VCPU_ID);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
vcpu_load_state(vm, VCPU_ID, state);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
|
|
@ -176,7 +176,7 @@ int main(int argc, char *argv[])
|
|||
|
||||
/* Restore state in a new VM. */
|
||||
kvm_vm_restart(vm, O_RDWR);
|
||||
vm_vcpu_add(vm, VCPU_ID, 0, 0);
|
||||
vm_vcpu_add(vm, VCPU_ID);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
vcpu_load_state(vm, VCPU_ID, state);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
|
|
@ -237,10 +237,10 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
|
|||
|
||||
switch (index) {
|
||||
case TIMER_VTIMER:
|
||||
cnt_ctl = read_sysreg_el0(cntv_ctl);
|
||||
cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
|
||||
break;
|
||||
case TIMER_PTIMER:
|
||||
cnt_ctl = read_sysreg_el0(cntp_ctl);
|
||||
cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
|
||||
break;
|
||||
case NR_KVM_TIMERS:
|
||||
/* GCC is braindead */
|
||||
|
@ -350,20 +350,20 @@ static void timer_save_state(struct arch_timer_context *ctx)
|
|||
|
||||
switch (index) {
|
||||
case TIMER_VTIMER:
|
||||
ctx->cnt_ctl = read_sysreg_el0(cntv_ctl);
|
||||
ctx->cnt_cval = read_sysreg_el0(cntv_cval);
|
||||
ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
|
||||
ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL);
|
||||
|
||||
/* Disable the timer */
|
||||
write_sysreg_el0(0, cntv_ctl);
|
||||
write_sysreg_el0(0, SYS_CNTV_CTL);
|
||||
isb();
|
||||
|
||||
break;
|
||||
case TIMER_PTIMER:
|
||||
ctx->cnt_ctl = read_sysreg_el0(cntp_ctl);
|
||||
ctx->cnt_cval = read_sysreg_el0(cntp_cval);
|
||||
ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
|
||||
ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL);
|
||||
|
||||
/* Disable the timer */
|
||||
write_sysreg_el0(0, cntp_ctl);
|
||||
write_sysreg_el0(0, SYS_CNTP_CTL);
|
||||
isb();
|
||||
|
||||
break;
|
||||
|
@ -429,14 +429,14 @@ static void timer_restore_state(struct arch_timer_context *ctx)
|
|||
|
||||
switch (index) {
|
||||
case TIMER_VTIMER:
|
||||
write_sysreg_el0(ctx->cnt_cval, cntv_cval);
|
||||
write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL);
|
||||
isb();
|
||||
write_sysreg_el0(ctx->cnt_ctl, cntv_ctl);
|
||||
write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL);
|
||||
break;
|
||||
case TIMER_PTIMER:
|
||||
write_sysreg_el0(ctx->cnt_cval, cntp_cval);
|
||||
write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL);
|
||||
isb();
|
||||
write_sysreg_el0(ctx->cnt_ctl, cntp_ctl);
|
||||
write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL);
|
||||
break;
|
||||
case NR_KVM_TIMERS:
|
||||
BUG();
|
||||
|
|
|
@ -93,9 +93,9 @@ int kvm_arch_hardware_setup(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_check_processor_compat(void *rtn)
|
||||
int kvm_arch_check_processor_compat(void)
|
||||
{
|
||||
*(int *)rtn = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1332,6 +1332,8 @@ static void cpu_hyp_reset(void)
|
|||
|
||||
static void cpu_hyp_reinit(void)
|
||||
{
|
||||
kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt);
|
||||
|
||||
cpu_hyp_reset();
|
||||
|
||||
if (is_kernel_in_hyp_mode())
|
||||
|
@ -1569,7 +1571,6 @@ static int init_hyp_mode(void)
|
|||
kvm_host_data_t *cpu_data;
|
||||
|
||||
cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
|
||||
kvm_init_host_cpu_context(&cpu_data->host_ctxt, cpu);
|
||||
err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
|
||||
|
||||
if (err) {
|
||||
|
|
|
@ -13,6 +13,123 @@
|
|||
#include <kvm/arm_pmu.h>
|
||||
#include <kvm/arm_vgic.h>
|
||||
|
||||
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
|
||||
|
||||
#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
|
||||
|
||||
/**
|
||||
* kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
|
||||
* @vcpu: The vcpu pointer
|
||||
* @select_idx: The counter index
|
||||
*/
|
||||
static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
{
|
||||
return (select_idx == ARMV8_PMU_CYCLE_IDX &&
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
|
||||
}
|
||||
|
||||
static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu;
|
||||
struct kvm_vcpu_arch *vcpu_arch;
|
||||
|
||||
pmc -= pmc->idx;
|
||||
pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
|
||||
vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
|
||||
return container_of(vcpu_arch, struct kvm_vcpu, arch);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_pmc_is_chained - determine if the pmc is chained
|
||||
* @pmc: The PMU counter pointer
|
||||
*/
|
||||
static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
||||
|
||||
return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
|
||||
* @select_idx: The counter index
|
||||
*/
|
||||
static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
|
||||
{
|
||||
return select_idx & 0x1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_get_canonical_pmc - obtain the canonical pmc
|
||||
* @pmc: The PMU counter pointer
|
||||
*
|
||||
* When a pair of PMCs are chained together we use the low counter (canonical)
|
||||
* to hold the underlying perf event.
|
||||
*/
|
||||
static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (kvm_pmu_pmc_is_chained(pmc) &&
|
||||
kvm_pmu_idx_is_high_counter(pmc->idx))
|
||||
return pmc - 1;
|
||||
|
||||
return pmc;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
|
||||
* @vcpu: The vcpu pointer
|
||||
* @select_idx: The counter index
|
||||
*/
|
||||
static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
{
|
||||
u64 eventsel, reg;
|
||||
|
||||
select_idx |= 0x1;
|
||||
|
||||
if (select_idx == ARMV8_PMU_CYCLE_IDX)
|
||||
return false;
|
||||
|
||||
reg = PMEVTYPER0_EL0 + select_idx;
|
||||
eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
|
||||
|
||||
return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_get_pair_counter_value - get PMU counter value
|
||||
* @vcpu: The vcpu pointer
|
||||
* @pmc: The PMU counter pointer
|
||||
*/
|
||||
static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
|
||||
struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 counter, counter_high, reg, enabled, running;
|
||||
|
||||
if (kvm_pmu_pmc_is_chained(pmc)) {
|
||||
pmc = kvm_pmu_get_canonical_pmc(pmc);
|
||||
reg = PMEVCNTR0_EL0 + pmc->idx;
|
||||
|
||||
counter = __vcpu_sys_reg(vcpu, reg);
|
||||
counter_high = __vcpu_sys_reg(vcpu, reg + 1);
|
||||
|
||||
counter = lower_32_bits(counter) | (counter_high << 32);
|
||||
} else {
|
||||
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
|
||||
counter = __vcpu_sys_reg(vcpu, reg);
|
||||
}
|
||||
|
||||
/*
|
||||
* The real counter value is equal to the value of counter register plus
|
||||
* the value perf event counts.
|
||||
*/
|
||||
if (pmc->perf_event)
|
||||
counter += perf_event_read_value(pmc->perf_event, &enabled,
|
||||
&running);
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_get_counter_value - get PMU counter value
|
||||
* @vcpu: The vcpu pointer
|
||||
|
@ -20,22 +137,20 @@
|
|||
*/
|
||||
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
{
|
||||
u64 counter, reg, enabled, running;
|
||||
u64 counter;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
|
||||
|
||||
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
|
||||
counter = __vcpu_sys_reg(vcpu, reg);
|
||||
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
|
||||
|
||||
/* The real counter value is equal to the value of counter register plus
|
||||
* the value perf event counts.
|
||||
*/
|
||||
if (pmc->perf_event)
|
||||
counter += perf_event_read_value(pmc->perf_event, &enabled,
|
||||
&running);
|
||||
if (kvm_pmu_pmc_is_chained(pmc) &&
|
||||
kvm_pmu_idx_is_high_counter(select_idx))
|
||||
counter = upper_32_bits(counter);
|
||||
|
||||
return counter & pmc->bitmask;
|
||||
else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
|
||||
counter = lower_32_bits(counter);
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -51,6 +166,23 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
|
|||
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
|
||||
__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
|
||||
|
||||
/* Recreate the perf event to reflect the updated sample_period */
|
||||
kvm_pmu_create_perf_event(vcpu, select_idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_release_perf_event - remove the perf event
|
||||
* @pmc: The PMU counter pointer
|
||||
*/
|
||||
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
|
||||
{
|
||||
pmc = kvm_pmu_get_canonical_pmc(pmc);
|
||||
if (pmc->perf_event) {
|
||||
perf_event_disable(pmc->perf_event);
|
||||
perf_event_release_kernel(pmc->perf_event);
|
||||
pmc->perf_event = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -63,15 +195,23 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
|
|||
{
|
||||
u64 counter, reg;
|
||||
|
||||
if (pmc->perf_event) {
|
||||
counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
|
||||
pmc = kvm_pmu_get_canonical_pmc(pmc);
|
||||
if (!pmc->perf_event)
|
||||
return;
|
||||
|
||||
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
|
||||
|
||||
if (kvm_pmu_pmc_is_chained(pmc)) {
|
||||
reg = PMEVCNTR0_EL0 + pmc->idx;
|
||||
__vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
|
||||
__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
|
||||
} else {
|
||||
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
|
||||
__vcpu_sys_reg(vcpu, reg) = counter;
|
||||
perf_event_disable(pmc->perf_event);
|
||||
perf_event_release_kernel(pmc->perf_event);
|
||||
pmc->perf_event = NULL;
|
||||
__vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
|
||||
}
|
||||
|
||||
kvm_pmu_release_perf_event(pmc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -87,8 +227,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
|
|||
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
|
||||
kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
|
||||
pmu->pmc[i].idx = i;
|
||||
pmu->pmc[i].bitmask = 0xffffffffUL;
|
||||
}
|
||||
|
||||
bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -101,15 +242,8 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|||
int i;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
|
||||
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
|
||||
struct kvm_pmc *pmc = &pmu->pmc[i];
|
||||
|
||||
if (pmc->perf_event) {
|
||||
perf_event_disable(pmc->perf_event);
|
||||
perf_event_release_kernel(pmc->perf_event);
|
||||
pmc->perf_event = NULL;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
|
||||
kvm_pmu_release_perf_event(&pmu->pmc[i]);
|
||||
}
|
||||
|
||||
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
|
||||
|
@ -124,13 +258,13 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_enable_counter - enable selected PMU counter
|
||||
* kvm_pmu_enable_counter_mask - enable selected PMU counters
|
||||
* @vcpu: The vcpu pointer
|
||||
* @val: the value guest writes to PMCNTENSET register
|
||||
*
|
||||
* Call perf_event_enable to start counting the perf event
|
||||
*/
|
||||
void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
|
||||
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
int i;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
|
@ -144,6 +278,18 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
|
|||
continue;
|
||||
|
||||
pmc = &pmu->pmc[i];
|
||||
|
||||
/*
|
||||
* For high counters of chained events we must recreate the
|
||||
* perf event with the long (64bit) attribute set.
|
||||
*/
|
||||
if (kvm_pmu_pmc_is_chained(pmc) &&
|
||||
kvm_pmu_idx_is_high_counter(i)) {
|
||||
kvm_pmu_create_perf_event(vcpu, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* At this point, pmc must be the canonical */
|
||||
if (pmc->perf_event) {
|
||||
perf_event_enable(pmc->perf_event);
|
||||
if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
|
||||
|
@ -153,13 +299,13 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
|
|||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_disable_counter - disable selected PMU counter
|
||||
* kvm_pmu_disable_counter_mask - disable selected PMU counters
|
||||
* @vcpu: The vcpu pointer
|
||||
* @val: the value guest writes to PMCNTENCLR register
|
||||
*
|
||||
* Call perf_event_disable to stop counting the perf event
|
||||
*/
|
||||
void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val)
|
||||
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
int i;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
|
@ -173,6 +319,18 @@ void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val)
|
|||
continue;
|
||||
|
||||
pmc = &pmu->pmc[i];
|
||||
|
||||
/*
|
||||
* For high counters of chained events we must recreate the
|
||||
* perf event with the long (64bit) attribute unset.
|
||||
*/
|
||||
if (kvm_pmu_pmc_is_chained(pmc) &&
|
||||
kvm_pmu_idx_is_high_counter(i)) {
|
||||
kvm_pmu_create_perf_event(vcpu, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* At this point, pmc must be the canonical */
|
||||
if (pmc->perf_event)
|
||||
perf_event_disable(pmc->perf_event);
|
||||
}
|
||||
|
@ -262,17 +420,6 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
|
|||
kvm_pmu_update_state(vcpu);
|
||||
}
|
||||
|
||||
static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu;
|
||||
struct kvm_vcpu_arch *vcpu_arch;
|
||||
|
||||
pmc -= pmc->idx;
|
||||
pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
|
||||
vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
|
||||
return container_of(vcpu_arch, struct kvm_vcpu, arch);
|
||||
}
|
||||
|
||||
/**
|
||||
* When the perf event overflows, set the overflow status and inform the vcpu.
|
||||
*/
|
||||
|
@ -329,17 +476,15 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
|
|||
*/
|
||||
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
u64 mask;
|
||||
int i;
|
||||
|
||||
mask = kvm_pmu_valid_counter_mask(vcpu);
|
||||
if (val & ARMV8_PMU_PMCR_E) {
|
||||
kvm_pmu_enable_counter(vcpu,
|
||||
kvm_pmu_enable_counter_mask(vcpu,
|
||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
|
||||
} else {
|
||||
kvm_pmu_disable_counter(vcpu, mask);
|
||||
kvm_pmu_disable_counter_mask(vcpu, mask);
|
||||
}
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_C)
|
||||
|
@ -349,11 +494,6 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
|||
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++)
|
||||
kvm_pmu_set_counter_value(vcpu, i, 0);
|
||||
}
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_LC) {
|
||||
pmc = &pmu->pmc[ARMV8_PMU_CYCLE_IDX];
|
||||
pmc->bitmask = 0xffffffffffffffffUL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
|
@ -362,6 +502,112 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
|
|||
(__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_create_perf_event - create a perf event for a counter
|
||||
* @vcpu: The vcpu pointer
|
||||
* @select_idx: The number of selected counter
|
||||
*/
|
||||
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
struct perf_event *event;
|
||||
struct perf_event_attr attr;
|
||||
u64 eventsel, counter, reg, data;
|
||||
|
||||
/*
|
||||
* For chained counters the event type and filtering attributes are
|
||||
* obtained from the low/even counter. We also use this counter to
|
||||
* determine if the event is enabled/disabled.
|
||||
*/
|
||||
pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
|
||||
|
||||
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
|
||||
data = __vcpu_sys_reg(vcpu, reg);
|
||||
|
||||
kvm_pmu_stop_counter(vcpu, pmc);
|
||||
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
|
||||
|
||||
/* Software increment event does't need to be backed by a perf event */
|
||||
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
|
||||
pmc->idx != ARMV8_PMU_CYCLE_IDX)
|
||||
return;
|
||||
|
||||
memset(&attr, 0, sizeof(struct perf_event_attr));
|
||||
attr.type = PERF_TYPE_RAW;
|
||||
attr.size = sizeof(attr);
|
||||
attr.pinned = 1;
|
||||
attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
|
||||
attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
|
||||
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
|
||||
attr.exclude_hv = 1; /* Don't count EL2 events */
|
||||
attr.exclude_host = 1; /* Don't count host events */
|
||||
attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
|
||||
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
|
||||
|
||||
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
|
||||
|
||||
if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
|
||||
/**
|
||||
* The initial sample period (overflow count) of an event. For
|
||||
* chained counters we only support overflow interrupts on the
|
||||
* high counter.
|
||||
*/
|
||||
attr.sample_period = (-counter) & GENMASK(63, 0);
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
kvm_pmu_perf_overflow,
|
||||
pmc + 1);
|
||||
|
||||
if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
|
||||
attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
|
||||
} else {
|
||||
/* The initial sample period (overflow count) of an event. */
|
||||
if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
|
||||
attr.sample_period = (-counter) & GENMASK(63, 0);
|
||||
else
|
||||
attr.sample_period = (-counter) & GENMASK(31, 0);
|
||||
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
kvm_pmu_perf_overflow, pmc);
|
||||
}
|
||||
|
||||
if (IS_ERR(event)) {
|
||||
pr_err_once("kvm: pmu event creation failed %ld\n",
|
||||
PTR_ERR(event));
|
||||
return;
|
||||
}
|
||||
|
||||
pmc->perf_event = event;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_update_pmc_chained - update chained bitmap
|
||||
* @vcpu: The vcpu pointer
|
||||
* @select_idx: The number of selected counter
|
||||
*
|
||||
* Update the chained bitmap based on the event type written in the
|
||||
* typer register.
|
||||
*/
|
||||
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
|
||||
|
||||
if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
|
||||
/*
|
||||
* During promotion from !chained to chained we must ensure
|
||||
* the adjacent counter is stopped and its event destroyed
|
||||
*/
|
||||
if (!kvm_pmu_pmc_is_chained(pmc))
|
||||
kvm_pmu_stop_counter(vcpu, pmc);
|
||||
|
||||
set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
|
||||
} else {
|
||||
clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_set_counter_event_type - set selected counter to monitor some event
|
||||
* @vcpu: The vcpu pointer
|
||||
|
@ -375,45 +621,15 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
|
|||
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
||||
u64 select_idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
|
||||
struct perf_event *event;
|
||||
struct perf_event_attr attr;
|
||||
u64 eventsel, counter;
|
||||
u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK;
|
||||
|
||||
kvm_pmu_stop_counter(vcpu, pmc);
|
||||
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
|
||||
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
|
||||
|
||||
/* Software increment event does't need to be backed by a perf event */
|
||||
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
|
||||
select_idx != ARMV8_PMU_CYCLE_IDX)
|
||||
return;
|
||||
__vcpu_sys_reg(vcpu, reg) = event_type;
|
||||
|
||||
memset(&attr, 0, sizeof(struct perf_event_attr));
|
||||
attr.type = PERF_TYPE_RAW;
|
||||
attr.size = sizeof(attr);
|
||||
attr.pinned = 1;
|
||||
attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx);
|
||||
attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
|
||||
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
|
||||
attr.exclude_hv = 1; /* Don't count EL2 events */
|
||||
attr.exclude_host = 1; /* Don't count host events */
|
||||
attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
|
||||
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
|
||||
|
||||
counter = kvm_pmu_get_counter_value(vcpu, select_idx);
|
||||
/* The initial sample period (overflow count) of an event. */
|
||||
attr.sample_period = (-counter) & pmc->bitmask;
|
||||
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
kvm_pmu_perf_overflow, pmc);
|
||||
if (IS_ERR(event)) {
|
||||
pr_err_once("kvm: pmu event creation failed %ld\n",
|
||||
PTR_ERR(event));
|
||||
return;
|
||||
}
|
||||
|
||||
pmc->perf_event = event;
|
||||
kvm_pmu_update_pmc_chained(vcpu, select_idx);
|
||||
kvm_pmu_create_perf_event(vcpu, select_idx);
|
||||
}
|
||||
|
||||
bool kvm_arm_support_pmu_v3(void)
|
||||
|
|
|
@ -401,8 +401,16 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
|||
feature = smccc_get_arg1(vcpu);
|
||||
switch(feature) {
|
||||
case ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
if (kvm_arm_harden_branch_predictor())
|
||||
switch (kvm_arm_harden_branch_predictor()) {
|
||||
case KVM_BP_HARDEN_UNKNOWN:
|
||||
break;
|
||||
case KVM_BP_HARDEN_WA_NEEDED:
|
||||
val = SMCCC_RET_SUCCESS;
|
||||
break;
|
||||
case KVM_BP_HARDEN_NOT_REQUIRED:
|
||||
val = SMCCC_RET_NOT_REQUIRED;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
switch (kvm_arm_have_ssbd()) {
|
||||
|
@ -430,42 +438,103 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
|||
|
||||
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 1; /* PSCI version */
|
||||
return 3; /* PSCI version and two workaround registers */
|
||||
}
|
||||
|
||||
int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
|
||||
{
|
||||
if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
|
||||
if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
#define KVM_REG_FEATURE_LEVEL_WIDTH 4
|
||||
#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
|
||||
|
||||
/*
|
||||
* Convert the workaround level into an easy-to-compare number, where higher
|
||||
* values mean better protection.
|
||||
*/
|
||||
static int get_kernel_wa_level(u64 regid)
|
||||
{
|
||||
if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
|
||||
void __user *uaddr = (void __user *)(long)reg->addr;
|
||||
u64 val;
|
||||
|
||||
val = kvm_psci_version(vcpu, vcpu->kvm);
|
||||
if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
switch (regid) {
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
switch (kvm_arm_harden_branch_predictor()) {
|
||||
case KVM_BP_HARDEN_UNKNOWN:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
|
||||
case KVM_BP_HARDEN_WA_NEEDED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
|
||||
case KVM_BP_HARDEN_NOT_REQUIRED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
|
||||
}
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
switch (kvm_arm_have_ssbd()) {
|
||||
case KVM_SSBD_FORCE_DISABLE:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
|
||||
case KVM_SSBD_KERNEL:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
|
||||
case KVM_SSBD_FORCE_ENABLE:
|
||||
case KVM_SSBD_MITIGATED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
|
||||
case KVM_SSBD_UNKNOWN:
|
||||
default:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
{
|
||||
void __user *uaddr = (void __user *)(long)reg->addr;
|
||||
u64 val;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_ARM_PSCI_VERSION:
|
||||
val = kvm_psci_version(vcpu, vcpu->kvm);
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
|
||||
|
||||
if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
|
||||
kvm_arm_get_vcpu_workaround_2_flag(vcpu))
|
||||
val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
|
||||
break;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
{
|
||||
if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
|
||||
void __user *uaddr = (void __user *)(long)reg->addr;
|
||||
bool wants_02;
|
||||
u64 val;
|
||||
void __user *uaddr = (void __user *)(long)reg->addr;
|
||||
u64 val;
|
||||
int wa_level;
|
||||
|
||||
if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_ARM_PSCI_VERSION:
|
||||
{
|
||||
bool wants_02;
|
||||
|
||||
wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
|
||||
|
||||
|
@ -482,6 +551,54 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
|||
vcpu->kvm->arch.psci_version = val;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (get_kernel_wa_level(reg->id) < val)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
|
||||
return -EINVAL;
|
||||
|
||||
wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
|
||||
|
||||
if (get_kernel_wa_level(reg->id) < wa_level)
|
||||
return -EINVAL;
|
||||
|
||||
/* The enabled bit must not be set unless the level is AVAIL. */
|
||||
if (wa_level != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
|
||||
wa_level != val)
|
||||
return -EINVAL;
|
||||
|
||||
/* Are we finished or do we need to check the enable bit ? */
|
||||
if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If this kernel supports the workaround to be switched on
|
||||
* or off, make sure it matches the requested setting.
|
||||
*/
|
||||
switch (wa_level) {
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
|
||||
kvm_arm_set_vcpu_workaround_2_flag(vcpu,
|
||||
val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
|
||||
kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
|
|
|
@ -184,9 +184,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
|||
|
||||
nr_rt_entries += 1;
|
||||
|
||||
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
|
||||
new = kzalloc(struct_size(new, map, nr_rt_entries), GFP_KERNEL_ACCOUNT);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
|
@ -95,7 +95,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
|
|||
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
|
||||
*/
|
||||
|
||||
DEFINE_SPINLOCK(kvm_lock);
|
||||
DEFINE_MUTEX(kvm_lock);
|
||||
static DEFINE_RAW_SPINLOCK(kvm_count_lock);
|
||||
LIST_HEAD(vm_list);
|
||||
|
||||
|
@ -680,9 +680,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
|||
if (r)
|
||||
goto out_err;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_add(&kvm->vm_list, &vm_list);
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
|
||||
preempt_notifier_inc();
|
||||
|
||||
|
@ -728,9 +728,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
|||
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
|
||||
kvm_destroy_vm_debugfs(kvm);
|
||||
kvm_arch_sync_events(kvm);
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_del(&kvm->vm_list);
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
kvm_free_irq_routing(kvm);
|
||||
for (i = 0; i < KVM_NR_BUSES; i++) {
|
||||
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
|
||||
|
@ -1790,7 +1790,7 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
|
|||
if (!map->hva)
|
||||
return;
|
||||
|
||||
if (map->page)
|
||||
if (map->page != KVM_UNMAPPED_PAGE)
|
||||
kunmap(map->page);
|
||||
#ifdef CONFIG_HAS_IOMEM
|
||||
else
|
||||
|
@ -4031,13 +4031,13 @@ static int vm_stat_get(void *_offset, u64 *val)
|
|||
u64 tmp_val;
|
||||
|
||||
*val = 0;
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
stat_tmp.kvm = kvm;
|
||||
vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
|
||||
*val += tmp_val;
|
||||
}
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4050,12 +4050,12 @@ static int vm_stat_clear(void *_offset, u64 val)
|
|||
if (val)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
stat_tmp.kvm = kvm;
|
||||
vm_stat_clear_per_vm((void *)&stat_tmp, 0);
|
||||
}
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4070,13 +4070,13 @@ static int vcpu_stat_get(void *_offset, u64 *val)
|
|||
u64 tmp_val;
|
||||
|
||||
*val = 0;
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
stat_tmp.kvm = kvm;
|
||||
vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
|
||||
*val += tmp_val;
|
||||
}
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4089,12 +4089,12 @@ static int vcpu_stat_clear(void *_offset, u64 val)
|
|||
if (val)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
stat_tmp.kvm = kvm;
|
||||
vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
|
||||
}
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4115,7 +4115,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
|
|||
if (!kvm_dev.this_device || !kvm)
|
||||
return;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
mutex_lock(&kvm_lock);
|
||||
if (type == KVM_EVENT_CREATE_VM) {
|
||||
kvm_createvm_count++;
|
||||
kvm_active_vms++;
|
||||
|
@ -4124,7 +4124,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
|
|||
}
|
||||
created = kvm_createvm_count;
|
||||
active = kvm_active_vms;
|
||||
spin_unlock(&kvm_lock);
|
||||
mutex_unlock(&kvm_lock);
|
||||
|
||||
env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
|
||||
if (!env)
|
||||
|
@ -4221,6 +4221,11 @@ static void kvm_sched_out(struct preempt_notifier *pn,
|
|||
kvm_arch_vcpu_put(vcpu);
|
||||
}
|
||||
|
||||
static void check_processor_compat(void *rtn)
|
||||
{
|
||||
*(int *)rtn = kvm_arch_check_processor_compat();
|
||||
}
|
||||
|
||||
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
||||
struct module *module)
|
||||
{
|
||||
|
@ -4252,9 +4257,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
|||
goto out_free_0a;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
smp_call_function_single(cpu,
|
||||
kvm_arch_check_processor_compat,
|
||||
&r, 1);
|
||||
smp_call_function_single(cpu, check_processor_compat, &r, 1);
|
||||
if (r < 0)
|
||||
goto out_free_1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue