Fairly small update, but there are some interesting new features.
Common: Optional support for adding a small amount of polling on each HLT instruction executed in the guest (or equivalent for other architectures). This can improve latency up to 50% on some scenarios (e.g. O_DSYNC writes or TCP_RR netperf tests). This also has to be enabled manually for now, but the plan is to auto-tune this in the future. ARM/ARM64: the highlights are support for GICv3 emulation and dirty page tracking s390: several optimizations and bugfixes. Also a first: a feature exposed by KVM (UUID and long guest name in /proc/sysinfo) before it is available in IBM's hypervisor! :) MIPS: Bugfixes. x86: Support for PML (page modification logging, a new feature in Broadwell Xeons that speeds up dirty page tracking), nested virtualization improvements (nested APICv---a nice optimization), usual round of emulation fixes. There is also a new option to reduce latency of the TSC deadline timer in the guest; this needs to be tuned manually. Some commits are common between this pull and Catalin's; I see you have already included his tree. ARM has other conflicts where functions are added in the same place by 3.19-rc and 3.20 patches. These are not large though, and entirely within KVM. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJU28rkAAoJEL/70l94x66DXqQH/1TDOfJIjW7P2kb0Sw7Fy1wi cEX1KO/VFxAqc8R0E/0Wb55CXyPjQJM6xBXuFr5cUDaIjQ8ULSktL4pEwXyyv/s5 DBDkN65mriry2w5VuEaRLVcuX9Wy+tqLQXWNkEySfyb4uhZChWWHvKEcgw5SqCyg NlpeHurYESIoNyov3jWqvBjr4OmaQENyv7t2c6q5ErIgG02V+iCux5QGbphM2IC9 LFtPKxoqhfeB2xFxTOIt8HJiXrZNwflsTejIlCl/NSEiDVLLxxHCxK2tWK/tUXMn JfLD9ytXBWtNMwInvtFm4fPmDouv2VDyR0xnK2db+/axsJZnbxqjGu1um4Dqbak= =7gdx -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM update from Paolo Bonzini: "Fairly small update, but there are some interesting new features. Common: Optional support for adding a small amount of polling on each HLT instruction executed in the guest (or equivalent for other architectures). This can improve latency up to 50% on some scenarios (e.g. O_DSYNC writes or TCP_RR netperf tests). This also has to be enabled manually for now, but the plan is to auto-tune this in the future. ARM/ARM64: The highlights are support for GICv3 emulation and dirty page tracking s390: Several optimizations and bugfixes. Also a first: a feature exposed by KVM (UUID and long guest name in /proc/sysinfo) before it is available in IBM's hypervisor! :) MIPS: Bugfixes. x86: Support for PML (page modification logging, a new feature in Broadwell Xeons that speeds up dirty page tracking), nested virtualization improvements (nested APICv---a nice optimization), usual round of emulation fixes. There is also a new option to reduce latency of the TSC deadline timer in the guest; this needs to be tuned manually. Some commits are common between this pull and Catalin's; I see you have already included his tree. Powerpc: Nothing yet. The KVM/PPC changes will come in through the PPC maintainers, because I haven't received them yet and I might end up being offline for some part of next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits) KVM: ia64: drop kvm.h from installed user headers KVM: x86: fix build with !CONFIG_SMP KVM: x86: emulate: correct page fault error code for NoWrite instructions KVM: Disable compat ioctl for s390 KVM: s390: add cpu model support KVM: s390: use facilities and cpu_id per KVM KVM: s390/CPACF: Choose crypto control block format s390/kernel: Update /proc/sysinfo file with Extended Name and UUID KVM: s390: reenable LPP facility KVM: s390: floating irqs: fix user triggerable endless loop kvm: add halt_poll_ns module parameter kvm: remove KVM_MMIO_SIZE KVM: MIPS: Don't leak FPU/DSP to guest KVM: MIPS: Disable HTW while in guest KVM: nVMX: Enable nested posted interrupt processing KVM: nVMX: Enable nested virtual interrupt delivery KVM: nVMX: Enable nested apic register virtualization KVM: nVMX: Make nested control MSRs per-cpu KVM: nVMX: Enable nested virtualize x2apic mode KVM: nVMX: Prepare for using hardware MSR bitmap ...
This commit is contained in:
commit
b9085bcbf5
|
@ -612,11 +612,14 @@ Type: vm ioctl
|
|||
Parameters: none
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Creates an interrupt controller model in the kernel. On x86, creates a virtual
|
||||
ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
|
||||
local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
|
||||
only go to the IOAPIC. On ARM/arm64, a GIC is
|
||||
created. On s390, a dummy irq routing table is created.
|
||||
Creates an interrupt controller model in the kernel.
|
||||
On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
|
||||
future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both
|
||||
PIC and IOAPIC; GSI 16-23 only go to the IOAPIC.
|
||||
On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of
|
||||
KVM_CREATE_DEVICE, which also supports creating a GICv2. Using
|
||||
KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2.
|
||||
On s390, a dummy irq routing table is created.
|
||||
|
||||
Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
|
||||
before KVM_CREATE_IRQCHIP can be used.
|
||||
|
@ -2312,7 +2315,7 @@ struct kvm_s390_interrupt {
|
|||
|
||||
type can be one of the following:
|
||||
|
||||
KVM_S390_SIGP_STOP (vcpu) - sigp restart
|
||||
KVM_S390_SIGP_STOP (vcpu) - sigp stop; optional flags in parm
|
||||
KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
|
||||
KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
|
||||
KVM_S390_RESTART (vcpu) - restart
|
||||
|
@ -3225,3 +3228,23 @@ userspace from doing that.
|
|||
If the hcall number specified is not one that has an in-kernel
|
||||
implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL
|
||||
error.
|
||||
|
||||
7.2 KVM_CAP_S390_USER_SIGP
|
||||
|
||||
Architectures: s390
|
||||
Parameters: none
|
||||
|
||||
This capability controls which SIGP orders will be handled completely in user
|
||||
space. With this capability enabled, all fast orders will be handled completely
|
||||
in the kernel:
|
||||
- SENSE
|
||||
- SENSE RUNNING
|
||||
- EXTERNAL CALL
|
||||
- EMERGENCY SIGNAL
|
||||
- CONDITIONAL EMERGENCY SIGNAL
|
||||
|
||||
All other orders will be handled completely in user space.
|
||||
|
||||
Only privileged operation exceptions will be checked for in the kernel (or even
|
||||
in the hardware prior to interception). If this capability is not enabled, the
|
||||
old way of handling SIGP orders is used (partially in kernel and user space).
|
||||
|
|
|
@ -3,22 +3,42 @@ ARM Virtual Generic Interrupt Controller (VGIC)
|
|||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
|
||||
KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
|
||||
|
||||
Only one VGIC instance may be instantiated through either this API or the
|
||||
legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt
|
||||
controller, requiring emulated user-space devices to inject interrupts to the
|
||||
VGIC instead of directly to CPUs.
|
||||
|
||||
Creating a guest GICv3 device requires a host GICv3 as well.
|
||||
GICv3 implementations with hardware compatibility support allow a guest GICv2
|
||||
as well.
|
||||
|
||||
Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||
Attributes:
|
||||
KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GIC distributor
|
||||
register mappings.
|
||||
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
This address needs to be 4K aligned and the region covers 4 KByte.
|
||||
|
||||
KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GIC virtual cpu
|
||||
interface register mappings.
|
||||
interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
This address needs to be 4K aligned and the region covers 4 KByte.
|
||||
|
||||
KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GICv3 distributor
|
||||
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
||||
This address needs to be 64K aligned and the region covers 64 KByte.
|
||||
|
||||
KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit)
|
||||
Base address in the guest physical address space of the GICv3
|
||||
redistributor register mappings. There are two 64K pages for each
|
||||
VCPU and all of the redistributor pages are contiguous.
|
||||
Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
||||
This address needs to be 64K aligned.
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
|
||||
Attributes:
|
||||
|
@ -36,6 +56,7 @@ Groups:
|
|||
the register.
|
||||
Limitations:
|
||||
- Priorities are not implemented, and registers are RAZ/WI
|
||||
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
Errors:
|
||||
-ENODEV: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
|
@ -68,6 +89,7 @@ Groups:
|
|||
|
||||
Limitations:
|
||||
- Priorities are not implemented, and registers are RAZ/WI
|
||||
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
Errors:
|
||||
-ENODEV: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
|
@ -81,3 +103,14 @@ Groups:
|
|||
-EINVAL: Value set is out of the expected range
|
||||
-EBUSY: Value has already be set, or GIC has already been initialized
|
||||
with default values.
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
Attributes:
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the VGIC, no additional parameter in
|
||||
kvm_device_attr.addr.
|
||||
Errors:
|
||||
-ENXIO: VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV: no online VCPU
|
||||
-ENOMEM: memory shortage when allocating vgic internal data
|
||||
|
|
|
@ -24,3 +24,62 @@ Returns: 0
|
|||
|
||||
Clear the CMMA status for all guest pages, so any pages the guest marked
|
||||
as unused are again used any may not be reclaimed by the host.
|
||||
|
||||
1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
|
||||
Parameters: in attr->addr the address for the new limit of guest memory
|
||||
Returns: -EFAULT if the given address is not accessible
|
||||
-EINVAL if the virtual machine is of type UCONTROL
|
||||
-E2BIG if the given guest memory is to big for that machine
|
||||
-EBUSY if a vcpu is already defined
|
||||
-ENOMEM if not enough memory is available for a new shadow guest mapping
|
||||
0 otherwise
|
||||
|
||||
Allows userspace to query the actual limit and set a new limit for
|
||||
the maximum guest memory size. The limit will be rounded up to
|
||||
2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
|
||||
the number of page table levels.
|
||||
|
||||
2. GROUP: KVM_S390_VM_CPU_MODEL
|
||||
Architectures: s390
|
||||
|
||||
2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
|
||||
|
||||
Allows user space to retrieve machine and kvm specific cpu related information:
|
||||
|
||||
struct kvm_s390_vm_cpu_machine {
|
||||
__u64 cpuid; # CPUID of host
|
||||
__u32 ibc; # IBC level range offered by host
|
||||
__u8 pad[4];
|
||||
__u64 fac_mask[256]; # set of cpu facilities enabled by KVM
|
||||
__u64 fac_list[256]; # set of cpu facilities offered by host
|
||||
}
|
||||
|
||||
Parameters: address of buffer to store the machine related cpu data
|
||||
of type struct kvm_s390_vm_cpu_machine*
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
-ENOMEM if not enough memory is available to process the ioctl
|
||||
0 in case of success
|
||||
|
||||
2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
|
||||
|
||||
Allows user space to retrieve or request to change cpu related information for a vcpu:
|
||||
|
||||
struct kvm_s390_vm_cpu_processor {
|
||||
__u64 cpuid; # CPUID currently (to be) used by this vcpu
|
||||
__u16 ibc; # IBC level currently (to be) used by this vcpu
|
||||
__u8 pad[6];
|
||||
__u64 fac_list[256]; # set of cpu facilities currently (to be) used
|
||||
# by this vcpu
|
||||
}
|
||||
|
||||
KVM does not enforce or limit the cpu model data in any form. Take the information
|
||||
retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
|
||||
setups. Instruction interceptions triggered by additionally set facilitiy bits that
|
||||
are not handled by KVM need to by imlemented in the VM driver code.
|
||||
|
||||
Parameters: address of buffer to store/set the processor related cpu
|
||||
data of type struct kvm_s390_vm_cpu_processor*.
|
||||
Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case)
|
||||
-EFAULT if the given address is not accessible from kernel space
|
||||
-ENOMEM if not enough memory is available to process the ioctl
|
||||
0 in case of success
|
||||
|
|
|
@ -96,6 +96,7 @@ extern char __kvm_hyp_code_end[];
|
|||
|
||||
extern void __kvm_flush_vm_context(void);
|
||||
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
|
||||
|
||||
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
|
||||
#endif
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmio.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/cputype.h>
|
||||
|
||||
unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
|
||||
unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
|
||||
|
@ -177,9 +178,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
|
|||
return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
|
||||
}
|
||||
|
||||
static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
|
||||
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.cp15[c0_MPIDR];
|
||||
return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK;
|
||||
}
|
||||
|
||||
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -68,6 +68,7 @@ struct kvm_arch {
|
|||
|
||||
/* Interrupt controller */
|
||||
struct vgic_dist vgic;
|
||||
int max_vcpus;
|
||||
};
|
||||
|
||||
#define KVM_NR_MEM_OBJS 40
|
||||
|
@ -144,6 +145,7 @@ struct kvm_vm_stat {
|
|||
};
|
||||
|
||||
struct kvm_vcpu_stat {
|
||||
u32 halt_successful_poll;
|
||||
u32 halt_wakeup;
|
||||
};
|
||||
|
||||
|
@ -231,6 +233,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
|
|||
int kvm_perf_init(void);
|
||||
int kvm_perf_teardown(void);
|
||||
|
||||
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
|
||||
|
||||
static inline void kvm_arch_hardware_disable(void) {}
|
||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
|
|
|
@ -37,6 +37,7 @@ struct kvm_exit_mmio {
|
|||
u8 data[8];
|
||||
u32 len;
|
||||
bool is_write;
|
||||
void *private;
|
||||
};
|
||||
|
||||
static inline void kvm_prepare_mmio(struct kvm_run *run,
|
||||
|
|
|
@ -115,6 +115,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
|
|||
pmd_val(*pmd) |= L_PMD_S2_RDWR;
|
||||
}
|
||||
|
||||
static inline void kvm_set_s2pte_readonly(pte_t *pte)
|
||||
{
|
||||
pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pte_readonly(pte_t *pte)
|
||||
{
|
||||
return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
|
||||
}
|
||||
|
||||
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
|
||||
{
|
||||
pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
|
||||
{
|
||||
return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
|
||||
}
|
||||
|
||||
|
||||
/* Open coded p*d_addr_end that can deal with 64bit addresses */
|
||||
#define kvm_pgd_addr_end(addr, end) \
|
||||
({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
|
||||
|
|
|
@ -129,6 +129,7 @@
|
|||
#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */
|
||||
#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
|
||||
|
||||
#define L_PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[1] */
|
||||
#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
|
||||
|
||||
/*
|
||||
|
|
|
@ -175,6 +175,8 @@ struct kvm_arch_memory_slot {
|
|||
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
|
||||
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
|
||||
|
||||
/* KVM_IRQ_LINE irq field index values */
|
||||
#define KVM_ARM_IRQ_TYPE_SHIFT 24
|
||||
|
|
|
@ -21,8 +21,10 @@ config KVM
|
|||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
select KVM_MMIO
|
||||
select KVM_ARM_HOST
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select SRCU
|
||||
depends on ARM_VIRT_EXT && ARM_LPAE
|
||||
---help---
|
||||
|
|
|
@ -22,4 +22,5 @@ obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
|
|||
obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
|
||||
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
|
||||
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
|
||||
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
|
||||
obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
|
||||
|
|
|
@ -132,6 +132,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
/* Mark the initial VMID generation invalid */
|
||||
kvm->arch.vmid_gen = 0;
|
||||
|
||||
/* The maximum number of VCPUs is limited by the host's GIC model */
|
||||
kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
|
||||
|
||||
return ret;
|
||||
out_free_stage2_pgd:
|
||||
kvm_free_stage2_pgd(kvm);
|
||||
|
@ -218,6 +221,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (id >= kvm->arch.max_vcpus) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
|
||||
if (!vcpu) {
|
||||
err = -ENOMEM;
|
||||
|
@ -241,9 +249,8 @@ out:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
|
@ -777,9 +784,39 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
|
||||
* @kvm: kvm instance
|
||||
* @log: slot id and address to which we copy the log
|
||||
*
|
||||
* Steps 1-4 below provide general overview of dirty page logging. See
|
||||
* kvm_get_dirty_log_protect() function description for additional details.
|
||||
*
|
||||
* We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
|
||||
* always flush the TLB (step 4) even if previous step failed and the dirty
|
||||
* bitmap may be corrupt. Regardless of previous outcome the KVM logging API
|
||||
* does not preclude user space subsequent dirty log read. Flushing TLB ensures
|
||||
* writes will be marked dirty for next log read.
|
||||
*
|
||||
* 1. Take a snapshot of the bit and clear it if needed.
|
||||
* 2. Write protect the corresponding page.
|
||||
* 3. Copy the snapshot to the userspace.
|
||||
* 4. Flush TLB's if needed.
|
||||
*/
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
{
|
||||
return -EINVAL;
|
||||
bool is_dirty = false;
|
||||
int r;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
|
||||
|
||||
if (is_dirty)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
|
||||
|
@ -811,7 +848,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
switch (ioctl) {
|
||||
case KVM_CREATE_IRQCHIP: {
|
||||
if (vgic_present)
|
||||
return kvm_vgic_create(kvm);
|
||||
return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
else
|
||||
return -ENXIO;
|
||||
}
|
||||
|
@ -1035,6 +1072,19 @@ static void check_kvm_target_cpu(void *ret)
|
|||
*(int *)ret = kvm_target_cpu();
|
||||
}
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
mpidr &= MPIDR_HWID_BITMASK;
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
|
||||
return vcpu;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize Hyp-mode and memory mappings on all CPUs.
|
||||
*/
|
||||
|
|
|
@ -87,11 +87,13 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
*/
|
||||
static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
trace_kvm_wfi(*vcpu_pc(vcpu));
|
||||
if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
|
||||
if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
|
||||
trace_kvm_wfx(*vcpu_pc(vcpu), true);
|
||||
kvm_vcpu_on_spin(vcpu);
|
||||
else
|
||||
} else {
|
||||
trace_kvm_wfx(*vcpu_pc(vcpu), false);
|
||||
kvm_vcpu_block(vcpu);
|
||||
}
|
||||
|
||||
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
|
||||
|
|
|
@ -66,6 +66,17 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
|
|||
bx lr
|
||||
ENDPROC(__kvm_tlb_flush_vmid_ipa)
|
||||
|
||||
/**
|
||||
* void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
|
||||
*
|
||||
* Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
|
||||
* parameter
|
||||
*/
|
||||
|
||||
ENTRY(__kvm_tlb_flush_vmid)
|
||||
b __kvm_tlb_flush_vmid_ipa
|
||||
ENDPROC(__kvm_tlb_flush_vmid)
|
||||
|
||||
/********************************************************************
|
||||
* Flush TLBs and instruction caches of all CPUs inside the inner-shareable
|
||||
* domain, for all VMIDs
|
||||
|
|
|
@ -45,6 +45,26 @@ static phys_addr_t hyp_idmap_vector;
|
|||
#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
|
||||
|
||||
#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
|
||||
#define kvm_pud_huge(_x) pud_huge(_x)
|
||||
|
||||
#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
|
||||
#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
|
||||
|
||||
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
|
||||
* @kvm: pointer to kvm structure.
|
||||
*
|
||||
* Interface to HYP function to flush all VM TLB entries
|
||||
*/
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
|
||||
}
|
||||
|
||||
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
|
||||
{
|
||||
|
@ -78,6 +98,25 @@ static void kvm_flush_dcache_pud(pud_t pud)
|
|||
__kvm_flush_dcache_pud(pud);
|
||||
}
|
||||
|
||||
/**
|
||||
* stage2_dissolve_pmd() - clear and flush huge PMD entry
|
||||
* @kvm: pointer to kvm structure.
|
||||
* @addr: IPA
|
||||
* @pmd: pmd pointer for IPA
|
||||
*
|
||||
* Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
|
||||
* pages in the range dirty.
|
||||
*/
|
||||
static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
|
||||
{
|
||||
if (!kvm_pmd_huge(*pmd))
|
||||
return;
|
||||
|
||||
pmd_clear(pmd);
|
||||
kvm_tlb_flush_vmid_ipa(kvm, addr);
|
||||
put_page(virt_to_page(pmd));
|
||||
}
|
||||
|
||||
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
|
||||
int min, int max)
|
||||
{
|
||||
|
@ -819,10 +858,15 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
|
|||
}
|
||||
|
||||
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
||||
phys_addr_t addr, const pte_t *new_pte, bool iomap)
|
||||
phys_addr_t addr, const pte_t *new_pte,
|
||||
unsigned long flags)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
pte_t *pte, old_pte;
|
||||
bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
|
||||
bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
|
||||
|
||||
VM_BUG_ON(logging_active && !cache);
|
||||
|
||||
/* Create stage-2 page table mapping - Levels 0 and 1 */
|
||||
pmd = stage2_get_pmd(kvm, cache, addr);
|
||||
|
@ -834,6 +878,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* While dirty page logging - dissolve huge PMD, then continue on to
|
||||
* allocate page.
|
||||
*/
|
||||
if (logging_active)
|
||||
stage2_dissolve_pmd(kvm, addr, pmd);
|
||||
|
||||
/* Create stage-2 page mappings - Level 2 */
|
||||
if (pmd_none(*pmd)) {
|
||||
if (!cache)
|
||||
|
@ -890,7 +941,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
|||
if (ret)
|
||||
goto out;
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
|
||||
ret = stage2_set_pte(kvm, &cache, addr, &pte,
|
||||
KVM_S2PTE_FLAG_IS_IOMAP);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -957,6 +1009,165 @@ static bool kvm_is_device_pfn(unsigned long pfn)
|
|||
return !pfn_valid(pfn);
|
||||
}
|
||||
|
||||
/**
|
||||
* stage2_wp_ptes - write protect PMD range
|
||||
* @pmd: pointer to pmd entry
|
||||
* @addr: range start address
|
||||
* @end: range end address
|
||||
*/
|
||||
static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
|
||||
{
|
||||
pte_t *pte;
|
||||
|
||||
pte = pte_offset_kernel(pmd, addr);
|
||||
do {
|
||||
if (!pte_none(*pte)) {
|
||||
if (!kvm_s2pte_readonly(pte))
|
||||
kvm_set_s2pte_readonly(pte);
|
||||
}
|
||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||
}
|
||||
|
||||
/**
|
||||
* stage2_wp_pmds - write protect PUD range
|
||||
* @pud: pointer to pud entry
|
||||
* @addr: range start address
|
||||
* @end: range end address
|
||||
*/
|
||||
static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
phys_addr_t next;
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
|
||||
do {
|
||||
next = kvm_pmd_addr_end(addr, end);
|
||||
if (!pmd_none(*pmd)) {
|
||||
if (kvm_pmd_huge(*pmd)) {
|
||||
if (!kvm_s2pmd_readonly(pmd))
|
||||
kvm_set_s2pmd_readonly(pmd);
|
||||
} else {
|
||||
stage2_wp_ptes(pmd, addr, next);
|
||||
}
|
||||
}
|
||||
} while (pmd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
/**
|
||||
* stage2_wp_puds - write protect PGD range
|
||||
* @pgd: pointer to pgd entry
|
||||
* @addr: range start address
|
||||
* @end: range end address
|
||||
*
|
||||
* Process PUD entries, for a huge PUD we cause a panic.
|
||||
*/
|
||||
static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
|
||||
{
|
||||
pud_t *pud;
|
||||
phys_addr_t next;
|
||||
|
||||
pud = pud_offset(pgd, addr);
|
||||
do {
|
||||
next = kvm_pud_addr_end(addr, end);
|
||||
if (!pud_none(*pud)) {
|
||||
/* TODO:PUD not supported, revisit later if supported */
|
||||
BUG_ON(kvm_pud_huge(*pud));
|
||||
stage2_wp_pmds(pud, addr, next);
|
||||
}
|
||||
} while (pud++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
/**
|
||||
* stage2_wp_range() - write protect stage2 memory region range
|
||||
* @kvm: The KVM pointer
|
||||
* @addr: Start address of range
|
||||
* @end: End address of range
|
||||
*/
|
||||
static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
phys_addr_t next;
|
||||
|
||||
pgd = kvm->arch.pgd + pgd_index(addr);
|
||||
do {
|
||||
/*
|
||||
* Release kvm_mmu_lock periodically if the memory region is
|
||||
* large. Otherwise, we may see kernel panics with
|
||||
* CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
|
||||
* CONFIG_LOCKDEP. Additionally, holding the lock too long
|
||||
* will also starve other vCPUs.
|
||||
*/
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock))
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
|
||||
next = kvm_pgd_addr_end(addr, end);
|
||||
if (pgd_present(*pgd))
|
||||
stage2_wp_puds(pgd, addr, next);
|
||||
} while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
|
||||
* @kvm: The KVM pointer
|
||||
* @slot: The memory slot to write protect
|
||||
*
|
||||
* Called to start logging dirty pages after memory region
|
||||
* KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
|
||||
* all present PMD and PTEs are write protected in the memory region.
|
||||
* Afterwards read of dirty page log can be called.
|
||||
*
|
||||
* Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
|
||||
* serializing operations for VM memory regions.
|
||||
*/
|
||||
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
||||
{
|
||||
struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
|
||||
phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
|
||||
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
stage2_wp_range(kvm, start, end);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mmu_write_protect_pt_masked() - write protect dirty pages
|
||||
* @kvm: The KVM pointer
|
||||
* @slot: The memory slot associated with mask
|
||||
* @gfn_offset: The gfn offset in memory slot
|
||||
* @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
|
||||
* slot to be write protected
|
||||
*
|
||||
* Walks bits set in mask write protects the associated pte's. Caller must
|
||||
* acquire kvm_mmu_lock.
|
||||
*/
|
||||
static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
|
||||
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
|
||||
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
|
||||
|
||||
stage2_wp_range(kvm, start, end);
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
|
||||
* dirty pages.
|
||||
*
|
||||
* It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
|
||||
* enable dirty logging for them.
|
||||
*/
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
|
||||
}
|
||||
|
||||
static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
|
||||
unsigned long size, bool uncached)
|
||||
{
|
||||
|
@ -977,6 +1188,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
pfn_t pfn;
|
||||
pgprot_t mem_type = PAGE_S2;
|
||||
bool fault_ipa_uncached;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
unsigned long flags = 0;
|
||||
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
if (fault_status == FSC_PERM && !write_fault) {
|
||||
|
@ -993,7 +1206,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (is_vm_hugetlb_page(vma)) {
|
||||
if (is_vm_hugetlb_page(vma) && !logging_active) {
|
||||
hugetlb = true;
|
||||
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
|
||||
} else {
|
||||
|
@ -1034,12 +1247,30 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
if (is_error_pfn(pfn))
|
||||
return -EFAULT;
|
||||
|
||||
if (kvm_is_device_pfn(pfn))
|
||||
if (kvm_is_device_pfn(pfn)) {
|
||||
mem_type = PAGE_S2_DEVICE;
|
||||
flags |= KVM_S2PTE_FLAG_IS_IOMAP;
|
||||
} else if (logging_active) {
|
||||
/*
|
||||
* Faults on pages in a memslot with logging enabled
|
||||
* should not be mapped with huge pages (it introduces churn
|
||||
* and performance degradation), so force a pte mapping.
|
||||
*/
|
||||
force_pte = true;
|
||||
flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
|
||||
|
||||
/*
|
||||
* Only actually map the page as writable if this was a write
|
||||
* fault.
|
||||
*/
|
||||
if (!write_fault)
|
||||
writable = false;
|
||||
}
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
if (!hugetlb && !force_pte)
|
||||
hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
|
||||
|
||||
|
@ -1056,16 +1287,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
|
||||
} else {
|
||||
pte_t new_pte = pfn_pte(pfn, mem_type);
|
||||
|
||||
if (writable) {
|
||||
kvm_set_s2pte_writable(&new_pte);
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
mark_page_dirty(kvm, gfn);
|
||||
}
|
||||
coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
|
||||
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
|
||||
pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
|
||||
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
|
||||
}
|
||||
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
|
@ -1215,7 +1446,14 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
|
|||
{
|
||||
pte_t *pte = (pte_t *)data;
|
||||
|
||||
stage2_set_pte(kvm, NULL, gpa, pte, false);
|
||||
/*
|
||||
* We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
|
||||
* flag clear because MMU notifiers will have unmapped a huge PMD before
|
||||
* calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
|
||||
* therefore stage2_set_pte() never needs to clear out a huge PMD
|
||||
* through this calling path.
|
||||
*/
|
||||
stage2_set_pte(kvm, NULL, gpa, pte, 0);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1348,6 +1586,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||
const struct kvm_memory_slot *old,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
/*
|
||||
* At this point memslot has been committed and there is an
|
||||
* allocated dirty_bitmap[], dirty pages will be be tracked while the
|
||||
* memory slot is write protected.
|
||||
*/
|
||||
if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
|
||||
kvm_mmu_wp_memory_region(kvm, mem->slot);
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
|
@ -1360,7 +1605,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
bool writable = !(mem->flags & KVM_MEM_READONLY);
|
||||
int ret = 0;
|
||||
|
||||
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
|
||||
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
|
||||
change != KVM_MR_FLAGS_ONLY)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
|
@ -1411,6 +1657,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
|
||||
vm_start - vma->vm_start;
|
||||
|
||||
/* IO region dirty page logging not allowed */
|
||||
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
|
||||
return -EINVAL;
|
||||
|
||||
ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
|
||||
vm_end - vm_start,
|
||||
writable);
|
||||
|
@ -1420,6 +1670,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
hva = vm_end;
|
||||
} while (hva < reg_end);
|
||||
|
||||
if (change == KVM_MR_FLAGS_ONLY)
|
||||
return ret;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <asm/cputype.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_psci.h>
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
/*
|
||||
* This is an implementation of the Power State Coordination Interface
|
||||
|
@ -66,25 +67,17 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
|
|||
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
||||
{
|
||||
struct kvm *kvm = source_vcpu->kvm;
|
||||
struct kvm_vcpu *vcpu = NULL, *tmp;
|
||||
struct kvm_vcpu *vcpu = NULL;
|
||||
wait_queue_head_t *wq;
|
||||
unsigned long cpu_id;
|
||||
unsigned long context_id;
|
||||
unsigned long mpidr;
|
||||
phys_addr_t target_pc;
|
||||
int i;
|
||||
|
||||
cpu_id = *vcpu_reg(source_vcpu, 1);
|
||||
cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
|
||||
if (vcpu_mode_is_32bit(source_vcpu))
|
||||
cpu_id &= ~((u32) 0);
|
||||
|
||||
kvm_for_each_vcpu(i, tmp, kvm) {
|
||||
mpidr = kvm_vcpu_get_mpidr(tmp);
|
||||
if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
|
||||
vcpu = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
|
||||
|
||||
/*
|
||||
* Make sure the caller requested a valid CPU and that the CPU is
|
||||
|
@ -155,7 +148,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
|
|||
* then ON else OFF
|
||||
*/
|
||||
kvm_for_each_vcpu(i, tmp, kvm) {
|
||||
mpidr = kvm_vcpu_get_mpidr(tmp);
|
||||
mpidr = kvm_vcpu_get_mpidr_aff(tmp);
|
||||
if (((mpidr & target_affinity_mask) == target_affinity) &&
|
||||
!tmp->arch.pause) {
|
||||
return PSCI_0_2_AFFINITY_LEVEL_ON;
|
||||
|
|
|
@ -140,19 +140,22 @@ TRACE_EVENT(kvm_emulate_cp15_imp,
|
|||
__entry->CRm, __entry->Op2)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_wfi,
|
||||
TP_PROTO(unsigned long vcpu_pc),
|
||||
TP_ARGS(vcpu_pc),
|
||||
TRACE_EVENT(kvm_wfx,
|
||||
TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
|
||||
TP_ARGS(vcpu_pc, is_wfe),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, vcpu_pc )
|
||||
__field( bool, is_wfe )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_pc = vcpu_pc;
|
||||
__entry->is_wfe = is_wfe;
|
||||
),
|
||||
|
||||
TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc)
|
||||
TP_printk("guest executed wf%c at: 0x%08lx",
|
||||
__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_unmap_hva,
|
||||
|
|
|
@ -96,6 +96,7 @@
|
|||
#define ESR_ELx_COND_SHIFT (20)
|
||||
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
|
||||
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
|
||||
#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <asm/types.h>
|
||||
|
|
|
@ -126,6 +126,7 @@ extern char __kvm_hyp_vector[];
|
|||
|
||||
extern void __kvm_flush_vm_context(void);
|
||||
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
|
||||
|
||||
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmio.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/cputype.h>
|
||||
|
||||
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
|
||||
unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
|
||||
|
@ -140,6 +141,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
|
|||
return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
|
||||
}
|
||||
|
||||
static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
|
||||
|
@ -201,9 +207,9 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
|
|||
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
|
||||
}
|
||||
|
||||
static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
|
||||
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu_sys_reg(vcpu, MPIDR_EL1);
|
||||
return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
|
||||
}
|
||||
|
||||
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -59,6 +59,9 @@ struct kvm_arch {
|
|||
/* VTTBR value associated with above pgd and vmid */
|
||||
u64 vttbr;
|
||||
|
||||
/* The maximum number of vCPUs depends on the used GIC model */
|
||||
int max_vcpus;
|
||||
|
||||
/* Interrupt controller */
|
||||
struct vgic_dist vgic;
|
||||
|
||||
|
@ -159,6 +162,7 @@ struct kvm_vm_stat {
|
|||
};
|
||||
|
||||
struct kvm_vcpu_stat {
|
||||
u32 halt_successful_poll;
|
||||
u32 halt_wakeup;
|
||||
};
|
||||
|
||||
|
@ -196,6 +200,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
|
|||
|
||||
u64 kvm_call_hyp(void *hypfn, ...);
|
||||
void force_vm_exit(const cpumask_t *mask);
|
||||
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
|
||||
|
||||
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
int exception_index);
|
||||
|
@ -203,6 +208,8 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
|||
int kvm_perf_init(void);
|
||||
int kvm_perf_teardown(void);
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
|
||||
|
||||
static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
|
||||
phys_addr_t pgd_ptr,
|
||||
unsigned long hyp_stack_ptr,
|
||||
|
|
|
@ -40,6 +40,7 @@ struct kvm_exit_mmio {
|
|||
u8 data[8];
|
||||
u32 len;
|
||||
bool is_write;
|
||||
void *private;
|
||||
};
|
||||
|
||||
static inline void kvm_prepare_mmio(struct kvm_run *run,
|
||||
|
|
|
@ -118,6 +118,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
|
|||
pmd_val(*pmd) |= PMD_S2_RDWR;
|
||||
}
|
||||
|
||||
static inline void kvm_set_s2pte_readonly(pte_t *pte)
|
||||
{
|
||||
pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pte_readonly(pte_t *pte)
|
||||
{
|
||||
return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
|
||||
}
|
||||
|
||||
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
|
||||
{
|
||||
pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
|
||||
{
|
||||
return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY;
|
||||
}
|
||||
|
||||
|
||||
#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end)
|
||||
#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
|
||||
#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
|
||||
|
|
|
@ -119,6 +119,7 @@
|
|||
#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
|
||||
#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
|
||||
|
||||
#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
|
||||
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
|
||||
|
||||
/*
|
||||
|
|
|
@ -78,6 +78,13 @@ struct kvm_regs {
|
|||
#define KVM_VGIC_V2_DIST_SIZE 0x1000
|
||||
#define KVM_VGIC_V2_CPU_SIZE 0x2000
|
||||
|
||||
/* Supported VGICv3 address types */
|
||||
#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
|
||||
#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
|
||||
|
||||
#define KVM_VGIC_V3_DIST_SIZE SZ_64K
|
||||
#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
|
||||
|
||||
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
|
||||
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
|
||||
#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
|
||||
|
@ -161,6 +168,8 @@ struct kvm_arch_memory_slot {
|
|||
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
|
||||
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
|
||||
|
||||
/* KVM_IRQ_LINE irq field index values */
|
||||
#define KVM_ARM_IRQ_TYPE_SHIFT 24
|
||||
|
|
|
@ -140,6 +140,7 @@ int main(void)
|
|||
DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
|
||||
DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
|
||||
DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
|
||||
DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
|
||||
DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
|
||||
DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
|
||||
DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
|
||||
|
|
|
@ -22,10 +22,12 @@ config KVM
|
|||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
select KVM_MMIO
|
||||
select KVM_ARM_HOST
|
||||
select KVM_ARM_VGIC
|
||||
select KVM_ARM_TIMER
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select SRCU
|
||||
---help---
|
||||
Support hosting virtualized guest machines.
|
||||
|
|
|
@ -21,7 +21,9 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
|
|||
|
||||
kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
|
||||
kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
|
||||
kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
|
||||
kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
|
||||
kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
|
||||
kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
|
||||
kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
|
||||
kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
|
||||
|
|
|
@ -28,12 +28,18 @@
|
|||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_psci.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
|
||||
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
|
||||
|
||||
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
int ret;
|
||||
|
||||
trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
|
||||
kvm_vcpu_hvc_get_imm(vcpu));
|
||||
|
||||
ret = kvm_psci_call(vcpu);
|
||||
if (ret < 0) {
|
||||
kvm_inject_undefined(vcpu);
|
||||
|
@ -63,10 +69,13 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
*/
|
||||
static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE)
|
||||
if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
|
||||
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
|
||||
kvm_vcpu_on_spin(vcpu);
|
||||
else
|
||||
} else {
|
||||
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
|
||||
kvm_vcpu_block(vcpu);
|
||||
}
|
||||
|
||||
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
|
||||
|
|
|
@ -1032,6 +1032,28 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
|
|||
ret
|
||||
ENDPROC(__kvm_tlb_flush_vmid_ipa)
|
||||
|
||||
/**
|
||||
* void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
|
||||
* @struct kvm *kvm - pointer to kvm structure
|
||||
*
|
||||
* Invalidates all Stage 1 and 2 TLB entries for current VMID.
|
||||
*/
|
||||
ENTRY(__kvm_tlb_flush_vmid)
|
||||
dsb ishst
|
||||
|
||||
kern_hyp_va x0
|
||||
ldr x2, [x0, #KVM_VTTBR]
|
||||
msr vttbr_el2, x2
|
||||
isb
|
||||
|
||||
tlbi vmalls12e1is
|
||||
dsb ish
|
||||
isb
|
||||
|
||||
msr vttbr_el2, xzr
|
||||
ret
|
||||
ENDPROC(__kvm_tlb_flush_vmid)
|
||||
|
||||
ENTRY(__kvm_flush_vm_context)
|
||||
dsb ishst
|
||||
tlbi alle1is
|
||||
|
|
|
@ -113,6 +113,27 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Trap handler for the GICv3 SGI generation system register.
|
||||
* Forward the request to the VGIC emulation.
|
||||
* The cp15_64 code makes sure this automatically works
|
||||
* for both AArch64 and AArch32 accesses.
|
||||
*/
|
||||
static bool access_gic_sgi(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (!p->is_write)
|
||||
return read_from_write_only(vcpu, p);
|
||||
|
||||
val = *vcpu_reg(vcpu, p->Rt);
|
||||
vgic_v3_dispatch_sgi(vcpu, val);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
|
@ -200,10 +221,19 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
|||
|
||||
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 mpidr;
|
||||
|
||||
/*
|
||||
* Simply map the vcpu_id into the Aff0 field of the MPIDR.
|
||||
* Map the vcpu_id into the first three affinity level fields of
|
||||
* the MPIDR. We limit the number of VCPUs in level 0 due to a
|
||||
* limitation to 16 CPUs in that level in the ICC_SGIxR registers
|
||||
* of the GICv3 to be able to address each CPU directly when
|
||||
* sending IPIs.
|
||||
*/
|
||||
vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
|
||||
mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
|
||||
mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
|
||||
mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
|
||||
vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
|
||||
}
|
||||
|
||||
/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
|
||||
|
@ -373,6 +403,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
|
||||
NULL, reset_val, VBAR_EL1, 0 },
|
||||
|
||||
/* ICC_SGI1R_EL1 */
|
||||
{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101),
|
||||
access_gic_sgi },
|
||||
/* ICC_SRE_EL1 */
|
||||
{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101),
|
||||
trap_raz_wi },
|
||||
|
@ -605,6 +638,8 @@ static const struct sys_reg_desc cp14_64_regs[] = {
|
|||
* register).
|
||||
*/
|
||||
static const struct sys_reg_desc cp15_regs[] = {
|
||||
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
|
||||
|
||||
{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
|
||||
{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
|
||||
{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
|
||||
|
@ -652,6 +687,7 @@ static const struct sys_reg_desc cp15_regs[] = {
|
|||
|
||||
static const struct sys_reg_desc cp15_64_regs[] = {
|
||||
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
|
||||
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
|
||||
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_ARM64_KVM_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
|
||||
TRACE_EVENT(kvm_wfx_arm64,
|
||||
TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
|
||||
TP_ARGS(vcpu_pc, is_wfe),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, vcpu_pc)
|
||||
__field(bool, is_wfe)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_pc = vcpu_pc;
|
||||
__entry->is_wfe = is_wfe;
|
||||
),
|
||||
|
||||
TP_printk("guest executed wf%c at: 0x%08lx",
|
||||
__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_hvc_arm64,
|
||||
TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
|
||||
TP_ARGS(vcpu_pc, r0, imm),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, vcpu_pc)
|
||||
__field(unsigned long, r0)
|
||||
__field(unsigned long, imm)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_pc = vcpu_pc;
|
||||
__entry->r0 = r0;
|
||||
__entry->imm = imm;
|
||||
),
|
||||
|
||||
TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
|
||||
__entry->vcpu_pc, __entry->r0, __entry->imm)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_ARM64_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_FILE trace
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
|
@ -148,17 +148,18 @@
|
|||
* x0: Register pointing to VCPU struct
|
||||
*/
|
||||
.macro restore_vgic_v3_state
|
||||
// Disable SRE_EL1 access. Necessary, otherwise
|
||||
// ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
|
||||
msr_s ICC_SRE_EL1, xzr
|
||||
isb
|
||||
|
||||
// Compute the address of struct vgic_cpu
|
||||
add x3, x0, #VCPU_VGIC_CPU
|
||||
|
||||
// Restore all interesting registers
|
||||
ldr w4, [x3, #VGIC_V3_CPU_HCR]
|
||||
ldr w5, [x3, #VGIC_V3_CPU_VMCR]
|
||||
ldr w25, [x3, #VGIC_V3_CPU_SRE]
|
||||
|
||||
msr_s ICC_SRE_EL1, x25
|
||||
|
||||
// make sure SRE is valid before writing the other registers
|
||||
isb
|
||||
|
||||
msr_s ICH_HCR_EL2, x4
|
||||
msr_s ICH_VMCR_EL2, x5
|
||||
|
@ -244,9 +245,12 @@
|
|||
dsb sy
|
||||
|
||||
// Prevent the guest from touching the GIC system registers
|
||||
// if SRE isn't enabled for GICv3 emulation
|
||||
cbnz x25, 1f
|
||||
mrs_s x5, ICC_SRE_EL2
|
||||
and x5, x5, #~ICC_SRE_EL2_ENABLE
|
||||
msr_s ICC_SRE_EL2, x5
|
||||
1:
|
||||
.endm
|
||||
|
||||
ENTRY(__save_vgic_v3_state)
|
||||
|
|
|
@ -18,7 +18,6 @@ header-y += intrinsics.h
|
|||
header-y += ioctl.h
|
||||
header-y += ioctls.h
|
||||
header-y += ipcbuf.h
|
||||
header-y += kvm.h
|
||||
header-y += kvm_para.h
|
||||
header-y += mman.h
|
||||
header-y += msgbuf.h
|
||||
|
|
|
@ -120,6 +120,7 @@ struct kvm_vcpu_stat {
|
|||
u32 resvd_inst_exits;
|
||||
u32 break_inst_exits;
|
||||
u32 flush_dcache_exits;
|
||||
u32 halt_successful_poll;
|
||||
u32 halt_wakeup;
|
||||
};
|
||||
|
||||
|
|
|
@ -434,7 +434,7 @@ __kvm_mips_return_to_guest:
|
|||
/* Setup status register for running guest in UM */
|
||||
.set at
|
||||
or v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
|
||||
and v1, v1, ~ST0_CU0
|
||||
and v1, v1, ~(ST0_CU0 | ST0_MX)
|
||||
.set noat
|
||||
mtc0 v1, CP0_STATUS
|
||||
ehb
|
||||
|
|
|
@ -15,9 +15,11 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <asm/fpu.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
|
@ -47,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU },
|
||||
{ "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU },
|
||||
{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
|
||||
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
|
||||
{ "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU },
|
||||
{NULL}
|
||||
};
|
||||
|
@ -378,6 +381,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
vcpu->mmio_needed = 0;
|
||||
}
|
||||
|
||||
lose_fpu(1);
|
||||
|
||||
local_irq_disable();
|
||||
/* Check if we have any exceptions/interrupts pending */
|
||||
kvm_mips_deliver_interrupts(vcpu,
|
||||
|
@ -385,8 +390,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
|
||||
kvm_guest_enter();
|
||||
|
||||
/* Disable hardware page table walking while in guest */
|
||||
htw_stop();
|
||||
|
||||
r = __kvm_mips_vcpu_run(run, vcpu);
|
||||
|
||||
/* Re-enable HTW before enabling interrupts */
|
||||
htw_start();
|
||||
|
||||
kvm_guest_exit();
|
||||
local_irq_enable();
|
||||
|
||||
|
@ -832,9 +843,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
|
@ -980,9 +990,6 @@ static void kvm_mips_set_c0_status(void)
|
|||
{
|
||||
uint32_t status = read_c0_status();
|
||||
|
||||
if (cpu_has_fpu)
|
||||
status |= (ST0_CU1);
|
||||
|
||||
if (cpu_has_dsp)
|
||||
status |= (ST0_MX);
|
||||
|
||||
|
@ -1002,6 +1009,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
enum emulation_result er = EMULATE_DONE;
|
||||
int ret = RESUME_GUEST;
|
||||
|
||||
/* re-enable HTW before enabling interrupts */
|
||||
htw_start();
|
||||
|
||||
/* Set a default exit reason */
|
||||
run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
run->ready_for_interrupt_injection = 1;
|
||||
|
@ -1136,6 +1146,9 @@ skip_emul:
|
|||
}
|
||||
}
|
||||
|
||||
/* Disable HTW before returning to guest or host */
|
||||
htw_stop();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -107,6 +107,7 @@ struct kvm_vcpu_stat {
|
|||
u32 emulated_inst_exits;
|
||||
u32 dec_exits;
|
||||
u32 ext_intr_exits;
|
||||
u32 halt_successful_poll;
|
||||
u32 halt_wakeup;
|
||||
u32 dbell_exits;
|
||||
u32 gdbell_exits;
|
||||
|
|
|
@ -52,6 +52,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ "dec", VCPU_STAT(dec_exits) },
|
||||
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
|
||||
{ "queue_intr", VCPU_STAT(queue_intr) },
|
||||
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
|
||||
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
|
||||
{ "pf_storage", VCPU_STAT(pf_storage) },
|
||||
{ "sp_storage", VCPU_STAT(sp_storage) },
|
||||
|
|
|
@ -62,6 +62,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ "inst_emu", VCPU_STAT(emulated_inst_exits) },
|
||||
{ "dec", VCPU_STAT(dec_exits) },
|
||||
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
|
||||
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
|
||||
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
|
||||
{ "doorbell", VCPU_STAT(dbell_exits) },
|
||||
{ "guest doorbell", VCPU_STAT(gdbell_exits) },
|
||||
|
|
|
@ -623,9 +623,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
|||
return vcpu;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -35,11 +35,13 @@
|
|||
#define KVM_NR_IRQCHIPS 1
|
||||
#define KVM_IRQCHIP_NUM_PINS 4096
|
||||
|
||||
#define SIGP_CTRL_C 0x00800000
|
||||
#define SIGP_CTRL_C 0x80
|
||||
#define SIGP_CTRL_SCN_MASK 0x3f
|
||||
|
||||
struct sca_entry {
|
||||
atomic_t ctrl;
|
||||
__u32 reserved;
|
||||
__u8 reserved0;
|
||||
__u8 sigp_ctrl;
|
||||
__u16 reserved[3];
|
||||
__u64 sda;
|
||||
__u64 reserved2[2];
|
||||
} __attribute__((packed));
|
||||
|
@ -87,7 +89,8 @@ struct kvm_s390_sie_block {
|
|||
atomic_t cpuflags; /* 0x0000 */
|
||||
__u32 : 1; /* 0x0004 */
|
||||
__u32 prefix : 18;
|
||||
__u32 : 13;
|
||||
__u32 : 1;
|
||||
__u32 ibc : 12;
|
||||
__u8 reserved08[4]; /* 0x0008 */
|
||||
#define PROG_IN_SIE (1<<0)
|
||||
__u32 prog0c; /* 0x000c */
|
||||
|
@ -132,7 +135,9 @@ struct kvm_s390_sie_block {
|
|||
__u8 reserved60; /* 0x0060 */
|
||||
__u8 ecb; /* 0x0061 */
|
||||
__u8 ecb2; /* 0x0062 */
|
||||
__u8 reserved63[1]; /* 0x0063 */
|
||||
#define ECB3_AES 0x04
|
||||
#define ECB3_DEA 0x08
|
||||
__u8 ecb3; /* 0x0063 */
|
||||
__u32 scaol; /* 0x0064 */
|
||||
__u8 reserved68[4]; /* 0x0068 */
|
||||
__u32 todpr; /* 0x006c */
|
||||
|
@ -159,6 +164,7 @@ struct kvm_s390_sie_block {
|
|||
__u64 tecmc; /* 0x00e8 */
|
||||
__u8 reservedf0[12]; /* 0x00f0 */
|
||||
#define CRYCB_FORMAT1 0x00000001
|
||||
#define CRYCB_FORMAT2 0x00000003
|
||||
__u32 crycbd; /* 0x00fc */
|
||||
__u64 gcr[16]; /* 0x0100 */
|
||||
__u64 gbea; /* 0x0180 */
|
||||
|
@ -192,6 +198,7 @@ struct kvm_vcpu_stat {
|
|||
u32 exit_stop_request;
|
||||
u32 exit_validity;
|
||||
u32 exit_instruction;
|
||||
u32 halt_successful_poll;
|
||||
u32 halt_wakeup;
|
||||
u32 instruction_lctl;
|
||||
u32 instruction_lctlg;
|
||||
|
@ -378,14 +385,11 @@ struct kvm_s390_interrupt_info {
|
|||
struct kvm_s390_emerg_info emerg;
|
||||
struct kvm_s390_extcall_info extcall;
|
||||
struct kvm_s390_prefix_info prefix;
|
||||
struct kvm_s390_stop_info stop;
|
||||
struct kvm_s390_mchk_info mchk;
|
||||
};
|
||||
};
|
||||
|
||||
/* for local_interrupt.action_flags */
|
||||
#define ACTION_STORE_ON_STOP (1<<0)
|
||||
#define ACTION_STOP_ON_STOP (1<<1)
|
||||
|
||||
struct kvm_s390_irq_payload {
|
||||
struct kvm_s390_io_info io;
|
||||
struct kvm_s390_ext_info ext;
|
||||
|
@ -393,6 +397,7 @@ struct kvm_s390_irq_payload {
|
|||
struct kvm_s390_emerg_info emerg;
|
||||
struct kvm_s390_extcall_info extcall;
|
||||
struct kvm_s390_prefix_info prefix;
|
||||
struct kvm_s390_stop_info stop;
|
||||
struct kvm_s390_mchk_info mchk;
|
||||
};
|
||||
|
||||
|
@ -401,7 +406,6 @@ struct kvm_s390_local_interrupt {
|
|||
struct kvm_s390_float_interrupt *float_int;
|
||||
wait_queue_head_t *wq;
|
||||
atomic_t *cpuflags;
|
||||
unsigned int action_bits;
|
||||
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
|
||||
struct kvm_s390_irq_payload irq;
|
||||
unsigned long pending_irqs;
|
||||
|
@ -470,7 +474,6 @@ struct kvm_vcpu_arch {
|
|||
};
|
||||
struct gmap *gmap;
|
||||
struct kvm_guestdbg_info_arch guestdbg;
|
||||
#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
|
||||
unsigned long pfault_token;
|
||||
unsigned long pfault_select;
|
||||
unsigned long pfault_compare;
|
||||
|
@ -504,13 +507,39 @@ struct s390_io_adapter {
|
|||
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
|
||||
#define MAX_S390_ADAPTER_MAPS 256
|
||||
|
||||
/* maximum size of facilities and facility mask is 2k bytes */
|
||||
#define S390_ARCH_FAC_LIST_SIZE_BYTE (1<<11)
|
||||
#define S390_ARCH_FAC_LIST_SIZE_U64 \
|
||||
(S390_ARCH_FAC_LIST_SIZE_BYTE / sizeof(u64))
|
||||
#define S390_ARCH_FAC_MASK_SIZE_BYTE S390_ARCH_FAC_LIST_SIZE_BYTE
|
||||
#define S390_ARCH_FAC_MASK_SIZE_U64 \
|
||||
(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
|
||||
|
||||
struct s390_model_fac {
|
||||
/* facilities used in SIE context */
|
||||
__u64 sie[S390_ARCH_FAC_LIST_SIZE_U64];
|
||||
/* subset enabled by kvm */
|
||||
__u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64];
|
||||
};
|
||||
|
||||
struct kvm_s390_cpu_model {
|
||||
struct s390_model_fac *fac;
|
||||
struct cpuid cpu_id;
|
||||
unsigned short ibc;
|
||||
};
|
||||
|
||||
struct kvm_s390_crypto {
|
||||
struct kvm_s390_crypto_cb *crycb;
|
||||
__u32 crycbd;
|
||||
__u8 aes_kw;
|
||||
__u8 dea_kw;
|
||||
};
|
||||
|
||||
struct kvm_s390_crypto_cb {
|
||||
__u8 reserved00[128]; /* 0x0000 */
|
||||
__u8 reserved00[72]; /* 0x0000 */
|
||||
__u8 dea_wrapping_key_mask[24]; /* 0x0048 */
|
||||
__u8 aes_wrapping_key_mask[32]; /* 0x0060 */
|
||||
__u8 reserved80[128]; /* 0x0080 */
|
||||
};
|
||||
|
||||
struct kvm_arch{
|
||||
|
@ -523,12 +552,15 @@ struct kvm_arch{
|
|||
int use_irqchip;
|
||||
int use_cmma;
|
||||
int user_cpu_state_ctrl;
|
||||
int user_sigp;
|
||||
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
|
||||
wait_queue_head_t ipte_wq;
|
||||
int ipte_lock_count;
|
||||
struct mutex ipte_mutex;
|
||||
spinlock_t start_stop_lock;
|
||||
struct kvm_s390_cpu_model model;
|
||||
struct kvm_s390_crypto crypto;
|
||||
u64 epoch;
|
||||
};
|
||||
|
||||
#define KVM_HVA_ERR_BAD (-1UL)
|
||||
|
|
|
@ -31,7 +31,8 @@ struct sclp_cpu_entry {
|
|||
u8 reserved0[2];
|
||||
u8 : 3;
|
||||
u8 siif : 1;
|
||||
u8 : 4;
|
||||
u8 sigpif : 1;
|
||||
u8 : 3;
|
||||
u8 reserved2[10];
|
||||
u8 type;
|
||||
u8 reserved1;
|
||||
|
@ -69,6 +70,7 @@ int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
|
|||
unsigned long sclp_get_hsa_size(void);
|
||||
void sclp_early_detect(void);
|
||||
int sclp_has_siif(void);
|
||||
int sclp_has_sigpif(void);
|
||||
unsigned int sclp_get_ibc(void);
|
||||
|
||||
long _sclp_print_early(const char *);
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#define __ASM_S390_SYSINFO_H
|
||||
|
||||
#include <asm/bitsperlong.h>
|
||||
#include <linux/uuid.h>
|
||||
|
||||
struct sysinfo_1_1_1 {
|
||||
unsigned char p:1;
|
||||
|
@ -116,10 +117,13 @@ struct sysinfo_3_2_2 {
|
|||
char name[8];
|
||||
unsigned int caf;
|
||||
char cpi[16];
|
||||
char reserved_1[24];
|
||||
|
||||
char reserved_1[3];
|
||||
char ext_name_encoding;
|
||||
unsigned int reserved_2;
|
||||
uuid_be uuid;
|
||||
} vm[8];
|
||||
char reserved_544[3552];
|
||||
char reserved_3[1504];
|
||||
char ext_names[8][256];
|
||||
};
|
||||
|
||||
extern int topology_max_mnest;
|
||||
|
|
|
@ -57,10 +57,44 @@ struct kvm_s390_io_adapter_req {
|
|||
|
||||
/* kvm attr_group on vm fd */
|
||||
#define KVM_S390_VM_MEM_CTRL 0
|
||||
#define KVM_S390_VM_TOD 1
|
||||
#define KVM_S390_VM_CRYPTO 2
|
||||
#define KVM_S390_VM_CPU_MODEL 3
|
||||
|
||||
/* kvm attributes for mem_ctrl */
|
||||
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
|
||||
#define KVM_S390_VM_MEM_CLR_CMMA 1
|
||||
#define KVM_S390_VM_MEM_LIMIT_SIZE 2
|
||||
|
||||
/* kvm attributes for KVM_S390_VM_TOD */
|
||||
#define KVM_S390_VM_TOD_LOW 0
|
||||
#define KVM_S390_VM_TOD_HIGH 1
|
||||
|
||||
/* kvm attributes for KVM_S390_VM_CPU_MODEL */
|
||||
/* processor related attributes are r/w */
|
||||
#define KVM_S390_VM_CPU_PROCESSOR 0
|
||||
struct kvm_s390_vm_cpu_processor {
|
||||
__u64 cpuid;
|
||||
__u16 ibc;
|
||||
__u8 pad[6];
|
||||
__u64 fac_list[256];
|
||||
};
|
||||
|
||||
/* machine related attributes are r/o */
|
||||
#define KVM_S390_VM_CPU_MACHINE 1
|
||||
struct kvm_s390_vm_cpu_machine {
|
||||
__u64 cpuid;
|
||||
__u32 ibc;
|
||||
__u8 pad[4];
|
||||
__u64 fac_mask[256];
|
||||
__u64 fac_list[256];
|
||||
};
|
||||
|
||||
/* kvm attributes for crypto */
|
||||
#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
|
||||
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
|
||||
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
|
||||
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
|
@ -107,6 +141,9 @@ struct kvm_guest_debug_arch {
|
|||
struct kvm_hw_breakpoint __user *hw_bp;
|
||||
};
|
||||
|
||||
/* for KVM_SYNC_PFAULT and KVM_REG_S390_PFTOKEN */
|
||||
#define KVM_S390_PFAULT_TOKEN_INVALID 0xffffffffffffffffULL
|
||||
|
||||
#define KVM_SYNC_PREFIX (1UL << 0)
|
||||
#define KVM_SYNC_GPRS (1UL << 1)
|
||||
#define KVM_SYNC_ACRS (1UL << 2)
|
||||
|
|
|
@ -204,6 +204,33 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
|
|||
}
|
||||
}
|
||||
|
||||
static void print_ext_name(struct seq_file *m, int lvl,
|
||||
struct sysinfo_3_2_2 *info)
|
||||
{
|
||||
if (info->vm[lvl].ext_name_encoding == 0)
|
||||
return;
|
||||
if (info->ext_names[lvl][0] == 0)
|
||||
return;
|
||||
switch (info->vm[lvl].ext_name_encoding) {
|
||||
case 1: /* EBCDIC */
|
||||
EBCASC(info->ext_names[lvl], sizeof(info->ext_names[lvl]));
|
||||
break;
|
||||
case 2: /* UTF-8 */
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
seq_printf(m, "VM%02d Extended Name: %-.256s\n", lvl,
|
||||
info->ext_names[lvl]);
|
||||
}
|
||||
|
||||
static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
|
||||
{
|
||||
if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
|
||||
return;
|
||||
seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid);
|
||||
}
|
||||
|
||||
static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
|
||||
{
|
||||
int i;
|
||||
|
@ -221,6 +248,8 @@ static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
|
|||
seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
|
||||
seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby);
|
||||
seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved);
|
||||
print_ext_name(m, i, info);
|
||||
print_uuid(m, i, info);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -357,8 +357,8 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
|
|||
union asce asce;
|
||||
|
||||
ctlreg0.val = vcpu->arch.sie_block->gcr[0];
|
||||
edat1 = ctlreg0.edat && test_vfacility(8);
|
||||
edat2 = edat1 && test_vfacility(78);
|
||||
edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
|
||||
edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
|
||||
asce.val = get_vcpu_asce(vcpu);
|
||||
if (asce.r)
|
||||
goto real_address;
|
||||
|
|
|
@ -68,18 +68,27 @@ static int handle_noop(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_stop(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
int rc = 0;
|
||||
unsigned int action_bits;
|
||||
uint8_t flags, stop_pending;
|
||||
|
||||
vcpu->stat.exit_stop_request++;
|
||||
trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
|
||||
|
||||
action_bits = vcpu->arch.local_int.action_bits;
|
||||
|
||||
if (!(action_bits & ACTION_STOP_ON_STOP))
|
||||
/* delay the stop if any non-stop irq is pending */
|
||||
if (kvm_s390_vcpu_has_irq(vcpu, 1))
|
||||
return 0;
|
||||
|
||||
if (action_bits & ACTION_STORE_ON_STOP) {
|
||||
/* avoid races with the injection/SIGP STOP code */
|
||||
spin_lock(&li->lock);
|
||||
flags = li->irq.stop.flags;
|
||||
stop_pending = kvm_s390_is_stop_irq_pending(vcpu);
|
||||
spin_unlock(&li->lock);
|
||||
|
||||
trace_kvm_s390_stop_request(stop_pending, flags);
|
||||
if (!stop_pending)
|
||||
return 0;
|
||||
|
||||
if (flags & KVM_S390_STOP_FLAG_STORE_STATUS) {
|
||||
rc = kvm_s390_vcpu_store_status(vcpu,
|
||||
KVM_S390_STORE_STATUS_NOADDR);
|
||||
if (rc)
|
||||
|
@ -279,11 +288,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
|
|||
irq.type = KVM_S390_INT_CPU_TIMER;
|
||||
break;
|
||||
case EXT_IRQ_EXTERNAL_CALL:
|
||||
if (kvm_s390_si_ext_call_pending(vcpu))
|
||||
return 0;
|
||||
irq.type = KVM_S390_INT_EXTERNAL_CALL;
|
||||
irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
|
||||
break;
|
||||
rc = kvm_s390_inject_vcpu(vcpu, &irq);
|
||||
/* ignore if another external call is already pending */
|
||||
if (rc == -EBUSY)
|
||||
return 0;
|
||||
return rc;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
@ -307,17 +318,19 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
|
|||
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
|
||||
|
||||
/* Make sure that the source is paged-in */
|
||||
srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]);
|
||||
if (kvm_is_error_gpa(vcpu->kvm, srcaddr))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
|
||||
&srcaddr, 0);
|
||||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
|
||||
/* Make sure that the destination is paged-in */
|
||||
dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]);
|
||||
if (kvm_is_error_gpa(vcpu->kvm, dstaddr))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
|
||||
&dstaddr, 1);
|
||||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <linux/bitmap.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/sclp.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "trace-s390.h"
|
||||
|
@ -159,6 +160,12 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
|
|||
if (psw_mchk_disabled(vcpu))
|
||||
active_mask &= ~IRQ_PEND_MCHK_MASK;
|
||||
|
||||
/*
|
||||
* STOP irqs will never be actively delivered. They are triggered via
|
||||
* intercept requests and cleared when the stop intercept is performed.
|
||||
*/
|
||||
__clear_bit(IRQ_PEND_SIGP_STOP, &active_mask);
|
||||
|
||||
return active_mask;
|
||||
}
|
||||
|
||||
|
@ -186,9 +193,6 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
|
|||
LCTL_CR10 | LCTL_CR11);
|
||||
vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
|
||||
}
|
||||
|
||||
if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP)
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
|
||||
}
|
||||
|
||||
static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
|
||||
|
@ -216,11 +220,18 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.sie_block->lctl |= LCTL_CR14;
|
||||
}
|
||||
|
||||
static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_s390_is_stop_irq_pending(vcpu))
|
||||
__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
|
||||
}
|
||||
|
||||
/* Set interception request for non-deliverable local interrupts */
|
||||
static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
set_intercept_indicators_ext(vcpu);
|
||||
set_intercept_indicators_mchk(vcpu);
|
||||
set_intercept_indicators_stop(vcpu);
|
||||
}
|
||||
|
||||
static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
|
||||
|
@ -392,18 +403,6 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
|
|||
return rc ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
|
||||
vcpu->stat.deliver_stop_signal++;
|
||||
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
|
||||
0, 0);
|
||||
|
||||
__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
|
||||
clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
|
@ -705,7 +704,6 @@ static const deliver_irq_t deliver_irq_funcs[] = {
|
|||
[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
|
||||
[IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer,
|
||||
[IRQ_PEND_RESTART] = __deliver_restart,
|
||||
[IRQ_PEND_SIGP_STOP] = __deliver_stop,
|
||||
[IRQ_PEND_SET_PREFIX] = __deliver_set_prefix,
|
||||
[IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init,
|
||||
};
|
||||
|
@ -738,21 +736,20 @@ static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
|
|||
return rc;
|
||||
}
|
||||
|
||||
/* Check whether SIGP interpretation facility has an external call pending */
|
||||
int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
|
||||
/* Check whether an external call is pending (deliverable or not) */
|
||||
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl;
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
|
||||
|
||||
if (!psw_extint_disabled(vcpu) &&
|
||||
(vcpu->arch.sie_block->gcr[0] & 0x2000ul) &&
|
||||
(atomic_read(sigp_ctrl) & SIGP_CTRL_C) &&
|
||||
(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
|
||||
return 1;
|
||||
if (!sclp_has_sigpif())
|
||||
return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
|
||||
|
||||
return 0;
|
||||
return (sigp_ctrl & SIGP_CTRL_C) &&
|
||||
(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND);
|
||||
}
|
||||
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
|
||||
struct kvm_s390_interrupt_info *inti;
|
||||
|
@ -773,7 +770,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
|
|||
if (!rc && kvm_cpu_has_pending_timer(vcpu))
|
||||
rc = 1;
|
||||
|
||||
if (!rc && kvm_s390_si_ext_call_pending(vcpu))
|
||||
/* external call pending and deliverable */
|
||||
if (!rc && kvm_s390_ext_call_pending(vcpu) &&
|
||||
!psw_extint_disabled(vcpu) &&
|
||||
(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
|
||||
rc = 1;
|
||||
|
||||
if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
|
||||
rc = 1;
|
||||
|
||||
return rc;
|
||||
|
@ -804,14 +807,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
|
|||
return -EOPNOTSUPP; /* disabled wait */
|
||||
}
|
||||
|
||||
__set_cpu_idle(vcpu);
|
||||
if (!ckc_interrupts_enabled(vcpu)) {
|
||||
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
|
||||
__set_cpu_idle(vcpu);
|
||||
goto no_timer;
|
||||
}
|
||||
|
||||
now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
|
||||
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
|
||||
|
||||
/* underflow */
|
||||
if (vcpu->arch.sie_block->ckc < now)
|
||||
return 0;
|
||||
|
||||
__set_cpu_idle(vcpu);
|
||||
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
|
||||
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
|
||||
no_timer:
|
||||
|
@ -820,7 +829,7 @@ no_timer:
|
|||
__unset_cpu_idle(vcpu);
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
|
||||
hrtimer_cancel(&vcpu->arch.ckc_timer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -840,10 +849,20 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
|
|||
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
u64 now, sltime;
|
||||
|
||||
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
|
||||
kvm_s390_vcpu_wakeup(vcpu);
|
||||
now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
|
||||
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
|
||||
|
||||
/*
|
||||
* If the monotonic clock runs faster than the tod clock we might be
|
||||
* woken up too early and have to go back to sleep to avoid deadlocks.
|
||||
*/
|
||||
if (vcpu->arch.sie_block->ckc > now &&
|
||||
hrtimer_forward_now(timer, ns_to_ktime(sltime)))
|
||||
return HRTIMER_RESTART;
|
||||
kvm_s390_vcpu_wakeup(vcpu);
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
|
@ -859,8 +878,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
|
|||
|
||||
/* clear pending external calls set by sigp interpretation facility */
|
||||
atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
|
||||
atomic_clear_mask(SIGP_CTRL_C,
|
||||
&vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
|
||||
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
|
||||
}
|
||||
|
||||
int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
|
||||
|
@ -984,18 +1002,43 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
||||
static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
|
||||
{
|
||||
unsigned char new_val, old_val;
|
||||
uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
|
||||
|
||||
new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
|
||||
old_val = *sigp_ctrl & ~SIGP_CTRL_C;
|
||||
if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
|
||||
/* another external call is pending */
|
||||
return -EBUSY;
|
||||
}
|
||||
atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
|
||||
uint16_t src_id = irq->u.extcall.code;
|
||||
|
||||
VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
|
||||
irq->u.extcall.code);
|
||||
src_id);
|
||||
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
|
||||
irq->u.extcall.code, 0, 2);
|
||||
src_id, 0, 2);
|
||||
|
||||
/* sending vcpu invalid */
|
||||
if (src_id >= KVM_MAX_VCPUS ||
|
||||
kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (sclp_has_sigpif())
|
||||
return __inject_extcall_sigpif(vcpu, src_id);
|
||||
|
||||
if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
|
||||
return -EBUSY;
|
||||
*extcall = irq->u.extcall;
|
||||
set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1006,23 +1049,41 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
|||
struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
|
||||
|
||||
VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
|
||||
prefix->address);
|
||||
irq->u.prefix.address);
|
||||
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
|
||||
prefix->address, 0, 2);
|
||||
irq->u.prefix.address, 0, 2);
|
||||
|
||||
if (!is_vcpu_stopped(vcpu))
|
||||
return -EBUSY;
|
||||
|
||||
*prefix = irq->u.prefix;
|
||||
set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define KVM_S390_STOP_SUPP_FLAGS (KVM_S390_STOP_FLAG_STORE_STATUS)
|
||||
static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
struct kvm_s390_stop_info *stop = &li->irq.stop;
|
||||
int rc = 0;
|
||||
|
||||
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
|
||||
|
||||
li->action_bits |= ACTION_STOP_ON_STOP;
|
||||
set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
|
||||
if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
|
||||
return -EINVAL;
|
||||
|
||||
if (is_vcpu_stopped(vcpu)) {
|
||||
if (irq->u.stop.flags & KVM_S390_STOP_FLAG_STORE_STATUS)
|
||||
rc = kvm_s390_store_status_unloaded(vcpu,
|
||||
KVM_S390_STORE_STATUS_NOADDR);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
|
||||
return -EBUSY;
|
||||
stop->flags = irq->u.stop.flags;
|
||||
__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1042,14 +1103,13 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
|
|||
struct kvm_s390_irq *irq)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
|
||||
|
||||
VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
|
||||
irq->u.emerg.code);
|
||||
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
|
||||
emerg->code, 0, 2);
|
||||
irq->u.emerg.code, 0, 2);
|
||||
|
||||
set_bit(emerg->code, li->sigp_emerg_pending);
|
||||
set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
|
||||
set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
return 0;
|
||||
|
@ -1061,9 +1121,9 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
|||
struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
|
||||
mchk->mcic);
|
||||
irq->u.mchk.mcic);
|
||||
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
|
||||
mchk->mcic, 2);
|
||||
irq->u.mchk.mcic, 2);
|
||||
|
||||
/*
|
||||
* Because repressible machine checks can be indicated along with
|
||||
|
@ -1121,7 +1181,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
|
|||
|
||||
if ((!schid && !cr6) || (schid && cr6))
|
||||
return NULL;
|
||||
mutex_lock(&kvm->lock);
|
||||
fi = &kvm->arch.float_int;
|
||||
spin_lock(&fi->lock);
|
||||
inti = NULL;
|
||||
|
@ -1149,7 +1208,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
|
|||
if (list_empty(&fi->list))
|
||||
atomic_set(&fi->active, 0);
|
||||
spin_unlock(&fi->lock);
|
||||
mutex_unlock(&kvm->lock);
|
||||
return inti;
|
||||
}
|
||||
|
||||
|
@ -1162,7 +1220,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
|
|||
int sigcpu;
|
||||
int rc = 0;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
fi = &kvm->arch.float_int;
|
||||
spin_lock(&fi->lock);
|
||||
if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
|
||||
|
@ -1187,6 +1244,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
|
|||
list_add_tail(&inti->list, &iter->list);
|
||||
}
|
||||
atomic_set(&fi->active, 1);
|
||||
if (atomic_read(&kvm->online_vcpus) == 0)
|
||||
goto unlock_fi;
|
||||
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
|
||||
if (sigcpu == KVM_MAX_VCPUS) {
|
||||
do {
|
||||
|
@ -1213,7 +1272,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
|
|||
kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
|
||||
unlock_fi:
|
||||
spin_unlock(&fi->lock);
|
||||
mutex_unlock(&kvm->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -1221,6 +1279,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
|||
struct kvm_s390_interrupt *s390int)
|
||||
{
|
||||
struct kvm_s390_interrupt_info *inti;
|
||||
int rc;
|
||||
|
||||
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
|
||||
if (!inti)
|
||||
|
@ -1239,7 +1298,6 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
|||
inti->ext.ext_params = s390int->parm;
|
||||
break;
|
||||
case KVM_S390_INT_PFAULT_DONE:
|
||||
inti->type = s390int->type;
|
||||
inti->ext.ext_params2 = s390int->parm64;
|
||||
break;
|
||||
case KVM_S390_MCHK:
|
||||
|
@ -1268,7 +1326,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
|||
trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
|
||||
2);
|
||||
|
||||
return __inject_vm(kvm, inti);
|
||||
rc = __inject_vm(kvm, inti);
|
||||
if (rc)
|
||||
kfree(inti);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void kvm_s390_reinject_io_int(struct kvm *kvm,
|
||||
|
@ -1290,13 +1351,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
|
|||
case KVM_S390_SIGP_SET_PREFIX:
|
||||
irq->u.prefix.address = s390int->parm;
|
||||
break;
|
||||
case KVM_S390_SIGP_STOP:
|
||||
irq->u.stop.flags = s390int->parm;
|
||||
break;
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
if (irq->u.extcall.code & 0xffff0000)
|
||||
if (s390int->parm & 0xffff0000)
|
||||
return -EINVAL;
|
||||
irq->u.extcall.code = s390int->parm;
|
||||
break;
|
||||
case KVM_S390_INT_EMERGENCY:
|
||||
if (irq->u.emerg.code & 0xffff0000)
|
||||
if (s390int->parm & 0xffff0000)
|
||||
return -EINVAL;
|
||||
irq->u.emerg.code = s390int->parm;
|
||||
break;
|
||||
|
@ -1307,6 +1371,23 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
|
||||
return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
|
||||
}
|
||||
|
||||
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
|
||||
spin_lock(&li->lock);
|
||||
li->irq.stop.flags = 0;
|
||||
clear_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
|
||||
spin_unlock(&li->lock);
|
||||
}
|
||||
|
||||
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
|
@ -1363,7 +1444,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
|
|||
struct kvm_s390_float_interrupt *fi;
|
||||
struct kvm_s390_interrupt_info *n, *inti = NULL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
fi = &kvm->arch.float_int;
|
||||
spin_lock(&fi->lock);
|
||||
list_for_each_entry_safe(inti, n, &fi->list, list) {
|
||||
|
@ -1373,7 +1453,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
|
|||
fi->irq_count = 0;
|
||||
atomic_set(&fi->active, 0);
|
||||
spin_unlock(&fi->lock);
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
|
||||
|
@ -1413,7 +1492,6 @@ static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
|
|||
int ret = 0;
|
||||
int n = 0;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
fi = &kvm->arch.float_int;
|
||||
spin_lock(&fi->lock);
|
||||
|
||||
|
@ -1432,7 +1510,6 @@ static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
|
|||
}
|
||||
|
||||
spin_unlock(&fi->lock);
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return ret < 0 ? ret : n;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/timer.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
@ -29,7 +30,6 @@
|
|||
#include <asm/pgtable.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/facility.h>
|
||||
#include <asm/sclp.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
|
@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ "exit_instruction", VCPU_STAT(exit_instruction) },
|
||||
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
|
||||
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
|
||||
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
|
||||
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
|
||||
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
|
||||
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
|
||||
|
@ -98,15 +99,20 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ NULL }
|
||||
};
|
||||
|
||||
unsigned long *vfacilities;
|
||||
static struct gmap_notifier gmap_notifier;
|
||||
/* upper facilities limit for kvm */
|
||||
unsigned long kvm_s390_fac_list_mask[] = {
|
||||
0xff82fffbf4fc2000UL,
|
||||
0x005c000000000000UL,
|
||||
};
|
||||
|
||||
/* test availability of vfacility */
|
||||
int test_vfacility(unsigned long nr)
|
||||
unsigned long kvm_s390_fac_list_mask_size(void)
|
||||
{
|
||||
return __test_facility(nr, (void *) vfacilities);
|
||||
BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
|
||||
return ARRAY_SIZE(kvm_s390_fac_list_mask);
|
||||
}
|
||||
|
||||
static struct gmap_notifier gmap_notifier;
|
||||
|
||||
/* Section: not file related */
|
||||
int kvm_arch_hardware_enable(void)
|
||||
{
|
||||
|
@ -166,6 +172,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_S390_IRQCHIP:
|
||||
case KVM_CAP_VM_ATTRIBUTES:
|
||||
case KVM_CAP_MP_STATE:
|
||||
case KVM_CAP_S390_USER_SIGP:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
|
@ -254,6 +261,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
|||
kvm->arch.use_irqchip = 1;
|
||||
r = 0;
|
||||
break;
|
||||
case KVM_CAP_S390_USER_SIGP:
|
||||
kvm->arch.user_sigp = 1;
|
||||
r = 0;
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
|
@ -261,7 +272,24 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
|||
return r;
|
||||
}
|
||||
|
||||
static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_MEM_LIMIT_SIZE:
|
||||
ret = 0;
|
||||
if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
unsigned int idx;
|
||||
|
@ -283,6 +311,36 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
mutex_unlock(&kvm->lock);
|
||||
ret = 0;
|
||||
break;
|
||||
case KVM_S390_VM_MEM_LIMIT_SIZE: {
|
||||
unsigned long new_limit;
|
||||
|
||||
if (kvm_is_ucontrol(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
if (get_user(new_limit, (u64 __user *)attr->addr))
|
||||
return -EFAULT;
|
||||
|
||||
if (new_limit > kvm->arch.gmap->asce_end)
|
||||
return -E2BIG;
|
||||
|
||||
ret = -EBUSY;
|
||||
mutex_lock(&kvm->lock);
|
||||
if (atomic_read(&kvm->online_vcpus) == 0) {
|
||||
/* gmap_alloc will round the limit up */
|
||||
struct gmap *new = gmap_alloc(current->mm, new_limit);
|
||||
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
} else {
|
||||
gmap_free(kvm->arch.gmap);
|
||||
new->private = kvm;
|
||||
kvm->arch.gmap = new;
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
|
@ -290,13 +348,276 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
|
||||
|
||||
static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
if (!test_kvm_facility(kvm, 76))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
|
||||
get_random_bytes(
|
||||
kvm->arch.crypto.crycb->aes_wrapping_key_mask,
|
||||
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
|
||||
kvm->arch.crypto.aes_kw = 1;
|
||||
break;
|
||||
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
|
||||
get_random_bytes(
|
||||
kvm->arch.crypto.crycb->dea_wrapping_key_mask,
|
||||
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
|
||||
kvm->arch.crypto.dea_kw = 1;
|
||||
break;
|
||||
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
|
||||
kvm->arch.crypto.aes_kw = 0;
|
||||
memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
|
||||
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
|
||||
break;
|
||||
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
|
||||
kvm->arch.crypto.dea_kw = 0;
|
||||
memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
|
||||
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
|
||||
break;
|
||||
default:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
kvm_s390_vcpu_crypto_setup(vcpu);
|
||||
exit_sie(vcpu);
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u8 gtod_high;
|
||||
|
||||
if (copy_from_user(>od_high, (void __user *)attr->addr,
|
||||
sizeof(gtod_high)))
|
||||
return -EFAULT;
|
||||
|
||||
if (gtod_high != 0)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_vcpu *cur_vcpu;
|
||||
unsigned int vcpu_idx;
|
||||
u64 host_tod, gtod;
|
||||
int r;
|
||||
|
||||
if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
|
||||
return -EFAULT;
|
||||
|
||||
r = store_tod_clock(&host_tod);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm->arch.epoch = gtod - host_tod;
|
||||
kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
|
||||
cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
|
||||
exit_sie(cur_vcpu);
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (attr->flags)
|
||||
return -EINVAL;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_TOD_HIGH:
|
||||
ret = kvm_s390_set_tod_high(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_TOD_LOW:
|
||||
ret = kvm_s390_set_tod_low(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u8 gtod_high = 0;
|
||||
|
||||
if (copy_to_user((void __user *)attr->addr, >od_high,
|
||||
sizeof(gtod_high)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 host_tod, gtod;
|
||||
int r;
|
||||
|
||||
r = store_tod_clock(&host_tod);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
gtod = host_tod + kvm->arch.epoch;
|
||||
if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (attr->flags)
|
||||
return -EINVAL;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_TOD_HIGH:
|
||||
ret = kvm_s390_get_tod_high(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_TOD_LOW:
|
||||
ret = kvm_s390_get_tod_low(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_vm_cpu_processor *proc;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (atomic_read(&kvm->online_vcpus)) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
proc = kzalloc(sizeof(*proc), GFP_KERNEL);
|
||||
if (!proc) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (!copy_from_user(proc, (void __user *)attr->addr,
|
||||
sizeof(*proc))) {
|
||||
memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
|
||||
sizeof(struct cpuid));
|
||||
kvm->arch.model.ibc = proc->ibc;
|
||||
memcpy(kvm->arch.model.fac->kvm, proc->fac_list,
|
||||
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
||||
} else
|
||||
ret = -EFAULT;
|
||||
kfree(proc);
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret = -ENXIO;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_CPU_PROCESSOR:
|
||||
ret = kvm_s390_set_processor(kvm, attr);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_vm_cpu_processor *proc;
|
||||
int ret = 0;
|
||||
|
||||
proc = kzalloc(sizeof(*proc), GFP_KERNEL);
|
||||
if (!proc) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
|
||||
proc->ibc = kvm->arch.model.ibc;
|
||||
memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE);
|
||||
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
|
||||
ret = -EFAULT;
|
||||
kfree(proc);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_vm_cpu_machine *mach;
|
||||
int ret = 0;
|
||||
|
||||
mach = kzalloc(sizeof(*mach), GFP_KERNEL);
|
||||
if (!mach) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
get_cpu_id((struct cpuid *) &mach->cpuid);
|
||||
mach->ibc = sclp_get_ibc();
|
||||
memcpy(&mach->fac_mask, kvm_s390_fac_list_mask,
|
||||
kvm_s390_fac_list_mask_size() * sizeof(u64));
|
||||
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
|
||||
S390_ARCH_FAC_LIST_SIZE_U64);
|
||||
if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
|
||||
ret = -EFAULT;
|
||||
kfree(mach);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret = -ENXIO;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_CPU_PROCESSOR:
|
||||
ret = kvm_s390_get_processor(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_MACHINE:
|
||||
ret = kvm_s390_get_machine(kvm, attr);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_S390_VM_MEM_CTRL:
|
||||
ret = kvm_s390_mem_control(kvm, attr);
|
||||
ret = kvm_s390_set_mem_control(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_TOD:
|
||||
ret = kvm_s390_set_tod(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_MODEL:
|
||||
ret = kvm_s390_set_cpu_model(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CRYPTO:
|
||||
ret = kvm_s390_vm_set_crypto(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
|
@ -308,7 +629,24 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
|
||||
static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
return -ENXIO;
|
||||
int ret;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_S390_VM_MEM_CTRL:
|
||||
ret = kvm_s390_get_mem_control(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_TOD:
|
||||
ret = kvm_s390_get_tod(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_MODEL:
|
||||
ret = kvm_s390_get_cpu_model(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
|
@ -320,6 +658,42 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
switch (attr->attr) {
|
||||
case KVM_S390_VM_MEM_ENABLE_CMMA:
|
||||
case KVM_S390_VM_MEM_CLR_CMMA:
|
||||
case KVM_S390_VM_MEM_LIMIT_SIZE:
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case KVM_S390_VM_TOD:
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_TOD_LOW:
|
||||
case KVM_S390_VM_TOD_HIGH:
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case KVM_S390_VM_CPU_MODEL:
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_CPU_PROCESSOR:
|
||||
case KVM_S390_VM_CPU_MACHINE:
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case KVM_S390_VM_CRYPTO:
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
|
||||
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
|
||||
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
|
||||
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
|
@ -401,9 +775,61 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
return r;
|
||||
}
|
||||
|
||||
static int kvm_s390_query_ap_config(u8 *config)
|
||||
{
|
||||
u32 fcn_code = 0x04000000UL;
|
||||
u32 cc;
|
||||
|
||||
asm volatile(
|
||||
"lgr 0,%1\n"
|
||||
"lgr 2,%2\n"
|
||||
".long 0xb2af0000\n" /* PQAP(QCI) */
|
||||
"ipm %0\n"
|
||||
"srl %0,28\n"
|
||||
: "=r" (cc)
|
||||
: "r" (fcn_code), "r" (config)
|
||||
: "cc", "0", "2", "memory"
|
||||
);
|
||||
|
||||
return cc;
|
||||
}
|
||||
|
||||
static int kvm_s390_apxa_installed(void)
|
||||
{
|
||||
u8 config[128];
|
||||
int cc;
|
||||
|
||||
if (test_facility(2) && test_facility(12)) {
|
||||
cc = kvm_s390_query_ap_config(config);
|
||||
|
||||
if (cc)
|
||||
pr_err("PQAP(QCI) failed with cc=%d", cc);
|
||||
else
|
||||
return config[0] & 0x40;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_set_crycb_format(struct kvm *kvm)
|
||||
{
|
||||
kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
|
||||
|
||||
if (kvm_s390_apxa_installed())
|
||||
kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
|
||||
else
|
||||
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
|
||||
}
|
||||
|
||||
static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
|
||||
{
|
||||
get_cpu_id(cpu_id);
|
||||
cpu_id->version = 0xff;
|
||||
}
|
||||
|
||||
static int kvm_s390_crypto_init(struct kvm *kvm)
|
||||
{
|
||||
if (!test_vfacility(76))
|
||||
if (!test_kvm_facility(kvm, 76))
|
||||
return 0;
|
||||
|
||||
kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
|
||||
|
@ -411,15 +837,18 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
|
|||
if (!kvm->arch.crypto.crycb)
|
||||
return -ENOMEM;
|
||||
|
||||
kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb |
|
||||
CRYCB_FORMAT1;
|
||||
kvm_s390_set_crycb_format(kvm);
|
||||
|
||||
/* Disable AES/DEA protected key functions by default */
|
||||
kvm->arch.crypto.aes_kw = 0;
|
||||
kvm->arch.crypto.dea_kw = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
int rc;
|
||||
int i, rc;
|
||||
char debug_name[16];
|
||||
static unsigned long sca_offset;
|
||||
|
||||
|
@ -454,6 +883,46 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
if (!kvm->arch.dbf)
|
||||
goto out_nodbf;
|
||||
|
||||
/*
|
||||
* The architectural maximum amount of facilities is 16 kbit. To store
|
||||
* this amount, 2 kbyte of memory is required. Thus we need a full
|
||||
* page to hold the active copy (arch.model.fac->sie) and the current
|
||||
* facilities set (arch.model.fac->kvm). Its address size has to be
|
||||
* 31 bits and word aligned.
|
||||
*/
|
||||
kvm->arch.model.fac =
|
||||
(struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
|
||||
if (!kvm->arch.model.fac)
|
||||
goto out_nofac;
|
||||
|
||||
memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list,
|
||||
S390_ARCH_FAC_LIST_SIZE_U64);
|
||||
|
||||
/*
|
||||
* If this KVM host runs *not* in a LPAR, relax the facility bits
|
||||
* of the kvm facility mask by all missing facilities. This will allow
|
||||
* to determine the right CPU model by means of the remaining facilities.
|
||||
* Live guest migration must prohibit the migration of KVMs running in
|
||||
* a LPAR to non LPAR hosts.
|
||||
*/
|
||||
if (!MACHINE_IS_LPAR)
|
||||
for (i = 0; i < kvm_s390_fac_list_mask_size(); i++)
|
||||
kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i];
|
||||
|
||||
/*
|
||||
* Apply the kvm facility mask to limit the kvm supported/tolerated
|
||||
* facility list.
|
||||
*/
|
||||
for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
|
||||
if (i < kvm_s390_fac_list_mask_size())
|
||||
kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i];
|
||||
else
|
||||
kvm->arch.model.fac->kvm[i] = 0UL;
|
||||
}
|
||||
|
||||
kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
|
||||
kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
|
||||
|
||||
if (kvm_s390_crypto_init(kvm) < 0)
|
||||
goto out_crypto;
|
||||
|
||||
|
@ -477,6 +946,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
|
||||
kvm->arch.css_support = 0;
|
||||
kvm->arch.use_irqchip = 0;
|
||||
kvm->arch.epoch = 0;
|
||||
|
||||
spin_lock_init(&kvm->arch.start_stop_lock);
|
||||
|
||||
|
@ -484,6 +954,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
out_nogmap:
|
||||
kfree(kvm->arch.crypto.crycb);
|
||||
out_crypto:
|
||||
free_page((unsigned long)kvm->arch.model.fac);
|
||||
out_nofac:
|
||||
debug_unregister(kvm->arch.dbf);
|
||||
out_nodbf:
|
||||
free_page((unsigned long)(kvm->arch.sca));
|
||||
|
@ -536,6 +1008,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
|
|||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
kvm_free_vcpus(kvm);
|
||||
free_page((unsigned long)kvm->arch.model.fac);
|
||||
free_page((unsigned long)(kvm->arch.sca));
|
||||
debug_unregister(kvm->arch.dbf);
|
||||
kfree(kvm->arch.crypto.crycb);
|
||||
|
@ -546,25 +1019,30 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
|||
}
|
||||
|
||||
/* Section: vcpu related */
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
if (kvm_is_ucontrol(vcpu->kvm)) {
|
||||
vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
|
||||
if (!vcpu->arch.gmap)
|
||||
return -ENOMEM;
|
||||
vcpu->arch.gmap->private = vcpu->kvm;
|
||||
return 0;
|
||||
}
|
||||
|
||||
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
|
||||
KVM_SYNC_GPRS |
|
||||
KVM_SYNC_ACRS |
|
||||
KVM_SYNC_CRS |
|
||||
KVM_SYNC_ARCH0 |
|
||||
KVM_SYNC_PFAULT;
|
||||
|
||||
if (kvm_is_ucontrol(vcpu->kvm))
|
||||
return __kvm_ucontrol_vcpu_init(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -615,16 +1093,27 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
|
|||
kvm_s390_clear_local_irqs(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
if (!kvm_is_ucontrol(vcpu->kvm))
|
||||
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
|
||||
}
|
||||
|
||||
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!test_vfacility(76))
|
||||
if (!test_kvm_facility(vcpu->kvm, 76))
|
||||
return;
|
||||
|
||||
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
|
||||
|
||||
if (vcpu->kvm->arch.crypto.aes_kw)
|
||||
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
|
||||
if (vcpu->kvm->arch.crypto.dea_kw)
|
||||
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
|
||||
|
||||
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
|
||||
}
|
||||
|
||||
|
@ -654,14 +1143,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||
CPUSTAT_STOPPED |
|
||||
CPUSTAT_GED);
|
||||
vcpu->arch.sie_block->ecb = 6;
|
||||
if (test_vfacility(50) && test_vfacility(73))
|
||||
if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
|
||||
vcpu->arch.sie_block->ecb |= 0x10;
|
||||
|
||||
vcpu->arch.sie_block->ecb2 = 8;
|
||||
vcpu->arch.sie_block->eca = 0xD1002000U;
|
||||
vcpu->arch.sie_block->eca = 0xC1002000U;
|
||||
if (sclp_has_siif())
|
||||
vcpu->arch.sie_block->eca |= 1;
|
||||
vcpu->arch.sie_block->fac = (int) (long) vfacilities;
|
||||
if (sclp_has_sigpif())
|
||||
vcpu->arch.sie_block->eca |= 0x10000000U;
|
||||
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
|
||||
ICTL_TPROT;
|
||||
|
||||
|
@ -670,10 +1160,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
|
||||
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
|
||||
get_cpu_id(&vcpu->arch.cpu_id);
|
||||
vcpu->arch.cpu_id.version = 0xff;
|
||||
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id;
|
||||
memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm,
|
||||
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
||||
vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc;
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
|
||||
kvm_s390_vcpu_crypto_setup(vcpu);
|
||||
|
||||
|
@ -717,6 +1212,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
|||
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
|
||||
set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
|
||||
}
|
||||
vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie;
|
||||
|
||||
spin_lock_init(&vcpu->arch.local_int.lock);
|
||||
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
|
||||
|
@ -741,7 +1237,7 @@ out:
|
|||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_cpu_has_interrupt(vcpu);
|
||||
return kvm_s390_vcpu_has_irq(vcpu, 0);
|
||||
}
|
||||
|
||||
void s390_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
|
@ -869,6 +1365,8 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
|
|||
case KVM_REG_S390_PFTOKEN:
|
||||
r = get_user(vcpu->arch.pfault_token,
|
||||
(u64 __user *)reg->addr);
|
||||
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
break;
|
||||
case KVM_REG_S390_PFCOMPARE:
|
||||
r = get_user(vcpu->arch.pfault_compare,
|
||||
|
@ -1176,7 +1674,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
|
|||
return 0;
|
||||
if (psw_extint_disabled(vcpu))
|
||||
return 0;
|
||||
if (kvm_cpu_has_interrupt(vcpu))
|
||||
if (kvm_s390_vcpu_has_irq(vcpu, 0))
|
||||
return 0;
|
||||
if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
|
||||
return 0;
|
||||
|
@ -1341,6 +1839,8 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
vcpu->arch.pfault_token = kvm_run->s.regs.pft;
|
||||
vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
|
||||
vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
|
||||
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
}
|
||||
kvm_run->kvm_dirty_regs = 0;
|
||||
}
|
||||
|
@ -1559,15 +2059,10 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
|
|||
spin_lock(&vcpu->kvm->arch.start_stop_lock);
|
||||
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
|
||||
|
||||
/* Need to lock access to action_bits to avoid a SIGP race condition */
|
||||
spin_lock(&vcpu->arch.local_int.lock);
|
||||
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
|
||||
|
||||
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
|
||||
vcpu->arch.local_int.action_bits &=
|
||||
~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP);
|
||||
spin_unlock(&vcpu->arch.local_int.lock);
|
||||
kvm_s390_clear_stop_irq(vcpu);
|
||||
|
||||
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
|
||||
__disable_ibs_on_vcpu(vcpu);
|
||||
|
||||
for (i = 0; i < online_vcpus; i++) {
|
||||
|
@ -1783,30 +2278,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||
|
||||
static int __init kvm_s390_init(void)
|
||||
{
|
||||
int ret;
|
||||
ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* guests can ask for up to 255+1 double words, we need a full page
|
||||
* to hold the maximum amount of facilities. On the other hand, we
|
||||
* only set facilities that are known to work in KVM.
|
||||
*/
|
||||
vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
|
||||
if (!vfacilities) {
|
||||
kvm_exit();
|
||||
return -ENOMEM;
|
||||
}
|
||||
memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
|
||||
vfacilities[0] &= 0xff82fffbf47c2000UL;
|
||||
vfacilities[1] &= 0x005c000000000000UL;
|
||||
return 0;
|
||||
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
|
||||
}
|
||||
|
||||
static void __exit kvm_s390_exit(void)
|
||||
{
|
||||
free_page((unsigned long) vfacilities);
|
||||
kvm_exit();
|
||||
}
|
||||
|
||||
|
|
|
@ -18,12 +18,10 @@
|
|||
#include <linux/hrtimer.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/facility.h>
|
||||
|
||||
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* declare vfacilities extern */
|
||||
extern unsigned long *vfacilities;
|
||||
|
||||
/* Transactional Memory Execution related macros */
|
||||
#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
|
||||
#define TDB_FORMAT1 1
|
||||
|
@ -127,6 +125,12 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
|
|||
vcpu->arch.sie_block->gpsw.mask |= cc << 44;
|
||||
}
|
||||
|
||||
/* test availability of facility in a kvm intance */
|
||||
static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
|
||||
{
|
||||
return __test_facility(nr, kvm->arch.model.fac->kvm);
|
||||
}
|
||||
|
||||
/* are cpu states controlled by user space */
|
||||
static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
|
||||
{
|
||||
|
@ -183,7 +187,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
|
|||
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
|
||||
/* is cmma enabled */
|
||||
bool kvm_s390_cmma_enabled(struct kvm *kvm);
|
||||
int test_vfacility(unsigned long nr);
|
||||
unsigned long kvm_s390_fac_list_mask_size(void);
|
||||
extern unsigned long kvm_s390_fac_list_mask[];
|
||||
|
||||
/* implemented in diag.c */
|
||||
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
|
||||
|
@ -228,11 +233,13 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
|
|||
struct kvm_s390_irq *s390irq);
|
||||
|
||||
/* implemented in interrupt.c */
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop);
|
||||
int psw_extint_disabled(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_destroy_adapters(struct kvm *kvm);
|
||||
int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
|
||||
extern struct kvm_device_ops kvm_flic_ops;
|
||||
int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* implemented in guestdbg.c */
|
||||
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -337,19 +337,24 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
|
|||
static int handle_stfl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int rc;
|
||||
unsigned int fac;
|
||||
|
||||
vcpu->stat.instruction_stfl++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
/*
|
||||
* We need to shift the lower 32 facility bits (bit 0-31) from a u64
|
||||
* into a u32 memory representation. They will remain bits 0-31.
|
||||
*/
|
||||
fac = *vcpu->kvm->arch.model.fac->sie >> 32;
|
||||
rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
|
||||
vfacilities, 4);
|
||||
&fac, sizeof(fac));
|
||||
if (rc)
|
||||
return rc;
|
||||
VCPU_EVENT(vcpu, 5, "store facility list value %x",
|
||||
*(unsigned int *) vfacilities);
|
||||
trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
|
||||
VCPU_EVENT(vcpu, 5, "store facility list value %x", fac);
|
||||
trace_kvm_s390_handle_stfl(vcpu, fac);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -26,15 +26,17 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
|
|||
struct kvm_s390_local_interrupt *li;
|
||||
int cpuflags;
|
||||
int rc;
|
||||
int ext_call_pending;
|
||||
|
||||
li = &dst_vcpu->arch.local_int;
|
||||
|
||||
cpuflags = atomic_read(li->cpuflags);
|
||||
if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
|
||||
ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
|
||||
if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending)
|
||||
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
|
||||
else {
|
||||
*reg &= 0xffffffff00000000UL;
|
||||
if (cpuflags & CPUSTAT_ECALL_PEND)
|
||||
if (ext_call_pending)
|
||||
*reg |= SIGP_STATUS_EXT_CALL_PENDING;
|
||||
if (cpuflags & CPUSTAT_STOPPED)
|
||||
*reg |= SIGP_STATUS_STOPPED;
|
||||
|
@ -96,7 +98,7 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
|
||||
static int __sigp_external_call(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu *dst_vcpu)
|
||||
struct kvm_vcpu *dst_vcpu, u64 *reg)
|
||||
{
|
||||
struct kvm_s390_irq irq = {
|
||||
.type = KVM_S390_INT_EXTERNAL_CALL,
|
||||
|
@ -105,45 +107,31 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu,
|
|||
int rc;
|
||||
|
||||
rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
|
||||
if (!rc)
|
||||
if (rc == -EBUSY) {
|
||||
*reg &= 0xffffffff00000000UL;
|
||||
*reg |= SIGP_STATUS_EXT_CALL_PENDING;
|
||||
return SIGP_CC_STATUS_STORED;
|
||||
} else if (rc == 0) {
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
|
||||
dst_vcpu->vcpu_id);
|
||||
}
|
||||
|
||||
return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
|
||||
}
|
||||
|
||||
static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
|
||||
int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
|
||||
|
||||
spin_lock(&li->lock);
|
||||
if (li->action_bits & ACTION_STOP_ON_STOP) {
|
||||
/* another SIGP STOP is pending */
|
||||
rc = SIGP_CC_BUSY;
|
||||
goto out;
|
||||
}
|
||||
if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
|
||||
if ((action & ACTION_STORE_ON_STOP) != 0)
|
||||
rc = -ESHUTDOWN;
|
||||
goto out;
|
||||
}
|
||||
set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
|
||||
li->action_bits |= action;
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
|
||||
kvm_s390_vcpu_wakeup(dst_vcpu);
|
||||
out:
|
||||
spin_unlock(&li->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
|
||||
{
|
||||
struct kvm_s390_irq irq = {
|
||||
.type = KVM_S390_SIGP_STOP,
|
||||
};
|
||||
int rc;
|
||||
|
||||
rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
|
||||
rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
|
||||
if (rc == -EBUSY)
|
||||
rc = SIGP_CC_BUSY;
|
||||
else if (rc == 0)
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x",
|
||||
dst_vcpu->vcpu_id);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -151,21 +139,19 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
|
|||
static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu *dst_vcpu, u64 *reg)
|
||||
{
|
||||
struct kvm_s390_irq irq = {
|
||||
.type = KVM_S390_SIGP_STOP,
|
||||
.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS,
|
||||
};
|
||||
int rc;
|
||||
|
||||
rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
|
||||
ACTION_STORE_ON_STOP);
|
||||
rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
|
||||
if (rc == -EBUSY)
|
||||
rc = SIGP_CC_BUSY;
|
||||
else if (rc == 0)
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
|
||||
dst_vcpu->vcpu_id);
|
||||
|
||||
if (rc == -ESHUTDOWN) {
|
||||
/* If the CPU has already been stopped, we still have
|
||||
* to save the status when doing stop-and-store. This
|
||||
* has to be done after unlocking all spinlocks. */
|
||||
rc = kvm_s390_store_status_unloaded(dst_vcpu,
|
||||
KVM_S390_STORE_STATUS_NOADDR);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -197,41 +183,33 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
|
|||
static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
|
||||
u32 address, u64 *reg)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li;
|
||||
struct kvm_s390_irq irq = {
|
||||
.type = KVM_S390_SIGP_SET_PREFIX,
|
||||
.u.prefix.address = address & 0x7fffe000u,
|
||||
};
|
||||
int rc;
|
||||
|
||||
li = &dst_vcpu->arch.local_int;
|
||||
|
||||
/*
|
||||
* Make sure the new value is valid memory. We only need to check the
|
||||
* first page, since address is 8k aligned and memory pieces are always
|
||||
* at least 1MB aligned and have at least a size of 1MB.
|
||||
*/
|
||||
address &= 0x7fffe000u;
|
||||
if (kvm_is_error_gpa(vcpu->kvm, address)) {
|
||||
if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) {
|
||||
*reg &= 0xffffffff00000000UL;
|
||||
*reg |= SIGP_STATUS_INVALID_PARAMETER;
|
||||
return SIGP_CC_STATUS_STORED;
|
||||
}
|
||||
|
||||
spin_lock(&li->lock);
|
||||
/* cpu must be in stopped state */
|
||||
if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
|
||||
rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
|
||||
if (rc == -EBUSY) {
|
||||
*reg &= 0xffffffff00000000UL;
|
||||
*reg |= SIGP_STATUS_INCORRECT_STATE;
|
||||
rc = SIGP_CC_STATUS_STORED;
|
||||
goto out_li;
|
||||
return SIGP_CC_STATUS_STORED;
|
||||
} else if (rc == 0) {
|
||||
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x",
|
||||
dst_vcpu->vcpu_id, irq.u.prefix.address);
|
||||
}
|
||||
|
||||
li->irq.prefix.address = address;
|
||||
set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
|
||||
kvm_s390_vcpu_wakeup(dst_vcpu);
|
||||
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
|
||||
|
||||
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
|
||||
address);
|
||||
out_li:
|
||||
spin_unlock(&li->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -242,9 +220,7 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
|
|||
int flags;
|
||||
int rc;
|
||||
|
||||
spin_lock(&dst_vcpu->arch.local_int.lock);
|
||||
flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
|
||||
spin_unlock(&dst_vcpu->arch.local_int.lock);
|
||||
if (!(flags & CPUSTAT_STOPPED)) {
|
||||
*reg &= 0xffffffff00000000UL;
|
||||
*reg |= SIGP_STATUS_INCORRECT_STATE;
|
||||
|
@ -291,8 +267,9 @@ static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
|
|||
/* handle (RE)START in user space */
|
||||
int rc = -EOPNOTSUPP;
|
||||
|
||||
/* make sure we don't race with STOP irq injection */
|
||||
spin_lock(&li->lock);
|
||||
if (li->action_bits & ACTION_STOP_ON_STOP)
|
||||
if (kvm_s390_is_stop_irq_pending(dst_vcpu))
|
||||
rc = SIGP_CC_BUSY;
|
||||
spin_unlock(&li->lock);
|
||||
|
||||
|
@ -333,7 +310,7 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
|
|||
break;
|
||||
case SIGP_EXTERNAL_CALL:
|
||||
vcpu->stat.instruction_sigp_external_call++;
|
||||
rc = __sigp_external_call(vcpu, dst_vcpu);
|
||||
rc = __sigp_external_call(vcpu, dst_vcpu, status_reg);
|
||||
break;
|
||||
case SIGP_EMERGENCY_SIGNAL:
|
||||
vcpu->stat.instruction_sigp_emergency++;
|
||||
|
@ -394,6 +371,53 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
|
|||
return rc;
|
||||
}
|
||||
|
||||
static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
|
||||
{
|
||||
if (!vcpu->kvm->arch.user_sigp)
|
||||
return 0;
|
||||
|
||||
switch (order_code) {
|
||||
case SIGP_SENSE:
|
||||
case SIGP_EXTERNAL_CALL:
|
||||
case SIGP_EMERGENCY_SIGNAL:
|
||||
case SIGP_COND_EMERGENCY_SIGNAL:
|
||||
case SIGP_SENSE_RUNNING:
|
||||
return 0;
|
||||
/* update counters as we're directly dropping to user space */
|
||||
case SIGP_STOP:
|
||||
vcpu->stat.instruction_sigp_stop++;
|
||||
break;
|
||||
case SIGP_STOP_AND_STORE_STATUS:
|
||||
vcpu->stat.instruction_sigp_stop_store_status++;
|
||||
break;
|
||||
case SIGP_STORE_STATUS_AT_ADDRESS:
|
||||
vcpu->stat.instruction_sigp_store_status++;
|
||||
break;
|
||||
case SIGP_SET_PREFIX:
|
||||
vcpu->stat.instruction_sigp_prefix++;
|
||||
break;
|
||||
case SIGP_START:
|
||||
vcpu->stat.instruction_sigp_start++;
|
||||
break;
|
||||
case SIGP_RESTART:
|
||||
vcpu->stat.instruction_sigp_restart++;
|
||||
break;
|
||||
case SIGP_INITIAL_CPU_RESET:
|
||||
vcpu->stat.instruction_sigp_init_cpu_reset++;
|
||||
break;
|
||||
case SIGP_CPU_RESET:
|
||||
vcpu->stat.instruction_sigp_cpu_reset++;
|
||||
break;
|
||||
default:
|
||||
vcpu->stat.instruction_sigp_unknown++;
|
||||
}
|
||||
|
||||
VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space",
|
||||
order_code);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
|
@ -408,6 +432,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
|||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
order_code = kvm_s390_get_base_disp_rs(vcpu);
|
||||
if (handle_sigp_order_in_user_space(vcpu, order_code))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (r1 % 2)
|
||||
parameter = vcpu->run->s.regs.gprs[r1];
|
||||
|
|
|
@ -209,19 +209,21 @@ TRACE_EVENT(kvm_s390_request_resets,
|
|||
* Trace point for a vcpu's stop requests.
|
||||
*/
|
||||
TRACE_EVENT(kvm_s390_stop_request,
|
||||
TP_PROTO(unsigned int action_bits),
|
||||
TP_ARGS(action_bits),
|
||||
TP_PROTO(unsigned char stop_irq, unsigned char flags),
|
||||
TP_ARGS(stop_irq, flags),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, action_bits)
|
||||
__field(unsigned char, stop_irq)
|
||||
__field(unsigned char, flags)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->action_bits = action_bits;
|
||||
__entry->stop_irq = stop_irq;
|
||||
__entry->flags = flags;
|
||||
),
|
||||
|
||||
TP_printk("stop request, action_bits = %08x",
|
||||
__entry->action_bits)
|
||||
TP_printk("stop request, stop irq = %u, flags = %08x",
|
||||
__entry->stop_irq, __entry->flags)
|
||||
);
|
||||
|
||||
|
||||
|
|
|
@ -208,6 +208,7 @@ struct x86_emulate_ops {
|
|||
|
||||
void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
|
||||
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
|
||||
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
|
||||
};
|
||||
|
||||
typedef u32 __attribute__((vector_size(16))) sse128_t;
|
||||
|
|
|
@ -38,8 +38,6 @@
|
|||
#define KVM_PRIVATE_MEM_SLOTS 3
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
|
||||
#define KVM_MMIO_SIZE 16
|
||||
|
||||
#define KVM_PIO_PAGE_OFFSET 1
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
|
||||
|
||||
|
@ -51,7 +49,7 @@
|
|||
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
|
||||
|
||||
#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
|
||||
#define CR3_PCID_INVD (1UL << 63)
|
||||
#define CR3_PCID_INVD BIT_64(63)
|
||||
#define CR4_RESERVED_BITS \
|
||||
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
|
||||
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
|
||||
|
@ -160,6 +158,18 @@ enum {
|
|||
#define DR7_FIXED_1 0x00000400
|
||||
#define DR7_VOLATILE 0xffff2bff
|
||||
|
||||
#define PFERR_PRESENT_BIT 0
|
||||
#define PFERR_WRITE_BIT 1
|
||||
#define PFERR_USER_BIT 2
|
||||
#define PFERR_RSVD_BIT 3
|
||||
#define PFERR_FETCH_BIT 4
|
||||
|
||||
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
|
||||
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
|
||||
#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
|
||||
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
|
||||
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
|
||||
|
||||
/* apic attention bits */
|
||||
#define KVM_APIC_CHECK_VAPIC 0
|
||||
/*
|
||||
|
@ -615,6 +625,8 @@ struct kvm_arch {
|
|||
#ifdef CONFIG_KVM_MMU_AUDIT
|
||||
int audit_point;
|
||||
#endif
|
||||
|
||||
bool boot_vcpu_runs_old_kvmclock;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
|
@ -643,6 +655,7 @@ struct kvm_vcpu_stat {
|
|||
u32 irq_window_exits;
|
||||
u32 nmi_window_exits;
|
||||
u32 halt_exits;
|
||||
u32 halt_successful_poll;
|
||||
u32 halt_wakeup;
|
||||
u32 request_irq_exits;
|
||||
u32 irq_exits;
|
||||
|
@ -787,6 +800,31 @@ struct kvm_x86_ops {
|
|||
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
|
||||
|
||||
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
|
||||
|
||||
/*
|
||||
* Arch-specific dirty logging hooks. These hooks are only supposed to
|
||||
* be valid if the specific arch has hardware-accelerated dirty logging
|
||||
* mechanism. Currently only for PML on VMX.
|
||||
*
|
||||
* - slot_enable_log_dirty:
|
||||
* called when enabling log dirty mode for the slot.
|
||||
* - slot_disable_log_dirty:
|
||||
* called when disabling log dirty mode for the slot.
|
||||
* also called when slot is created with log dirty disabled.
|
||||
* - flush_log_dirty:
|
||||
* called before reporting dirty_bitmap to userspace.
|
||||
* - enable_log_dirty_pt_masked:
|
||||
* called when reenabling log dirty for the GFNs in the mask after
|
||||
* corresponding bits are cleared in slot->dirty_bitmap.
|
||||
*/
|
||||
void (*slot_enable_log_dirty)(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot);
|
||||
void (*slot_disable_log_dirty)(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot);
|
||||
void (*flush_log_dirty)(struct kvm *kvm);
|
||||
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t offset, unsigned long mask);
|
||||
};
|
||||
|
||||
struct kvm_arch_async_pf {
|
||||
|
@ -819,8 +857,15 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
|||
u64 dirty_mask, u64 nx_mask, u64 x_mask);
|
||||
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
|
||||
void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_set_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask);
|
||||
void kvm_mmu_zap_all(struct kvm *kvm);
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
|
||||
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
|
||||
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
|
||||
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
|
||||
#define SECONDARY_EXEC_XSAVES 0x00100000
|
||||
|
||||
|
||||
|
@ -121,6 +122,7 @@ enum vmcs_field {
|
|||
GUEST_LDTR_SELECTOR = 0x0000080c,
|
||||
GUEST_TR_SELECTOR = 0x0000080e,
|
||||
GUEST_INTR_STATUS = 0x00000810,
|
||||
GUEST_PML_INDEX = 0x00000812,
|
||||
HOST_ES_SELECTOR = 0x00000c00,
|
||||
HOST_CS_SELECTOR = 0x00000c02,
|
||||
HOST_SS_SELECTOR = 0x00000c04,
|
||||
|
@ -140,6 +142,8 @@ enum vmcs_field {
|
|||
VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
|
||||
VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
|
||||
VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
|
||||
PML_ADDRESS = 0x0000200e,
|
||||
PML_ADDRESS_HIGH = 0x0000200f,
|
||||
TSC_OFFSET = 0x00002010,
|
||||
TSC_OFFSET_HIGH = 0x00002011,
|
||||
VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
|
||||
|
|
|
@ -364,6 +364,9 @@
|
|||
#define MSR_IA32_UCODE_WRITE 0x00000079
|
||||
#define MSR_IA32_UCODE_REV 0x0000008b
|
||||
|
||||
#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
|
||||
#define MSR_IA32_SMBASE 0x0000009e
|
||||
|
||||
#define MSR_IA32_PERF_STATUS 0x00000198
|
||||
#define MSR_IA32_PERF_CTL 0x00000199
|
||||
#define INTEL_PERF_CTL_MASK 0xffff
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
#define EXIT_REASON_MSR_READ 31
|
||||
#define EXIT_REASON_MSR_WRITE 32
|
||||
#define EXIT_REASON_INVALID_STATE 33
|
||||
#define EXIT_REASON_MSR_LOAD_FAIL 34
|
||||
#define EXIT_REASON_MWAIT_INSTRUCTION 36
|
||||
#define EXIT_REASON_MONITOR_INSTRUCTION 39
|
||||
#define EXIT_REASON_PAUSE_INSTRUCTION 40
|
||||
|
@ -72,6 +73,7 @@
|
|||
#define EXIT_REASON_XSETBV 55
|
||||
#define EXIT_REASON_APIC_WRITE 56
|
||||
#define EXIT_REASON_INVPCID 58
|
||||
#define EXIT_REASON_PML_FULL 62
|
||||
#define EXIT_REASON_XSAVES 63
|
||||
#define EXIT_REASON_XRSTORS 64
|
||||
|
||||
|
@ -116,10 +118,14 @@
|
|||
{ EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
|
||||
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
|
||||
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
|
||||
{ EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
|
||||
{ EXIT_REASON_INVD, "INVD" }, \
|
||||
{ EXIT_REASON_INVVPID, "INVVPID" }, \
|
||||
{ EXIT_REASON_INVPCID, "INVPCID" }, \
|
||||
{ EXIT_REASON_XSAVES, "XSAVES" }, \
|
||||
{ EXIT_REASON_XRSTORS, "XRSTORS" }
|
||||
|
||||
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
|
||||
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
|
||||
|
||||
#endif /* _UAPIVMX_H */
|
||||
|
|
|
@ -39,6 +39,7 @@ config KVM
|
|||
select PERF_EVENTS
|
||||
select HAVE_KVM_MSI
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_VFIO
|
||||
select SRCU
|
||||
---help---
|
||||
|
|
|
@ -86,6 +86,7 @@
|
|||
#define DstAcc (OpAcc << DstShift)
|
||||
#define DstDI (OpDI << DstShift)
|
||||
#define DstMem64 (OpMem64 << DstShift)
|
||||
#define DstMem16 (OpMem16 << DstShift)
|
||||
#define DstImmUByte (OpImmUByte << DstShift)
|
||||
#define DstDX (OpDX << DstShift)
|
||||
#define DstAccLo (OpAccLo << DstShift)
|
||||
|
@ -124,6 +125,7 @@
|
|||
#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
|
||||
#define Escape (5<<15) /* Escape to coprocessor instruction */
|
||||
#define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
|
||||
#define ModeDual (7<<15) /* Different instruction for 32/64 bit */
|
||||
#define Sse (1<<18) /* SSE Vector instruction */
|
||||
/* Generic ModRM decode. */
|
||||
#define ModRM (1<<19)
|
||||
|
@ -165,10 +167,10 @@
|
|||
#define NoMod ((u64)1 << 47) /* Mod field is ignored */
|
||||
#define Intercept ((u64)1 << 48) /* Has valid intercept field */
|
||||
#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
|
||||
#define NoBigReal ((u64)1 << 50) /* No big real mode */
|
||||
#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
|
||||
#define NearBranch ((u64)1 << 52) /* Near branches */
|
||||
#define No16 ((u64)1 << 53) /* No 16 bit operand */
|
||||
#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
|
||||
|
||||
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
|
||||
|
||||
|
@ -213,6 +215,7 @@ struct opcode {
|
|||
const struct gprefix *gprefix;
|
||||
const struct escape *esc;
|
||||
const struct instr_dual *idual;
|
||||
const struct mode_dual *mdual;
|
||||
void (*fastop)(struct fastop *fake);
|
||||
} u;
|
||||
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
|
||||
|
@ -240,6 +243,11 @@ struct instr_dual {
|
|||
struct opcode mod3;
|
||||
};
|
||||
|
||||
struct mode_dual {
|
||||
struct opcode mode32;
|
||||
struct opcode mode64;
|
||||
};
|
||||
|
||||
/* EFLAGS bit definitions. */
|
||||
#define EFLG_ID (1<<21)
|
||||
#define EFLG_VIP (1<<20)
|
||||
|
@ -262,6 +270,13 @@ struct instr_dual {
|
|||
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
|
||||
#define EFLG_RESERVED_ONE_MASK 2
|
||||
|
||||
enum x86_transfer_type {
|
||||
X86_TRANSFER_NONE,
|
||||
X86_TRANSFER_CALL_JMP,
|
||||
X86_TRANSFER_RET,
|
||||
X86_TRANSFER_TASK_SWITCH,
|
||||
};
|
||||
|
||||
static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
|
||||
{
|
||||
if (!(ctxt->regs_valid & (1 << nr))) {
|
||||
|
@ -669,9 +684,13 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
|
|||
}
|
||||
if (addr.ea > lim)
|
||||
goto bad;
|
||||
*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
|
||||
if (lim == 0xffffffff)
|
||||
*max_size = ~0u;
|
||||
else {
|
||||
*max_size = (u64)lim + 1 - addr.ea;
|
||||
if (size > *max_size)
|
||||
goto bad;
|
||||
}
|
||||
la &= (u32)-1;
|
||||
break;
|
||||
}
|
||||
|
@ -722,19 +741,26 @@ static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
|
|||
const struct desc_struct *cs_desc)
|
||||
{
|
||||
enum x86emul_mode mode = ctxt->mode;
|
||||
int rc;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
|
||||
if (ctxt->mode >= X86EMUL_MODE_PROT16) {
|
||||
if (cs_desc->l) {
|
||||
u64 efer = 0;
|
||||
|
||||
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
|
||||
if (efer & EFER_LMA)
|
||||
mode = X86EMUL_MODE_PROT64;
|
||||
} else
|
||||
mode = X86EMUL_MODE_PROT32; /* temporary value */
|
||||
}
|
||||
#endif
|
||||
if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
|
||||
mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
|
||||
return assign_eip(ctxt, dst, mode);
|
||||
rc = assign_eip(ctxt, dst, mode);
|
||||
if (rc == X86EMUL_CONTINUE)
|
||||
ctxt->mode = mode;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
|
||||
|
@ -1057,8 +1083,6 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
|
|||
asm volatile("fnstcw %0": "+m"(fcw));
|
||||
ctxt->ops->put_fpu(ctxt);
|
||||
|
||||
/* force 2 byte destination */
|
||||
ctxt->dst.bytes = 2;
|
||||
ctxt->dst.val = fcw;
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
|
@ -1075,8 +1099,6 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
|
|||
asm volatile("fnstsw %0": "+m"(fsw));
|
||||
ctxt->ops->put_fpu(ctxt);
|
||||
|
||||
/* force 2 byte destination */
|
||||
ctxt->dst.bytes = 2;
|
||||
ctxt->dst.val = fsw;
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
|
@ -1223,6 +1245,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
|
|||
else {
|
||||
modrm_ea += reg_read(ctxt, base_reg);
|
||||
adjust_modrm_seg(ctxt, base_reg);
|
||||
/* Increment ESP on POP [ESP] */
|
||||
if ((ctxt->d & IncSP) &&
|
||||
base_reg == VCPU_REGS_RSP)
|
||||
modrm_ea += ctxt->op_bytes;
|
||||
}
|
||||
if (index_reg != 4)
|
||||
modrm_ea += reg_read(ctxt, index_reg) << scale;
|
||||
|
@ -1435,28 +1461,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
|
|||
ops->get_gdt(ctxt, dt);
|
||||
}
|
||||
|
||||
/* allowed just for 8 bytes segments */
|
||||
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
u16 selector, struct desc_struct *desc,
|
||||
ulong *desc_addr_p)
|
||||
{
|
||||
struct desc_ptr dt;
|
||||
u16 index = selector >> 3;
|
||||
ulong addr;
|
||||
|
||||
get_descriptor_table_ptr(ctxt, selector, &dt);
|
||||
|
||||
if (dt.size < index * 8 + 7)
|
||||
return emulate_gp(ctxt, selector & 0xfffc);
|
||||
|
||||
*desc_addr_p = addr = dt.address + index * 8;
|
||||
return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
|
||||
&ctxt->exception);
|
||||
}
|
||||
|
||||
/* allowed just for 8 bytes segments */
|
||||
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
u16 selector, struct desc_struct *desc)
|
||||
static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
|
||||
u16 selector, ulong *desc_addr_p)
|
||||
{
|
||||
struct desc_ptr dt;
|
||||
u16 index = selector >> 3;
|
||||
|
@ -1468,6 +1474,47 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
return emulate_gp(ctxt, selector & 0xfffc);
|
||||
|
||||
addr = dt.address + index * 8;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (addr >> 32 != 0) {
|
||||
u64 efer = 0;
|
||||
|
||||
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
|
||||
if (!(efer & EFER_LMA))
|
||||
addr &= (u32)-1;
|
||||
}
|
||||
#endif
|
||||
|
||||
*desc_addr_p = addr;
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
/* allowed just for 8 bytes segments */
|
||||
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
u16 selector, struct desc_struct *desc,
|
||||
ulong *desc_addr_p)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
|
||||
&ctxt->exception);
|
||||
}
|
||||
|
||||
/* allowed just for 8 bytes segments */
|
||||
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
u16 selector, struct desc_struct *desc)
|
||||
{
|
||||
int rc;
|
||||
ulong addr;
|
||||
|
||||
rc = get_descriptor_ptr(ctxt, selector, &addr);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
|
||||
&ctxt->exception);
|
||||
}
|
||||
|
@ -1475,7 +1522,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
/* Does not support long mode */
|
||||
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
u16 selector, int seg, u8 cpl,
|
||||
bool in_task_switch,
|
||||
enum x86_transfer_type transfer,
|
||||
struct desc_struct *desc)
|
||||
{
|
||||
struct desc_struct seg_desc, old_desc;
|
||||
|
@ -1529,11 +1576,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
return ret;
|
||||
|
||||
err_code = selector & 0xfffc;
|
||||
err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
|
||||
err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
|
||||
GP_VECTOR;
|
||||
|
||||
/* can't load system descriptor into segment selector */
|
||||
if (seg <= VCPU_SREG_GS && !seg_desc.s)
|
||||
if (seg <= VCPU_SREG_GS && !seg_desc.s) {
|
||||
if (transfer == X86_TRANSFER_CALL_JMP)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
goto exception;
|
||||
}
|
||||
|
||||
if (!seg_desc.p) {
|
||||
err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
|
||||
|
@ -1605,10 +1656,13 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
|
||||
if (seg_desc.s) {
|
||||
/* mark segment as accessed */
|
||||
if (!(seg_desc.type & 1)) {
|
||||
seg_desc.type |= 1;
|
||||
ret = write_segment_descriptor(ctxt, selector, &seg_desc);
|
||||
ret = write_segment_descriptor(ctxt, selector,
|
||||
&seg_desc);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
}
|
||||
} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
|
||||
ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
|
||||
sizeof(base3), &ctxt->exception);
|
||||
|
@ -1631,7 +1685,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
u16 selector, int seg)
|
||||
{
|
||||
u8 cpl = ctxt->ops->cpl(ctxt);
|
||||
return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
|
||||
return __load_segment_descriptor(ctxt, selector, seg, cpl,
|
||||
X86_TRANSFER_NONE, NULL);
|
||||
}
|
||||
|
||||
static void write_register_operand(struct operand *op)
|
||||
|
@ -1828,12 +1883,14 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
|
|||
unsigned long selector;
|
||||
int rc;
|
||||
|
||||
rc = emulate_pop(ctxt, &selector, ctxt->op_bytes);
|
||||
rc = emulate_pop(ctxt, &selector, 2);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
if (ctxt->modrm_reg == VCPU_SREG_SS)
|
||||
ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
|
||||
if (ctxt->op_bytes > 2)
|
||||
rsp_increment(ctxt, ctxt->op_bytes - 2);
|
||||
|
||||
rc = load_segment_descriptor(ctxt, (u16)selector, seg);
|
||||
return rc;
|
||||
|
@ -2007,6 +2064,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
|
|||
|
||||
ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
|
||||
ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
|
||||
ctxt->ops->set_nmi_mask(ctxt, false);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -2041,7 +2099,8 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
|
|||
|
||||
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
|
||||
|
||||
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
|
||||
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
|
||||
X86_TRANSFER_CALL_JMP,
|
||||
&new_desc);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
@ -2130,7 +2189,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
|
|||
/* Outer-privilege level return is not implemented */
|
||||
if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
|
||||
rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
|
||||
X86_TRANSFER_RET,
|
||||
&new_desc);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
@ -2163,12 +2223,15 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
|
|||
fastop(ctxt, em_cmp);
|
||||
|
||||
if (ctxt->eflags & EFLG_ZF) {
|
||||
/* Success: write back to memory. */
|
||||
/* Success: write back to memory; no update of EAX */
|
||||
ctxt->src.type = OP_NONE;
|
||||
ctxt->dst.val = ctxt->src.orig_val;
|
||||
} else {
|
||||
/* Failure: write the value we saw to EAX. */
|
||||
ctxt->dst.type = OP_REG;
|
||||
ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
|
||||
ctxt->src.type = OP_REG;
|
||||
ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
|
||||
ctxt->src.val = ctxt->dst.orig_val;
|
||||
/* Create write-cycle to dest by writing the same value */
|
||||
ctxt->dst.val = ctxt->dst.orig_val;
|
||||
}
|
||||
return X86EMUL_CONTINUE;
|
||||
|
@ -2556,23 +2619,23 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
|
|||
* it is handled in a context of new task
|
||||
*/
|
||||
ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
|
||||
|
@ -2694,31 +2757,31 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
|
|||
* it is handled in a context of new task
|
||||
*/
|
||||
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
|
||||
cpl, true, NULL);
|
||||
cpl, X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
|
||||
true, NULL);
|
||||
X86_TRANSFER_TASK_SWITCH, NULL);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
|
||||
|
@ -2739,7 +2802,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
|
|||
ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
return ret;
|
||||
|
||||
save_state_to_tss32(ctxt, &tss_seg);
|
||||
|
@ -2748,13 +2810,11 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
|
|||
ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
|
||||
ldt_sel_offset - eip_offset, &ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
return ret;
|
||||
|
||||
ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
return ret;
|
||||
|
||||
if (old_tss_sel != 0xffff) {
|
||||
|
@ -2765,7 +2825,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
|
|||
sizeof tss_seg.prev_task_link,
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2999,15 +3058,16 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
|
|||
struct desc_struct old_desc, new_desc;
|
||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
||||
int cpl = ctxt->ops->cpl(ctxt);
|
||||
enum x86emul_mode prev_mode = ctxt->mode;
|
||||
|
||||
old_eip = ctxt->_eip;
|
||||
ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
|
||||
|
||||
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
|
||||
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
|
||||
&new_desc);
|
||||
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
|
||||
X86_TRANSFER_CALL_JMP, &new_desc);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return X86EMUL_CONTINUE;
|
||||
return rc;
|
||||
|
||||
rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
|
@ -3022,11 +3082,14 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
|
|||
rc = em_push(ctxt);
|
||||
/* If we failed, we tainted the memory, but the very least we should
|
||||
restore cs */
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
if (rc != X86EMUL_CONTINUE) {
|
||||
pr_warn_once("faulting far call emulation tainted memory\n");
|
||||
goto fail;
|
||||
}
|
||||
return rc;
|
||||
fail:
|
||||
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
|
||||
ctxt->mode = prev_mode;
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
@ -3477,6 +3540,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt)
|
|||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_movsxd(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
ctxt->dst.val = (s32) ctxt->src.val;
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static bool valid_cr(int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
|
@ -3676,6 +3745,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
|
|||
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
|
||||
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
|
||||
#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
|
||||
#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
|
||||
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
|
||||
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
|
||||
#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
|
||||
|
@ -3738,7 +3808,7 @@ static const struct opcode group1[] = {
|
|||
};
|
||||
|
||||
static const struct opcode group1A[] = {
|
||||
I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
|
||||
I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
|
||||
};
|
||||
|
||||
static const struct opcode group2[] = {
|
||||
|
@ -3854,7 +3924,7 @@ static const struct gprefix pfx_0f_e7 = {
|
|||
};
|
||||
|
||||
static const struct escape escape_d9 = { {
|
||||
N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
|
||||
N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
|
||||
}, {
|
||||
/* 0xC0 - 0xC7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
|
@ -3896,7 +3966,7 @@ static const struct escape escape_db = { {
|
|||
} };
|
||||
|
||||
static const struct escape escape_dd = { {
|
||||
N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
|
||||
N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
|
||||
}, {
|
||||
/* 0xC0 - 0xC7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
|
@ -3920,6 +3990,10 @@ static const struct instr_dual instr_dual_0f_c3 = {
|
|||
I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
|
||||
};
|
||||
|
||||
static const struct mode_dual mode_dual_63 = {
|
||||
N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
|
||||
};
|
||||
|
||||
static const struct opcode opcode_table[256] = {
|
||||
/* 0x00 - 0x07 */
|
||||
F6ALU(Lock, em_add),
|
||||
|
@ -3954,7 +4028,7 @@ static const struct opcode opcode_table[256] = {
|
|||
/* 0x60 - 0x67 */
|
||||
I(ImplicitOps | Stack | No64, em_pusha),
|
||||
I(ImplicitOps | Stack | No64, em_popa),
|
||||
N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
|
||||
N, MD(ModRM, &mode_dual_63),
|
||||
N, N, N, N,
|
||||
/* 0x68 - 0x6F */
|
||||
I(SrcImm | Mov | Stack, em_push),
|
||||
|
@ -4010,8 +4084,8 @@ static const struct opcode opcode_table[256] = {
|
|||
G(ByteOp, group11), G(0, group11),
|
||||
/* 0xC8 - 0xCF */
|
||||
I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
|
||||
I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm),
|
||||
I(ImplicitOps | Stack, em_ret_far),
|
||||
I(ImplicitOps | SrcImmU16, em_ret_far_imm),
|
||||
I(ImplicitOps, em_ret_far),
|
||||
D(ImplicitOps), DI(SrcImmByte, intn),
|
||||
D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
|
||||
/* 0xD0 - 0xD7 */
|
||||
|
@ -4108,7 +4182,7 @@ static const struct opcode twobyte_table[256] = {
|
|||
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
|
||||
GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
|
||||
/* 0xB0 - 0xB7 */
|
||||
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
|
||||
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
|
||||
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
|
||||
F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
|
||||
I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
|
||||
|
@ -4174,6 +4248,8 @@ static const struct opcode opcode_map_0f_38[256] = {
|
|||
#undef I
|
||||
#undef GP
|
||||
#undef EXT
|
||||
#undef MD
|
||||
#undef ID
|
||||
|
||||
#undef D2bv
|
||||
#undef D2bvIP
|
||||
|
@ -4563,6 +4639,12 @@ done_prefixes:
|
|||
else
|
||||
opcode = opcode.u.idual->mod012;
|
||||
break;
|
||||
case ModeDual:
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64)
|
||||
opcode = opcode.u.mdual->mode64;
|
||||
else
|
||||
opcode = opcode.u.mdual->mode32;
|
||||
break;
|
||||
default:
|
||||
return EMULATION_FAILED;
|
||||
}
|
||||
|
@ -4860,9 +4942,14 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
|||
/* optimisation - avoid slow emulated read if Mov */
|
||||
rc = segmented_read(ctxt, ctxt->dst.addr.mem,
|
||||
&ctxt->dst.val, ctxt->dst.bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
if (rc != X86EMUL_CONTINUE) {
|
||||
if (!(ctxt->d & NoWrite) &&
|
||||
rc == X86EMUL_PROPAGATE_FAULT &&
|
||||
ctxt->exception.vector == PF_VECTOR)
|
||||
ctxt->exception.error_code |= PFERR_WRITE_MASK;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
ctxt->dst.orig_val = ctxt->dst.val;
|
||||
|
||||
special_insn:
|
||||
|
@ -4899,11 +4986,6 @@ special_insn:
|
|||
goto threebyte_insn;
|
||||
|
||||
switch (ctxt->b) {
|
||||
case 0x63: /* movsxd */
|
||||
if (ctxt->mode != X86EMUL_MODE_PROT64)
|
||||
goto cannot_emulate;
|
||||
ctxt->dst.val = (s32) ctxt->src.val;
|
||||
break;
|
||||
case 0x70 ... 0x7f: /* jcc (short) */
|
||||
if (test_cc(ctxt->b, ctxt->eflags))
|
||||
rc = jmp_rel(ctxt, ctxt->src.val);
|
||||
|
|
|
@ -98,7 +98,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
|
|||
}
|
||||
|
||||
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
|
||||
int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
int short_hand, unsigned int dest, int dest_mode);
|
||||
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
|
||||
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
|
||||
|
|
|
@ -138,7 +138,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
|||
|
||||
gfn += page_size >> PAGE_SHIFT;
|
||||
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -306,6 +306,8 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
|
|||
kvm_unpin_pages(kvm, pfn, unmap_pages);
|
||||
|
||||
gfn += unmap_pages;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/delay.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include "kvm_cache_regs.h"
|
||||
|
@ -327,17 +328,24 @@ static u8 count_vectors(void *bitmap)
|
|||
return count;
|
||||
}
|
||||
|
||||
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
|
||||
void __kvm_apic_update_irr(u32 *pir, void *regs)
|
||||
{
|
||||
u32 i, pir_val;
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
for (i = 0; i <= 7; i++) {
|
||||
pir_val = xchg(&pir[i], 0);
|
||||
if (pir_val)
|
||||
*((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val;
|
||||
*((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
|
||||
|
||||
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
__kvm_apic_update_irr(pir, apic->regs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
|
||||
|
||||
static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
|
||||
|
@ -405,7 +413,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
|
|||
* because the processor can modify ISR under the hood. Instead
|
||||
* just set SVI.
|
||||
*/
|
||||
if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
|
||||
if (unlikely(kvm_x86_ops->hwapic_isr_update))
|
||||
kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
|
||||
else {
|
||||
++apic->isr_count;
|
||||
|
@ -453,7 +461,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
|
|||
* on the other hand isr_count and highest_isr_cache are unused
|
||||
* and must be left alone.
|
||||
*/
|
||||
if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
|
||||
if (unlikely(kvm_x86_ops->hwapic_isr_update))
|
||||
kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
|
||||
apic_find_highest_isr(apic));
|
||||
else {
|
||||
|
@ -580,55 +588,48 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
|
|||
apic_update_ppr(apic);
|
||||
}
|
||||
|
||||
static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
|
||||
static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
|
||||
{
|
||||
return dest == (apic_x2apic_mode(apic) ?
|
||||
X2APIC_BROADCAST : APIC_BROADCAST);
|
||||
}
|
||||
|
||||
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
|
||||
static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
|
||||
{
|
||||
return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
|
||||
}
|
||||
|
||||
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
|
||||
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
|
||||
{
|
||||
int result = 0;
|
||||
u32 logical_id;
|
||||
|
||||
if (kvm_apic_broadcast(apic, mda))
|
||||
return 1;
|
||||
return true;
|
||||
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
logical_id = kvm_apic_get_reg(apic, APIC_LDR);
|
||||
return logical_id & mda;
|
||||
}
|
||||
|
||||
logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR));
|
||||
if (apic_x2apic_mode(apic))
|
||||
return ((logical_id >> 16) == (mda >> 16))
|
||||
&& (logical_id & mda & 0xffff) != 0;
|
||||
|
||||
logical_id = GET_APIC_LOGICAL_ID(logical_id);
|
||||
|
||||
switch (kvm_apic_get_reg(apic, APIC_DFR)) {
|
||||
case APIC_DFR_FLAT:
|
||||
if (logical_id & mda)
|
||||
result = 1;
|
||||
break;
|
||||
return (logical_id & mda) != 0;
|
||||
case APIC_DFR_CLUSTER:
|
||||
if (((logical_id >> 4) == (mda >> 0x4))
|
||||
&& (logical_id & mda & 0xf))
|
||||
result = 1;
|
||||
break;
|
||||
return ((logical_id >> 4) == (mda >> 4))
|
||||
&& (logical_id & mda & 0xf) != 0;
|
||||
default:
|
||||
apic_debug("Bad DFR vcpu %d: %08x\n",
|
||||
apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
int short_hand, unsigned int dest, int dest_mode)
|
||||
{
|
||||
int result = 0;
|
||||
struct kvm_lapic *target = vcpu->arch.apic;
|
||||
|
||||
apic_debug("target %p, source %p, dest 0x%x, "
|
||||
|
@ -638,29 +639,21 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
|||
ASSERT(target);
|
||||
switch (short_hand) {
|
||||
case APIC_DEST_NOSHORT:
|
||||
if (dest_mode == 0)
|
||||
/* Physical mode. */
|
||||
result = kvm_apic_match_physical_addr(target, dest);
|
||||
if (dest_mode == APIC_DEST_PHYSICAL)
|
||||
return kvm_apic_match_physical_addr(target, dest);
|
||||
else
|
||||
/* Logical mode. */
|
||||
result = kvm_apic_match_logical_addr(target, dest);
|
||||
break;
|
||||
return kvm_apic_match_logical_addr(target, dest);
|
||||
case APIC_DEST_SELF:
|
||||
result = (target == source);
|
||||
break;
|
||||
return target == source;
|
||||
case APIC_DEST_ALLINC:
|
||||
result = 1;
|
||||
break;
|
||||
return true;
|
||||
case APIC_DEST_ALLBUT:
|
||||
result = (target != source);
|
||||
break;
|
||||
return target != source;
|
||||
default:
|
||||
apic_debug("kvm: apic: Bad dest shorthand value %x\n",
|
||||
short_hand);
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
|
||||
|
@ -693,7 +686,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
|
|||
|
||||
ret = true;
|
||||
|
||||
if (irq->dest_mode == 0) { /* physical mode */
|
||||
if (irq->dest_mode == APIC_DEST_PHYSICAL) {
|
||||
if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
|
||||
goto out;
|
||||
|
||||
|
@ -1076,25 +1069,72 @@ static void apic_timer_expired(struct kvm_lapic *apic)
|
|||
{
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
wait_queue_head_t *q = &vcpu->wq;
|
||||
struct kvm_timer *ktimer = &apic->lapic_timer;
|
||||
|
||||
/*
|
||||
* Note: KVM_REQ_PENDING_TIMER is implicitly checked in
|
||||
* vcpu_enter_guest.
|
||||
*/
|
||||
if (atomic_read(&apic->lapic_timer.pending))
|
||||
return;
|
||||
|
||||
atomic_inc(&apic->lapic_timer.pending);
|
||||
/* FIXME: this code should not know anything about vcpus */
|
||||
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
|
||||
kvm_set_pending_timer(vcpu);
|
||||
|
||||
if (waitqueue_active(q))
|
||||
wake_up_interruptible(q);
|
||||
|
||||
if (apic_lvtt_tscdeadline(apic))
|
||||
ktimer->expired_tscdeadline = ktimer->tscdeadline;
|
||||
}
|
||||
|
||||
/*
|
||||
* On APICv, this test will cause a busy wait
|
||||
* during a higher-priority task.
|
||||
*/
|
||||
|
||||
static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 reg = kvm_apic_get_reg(apic, APIC_LVTT);
|
||||
|
||||
if (kvm_apic_hw_enabled(apic)) {
|
||||
int vec = reg & APIC_VECTOR_MASK;
|
||||
void *bitmap = apic->regs + APIC_ISR;
|
||||
|
||||
if (kvm_x86_ops->deliver_posted_interrupt)
|
||||
bitmap = apic->regs + APIC_IRR;
|
||||
|
||||
if (apic_test_vector(vec, bitmap))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void wait_lapic_expire(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u64 guest_tsc, tsc_deadline;
|
||||
|
||||
if (!kvm_vcpu_has_lapic(vcpu))
|
||||
return;
|
||||
|
||||
if (apic->lapic_timer.expired_tscdeadline == 0)
|
||||
return;
|
||||
|
||||
if (!lapic_timer_int_injected(vcpu))
|
||||
return;
|
||||
|
||||
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
|
||||
apic->lapic_timer.expired_tscdeadline = 0;
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
|
||||
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
|
||||
|
||||
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */
|
||||
if (guest_tsc < tsc_deadline)
|
||||
__delay(tsc_deadline - guest_tsc);
|
||||
}
|
||||
|
||||
static void start_apic_timer(struct kvm_lapic *apic)
|
||||
{
|
||||
ktime_t now;
|
||||
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
|
||||
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
|
||||
|
@ -1140,6 +1180,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
|||
/* lapic timer in tsc deadline mode */
|
||||
u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
|
||||
u64 ns = 0;
|
||||
ktime_t expire;
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
|
||||
unsigned long flags;
|
||||
|
@ -1154,8 +1195,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
|||
if (likely(tscdeadline > guest_tsc)) {
|
||||
ns = (tscdeadline - guest_tsc) * 1000000ULL;
|
||||
do_div(ns, this_tsc_khz);
|
||||
expire = ktime_add_ns(now, ns);
|
||||
expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
|
||||
hrtimer_start(&apic->lapic_timer.timer,
|
||||
ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
|
||||
expire, HRTIMER_MODE_ABS);
|
||||
} else
|
||||
apic_timer_expired(apic);
|
||||
|
||||
|
@ -1745,7 +1788,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
|
|||
if (kvm_x86_ops->hwapic_irr_update)
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
apic_find_highest_irr(apic));
|
||||
kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
|
||||
if (unlikely(kvm_x86_ops->hwapic_isr_update))
|
||||
kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
|
||||
apic_find_highest_isr(apic));
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_rtc_eoi_tracking_restore_one(vcpu);
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ struct kvm_timer {
|
|||
u32 timer_mode;
|
||||
u32 timer_mode_mask;
|
||||
u64 tscdeadline;
|
||||
u64 expired_tscdeadline;
|
||||
atomic_t pending; /* accumulated triggered timers */
|
||||
};
|
||||
|
||||
|
@ -56,9 +57,8 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
|
|||
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
|
||||
void __kvm_apic_update_irr(u32 *pir, void *regs);
|
||||
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
|
||||
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
|
||||
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
|
||||
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
|
||||
unsigned long *dest_map);
|
||||
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
|
||||
|
@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
|
|||
|
||||
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
|
||||
|
||||
void wait_lapic_expire(struct kvm_vcpu *vcpu);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -63,30 +63,16 @@ enum {
|
|||
#undef MMU_DEBUG
|
||||
|
||||
#ifdef MMU_DEBUG
|
||||
static bool dbg = 0;
|
||||
module_param(dbg, bool, 0644);
|
||||
|
||||
#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
|
||||
#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
|
||||
|
||||
#define MMU_WARN_ON(x) WARN_ON(x)
|
||||
#else
|
||||
|
||||
#define pgprintk(x...) do { } while (0)
|
||||
#define rmap_printk(x...) do { } while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef MMU_DEBUG
|
||||
static bool dbg = 0;
|
||||
module_param(dbg, bool, 0644);
|
||||
#endif
|
||||
|
||||
#ifndef MMU_DEBUG
|
||||
#define ASSERT(x) do { } while (0)
|
||||
#else
|
||||
#define ASSERT(x) \
|
||||
if (!(x)) { \
|
||||
printk(KERN_WARNING "assertion failed %s:%d: %s\n", \
|
||||
__FILE__, __LINE__, #x); \
|
||||
}
|
||||
#define MMU_WARN_ON(x) do { } while (0)
|
||||
#endif
|
||||
|
||||
#define PTE_PREFETCH_NUM 8
|
||||
|
@ -546,6 +532,11 @@ static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask)
|
|||
return (old_spte & bit_mask) && !(new_spte & bit_mask);
|
||||
}
|
||||
|
||||
static bool spte_is_bit_changed(u64 old_spte, u64 new_spte, u64 bit_mask)
|
||||
{
|
||||
return (old_spte & bit_mask) != (new_spte & bit_mask);
|
||||
}
|
||||
|
||||
/* Rules for using mmu_spte_set:
|
||||
* Set the sptep from nonpresent to present.
|
||||
* Note: the sptep being assigned *must* be either not present
|
||||
|
@ -596,6 +587,14 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
|
|||
if (!shadow_accessed_mask)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Flush TLB when accessed/dirty bits are changed in the page tables,
|
||||
* to guarantee consistency between TLB and page tables.
|
||||
*/
|
||||
if (spte_is_bit_changed(old_spte, new_spte,
|
||||
shadow_accessed_mask | shadow_dirty_mask))
|
||||
ret = true;
|
||||
|
||||
if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask))
|
||||
kvm_set_pfn_accessed(spte_to_pfn(old_spte));
|
||||
if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask))
|
||||
|
@ -1216,6 +1215,60 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
|
|||
return flush;
|
||||
}
|
||||
|
||||
static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep)
|
||||
{
|
||||
u64 spte = *sptep;
|
||||
|
||||
rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
spte &= ~shadow_dirty_mask;
|
||||
|
||||
return mmu_spte_update(sptep, spte);
|
||||
}
|
||||
|
||||
static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
|
||||
{
|
||||
u64 *sptep;
|
||||
struct rmap_iterator iter;
|
||||
bool flush = false;
|
||||
|
||||
for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
|
||||
BUG_ON(!(*sptep & PT_PRESENT_MASK));
|
||||
|
||||
flush |= spte_clear_dirty(kvm, sptep);
|
||||
sptep = rmap_get_next(&iter);
|
||||
}
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
||||
static bool spte_set_dirty(struct kvm *kvm, u64 *sptep)
|
||||
{
|
||||
u64 spte = *sptep;
|
||||
|
||||
rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
spte |= shadow_dirty_mask;
|
||||
|
||||
return mmu_spte_update(sptep, spte);
|
||||
}
|
||||
|
||||
static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp)
|
||||
{
|
||||
u64 *sptep;
|
||||
struct rmap_iterator iter;
|
||||
bool flush = false;
|
||||
|
||||
for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
|
||||
BUG_ON(!(*sptep & PT_PRESENT_MASK));
|
||||
|
||||
flush |= spte_set_dirty(kvm, sptep);
|
||||
sptep = rmap_get_next(&iter);
|
||||
}
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
|
||||
* @kvm: kvm instance
|
||||
|
@ -1226,7 +1279,7 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
|
|||
* Used when we do not need to care about huge page mappings: e.g. during dirty
|
||||
* logging we do not have any such mappings.
|
||||
*/
|
||||
void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
|
@ -1242,6 +1295,53 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages
|
||||
* @kvm: kvm instance
|
||||
* @slot: slot to clear D-bit
|
||||
* @gfn_offset: start of the BITS_PER_LONG pages we care about
|
||||
* @mask: indicates which pages we should clear D-bit
|
||||
*
|
||||
* Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.
|
||||
*/
|
||||
void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
unsigned long *rmapp;
|
||||
|
||||
while (mask) {
|
||||
rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
|
||||
PT_PAGE_TABLE_LEVEL, slot);
|
||||
__rmap_clear_dirty(kvm, rmapp);
|
||||
|
||||
/* clear the first set bit */
|
||||
mask &= mask - 1;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
|
||||
|
||||
/**
|
||||
* kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
|
||||
* PT level pages.
|
||||
*
|
||||
* It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
|
||||
* enable dirty logging for them.
|
||||
*
|
||||
* Used when we do not need to care about huge page mappings: e.g. during dirty
|
||||
* logging we do not have any such mappings.
|
||||
*/
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
if (kvm_x86_ops->enable_log_dirty_pt_masked)
|
||||
kvm_x86_ops->enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
|
||||
mask);
|
||||
else
|
||||
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
|
||||
}
|
||||
|
||||
static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
|
@ -1536,7 +1636,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
|
|||
|
||||
static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
|
||||
{
|
||||
ASSERT(is_empty_shadow_page(sp->spt));
|
||||
MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
|
||||
hlist_del(&sp->hash_link);
|
||||
list_del(&sp->link);
|
||||
free_page((unsigned long)sp->spt);
|
||||
|
@ -2501,8 +2601,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
}
|
||||
}
|
||||
|
||||
if (pte_access & ACC_WRITE_MASK)
|
||||
if (pte_access & ACC_WRITE_MASK) {
|
||||
mark_page_dirty(vcpu->kvm, gfn);
|
||||
spte |= shadow_dirty_mask;
|
||||
}
|
||||
|
||||
set_pte:
|
||||
if (mmu_spte_update(sptep, spte))
|
||||
|
@ -2818,6 +2920,18 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|||
*/
|
||||
gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
|
||||
|
||||
/*
|
||||
* Theoretically we could also set dirty bit (and flush TLB) here in
|
||||
* order to eliminate unnecessary PML logging. See comments in
|
||||
* set_spte. But fast_page_fault is very unlikely to happen with PML
|
||||
* enabled, so we do not do this. This might result in the same GPA
|
||||
* to be logged in PML buffer again when the write really happens, and
|
||||
* eventually to be called by mark_page_dirty twice. But it's also no
|
||||
* harm. This also avoids the TLB flush needed after setting dirty bit
|
||||
* so non-PML cases won't be impacted.
|
||||
*
|
||||
* Compare with set_spte where instead shadow_dirty_mask is set.
|
||||
*/
|
||||
if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
|
||||
mark_page_dirty(vcpu->kvm, gfn);
|
||||
|
||||
|
@ -3041,7 +3155,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
|||
for (i = 0; i < 4; ++i) {
|
||||
hpa_t root = vcpu->arch.mmu.pae_root[i];
|
||||
|
||||
ASSERT(!VALID_PAGE(root));
|
||||
MMU_WARN_ON(VALID_PAGE(root));
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
make_mmu_pages_available(vcpu);
|
||||
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
|
||||
|
@ -3079,7 +3193,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
|||
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
ASSERT(!VALID_PAGE(root));
|
||||
MMU_WARN_ON(VALID_PAGE(root));
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
make_mmu_pages_available(vcpu);
|
||||
|
@ -3104,7 +3218,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
|||
for (i = 0; i < 4; ++i) {
|
||||
hpa_t root = vcpu->arch.mmu.pae_root[i];
|
||||
|
||||
ASSERT(!VALID_PAGE(root));
|
||||
MMU_WARN_ON(VALID_PAGE(root));
|
||||
if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
|
||||
pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
|
||||
if (!is_present_gpte(pdptr)) {
|
||||
|
@ -3329,8 +3443,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
ASSERT(vcpu);
|
||||
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
|
||||
gfn = gva >> PAGE_SHIFT;
|
||||
|
||||
|
@ -3396,8 +3509,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|||
int write = error_code & PFERR_WRITE_MASK;
|
||||
bool map_writable;
|
||||
|
||||
ASSERT(vcpu);
|
||||
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
|
||||
if (unlikely(error_code & PFERR_RSVD_MASK)) {
|
||||
r = handle_mmio_page_fault(vcpu, gpa, error_code, true);
|
||||
|
@ -3718,7 +3830,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
|
|||
update_permission_bitmask(vcpu, context, false);
|
||||
update_last_pte_bitmap(vcpu, context);
|
||||
|
||||
ASSERT(is_pae(vcpu));
|
||||
MMU_WARN_ON(!is_pae(vcpu));
|
||||
context->page_fault = paging64_page_fault;
|
||||
context->gva_to_gpa = paging64_gva_to_gpa;
|
||||
context->sync_page = paging64_sync_page;
|
||||
|
@ -3763,7 +3875,7 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
|
|||
|
||||
static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_mmu *context = vcpu->arch.walk_mmu;
|
||||
struct kvm_mmu *context = &vcpu->arch.mmu;
|
||||
|
||||
context->base_role.word = 0;
|
||||
context->page_fault = tdp_page_fault;
|
||||
|
@ -3803,11 +3915,12 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
|||
update_last_pte_bitmap(vcpu, context);
|
||||
}
|
||||
|
||||
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
|
||||
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
|
||||
ASSERT(vcpu);
|
||||
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
struct kvm_mmu *context = &vcpu->arch.mmu;
|
||||
|
||||
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
|
||||
|
||||
if (!is_paging(vcpu))
|
||||
nonpaging_init_context(vcpu, context);
|
||||
|
@ -3818,19 +3931,19 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
|
|||
else
|
||||
paging32_init_context(vcpu, context);
|
||||
|
||||
vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
|
||||
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
|
||||
vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
|
||||
vcpu->arch.mmu.base_role.smep_andnot_wp
|
||||
context->base_role.nxe = is_nx(vcpu);
|
||||
context->base_role.cr4_pae = !!is_pae(vcpu);
|
||||
context->base_role.cr0_wp = is_write_protection(vcpu);
|
||||
context->base_role.smep_andnot_wp
|
||||
= smep && !is_write_protection(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
|
||||
|
||||
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
|
||||
bool execonly)
|
||||
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
|
||||
{
|
||||
ASSERT(vcpu);
|
||||
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
struct kvm_mmu *context = &vcpu->arch.mmu;
|
||||
|
||||
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
|
||||
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
|
||||
|
||||
|
@ -3851,11 +3964,13 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
|
|||
|
||||
static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
|
||||
vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
|
||||
vcpu->arch.walk_mmu->get_cr3 = get_cr3;
|
||||
vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
|
||||
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
|
||||
struct kvm_mmu *context = &vcpu->arch.mmu;
|
||||
|
||||
kvm_init_shadow_mmu(vcpu);
|
||||
context->set_cr3 = kvm_x86_ops->set_cr3;
|
||||
context->get_cr3 = get_cr3;
|
||||
context->get_pdptr = kvm_pdptr_read;
|
||||
context->inject_page_fault = kvm_inject_page_fault;
|
||||
}
|
||||
|
||||
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
||||
|
@ -3900,17 +4015,15 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
|||
static void init_kvm_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (mmu_is_nested(vcpu))
|
||||
return init_kvm_nested_mmu(vcpu);
|
||||
init_kvm_nested_mmu(vcpu);
|
||||
else if (tdp_enabled)
|
||||
return init_kvm_tdp_mmu(vcpu);
|
||||
init_kvm_tdp_mmu(vcpu);
|
||||
else
|
||||
return init_kvm_softmmu(vcpu);
|
||||
init_kvm_softmmu(vcpu);
|
||||
}
|
||||
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ASSERT(vcpu);
|
||||
|
||||
kvm_mmu_unload(vcpu);
|
||||
init_kvm_mmu(vcpu);
|
||||
}
|
||||
|
@ -4266,8 +4379,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
|||
struct page *page;
|
||||
int i;
|
||||
|
||||
ASSERT(vcpu);
|
||||
|
||||
/*
|
||||
* When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
|
||||
* Therefore we need to allocate shadow page tables in the first
|
||||
|
@ -4286,8 +4397,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
|||
|
||||
int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ASSERT(vcpu);
|
||||
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
|
||||
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
||||
vcpu->arch.mmu.translate_gpa = translate_gpa;
|
||||
|
@ -4298,19 +4407,18 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
|||
|
||||
void kvm_mmu_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ASSERT(vcpu);
|
||||
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
|
||||
init_kvm_mmu(vcpu);
|
||||
}
|
||||
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
gfn_t last_gfn;
|
||||
int i;
|
||||
bool flush = false;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, slot);
|
||||
last_gfn = memslot->base_gfn + memslot->npages - 1;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
@ -4325,7 +4433,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
|
|||
|
||||
for (index = 0; index <= last_index; ++index, ++rmapp) {
|
||||
if (*rmapp)
|
||||
__rmap_write_protect(kvm, rmapp, false);
|
||||
flush |= __rmap_write_protect(kvm, rmapp,
|
||||
false);
|
||||
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock))
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
|
@ -4352,9 +4461,125 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
|
|||
* instead of PT_WRITABLE_MASK, that means it does not depend
|
||||
* on PT_WRITABLE_MASK anymore.
|
||||
*/
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
gfn_t last_gfn;
|
||||
unsigned long *rmapp;
|
||||
unsigned long last_index, index;
|
||||
bool flush = false;
|
||||
|
||||
last_gfn = memslot->base_gfn + memslot->npages - 1;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
rmapp = memslot->arch.rmap[PT_PAGE_TABLE_LEVEL - 1];
|
||||
last_index = gfn_to_index(last_gfn, memslot->base_gfn,
|
||||
PT_PAGE_TABLE_LEVEL);
|
||||
|
||||
for (index = 0; index <= last_index; ++index, ++rmapp) {
|
||||
if (*rmapp)
|
||||
flush |= __rmap_clear_dirty(kvm, rmapp);
|
||||
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock))
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
/*
|
||||
* It's also safe to flush TLBs out of mmu lock here as currently this
|
||||
* function is only used for dirty logging, in which case flushing TLB
|
||||
* out of mmu lock also guarantees no dirty pages will be lost in
|
||||
* dirty_bitmap.
|
||||
*/
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
|
||||
|
||||
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
gfn_t last_gfn;
|
||||
int i;
|
||||
bool flush = false;
|
||||
|
||||
last_gfn = memslot->base_gfn + memslot->npages - 1;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
for (i = PT_PAGE_TABLE_LEVEL + 1; /* skip rmap for 4K page */
|
||||
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
|
||||
unsigned long *rmapp;
|
||||
unsigned long last_index, index;
|
||||
|
||||
rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
|
||||
last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
|
||||
|
||||
for (index = 0; index <= last_index; ++index, ++rmapp) {
|
||||
if (*rmapp)
|
||||
flush |= __rmap_write_protect(kvm, rmapp,
|
||||
false);
|
||||
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock))
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
/* see kvm_mmu_slot_remove_write_access */
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
|
||||
|
||||
void kvm_mmu_slot_set_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
gfn_t last_gfn;
|
||||
int i;
|
||||
bool flush = false;
|
||||
|
||||
last_gfn = memslot->base_gfn + memslot->npages - 1;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
for (i = PT_PAGE_TABLE_LEVEL;
|
||||
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
|
||||
unsigned long *rmapp;
|
||||
unsigned long last_index, index;
|
||||
|
||||
rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
|
||||
last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
|
||||
|
||||
for (index = 0; index <= last_index; ++index, ++rmapp) {
|
||||
if (*rmapp)
|
||||
flush |= __rmap_set_dirty(kvm, rmapp);
|
||||
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock))
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
/* see kvm_mmu_slot_leaf_clear_dirty */
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
|
||||
|
||||
#define BATCH_ZAP_PAGES 10
|
||||
static void kvm_zap_obsolete_pages(struct kvm *kvm)
|
||||
{
|
||||
|
@ -4606,8 +4831,6 @@ EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
|
|||
|
||||
void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ASSERT(vcpu);
|
||||
|
||||
kvm_mmu_unload(vcpu);
|
||||
free_mmu_pages(vcpu);
|
||||
mmu_free_memory_caches(vcpu);
|
||||
|
|
|
@ -44,18 +44,6 @@
|
|||
#define PT_DIRECTORY_LEVEL 2
|
||||
#define PT_PAGE_TABLE_LEVEL 1
|
||||
|
||||
#define PFERR_PRESENT_BIT 0
|
||||
#define PFERR_WRITE_BIT 1
|
||||
#define PFERR_USER_BIT 2
|
||||
#define PFERR_RSVD_BIT 3
|
||||
#define PFERR_FETCH_BIT 4
|
||||
|
||||
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
|
||||
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
|
||||
#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
|
||||
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
|
||||
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
|
||||
|
||||
static inline u64 rsvd_bits(int s, int e)
|
||||
{
|
||||
return ((1ULL << (e - s + 1)) - 1) << s;
|
||||
|
@ -81,9 +69,8 @@ enum {
|
|||
};
|
||||
|
||||
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
|
||||
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
|
||||
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
|
||||
bool execonly);
|
||||
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
|
||||
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
|
||||
void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
bool ept);
|
||||
|
||||
|
|
|
@ -2003,8 +2003,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
|
|||
|
||||
static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
|
||||
|
||||
WARN_ON(mmu_is_nested(vcpu));
|
||||
kvm_init_shadow_mmu(vcpu);
|
||||
vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
|
||||
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
|
||||
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
|
||||
|
|
|
@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc,
|
|||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* Tracepoint for PML full VMEXIT.
|
||||
*/
|
||||
TRACE_EVENT(kvm_pml_full,
|
||||
TP_PROTO(unsigned int vcpu_id),
|
||||
TP_ARGS(vcpu_id),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
),
|
||||
|
||||
TP_printk("vcpu %d: PML full", __entry->vcpu_id)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_ple_window,
|
||||
TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
|
||||
TP_ARGS(grow, vcpu_id, new, old),
|
||||
|
@ -914,6 +932,26 @@ TRACE_EVENT(kvm_pvclock_update,
|
|||
__entry->flags)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_wait_lapic_expire,
|
||||
TP_PROTO(unsigned int vcpu_id, s64 delta),
|
||||
TP_ARGS(vcpu_id, delta),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
__field( s64, delta )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->delta = delta;
|
||||
),
|
||||
|
||||
TP_printk("vcpu %u: delta %lld (%s)",
|
||||
__entry->vcpu_id,
|
||||
__entry->delta,
|
||||
__entry->delta < 0 ? "early" : "late")
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
|
1086
arch/x86/kvm/vmx.c
1086
arch/x86/kvm/vmx.c
File diff suppressed because it is too large
Load Diff
|
@ -108,6 +108,10 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
|
|||
static u32 tsc_tolerance_ppm = 250;
|
||||
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
|
||||
|
||||
/* lapic timer advance (tscdeadline mode only) in nanoseconds */
|
||||
unsigned int lapic_timer_advance_ns = 0;
|
||||
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
|
||||
|
||||
static bool backwards_tsc_observed = false;
|
||||
|
||||
#define KVM_NR_SHARED_MSRS 16
|
||||
|
@ -141,6 +145,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ "irq_window", VCPU_STAT(irq_window_exits) },
|
||||
{ "nmi_window", VCPU_STAT(nmi_window_exits) },
|
||||
{ "halt_exits", VCPU_STAT(halt_exits) },
|
||||
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
|
||||
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
|
||||
{ "hypercalls", VCPU_STAT(hypercalls) },
|
||||
{ "request_irq", VCPU_STAT(request_irq_exits) },
|
||||
|
@ -492,7 +497,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
|
||||
|
||||
int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
void *data, int offset, int len, u32 access)
|
||||
{
|
||||
return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
|
||||
|
@ -643,7 +648,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
|
||||
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
|
||||
{
|
||||
u64 xcr0 = xcr;
|
||||
u64 old_xcr0 = vcpu->arch.xcr0;
|
||||
|
@ -1083,6 +1088,15 @@ static void update_pvclock_gtod(struct timekeeper *tk)
|
|||
}
|
||||
#endif
|
||||
|
||||
void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Note: KVM_REQ_PENDING_TIMER is implicitly checked in
|
||||
* vcpu_enter_guest. This function is only called from
|
||||
* the physical CPU that is running vcpu.
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
|
||||
}
|
||||
|
||||
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
|
||||
{
|
||||
|
@ -1180,7 +1194,7 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
|
|||
#endif
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
|
||||
unsigned long max_tsc_khz;
|
||||
static unsigned long max_tsc_khz;
|
||||
|
||||
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
|
||||
{
|
||||
|
@ -1234,7 +1248,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
|
|||
return tsc;
|
||||
}
|
||||
|
||||
void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
||||
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
bool vcpus_matched;
|
||||
|
@ -1529,7 +1543,8 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
|
|||
&ka->master_cycle_now);
|
||||
|
||||
ka->use_master_clock = host_tsc_clocksource && vcpus_matched
|
||||
&& !backwards_tsc_observed;
|
||||
&& !backwards_tsc_observed
|
||||
&& !ka->boot_vcpu_runs_old_kvmclock;
|
||||
|
||||
if (ka->use_master_clock)
|
||||
atomic_set(&kvm_guest_has_master_clock, 1);
|
||||
|
@ -2161,8 +2176,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
case MSR_KVM_SYSTEM_TIME_NEW:
|
||||
case MSR_KVM_SYSTEM_TIME: {
|
||||
u64 gpa_offset;
|
||||
struct kvm_arch *ka = &vcpu->kvm->arch;
|
||||
|
||||
kvmclock_reset(vcpu);
|
||||
|
||||
if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
|
||||
bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
|
||||
|
||||
if (ka->boot_vcpu_runs_old_kvmclock != tmp)
|
||||
set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
|
||||
&vcpu->requests);
|
||||
|
||||
ka->boot_vcpu_runs_old_kvmclock = tmp;
|
||||
}
|
||||
|
||||
vcpu->arch.time = data;
|
||||
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
|
||||
|
||||
|
@ -2324,6 +2351,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
|||
{
|
||||
return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_msr);
|
||||
|
||||
static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
{
|
||||
|
@ -2738,6 +2766,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_READONLY_MEM:
|
||||
case KVM_CAP_HYPERV_TIME:
|
||||
case KVM_CAP_IOAPIC_POLARITY_IGNORED:
|
||||
case KVM_CAP_TSC_DEADLINE_TIMER:
|
||||
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
|
||||
case KVM_CAP_ASSIGN_DEV_IRQ:
|
||||
case KVM_CAP_PCI_2_3:
|
||||
|
@ -2776,9 +2805,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_TSC_CONTROL:
|
||||
r = kvm_has_tsc_control;
|
||||
break;
|
||||
case KVM_CAP_TSC_DEADLINE_TIMER:
|
||||
r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
|
||||
break;
|
||||
default:
|
||||
r = 0;
|
||||
break;
|
||||
|
@ -3734,83 +3760,43 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
|
|||
* @kvm: kvm instance
|
||||
* @log: slot id and address to which we copy the log
|
||||
*
|
||||
* We need to keep it in mind that VCPU threads can write to the bitmap
|
||||
* concurrently. So, to avoid losing data, we keep the following order for
|
||||
* each bit:
|
||||
* Steps 1-4 below provide general overview of dirty page logging. See
|
||||
* kvm_get_dirty_log_protect() function description for additional details.
|
||||
*
|
||||
* We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
|
||||
* always flush the TLB (step 4) even if previous step failed and the dirty
|
||||
* bitmap may be corrupt. Regardless of previous outcome the KVM logging API
|
||||
* does not preclude user space subsequent dirty log read. Flushing TLB ensures
|
||||
* writes will be marked dirty for next log read.
|
||||
*
|
||||
* 1. Take a snapshot of the bit and clear it if needed.
|
||||
* 2. Write protect the corresponding page.
|
||||
* 3. Flush TLB's if needed.
|
||||
* 4. Copy the snapshot to the userspace.
|
||||
*
|
||||
* Between 2 and 3, the guest may write to the page using the remaining TLB
|
||||
* entry. This is not a problem because the page will be reported dirty at
|
||||
* step 4 using the snapshot taken before and step 3 ensures that successive
|
||||
* writes will be logged for the next call.
|
||||
* 3. Copy the snapshot to the userspace.
|
||||
* 4. Flush TLB's if needed.
|
||||
*/
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
{
|
||||
int r;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long n, i;
|
||||
unsigned long *dirty_bitmap;
|
||||
unsigned long *dirty_bitmap_buffer;
|
||||
bool is_dirty = false;
|
||||
int r;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
/*
|
||||
* Flush potentially hardware-cached dirty pages to dirty_bitmap.
|
||||
*/
|
||||
if (kvm_x86_ops->flush_log_dirty)
|
||||
kvm_x86_ops->flush_log_dirty(kvm);
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
|
||||
dirty_bitmap = memslot->dirty_bitmap;
|
||||
r = -ENOENT;
|
||||
if (!dirty_bitmap)
|
||||
goto out;
|
||||
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
|
||||
dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
|
||||
memset(dirty_bitmap_buffer, 0, n);
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
for (i = 0; i < n / sizeof(long); i++) {
|
||||
unsigned long mask;
|
||||
gfn_t offset;
|
||||
|
||||
if (!dirty_bitmap[i])
|
||||
continue;
|
||||
|
||||
is_dirty = true;
|
||||
|
||||
mask = xchg(&dirty_bitmap[i], 0);
|
||||
dirty_bitmap_buffer[i] = mask;
|
||||
|
||||
offset = i * BITS_PER_LONG;
|
||||
kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
|
||||
}
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
/* See the comments in kvm_mmu_slot_remove_write_access(). */
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
|
||||
|
||||
/*
|
||||
* All the TLBs can be flushed out of mmu lock, see the comments in
|
||||
* kvm_mmu_slot_remove_write_access().
|
||||
*/
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
if (is_dirty)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
|
||||
goto out;
|
||||
|
||||
r = 0;
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
@ -4516,6 +4502,8 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
|
|||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
addr += now;
|
||||
if (ctxt->mode != X86EMUL_MODE_PROT64)
|
||||
addr = (u32)addr;
|
||||
val += now;
|
||||
bytes -= now;
|
||||
}
|
||||
|
@ -4984,6 +4972,11 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon
|
|||
kvm_register_write(emul_to_vcpu(ctxt), reg, val);
|
||||
}
|
||||
|
||||
static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
|
||||
{
|
||||
kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
|
||||
}
|
||||
|
||||
static const struct x86_emulate_ops emulate_ops = {
|
||||
.read_gpr = emulator_read_gpr,
|
||||
.write_gpr = emulator_write_gpr,
|
||||
|
@ -5019,6 +5012,7 @@ static const struct x86_emulate_ops emulate_ops = {
|
|||
.put_fpu = emulator_put_fpu,
|
||||
.intercept = emulator_intercept,
|
||||
.get_cpuid = emulator_get_cpuid,
|
||||
.set_nmi_mask = emulator_set_nmi_mask,
|
||||
};
|
||||
|
||||
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
|
||||
|
@ -6311,6 +6305,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
trace_kvm_entry(vcpu->vcpu_id);
|
||||
wait_lapic_expire(vcpu);
|
||||
kvm_x86_ops->run(vcpu);
|
||||
|
||||
/*
|
||||
|
@ -7041,15 +7036,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
struct msr_data msr;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
r = vcpu_load(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
if (vcpu_load(vcpu))
|
||||
return;
|
||||
msr.data = 0x0;
|
||||
msr.index = MSR_IA32_TSC;
|
||||
msr.host_initiated = true;
|
||||
|
@ -7058,8 +7051,6 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
|||
|
||||
schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
|
||||
KVMCLOCK_SYNC_PERIOD);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
@ -7549,12 +7540,62 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
|
||||
struct kvm_memory_slot *new)
|
||||
{
|
||||
/* Still write protect RO slot */
|
||||
if (new->flags & KVM_MEM_READONLY) {
|
||||
kvm_mmu_slot_remove_write_access(kvm, new);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call kvm_x86_ops dirty logging hooks when they are valid.
|
||||
*
|
||||
* kvm_x86_ops->slot_disable_log_dirty is called when:
|
||||
*
|
||||
* - KVM_MR_CREATE with dirty logging is disabled
|
||||
* - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag
|
||||
*
|
||||
* The reason is, in case of PML, we need to set D-bit for any slots
|
||||
* with dirty logging disabled in order to eliminate unnecessary GPA
|
||||
* logging in PML buffer (and potential PML buffer full VMEXT). This
|
||||
* guarantees leaving PML enabled during guest's lifetime won't have
|
||||
* any additonal overhead from PML when guest is running with dirty
|
||||
* logging disabled for memory slots.
|
||||
*
|
||||
* kvm_x86_ops->slot_enable_log_dirty is called when switching new slot
|
||||
* to dirty logging mode.
|
||||
*
|
||||
* If kvm_x86_ops dirty logging hooks are invalid, use write protect.
|
||||
*
|
||||
* In case of write protect:
|
||||
*
|
||||
* Write protect all pages for dirty logging.
|
||||
*
|
||||
* All the sptes including the large sptes which point to this
|
||||
* slot are set to readonly. We can not create any new large
|
||||
* spte on this slot until the end of the logging.
|
||||
*
|
||||
* See the comments in fast_page_fault().
|
||||
*/
|
||||
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
if (kvm_x86_ops->slot_enable_log_dirty)
|
||||
kvm_x86_ops->slot_enable_log_dirty(kvm, new);
|
||||
else
|
||||
kvm_mmu_slot_remove_write_access(kvm, new);
|
||||
} else {
|
||||
if (kvm_x86_ops->slot_disable_log_dirty)
|
||||
kvm_x86_ops->slot_disable_log_dirty(kvm, new);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
|
||||
struct kvm_memory_slot *new;
|
||||
int nr_mmu_pages = 0;
|
||||
|
||||
if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
|
||||
|
@ -7573,17 +7614,20 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||
|
||||
if (nr_mmu_pages)
|
||||
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
|
||||
|
||||
/* It's OK to get 'new' slot here as it has already been installed */
|
||||
new = id_to_memslot(kvm->memslots, mem->slot);
|
||||
|
||||
/*
|
||||
* Write protect all pages for dirty logging.
|
||||
* Set up write protection and/or dirty logging for the new slot.
|
||||
*
|
||||
* All the sptes including the large sptes which point to this
|
||||
* slot are set to readonly. We can not create any new large
|
||||
* spte on this slot until the end of the logging.
|
||||
*
|
||||
* See the comments in fast_page_fault().
|
||||
* For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have
|
||||
* been zapped so no dirty logging staff is needed for old slot. For
|
||||
* KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
|
||||
* new and it's also covered when dealing with the new slot.
|
||||
*/
|
||||
if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
|
||||
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
|
||||
if (change != KVM_MR_DELETE)
|
||||
kvm_mmu_slot_apply_flags(kvm, new);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
|
@ -7837,3 +7881,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
|
|||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
|
||||
|
|
|
@ -147,6 +147,7 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
|
|||
|
||||
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
|
||||
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
|
@ -170,5 +171,7 @@ extern u64 kvm_supported_xcr0(void);
|
|||
|
||||
extern unsigned int min_timer_period_us;
|
||||
|
||||
extern unsigned int lapic_timer_advance_ns;
|
||||
|
||||
extern struct static_key kvm_no_apic_vcpu;
|
||||
#endif
|
||||
|
|
|
@ -481,15 +481,19 @@ out:
|
|||
return tlist;
|
||||
}
|
||||
|
||||
#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \
|
||||
(MPIDR_AFFINITY_LEVEL(cluster_id, level) \
|
||||
<< ICC_SGI1R_AFFINITY_## level ##_SHIFT)
|
||||
|
||||
static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48 |
|
||||
MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32 |
|
||||
irq << 24 |
|
||||
MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16 |
|
||||
tlist);
|
||||
val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) |
|
||||
MPIDR_TO_SGI_AFFINITY(cluster_id, 2) |
|
||||
irq << ICC_SGI1R_SGI_ID_SHIFT |
|
||||
MPIDR_TO_SGI_AFFINITY(cluster_id, 1) |
|
||||
tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
|
||||
|
||||
pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
|
||||
gic_write_sgi1r(val);
|
||||
|
|
|
@ -54,6 +54,7 @@ static unsigned long sclp_hsa_size;
|
|||
static unsigned int sclp_max_cpu;
|
||||
static struct sclp_ipl_info sclp_ipl_info;
|
||||
static unsigned char sclp_siif;
|
||||
static unsigned char sclp_sigpif;
|
||||
static u32 sclp_ibc;
|
||||
static unsigned int sclp_mtid;
|
||||
static unsigned int sclp_mtid_cp;
|
||||
|
@ -140,6 +141,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
|
|||
if (boot_cpu_address != cpue->core_id)
|
||||
continue;
|
||||
sclp_siif = cpue->siif;
|
||||
sclp_sigpif = cpue->sigpif;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -186,6 +188,12 @@ int sclp_has_siif(void)
|
|||
}
|
||||
EXPORT_SYMBOL(sclp_has_siif);
|
||||
|
||||
int sclp_has_sigpif(void)
|
||||
{
|
||||
return sclp_sigpif;
|
||||
}
|
||||
EXPORT_SYMBOL(sclp_has_sigpif);
|
||||
|
||||
unsigned int sclp_get_ibc(void)
|
||||
{
|
||||
return sclp_ibc;
|
||||
|
|
|
@ -33,10 +33,11 @@
|
|||
#define VGIC_V2_MAX_LRS (1 << 6)
|
||||
#define VGIC_V3_MAX_LRS 16
|
||||
#define VGIC_MAX_IRQS 1024
|
||||
#define VGIC_V2_MAX_CPUS 8
|
||||
|
||||
/* Sanity checks... */
|
||||
#if (KVM_MAX_VCPUS > 8)
|
||||
#error Invalid number of CPU interfaces
|
||||
#if (KVM_MAX_VCPUS > 255)
|
||||
#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now
|
||||
#endif
|
||||
|
||||
#if (VGIC_NR_IRQS_LEGACY & 31)
|
||||
|
@ -132,6 +133,18 @@ struct vgic_params {
|
|||
unsigned int maint_irq;
|
||||
/* Virtual control interface base address */
|
||||
void __iomem *vctrl_base;
|
||||
int max_gic_vcpus;
|
||||
/* Only needed for the legacy KVM_CREATE_IRQCHIP */
|
||||
bool can_emulate_gicv2;
|
||||
};
|
||||
|
||||
struct vgic_vm_ops {
|
||||
bool (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *,
|
||||
struct kvm_exit_mmio *);
|
||||
bool (*queue_sgi)(struct kvm_vcpu *, int irq);
|
||||
void (*add_sgi_source)(struct kvm_vcpu *, int irq, int source);
|
||||
int (*init_model)(struct kvm *);
|
||||
int (*map_resources)(struct kvm *, const struct vgic_params *);
|
||||
};
|
||||
|
||||
struct vgic_dist {
|
||||
|
@ -140,6 +153,9 @@ struct vgic_dist {
|
|||
bool in_kernel;
|
||||
bool ready;
|
||||
|
||||
/* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
|
||||
u32 vgic_model;
|
||||
|
||||
int nr_cpus;
|
||||
int nr_irqs;
|
||||
|
||||
|
@ -148,7 +164,11 @@ struct vgic_dist {
|
|||
|
||||
/* Distributor and vcpu interface mapping in the guest */
|
||||
phys_addr_t vgic_dist_base;
|
||||
/* GICv2 and GICv3 use different mapped register blocks */
|
||||
union {
|
||||
phys_addr_t vgic_cpu_base;
|
||||
phys_addr_t vgic_redist_base;
|
||||
};
|
||||
|
||||
/* Distributor enabled */
|
||||
u32 enabled;
|
||||
|
@ -210,8 +230,13 @@ struct vgic_dist {
|
|||
*/
|
||||
struct vgic_bitmap *irq_spi_target;
|
||||
|
||||
/* Target MPIDR for each IRQ (needed for GICv3 IROUTERn) only */
|
||||
u32 *irq_spi_mpidr;
|
||||
|
||||
/* Bitmap indicating which CPU has something pending */
|
||||
unsigned long *irq_pending_on_cpu;
|
||||
|
||||
struct vgic_vm_ops vm_ops;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -229,6 +254,7 @@ struct vgic_v3_cpu_if {
|
|||
#ifdef CONFIG_ARM_GIC_V3
|
||||
u32 vgic_hcr;
|
||||
u32 vgic_vmcr;
|
||||
u32 vgic_sre; /* Restored only, change ignored */
|
||||
u32 vgic_misr; /* Saved only */
|
||||
u32 vgic_eisr; /* Saved only */
|
||||
u32 vgic_elrsr; /* Saved only */
|
||||
|
@ -275,13 +301,15 @@ struct kvm_exit_mmio;
|
|||
int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
|
||||
int kvm_vgic_hyp_init(void);
|
||||
int kvm_vgic_map_resources(struct kvm *kvm);
|
||||
int kvm_vgic_create(struct kvm *kvm);
|
||||
int kvm_vgic_get_max_vcpus(void);
|
||||
int kvm_vgic_create(struct kvm *kvm, u32 type);
|
||||
void kvm_vgic_destroy(struct kvm *kvm);
|
||||
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
|
||||
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
|
||||
bool level);
|
||||
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
|
||||
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
|
||||
bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_exit_mmio *mmio);
|
||||
|
@ -327,7 +355,7 @@ static inline int kvm_vgic_map_resources(struct kvm *kvm)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int kvm_vgic_create(struct kvm *kvm)
|
||||
static inline int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -379,6 +407,11 @@ static inline bool vgic_ready(struct kvm *kvm)
|
|||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int kvm_vgic_get_max_vcpus(void)
|
||||
{
|
||||
return KVM_MAX_VCPUS;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#define GICD_SETSPI_SR 0x0050
|
||||
#define GICD_CLRSPI_SR 0x0058
|
||||
#define GICD_SEIR 0x0068
|
||||
#define GICD_IGROUPR 0x0080
|
||||
#define GICD_ISENABLER 0x0100
|
||||
#define GICD_ICENABLER 0x0180
|
||||
#define GICD_ISPENDR 0x0200
|
||||
|
@ -41,14 +42,37 @@
|
|||
#define GICD_ICACTIVER 0x0380
|
||||
#define GICD_IPRIORITYR 0x0400
|
||||
#define GICD_ICFGR 0x0C00
|
||||
#define GICD_IGRPMODR 0x0D00
|
||||
#define GICD_NSACR 0x0E00
|
||||
#define GICD_IROUTER 0x6000
|
||||
#define GICD_IDREGS 0xFFD0
|
||||
#define GICD_PIDR2 0xFFE8
|
||||
|
||||
/*
|
||||
* Those registers are actually from GICv2, but the spec demands that they
|
||||
* are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
|
||||
*/
|
||||
#define GICD_ITARGETSR 0x0800
|
||||
#define GICD_SGIR 0x0F00
|
||||
#define GICD_CPENDSGIR 0x0F10
|
||||
#define GICD_SPENDSGIR 0x0F20
|
||||
|
||||
#define GICD_CTLR_RWP (1U << 31)
|
||||
#define GICD_CTLR_DS (1U << 6)
|
||||
#define GICD_CTLR_ARE_NS (1U << 4)
|
||||
#define GICD_CTLR_ENABLE_G1A (1U << 1)
|
||||
#define GICD_CTLR_ENABLE_G1 (1U << 0)
|
||||
|
||||
/*
|
||||
* In systems with a single security state (what we emulate in KVM)
|
||||
* the meaning of the interrupt group enable bits is slightly different
|
||||
*/
|
||||
#define GICD_CTLR_ENABLE_SS_G1 (1U << 1)
|
||||
#define GICD_CTLR_ENABLE_SS_G0 (1U << 0)
|
||||
|
||||
#define GICD_TYPER_LPIS (1U << 17)
|
||||
#define GICD_TYPER_MBIS (1U << 16)
|
||||
|
||||
#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
|
||||
#define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32)
|
||||
#define GICD_TYPER_LPIS (1U << 17)
|
||||
|
@ -60,6 +84,8 @@
|
|||
#define GIC_PIDR2_ARCH_GICv3 0x30
|
||||
#define GIC_PIDR2_ARCH_GICv4 0x40
|
||||
|
||||
#define GIC_V3_DIST_SIZE 0x10000
|
||||
|
||||
/*
|
||||
* Re-Distributor registers, offsets from RD_base
|
||||
*/
|
||||
|
@ -78,6 +104,7 @@
|
|||
#define GICR_SYNCR 0x00C0
|
||||
#define GICR_MOVLPIR 0x0100
|
||||
#define GICR_MOVALLR 0x0110
|
||||
#define GICR_IDREGS GICD_IDREGS
|
||||
#define GICR_PIDR2 GICD_PIDR2
|
||||
|
||||
#define GICR_CTLR_ENABLE_LPIS (1UL << 0)
|
||||
|
@ -104,6 +131,7 @@
|
|||
/*
|
||||
* Re-Distributor registers, offsets from SGI_base
|
||||
*/
|
||||
#define GICR_IGROUPR0 GICD_IGROUPR
|
||||
#define GICR_ISENABLER0 GICD_ISENABLER
|
||||
#define GICR_ICENABLER0 GICD_ICENABLER
|
||||
#define GICR_ISPENDR0 GICD_ISPENDR
|
||||
|
@ -112,11 +140,15 @@
|
|||
#define GICR_ICACTIVER0 GICD_ICACTIVER
|
||||
#define GICR_IPRIORITYR0 GICD_IPRIORITYR
|
||||
#define GICR_ICFGR0 GICD_ICFGR
|
||||
#define GICR_IGRPMODR0 GICD_IGRPMODR
|
||||
#define GICR_NSACR GICD_NSACR
|
||||
|
||||
#define GICR_TYPER_PLPIS (1U << 0)
|
||||
#define GICR_TYPER_VLPIS (1U << 1)
|
||||
#define GICR_TYPER_LAST (1U << 4)
|
||||
|
||||
#define GIC_V3_REDIST_SIZE 0x20000
|
||||
|
||||
#define LPI_PROP_GROUP1 (1 << 1)
|
||||
#define LPI_PROP_ENABLED (1 << 0)
|
||||
|
||||
|
@ -248,6 +280,18 @@
|
|||
#define ICC_SRE_EL2_SRE (1 << 0)
|
||||
#define ICC_SRE_EL2_ENABLE (1 << 3)
|
||||
|
||||
#define ICC_SGI1R_TARGET_LIST_SHIFT 0
|
||||
#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
|
||||
#define ICC_SGI1R_AFFINITY_1_SHIFT 16
|
||||
#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
|
||||
#define ICC_SGI1R_SGI_ID_SHIFT 24
|
||||
#define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT)
|
||||
#define ICC_SGI1R_AFFINITY_2_SHIFT 32
|
||||
#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
|
||||
#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
|
||||
#define ICC_SGI1R_AFFINITY_3_SHIFT 48
|
||||
#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
|
||||
|
||||
/*
|
||||
* System register definitions
|
||||
*/
|
||||
|
|
|
@ -33,10 +33,6 @@
|
|||
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
#ifndef KVM_MMIO_SIZE
|
||||
#define KVM_MMIO_SIZE 8
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
|
||||
* in kvm, other bits are visible for userspace which are defined in
|
||||
|
@ -600,6 +596,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
|
|||
|
||||
int kvm_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log, int *is_dirty);
|
||||
|
||||
int kvm_get_dirty_log_protect(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log, bool *is_dirty);
|
||||
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset,
|
||||
unsigned long mask);
|
||||
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log);
|
||||
|
||||
|
@ -641,7 +646,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
|||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
|
||||
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_arch_hardware_enable(void);
|
||||
|
@ -1031,6 +1036,8 @@ void kvm_unregister_device_ops(u32 type);
|
|||
|
||||
extern struct kvm_device_ops kvm_mpic_ops;
|
||||
extern struct kvm_device_ops kvm_xics_ops;
|
||||
extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
|
||||
extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
|
||||
|
|
|
@ -37,6 +37,25 @@ TRACE_EVENT(kvm_userspace_exit,
|
|||
__entry->errno < 0 ? -__entry->errno : __entry->reason)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_vcpu_wakeup,
|
||||
TP_PROTO(__u64 ns, bool waited),
|
||||
TP_ARGS(ns, waited),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, ns )
|
||||
__field( bool, waited )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->ns = ns;
|
||||
__entry->waited = waited;
|
||||
),
|
||||
|
||||
TP_printk("%s time %lld ns",
|
||||
__entry->waited ? "wait" : "poll",
|
||||
__entry->ns)
|
||||
);
|
||||
|
||||
#if defined(CONFIG_HAVE_KVM_IRQFD)
|
||||
TRACE_EVENT(kvm_set_irq,
|
||||
TP_PROTO(unsigned int gsi, int level, int irq_source_id),
|
||||
|
|
|
@ -491,6 +491,11 @@ struct kvm_s390_emerg_info {
|
|||
__u16 code;
|
||||
};
|
||||
|
||||
#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01
|
||||
struct kvm_s390_stop_info {
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
struct kvm_s390_mchk_info {
|
||||
__u64 cr14;
|
||||
__u64 mcic;
|
||||
|
@ -509,6 +514,7 @@ struct kvm_s390_irq {
|
|||
struct kvm_s390_emerg_info emerg;
|
||||
struct kvm_s390_extcall_info extcall;
|
||||
struct kvm_s390_prefix_info prefix;
|
||||
struct kvm_s390_stop_info stop;
|
||||
struct kvm_s390_mchk_info mchk;
|
||||
char reserved[64];
|
||||
} u;
|
||||
|
@ -753,6 +759,7 @@ struct kvm_ppc_smmu_info {
|
|||
#define KVM_CAP_PPC_FIXUP_HCALL 103
|
||||
#define KVM_CAP_PPC_ENABLE_HCALL 104
|
||||
#define KVM_CAP_CHECK_EXTENSION_VM 105
|
||||
#define KVM_CAP_S390_USER_SIGP 106
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -952,6 +959,8 @@ enum kvm_device_type {
|
|||
#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2
|
||||
KVM_DEV_TYPE_FLIC,
|
||||
#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC
|
||||
KVM_DEV_TYPE_ARM_VGIC_V3,
|
||||
#define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3
|
||||
KVM_DEV_TYPE_MAX,
|
||||
};
|
||||
|
||||
|
|
|
@ -37,3 +37,13 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
|
|||
|
||||
config KVM_VFIO
|
||||
bool
|
||||
|
||||
config HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
bool
|
||||
|
||||
config KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
bool
|
||||
|
||||
config KVM_COMPAT
|
||||
def_bool y
|
||||
depends on COMPAT && !S390
|
||||
|
|
|
@ -0,0 +1,847 @@
|
|||
/*
|
||||
* Contains GICv2 specific emulation code, was in vgic.c before.
|
||||
*
|
||||
* Copyright (C) 2012 ARM Ltd.
|
||||
* Author: Marc Zyngier <marc.zyngier@arm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <linux/irqchip/arm-gic.h>
|
||||
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include "vgic.h"
|
||||
|
||||
#define GICC_ARCH_VERSION_V2 0x2
|
||||
|
||||
static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
|
||||
static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
|
||||
{
|
||||
return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
|
||||
}
|
||||
|
||||
static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio, phys_addr_t offset)
|
||||
{
|
||||
u32 reg;
|
||||
u32 word_offset = offset & 3;
|
||||
|
||||
switch (offset & ~3) {
|
||||
case 0: /* GICD_CTLR */
|
||||
reg = vcpu->kvm->arch.vgic.enabled;
|
||||
vgic_reg_access(mmio, ®, word_offset,
|
||||
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
|
||||
if (mmio->is_write) {
|
||||
vcpu->kvm->arch.vgic.enabled = reg & 1;
|
||||
vgic_update_state(vcpu->kvm);
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
||||
case 4: /* GICD_TYPER */
|
||||
reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
|
||||
reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
|
||||
vgic_reg_access(mmio, ®, word_offset,
|
||||
ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
|
||||
break;
|
||||
|
||||
case 8: /* GICD_IIDR */
|
||||
reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
|
||||
vgic_reg_access(mmio, ®, word_offset,
|
||||
ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
|
||||
vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
|
||||
}
|
||||
|
||||
static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
|
||||
vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT);
|
||||
}
|
||||
|
||||
static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
|
||||
vcpu->vcpu_id);
|
||||
}
|
||||
|
||||
static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
|
||||
vcpu->vcpu_id);
|
||||
}
|
||||
|
||||
static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
|
||||
vcpu->vcpu_id, offset);
|
||||
vgic_reg_access(mmio, reg, offset,
|
||||
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
|
||||
return false;
|
||||
}
|
||||
|
||||
#define GICD_ITARGETSR_SIZE 32
|
||||
#define GICD_CPUTARGETS_BITS 8
|
||||
#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
|
||||
static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
int i;
|
||||
u32 val = 0;
|
||||
|
||||
irq -= VGIC_NR_PRIVATE_IRQS;
|
||||
|
||||
for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
|
||||
val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i, c;
|
||||
unsigned long *bmap;
|
||||
u32 target;
|
||||
|
||||
irq -= VGIC_NR_PRIVATE_IRQS;
|
||||
|
||||
/*
|
||||
* Pick the LSB in each byte. This ensures we target exactly
|
||||
* one vcpu per IRQ. If the byte is null, assume we target
|
||||
* CPU0.
|
||||
*/
|
||||
for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
|
||||
int shift = i * GICD_CPUTARGETS_BITS;
|
||||
|
||||
target = ffs((val >> shift) & 0xffU);
|
||||
target = target ? (target - 1) : 0;
|
||||
dist->irq_spi_cpu[irq + i] = target;
|
||||
kvm_for_each_vcpu(c, vcpu, kvm) {
|
||||
bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
|
||||
if (c == target)
|
||||
set_bit(irq + i, bmap);
|
||||
else
|
||||
clear_bit(irq + i, bmap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
u32 reg;
|
||||
|
||||
/* We treat the banked interrupts targets as read-only */
|
||||
if (offset < 32) {
|
||||
u32 roreg;
|
||||
|
||||
roreg = 1 << vcpu->vcpu_id;
|
||||
roreg |= roreg << 8;
|
||||
roreg |= roreg << 16;
|
||||
|
||||
vgic_reg_access(mmio, &roreg, offset,
|
||||
ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
|
||||
return false;
|
||||
}
|
||||
|
||||
reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
|
||||
vgic_reg_access(mmio, ®, offset,
|
||||
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
|
||||
if (mmio->is_write) {
|
||||
vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
|
||||
vgic_update_state(vcpu->kvm);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio, phys_addr_t offset)
|
||||
{
|
||||
u32 *reg;
|
||||
|
||||
reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
|
||||
vcpu->vcpu_id, offset >> 1);
|
||||
|
||||
return vgic_handle_cfg_reg(reg, mmio, offset);
|
||||
}
|
||||
|
||||
static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio, phys_addr_t offset)
|
||||
{
|
||||
u32 reg;
|
||||
|
||||
vgic_reg_access(mmio, ®, offset,
|
||||
ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
|
||||
if (mmio->is_write) {
|
||||
vgic_dispatch_sgi(vcpu, reg);
|
||||
vgic_update_state(vcpu->kvm);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
|
||||
static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
int sgi;
|
||||
int min_sgi = (offset & ~0x3);
|
||||
int max_sgi = min_sgi + 3;
|
||||
int vcpu_id = vcpu->vcpu_id;
|
||||
u32 reg = 0;
|
||||
|
||||
/* Copy source SGIs from distributor side */
|
||||
for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
|
||||
u8 sources = *vgic_get_sgi_sources(dist, vcpu_id, sgi);
|
||||
|
||||
reg |= ((u32)sources) << (8 * (sgi - min_sgi));
|
||||
}
|
||||
|
||||
mmio_data_write(mmio, ~0, reg);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset, bool set)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
int sgi;
|
||||
int min_sgi = (offset & ~0x3);
|
||||
int max_sgi = min_sgi + 3;
|
||||
int vcpu_id = vcpu->vcpu_id;
|
||||
u32 reg;
|
||||
bool updated = false;
|
||||
|
||||
reg = mmio_data_read(mmio, ~0);
|
||||
|
||||
/* Clear pending SGIs on the distributor */
|
||||
for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
|
||||
u8 mask = reg >> (8 * (sgi - min_sgi));
|
||||
u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
|
||||
|
||||
if (set) {
|
||||
if ((*src & mask) != mask)
|
||||
updated = true;
|
||||
*src |= mask;
|
||||
} else {
|
||||
if (*src & mask)
|
||||
updated = true;
|
||||
*src &= ~mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (updated)
|
||||
vgic_update_state(vcpu->kvm);
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
if (!mmio->is_write)
|
||||
return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
|
||||
else
|
||||
return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
|
||||
}
|
||||
|
||||
static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
if (!mmio->is_write)
|
||||
return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
|
||||
else
|
||||
return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
|
||||
}
|
||||
|
||||
static const struct kvm_mmio_range vgic_dist_ranges[] = {
|
||||
{
|
||||
.base = GIC_DIST_CTRL,
|
||||
.len = 12,
|
||||
.bits_per_irq = 0,
|
||||
.handle_mmio = handle_mmio_misc,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_IGROUP,
|
||||
.len = VGIC_MAX_IRQS / 8,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_raz_wi,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_ENABLE_SET,
|
||||
.len = VGIC_MAX_IRQS / 8,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_set_enable_reg,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_ENABLE_CLEAR,
|
||||
.len = VGIC_MAX_IRQS / 8,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_clear_enable_reg,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_PENDING_SET,
|
||||
.len = VGIC_MAX_IRQS / 8,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_set_pending_reg,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_PENDING_CLEAR,
|
||||
.len = VGIC_MAX_IRQS / 8,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_clear_pending_reg,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_ACTIVE_SET,
|
||||
.len = VGIC_MAX_IRQS / 8,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_raz_wi,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_ACTIVE_CLEAR,
|
||||
.len = VGIC_MAX_IRQS / 8,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_raz_wi,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_PRI,
|
||||
.len = VGIC_MAX_IRQS,
|
||||
.bits_per_irq = 8,
|
||||
.handle_mmio = handle_mmio_priority_reg,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_TARGET,
|
||||
.len = VGIC_MAX_IRQS,
|
||||
.bits_per_irq = 8,
|
||||
.handle_mmio = handle_mmio_target_reg,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_CONFIG,
|
||||
.len = VGIC_MAX_IRQS / 4,
|
||||
.bits_per_irq = 2,
|
||||
.handle_mmio = handle_mmio_cfg_reg,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_SOFTINT,
|
||||
.len = 4,
|
||||
.handle_mmio = handle_mmio_sgi_reg,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_SGI_PENDING_CLEAR,
|
||||
.len = VGIC_NR_SGIS,
|
||||
.handle_mmio = handle_mmio_sgi_clear,
|
||||
},
|
||||
{
|
||||
.base = GIC_DIST_SGI_PENDING_SET,
|
||||
.len = VGIC_NR_SGIS,
|
||||
.handle_mmio = handle_mmio_sgi_set,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_exit_mmio *mmio)
|
||||
{
|
||||
unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
|
||||
|
||||
if (!is_in_range(mmio->phys_addr, mmio->len, base,
|
||||
KVM_VGIC_V2_DIST_SIZE))
|
||||
return false;
|
||||
|
||||
/* GICv2 does not support accesses wider than 32 bits */
|
||||
if (mmio->len > 4) {
|
||||
kvm_inject_dabt(vcpu, mmio->phys_addr);
|
||||
return true;
|
||||
}
|
||||
|
||||
return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
|
||||
}
|
||||
|
||||
static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
int nrcpus = atomic_read(&kvm->online_vcpus);
|
||||
u8 target_cpus;
|
||||
int sgi, mode, c, vcpu_id;
|
||||
|
||||
vcpu_id = vcpu->vcpu_id;
|
||||
|
||||
sgi = reg & 0xf;
|
||||
target_cpus = (reg >> 16) & 0xff;
|
||||
mode = (reg >> 24) & 3;
|
||||
|
||||
switch (mode) {
|
||||
case 0:
|
||||
if (!target_cpus)
|
||||
return;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
target_cpus = 1 << vcpu_id;
|
||||
break;
|
||||
}
|
||||
|
||||
kvm_for_each_vcpu(c, vcpu, kvm) {
|
||||
if (target_cpus & 1) {
|
||||
/* Flag the SGI as pending */
|
||||
vgic_dist_irq_set_pending(vcpu, sgi);
|
||||
*vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
|
||||
kvm_debug("SGI%d from CPU%d to CPU%d\n",
|
||||
sgi, vcpu_id, c);
|
||||
}
|
||||
|
||||
target_cpus >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
unsigned long sources;
|
||||
int vcpu_id = vcpu->vcpu_id;
|
||||
int c;
|
||||
|
||||
sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
|
||||
|
||||
for_each_set_bit(c, &sources, dist->nr_cpus) {
|
||||
if (vgic_queue_irq(vcpu, c, irq))
|
||||
clear_bit(c, &sources);
|
||||
}
|
||||
|
||||
*vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
|
||||
|
||||
/*
|
||||
* If the sources bitmap has been cleared it means that we
|
||||
* could queue all the SGIs onto link registers (see the
|
||||
* clear_bit above), and therefore we are done with them in
|
||||
* our emulated gic and can get rid of them.
|
||||
*/
|
||||
if (!sources) {
|
||||
vgic_dist_irq_clear_pending(vcpu, irq);
|
||||
vgic_cpu_irq_clear(vcpu, irq);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
|
||||
* @kvm: pointer to the kvm struct
|
||||
*
|
||||
* Map the virtual CPU interface into the VM before running any VCPUs. We
|
||||
* can't do this at creation time, because user space must first set the
|
||||
* virtual CPU interface address in the guest physical address space.
|
||||
*/
|
||||
static int vgic_v2_map_resources(struct kvm *kvm,
|
||||
const struct vgic_params *params)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
if (vgic_ready(kvm))
|
||||
goto out;
|
||||
|
||||
if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
|
||||
IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
|
||||
kvm_err("Need to set vgic cpu and dist addresses first\n");
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the vgic if this hasn't already been done on demand by
|
||||
* accessing the vgic state from userspace.
|
||||
*/
|
||||
ret = vgic_init(kvm);
|
||||
if (ret) {
|
||||
kvm_err("Unable to allocate maps\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
|
||||
params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
|
||||
true);
|
||||
if (ret) {
|
||||
kvm_err("Unable to remap VGIC CPU to VCPU\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvm->arch.vgic.ready = true;
|
||||
out:
|
||||
if (ret)
|
||||
kvm_vgic_destroy(kvm);
|
||||
mutex_unlock(&kvm->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
|
||||
*vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source;
|
||||
}
|
||||
|
||||
static int vgic_v2_init_model(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
|
||||
vgic_set_target_reg(kvm, 0, i);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vgic_v2_init_emulation(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
|
||||
dist->vm_ops.handle_mmio = vgic_v2_handle_mmio;
|
||||
dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
|
||||
dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
|
||||
dist->vm_ops.init_model = vgic_v2_init_model;
|
||||
dist->vm_ops.map_resources = vgic_v2_map_resources;
|
||||
|
||||
kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
|
||||
}
|
||||
|
||||
static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio, phys_addr_t offset)
|
||||
{
|
||||
bool updated = false;
|
||||
struct vgic_vmcr vmcr;
|
||||
u32 *vmcr_field;
|
||||
u32 reg;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
|
||||
switch (offset & ~0x3) {
|
||||
case GIC_CPU_CTRL:
|
||||
vmcr_field = &vmcr.ctlr;
|
||||
break;
|
||||
case GIC_CPU_PRIMASK:
|
||||
vmcr_field = &vmcr.pmr;
|
||||
break;
|
||||
case GIC_CPU_BINPOINT:
|
||||
vmcr_field = &vmcr.bpr;
|
||||
break;
|
||||
case GIC_CPU_ALIAS_BINPOINT:
|
||||
vmcr_field = &vmcr.abpr;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (!mmio->is_write) {
|
||||
reg = *vmcr_field;
|
||||
mmio_data_write(mmio, ~0, reg);
|
||||
} else {
|
||||
reg = mmio_data_read(mmio, ~0);
|
||||
if (reg != *vmcr_field) {
|
||||
*vmcr_field = reg;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
updated = true;
|
||||
}
|
||||
}
|
||||
return updated;
|
||||
}
|
||||
|
||||
static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio, phys_addr_t offset)
|
||||
{
|
||||
return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
|
||||
}
|
||||
|
||||
static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
u32 reg;
|
||||
|
||||
if (mmio->is_write)
|
||||
return false;
|
||||
|
||||
/* GICC_IIDR */
|
||||
reg = (PRODUCT_ID_KVM << 20) |
|
||||
(GICC_ARCH_VERSION_V2 << 16) |
|
||||
(IMPLEMENTER_ARM << 0);
|
||||
mmio_data_write(mmio, ~0, reg);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* CPU Interface Register accesses - these are not accessed by the VM, but by
|
||||
* user space for saving and restoring VGIC state.
|
||||
*/
|
||||
static const struct kvm_mmio_range vgic_cpu_ranges[] = {
|
||||
{
|
||||
.base = GIC_CPU_CTRL,
|
||||
.len = 12,
|
||||
.handle_mmio = handle_cpu_mmio_misc,
|
||||
},
|
||||
{
|
||||
.base = GIC_CPU_ALIAS_BINPOINT,
|
||||
.len = 4,
|
||||
.handle_mmio = handle_mmio_abpr,
|
||||
},
|
||||
{
|
||||
.base = GIC_CPU_ACTIVEPRIO,
|
||||
.len = 16,
|
||||
.handle_mmio = handle_mmio_raz_wi,
|
||||
},
|
||||
{
|
||||
.base = GIC_CPU_IDENT,
|
||||
.len = 4,
|
||||
.handle_mmio = handle_cpu_mmio_ident,
|
||||
},
|
||||
};
|
||||
|
||||
static int vgic_attr_regs_access(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr,
|
||||
u32 *reg, bool is_write)
|
||||
{
|
||||
const struct kvm_mmio_range *r = NULL, *ranges;
|
||||
phys_addr_t offset;
|
||||
int ret, cpuid, c;
|
||||
struct kvm_vcpu *vcpu, *tmp_vcpu;
|
||||
struct vgic_dist *vgic;
|
||||
struct kvm_exit_mmio mmio;
|
||||
|
||||
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
|
||||
cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
|
||||
KVM_DEV_ARM_VGIC_CPUID_SHIFT;
|
||||
|
||||
mutex_lock(&dev->kvm->lock);
|
||||
|
||||
ret = vgic_init(dev->kvm);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vcpu = kvm_get_vcpu(dev->kvm, cpuid);
|
||||
vgic = &dev->kvm->arch.vgic;
|
||||
|
||||
mmio.len = 4;
|
||||
mmio.is_write = is_write;
|
||||
if (is_write)
|
||||
mmio_data_write(&mmio, ~0, *reg);
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
mmio.phys_addr = vgic->vgic_dist_base + offset;
|
||||
ranges = vgic_dist_ranges;
|
||||
break;
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
|
||||
mmio.phys_addr = vgic->vgic_cpu_base + offset;
|
||||
ranges = vgic_cpu_ranges;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
r = vgic_find_range(ranges, &mmio, offset);
|
||||
|
||||
if (unlikely(!r || !r->handle_mmio)) {
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
spin_lock(&vgic->lock);
|
||||
|
||||
/*
|
||||
* Ensure that no other VCPU is running by checking the vcpu->cpu
|
||||
* field. If no other VPCUs are running we can safely access the VGIC
|
||||
* state, because even if another VPU is run after this point, that
|
||||
* VCPU will not touch the vgic state, because it will block on
|
||||
* getting the vgic->lock in kvm_vgic_sync_hwstate().
|
||||
*/
|
||||
kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
|
||||
if (unlikely(tmp_vcpu->cpu != -1)) {
|
||||
ret = -EBUSY;
|
||||
goto out_vgic_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Move all pending IRQs from the LRs on all VCPUs so the pending
|
||||
* state can be properly represented in the register state accessible
|
||||
* through this API.
|
||||
*/
|
||||
kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
|
||||
vgic_unqueue_irqs(tmp_vcpu);
|
||||
|
||||
offset -= r->base;
|
||||
r->handle_mmio(vcpu, &mmio, offset);
|
||||
|
||||
if (!is_write)
|
||||
*reg = mmio_data_read(&mmio, ~0);
|
||||
|
||||
ret = 0;
|
||||
out_vgic_unlock:
|
||||
spin_unlock(&vgic->lock);
|
||||
out:
|
||||
mutex_unlock(&dev->kvm->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vgic_v2_create(struct kvm_device *dev, u32 type)
|
||||
{
|
||||
return kvm_vgic_create(dev->kvm, type);
|
||||
}
|
||||
|
||||
static void vgic_v2_destroy(struct kvm_device *dev)
|
||||
{
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static int vgic_v2_set_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = vgic_set_common_attr(dev, attr);
|
||||
if (ret != -ENXIO)
|
||||
return ret;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u32 reg;
|
||||
|
||||
if (get_user(reg, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
return vgic_attr_regs_access(dev, attr, ®, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int vgic_v2_get_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = vgic_get_common_attr(dev, attr);
|
||||
if (ret != -ENXIO)
|
||||
return ret;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u32 reg = 0;
|
||||
|
||||
ret = vgic_attr_regs_access(dev, attr, ®, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
return put_user(reg, uaddr);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int vgic_v2_has_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
phys_addr_t offset;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_ADDR:
|
||||
switch (attr->attr) {
|
||||
case KVM_VGIC_V2_ADDR_TYPE_DIST:
|
||||
case KVM_VGIC_V2_ADDR_TYPE_CPU:
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
|
||||
return vgic_has_attr_regs(vgic_dist_ranges, offset);
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
|
||||
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
|
||||
return vgic_has_attr_regs(vgic_cpu_ranges, offset);
|
||||
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
|
||||
return 0;
|
||||
case KVM_DEV_ARM_VGIC_GRP_CTRL:
|
||||
switch (attr->attr) {
|
||||
case KVM_DEV_ARM_VGIC_CTRL_INIT:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
struct kvm_device_ops kvm_arm_vgic_v2_ops = {
|
||||
.name = "kvm-arm-vgic-v2",
|
||||
.create = vgic_v2_create,
|
||||
.destroy = vgic_v2_destroy,
|
||||
.set_attr = vgic_v2_set_attr,
|
||||
.get_attr = vgic_v2_get_attr,
|
||||
.has_attr = vgic_v2_has_attr,
|
||||
};
|
|
@ -229,12 +229,16 @@ int vgic_v2_probe(struct device_node *vgic_node,
|
|||
goto out_unmap;
|
||||
}
|
||||
|
||||
vgic->can_emulate_gicv2 = true;
|
||||
kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
|
||||
vgic->vcpu_base = vcpu_res.start;
|
||||
|
||||
kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
|
||||
vctrl_res.start, vgic->maint_irq);
|
||||
|
||||
vgic->type = VGIC_V2;
|
||||
vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
|
||||
*ops = &vgic_v2_ops;
|
||||
*params = vgic;
|
||||
goto out;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -34,6 +34,7 @@
|
|||
#define GICH_LR_VIRTUALID (0x3ffUL << 0)
|
||||
#define GICH_LR_PHYSID_CPUID_SHIFT (10)
|
||||
#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
|
||||
#define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1)
|
||||
|
||||
/*
|
||||
* LRs are stored in reverse order in memory. make sure we index them
|
||||
|
@ -48,11 +49,16 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
|
|||
struct vgic_lr lr_desc;
|
||||
u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
|
||||
|
||||
lr_desc.irq = val & GICH_LR_VIRTUALID;
|
||||
if (lr_desc.irq <= 15)
|
||||
lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
|
||||
lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
|
||||
else
|
||||
lr_desc.irq = val & GICH_LR_VIRTUALID;
|
||||
|
||||
lr_desc.source = 0;
|
||||
if (lr_desc.irq <= 15 &&
|
||||
vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
|
||||
lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
|
||||
|
||||
lr_desc.state = 0;
|
||||
|
||||
if (val & ICH_LR_PENDING_BIT)
|
||||
|
@ -68,8 +74,20 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
|
|||
static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
|
||||
struct vgic_lr lr_desc)
|
||||
{
|
||||
u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
|
||||
lr_desc.irq);
|
||||
u64 lr_val;
|
||||
|
||||
lr_val = lr_desc.irq;
|
||||
|
||||
/*
|
||||
* Currently all guest IRQs are Group1, as Group0 would result
|
||||
* in a FIQ in the guest, which it wouldn't expect.
|
||||
* Eventually we want to make this configurable, so we may revisit
|
||||
* this in the future.
|
||||
*/
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
|
||||
lr_val |= ICH_LR_GROUP;
|
||||
else
|
||||
lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
|
||||
if (lr_desc.state & LR_STATE_PENDING)
|
||||
lr_val |= ICH_LR_PENDING_BIT;
|
||||
|
@ -145,15 +163,27 @@ static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
|||
|
||||
static void vgic_v3_enable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
/*
|
||||
* By forcing VMCR to zero, the GIC will restore the binary
|
||||
* points to their reset values. Anything else resets to zero
|
||||
* anyway.
|
||||
*/
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
|
||||
vgic_v3->vgic_vmcr = 0;
|
||||
|
||||
/*
|
||||
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
|
||||
* way, so we force SRE to 1 to demonstrate this to the guest.
|
||||
* This goes with the spec allowing the value to be RAO/WI.
|
||||
*/
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
|
||||
vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
|
||||
else
|
||||
vgic_v3->vgic_sre = 0;
|
||||
|
||||
/* Get the show on the road... */
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
|
||||
vgic_v3->vgic_hcr = ICH_HCR_EN;
|
||||
}
|
||||
|
||||
static const struct vgic_ops vgic_v3_ops = {
|
||||
|
@ -205,35 +235,37 @@ int vgic_v3_probe(struct device_node *vgic_node,
|
|||
* maximum of 16 list registers. Just ignore bit 4...
|
||||
*/
|
||||
vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
|
||||
vgic->can_emulate_gicv2 = false;
|
||||
|
||||
if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
|
||||
gicv_idx = 1;
|
||||
|
||||
gicv_idx += 3; /* Also skip GICD, GICC, GICH */
|
||||
if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
|
||||
kvm_err("Cannot obtain GICV region\n");
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(vcpu_res.start)) {
|
||||
kvm_err("GICV physical address 0x%llx not page aligned\n",
|
||||
kvm_info("GICv3: no GICV resource entry\n");
|
||||
vgic->vcpu_base = 0;
|
||||
} else if (!PAGE_ALIGNED(vcpu_res.start)) {
|
||||
pr_warn("GICV physical address 0x%llx not page aligned\n",
|
||||
(unsigned long long)vcpu_res.start);
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
|
||||
kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
|
||||
vgic->vcpu_base = 0;
|
||||
} else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
|
||||
pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
|
||||
(unsigned long long)resource_size(&vcpu_res),
|
||||
PAGE_SIZE);
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vgic->vcpu_base = 0;
|
||||
} else {
|
||||
vgic->vcpu_base = vcpu_res.start;
|
||||
vgic->can_emulate_gicv2 = true;
|
||||
kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
|
||||
KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
}
|
||||
if (vgic->vcpu_base == 0)
|
||||
kvm_info("disabling GICv2 emulation\n");
|
||||
kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3);
|
||||
|
||||
vgic->vctrl_base = NULL;
|
||||
vgic->type = VGIC_V3;
|
||||
vgic->max_gic_vcpus = KVM_MAX_VCPUS;
|
||||
|
||||
kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
|
||||
vcpu_res.start, vgic->maint_irq);
|
||||
|
|
1127
virt/kvm/arm/vgic.c
1127
virt/kvm/arm/vgic.c
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright (C) 2012-2014 ARM Ltd.
|
||||
* Author: Marc Zyngier <marc.zyngier@arm.com>
|
||||
*
|
||||
* Derived from virt/kvm/arm/vgic.c
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __KVM_VGIC_H__
|
||||
#define __KVM_VGIC_H__
|
||||
|
||||
#define VGIC_ADDR_UNDEF (-1)
|
||||
#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
|
||||
|
||||
#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
|
||||
#define IMPLEMENTER_ARM 0x43b
|
||||
|
||||
#define ACCESS_READ_VALUE (1 << 0)
|
||||
#define ACCESS_READ_RAZ (0 << 0)
|
||||
#define ACCESS_READ_MASK(x) ((x) & (1 << 0))
|
||||
#define ACCESS_WRITE_IGNORED (0 << 1)
|
||||
#define ACCESS_WRITE_SETBIT (1 << 1)
|
||||
#define ACCESS_WRITE_CLEARBIT (2 << 1)
|
||||
#define ACCESS_WRITE_VALUE (3 << 1)
|
||||
#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
|
||||
|
||||
#define VCPU_NOT_ALLOCATED ((u8)-1)
|
||||
|
||||
unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x);
|
||||
|
||||
void vgic_update_state(struct kvm *kvm);
|
||||
int vgic_init_common_maps(struct kvm *kvm);
|
||||
|
||||
u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset);
|
||||
u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset);
|
||||
|
||||
void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq);
|
||||
void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq);
|
||||
void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq);
|
||||
void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
|
||||
int irq, int val);
|
||||
|
||||
void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
|
||||
bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq);
|
||||
void vgic_unqueue_irqs(struct kvm_vcpu *vcpu);
|
||||
|
||||
void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
|
||||
phys_addr_t offset, int mode);
|
||||
bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset);
|
||||
|
||||
static inline
|
||||
u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
|
||||
{
|
||||
return le32_to_cpu(*((u32 *)mmio->data)) & mask;
|
||||
}
|
||||
|
||||
static inline
|
||||
void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
|
||||
{
|
||||
*((u32 *)mmio->data) = cpu_to_le32(value) & mask;
|
||||
}
|
||||
|
||||
struct kvm_mmio_range {
|
||||
phys_addr_t base;
|
||||
unsigned long len;
|
||||
int bits_per_irq;
|
||||
bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset);
|
||||
};
|
||||
|
||||
static inline bool is_in_range(phys_addr_t addr, unsigned long len,
|
||||
phys_addr_t baseaddr, unsigned long size)
|
||||
{
|
||||
return (addr >= baseaddr) && (addr + len <= baseaddr + size);
|
||||
}
|
||||
|
||||
const
|
||||
struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset);
|
||||
|
||||
bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
const struct kvm_mmio_range *ranges,
|
||||
unsigned long mmio_base);
|
||||
|
||||
bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset, int vcpu_id, int access);
|
||||
|
||||
bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset, int vcpu_id);
|
||||
|
||||
bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset, int vcpu_id);
|
||||
|
||||
bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset);
|
||||
|
||||
void vgic_kick_vcpus(struct kvm *kvm);
|
||||
|
||||
int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset);
|
||||
int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
|
||||
int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
|
||||
|
||||
int vgic_init(struct kvm *kvm);
|
||||
void vgic_v2_init_emulation(struct kvm *kvm);
|
||||
void vgic_v3_init_emulation(struct kvm *kvm);
|
||||
|
||||
#endif
|
|
@ -66,6 +66,9 @@
|
|||
MODULE_AUTHOR("Qumranet");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
unsigned int halt_poll_ns = 0;
|
||||
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
|
||||
|
||||
/*
|
||||
* Ordering of locks:
|
||||
*
|
||||
|
@ -89,7 +92,7 @@ struct dentry *kvm_debugfs_dir;
|
|||
|
||||
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
|
||||
unsigned long arg);
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
|
||||
unsigned long arg);
|
||||
#endif
|
||||
|
@ -176,6 +179,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
|||
return called;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
long dirty_count = kvm->tlbs_dirty;
|
||||
|
@ -186,6 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
|
|||
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
|
||||
#endif
|
||||
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm)
|
||||
{
|
||||
|
@ -673,6 +678,7 @@ static void update_memslots(struct kvm_memslots *slots,
|
|||
if (!new->npages) {
|
||||
WARN_ON(!mslots[i].npages);
|
||||
new->base_gfn = 0;
|
||||
new->flags = 0;
|
||||
if (mslots[i].npages)
|
||||
slots->used_slots--;
|
||||
} else {
|
||||
|
@ -993,6 +999,86 @@ out:
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
/**
|
||||
* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
|
||||
* are dirty write protect them for next write.
|
||||
* @kvm: pointer to kvm instance
|
||||
* @log: slot id and address to which we copy the log
|
||||
* @is_dirty: flag set if any page is dirty
|
||||
*
|
||||
* We need to keep it in mind that VCPU threads can write to the bitmap
|
||||
* concurrently. So, to avoid losing track of dirty pages we keep the
|
||||
* following order:
|
||||
*
|
||||
* 1. Take a snapshot of the bit and clear it if needed.
|
||||
* 2. Write protect the corresponding page.
|
||||
* 3. Copy the snapshot to the userspace.
|
||||
* 4. Upon return caller flushes TLB's if needed.
|
||||
*
|
||||
* Between 2 and 4, the guest may write to the page using the remaining TLB
|
||||
* entry. This is not a problem because the page is reported dirty using
|
||||
* the snapshot taken before and step 4 ensures that writes done after
|
||||
* exiting to userspace will be logged for the next call.
|
||||
*
|
||||
*/
|
||||
int kvm_get_dirty_log_protect(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log, bool *is_dirty)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
int r, i;
|
||||
unsigned long n;
|
||||
unsigned long *dirty_bitmap;
|
||||
unsigned long *dirty_bitmap_buffer;
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
|
||||
dirty_bitmap = memslot->dirty_bitmap;
|
||||
r = -ENOENT;
|
||||
if (!dirty_bitmap)
|
||||
goto out;
|
||||
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
|
||||
dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
|
||||
memset(dirty_bitmap_buffer, 0, n);
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
*is_dirty = false;
|
||||
for (i = 0; i < n / sizeof(long); i++) {
|
||||
unsigned long mask;
|
||||
gfn_t offset;
|
||||
|
||||
if (!dirty_bitmap[i])
|
||||
continue;
|
||||
|
||||
*is_dirty = true;
|
||||
|
||||
mask = xchg(&dirty_bitmap[i], 0);
|
||||
dirty_bitmap_buffer[i] = mask;
|
||||
|
||||
offset = i * BITS_PER_LONG;
|
||||
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
|
||||
mask);
|
||||
}
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
|
||||
goto out;
|
||||
|
||||
r = 0;
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
|
||||
#endif
|
||||
|
||||
bool kvm_largepages_enabled(void)
|
||||
{
|
||||
return largepages_enabled;
|
||||
|
@ -1551,6 +1637,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_guest);
|
||||
|
||||
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
gpa_t gpa, unsigned long len)
|
||||
|
@ -1687,29 +1774,60 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(mark_page_dirty);
|
||||
|
||||
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_arch_vcpu_runnable(vcpu)) {
|
||||
kvm_make_request(KVM_REQ_UNHALT, vcpu);
|
||||
return -EINTR;
|
||||
}
|
||||
if (kvm_cpu_has_pending_timer(vcpu))
|
||||
return -EINTR;
|
||||
if (signal_pending(current))
|
||||
return -EINTR;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
|
||||
*/
|
||||
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ktime_t start, cur;
|
||||
DEFINE_WAIT(wait);
|
||||
bool waited = false;
|
||||
|
||||
start = cur = ktime_get();
|
||||
if (halt_poll_ns) {
|
||||
ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
|
||||
do {
|
||||
/*
|
||||
* This sets KVM_REQ_UNHALT if an interrupt
|
||||
* arrives.
|
||||
*/
|
||||
if (kvm_vcpu_check_block(vcpu) < 0) {
|
||||
++vcpu->stat.halt_successful_poll;
|
||||
goto out;
|
||||
}
|
||||
cur = ktime_get();
|
||||
} while (single_task_running() && ktime_before(cur, stop));
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
if (kvm_arch_vcpu_runnable(vcpu)) {
|
||||
kvm_make_request(KVM_REQ_UNHALT, vcpu);
|
||||
break;
|
||||
}
|
||||
if (kvm_cpu_has_pending_timer(vcpu))
|
||||
break;
|
||||
if (signal_pending(current))
|
||||
if (kvm_vcpu_check_block(vcpu) < 0)
|
||||
break;
|
||||
|
||||
waited = true;
|
||||
schedule();
|
||||
}
|
||||
|
||||
finish_wait(&vcpu->wq, &wait);
|
||||
cur = ktime_get();
|
||||
|
||||
out:
|
||||
trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_block);
|
||||
|
||||
|
@ -1892,7 +2010,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
|
|||
static struct file_operations kvm_vcpu_fops = {
|
||||
.release = kvm_vcpu_release,
|
||||
.unlocked_ioctl = kvm_vcpu_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
.compat_ioctl = kvm_vcpu_compat_ioctl,
|
||||
#endif
|
||||
.mmap = kvm_vcpu_mmap,
|
||||
|
@ -2182,7 +2300,7 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
static long kvm_vcpu_compat_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
|
@ -2274,7 +2392,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
|
|||
|
||||
static const struct file_operations kvm_device_fops = {
|
||||
.unlocked_ioctl = kvm_device_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
.compat_ioctl = kvm_device_ioctl,
|
||||
#endif
|
||||
.release = kvm_device_release,
|
||||
|
@ -2561,7 +2679,7 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
struct compat_kvm_dirty_log {
|
||||
__u32 slot;
|
||||
__u32 padding1;
|
||||
|
@ -2608,7 +2726,7 @@ out:
|
|||
static struct file_operations kvm_vm_fops = {
|
||||
.release = kvm_vm_release,
|
||||
.unlocked_ioctl = kvm_vm_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
.compat_ioctl = kvm_vm_compat_ioctl,
|
||||
#endif
|
||||
.llseek = noop_llseek,
|
||||
|
|
Loading…
Reference in New Issue