Merge branch 'kvm-updates/2.6.38' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.38' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (142 commits) KVM: Initialize fpu state in preemptible context KVM: VMX: when entering real mode align segment base to 16 bytes KVM: MMU: handle 'map_writable' in set_spte() function KVM: MMU: audit: allow audit more guests at the same time KVM: Fetch guest cr3 from hardware on demand KVM: Replace reads of vcpu->arch.cr3 by an accessor KVM: MMU: only write protect mappings at pagetable level KVM: VMX: Correct asm constraint in vmcs_load()/vmcs_clear() KVM: MMU: Initialize base_role for tdp mmus KVM: VMX: Optimize atomic EFER load KVM: VMX: Add definitions for more vm entry/exit control bits KVM: SVM: copy instruction bytes from VMCB KVM: SVM: implement enhanced INVLPG intercept KVM: SVM: enhance mov DR intercept handler KVM: SVM: enhance MOV CR intercept handler KVM: SVM: add new SVM feature bit names KVM: cleanup emulate_instruction KVM: move complete_insn_gp() into x86.c KVM: x86: fix CR8 handling KVM guest: Fix kvm clock initialization when it's configured out ...
This commit is contained in:
commit
55065bc527
|
@ -1705,6 +1705,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
|
||||
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
|
||||
|
||||
no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
|
||||
fault handling.
|
||||
|
||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||
|
||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||
|
|
|
@ -1085,6 +1085,184 @@ of 4 instructions that make up a hypercall.
|
|||
If any additional field gets added to this structure later on, a bit for that
|
||||
additional piece of information will be set in the flags bitmap.
|
||||
|
||||
4.47 KVM_ASSIGN_PCI_DEVICE
|
||||
|
||||
Capability: KVM_CAP_DEVICE_ASSIGNMENT
|
||||
Architectures: x86 ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_assigned_pci_dev (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Assigns a host PCI device to the VM.
|
||||
|
||||
struct kvm_assigned_pci_dev {
|
||||
__u32 assigned_dev_id;
|
||||
__u32 busnr;
|
||||
__u32 devfn;
|
||||
__u32 flags;
|
||||
__u32 segnr;
|
||||
union {
|
||||
__u32 reserved[11];
|
||||
};
|
||||
};
|
||||
|
||||
The PCI device is specified by the triple segnr, busnr, and devfn.
|
||||
Identification in succeeding service requests is done via assigned_dev_id. The
|
||||
following flags are specified:
|
||||
|
||||
/* Depends on KVM_CAP_IOMMU */
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
|
||||
4.48 KVM_DEASSIGN_PCI_DEVICE
|
||||
|
||||
Capability: KVM_CAP_DEVICE_DEASSIGNMENT
|
||||
Architectures: x86 ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_assigned_pci_dev (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Ends PCI device assignment, releasing all associated resources.
|
||||
|
||||
See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
|
||||
used in kvm_assigned_pci_dev to identify the device.
|
||||
|
||||
4.49 KVM_ASSIGN_DEV_IRQ
|
||||
|
||||
Capability: KVM_CAP_ASSIGN_DEV_IRQ
|
||||
Architectures: x86 ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_assigned_irq (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Assigns an IRQ to a passed-through device.
|
||||
|
||||
struct kvm_assigned_irq {
|
||||
__u32 assigned_dev_id;
|
||||
__u32 host_irq;
|
||||
__u32 guest_irq;
|
||||
__u32 flags;
|
||||
union {
|
||||
struct {
|
||||
__u32 addr_lo;
|
||||
__u32 addr_hi;
|
||||
__u32 data;
|
||||
} guest_msi;
|
||||
__u32 reserved[12];
|
||||
};
|
||||
};
|
||||
|
||||
The following flags are defined:
|
||||
|
||||
#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
|
||||
#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
|
||||
#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
|
||||
|
||||
#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
|
||||
#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
|
||||
#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
|
||||
|
||||
It is not valid to specify multiple types per host or guest IRQ. However, the
|
||||
IRQ type of host and guest can differ or can even be null.
|
||||
|
||||
4.50 KVM_DEASSIGN_DEV_IRQ
|
||||
|
||||
Capability: KVM_CAP_ASSIGN_DEV_IRQ
|
||||
Architectures: x86 ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_assigned_irq (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Ends an IRQ assignment to a passed-through device.
|
||||
|
||||
See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
|
||||
by assigned_dev_id, flags must correspond to the IRQ type specified on
|
||||
KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
|
||||
|
||||
4.51 KVM_SET_GSI_ROUTING
|
||||
|
||||
Capability: KVM_CAP_IRQ_ROUTING
|
||||
Architectures: x86 ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_irq_routing (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Sets the GSI routing table entries, overwriting any previously set entries.
|
||||
|
||||
struct kvm_irq_routing {
|
||||
__u32 nr;
|
||||
__u32 flags;
|
||||
struct kvm_irq_routing_entry entries[0];
|
||||
};
|
||||
|
||||
No flags are specified so far, the corresponding field must be set to zero.
|
||||
|
||||
struct kvm_irq_routing_entry {
|
||||
__u32 gsi;
|
||||
__u32 type;
|
||||
__u32 flags;
|
||||
__u32 pad;
|
||||
union {
|
||||
struct kvm_irq_routing_irqchip irqchip;
|
||||
struct kvm_irq_routing_msi msi;
|
||||
__u32 pad[8];
|
||||
} u;
|
||||
};
|
||||
|
||||
/* gsi routing entry types */
|
||||
#define KVM_IRQ_ROUTING_IRQCHIP 1
|
||||
#define KVM_IRQ_ROUTING_MSI 2
|
||||
|
||||
No flags are specified so far, the corresponding field must be set to zero.
|
||||
|
||||
struct kvm_irq_routing_irqchip {
|
||||
__u32 irqchip;
|
||||
__u32 pin;
|
||||
};
|
||||
|
||||
struct kvm_irq_routing_msi {
|
||||
__u32 address_lo;
|
||||
__u32 address_hi;
|
||||
__u32 data;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
4.52 KVM_ASSIGN_SET_MSIX_NR
|
||||
|
||||
Capability: KVM_CAP_DEVICE_MSIX
|
||||
Architectures: x86 ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_assigned_msix_nr (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Set the number of MSI-X interrupts for an assigned device. This service can
|
||||
only be called once in the lifetime of an assigned device.
|
||||
|
||||
struct kvm_assigned_msix_nr {
|
||||
__u32 assigned_dev_id;
|
||||
__u16 entry_nr;
|
||||
__u16 padding;
|
||||
};
|
||||
|
||||
#define KVM_MAX_MSIX_PER_DEV 256
|
||||
|
||||
4.53 KVM_ASSIGN_SET_MSIX_ENTRY
|
||||
|
||||
Capability: KVM_CAP_DEVICE_MSIX
|
||||
Architectures: x86 ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_assigned_msix_entry (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting
|
||||
the GSI vector to zero means disabling the interrupt.
|
||||
|
||||
struct kvm_assigned_msix_entry {
|
||||
__u32 assigned_dev_id;
|
||||
__u32 gsi;
|
||||
__u16 entry; /* The index of entry in the MSI-X table */
|
||||
__u16 padding[3];
|
||||
};
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
Application code obtains a pointer to the kvm_run structure by
|
||||
|
|
|
@ -36,6 +36,9 @@ KVM_FEATURE_MMU_OP || 2 || deprecated.
|
|||
KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
|
||||
|| || 0x4b564d00 and 0x4b564d01
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
|
||||
|| || writing to msr 0x4b564d02
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|
||||
|| || per-cpu warps are expected in
|
||||
|| || kvmclock.
|
||||
|
|
|
@ -3,7 +3,6 @@ Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
|
|||
=====================================================
|
||||
|
||||
KVM makes use of some custom MSRs to service some requests.
|
||||
At present, this facility is only used by kvmclock.
|
||||
|
||||
Custom MSRs have a range reserved for them, that goes from
|
||||
0x4b564d00 to 0x4b564dff. There are MSRs outside this area,
|
||||
|
@ -151,3 +150,38 @@ MSR_KVM_SYSTEM_TIME: 0x12
|
|||
return PRESENT;
|
||||
} else
|
||||
return NON_PRESENT;
|
||||
|
||||
MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
||||
data: Bits 63-6 hold 64-byte aligned physical address of a
|
||||
64 byte memory area which must be in guest RAM and must be
|
||||
zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
|
||||
when asynchronous page faults are enabled on the vcpu 0 when
|
||||
disabled. Bit 2 is 1 if asynchronous page faults can be injected
|
||||
when vcpu is in cpl == 0.
|
||||
|
||||
First 4 byte of 64 byte memory location will be written to by
|
||||
the hypervisor at the time of asynchronous page fault (APF)
|
||||
injection to indicate type of asynchronous page fault. Value
|
||||
of 1 means that the page referred to by the page fault is not
|
||||
present. Value 2 means that the page is now available. Disabling
|
||||
interrupt inhibits APFs. Guest must not enable interrupt
|
||||
before the reason is read, or it may be overwritten by another
|
||||
APF. Since APF uses the same exception vector as regular page
|
||||
fault guest must reset the reason to 0 before it does
|
||||
something that can generate normal page fault. If during page
|
||||
fault APF reason is 0 it means that this is regular page
|
||||
fault.
|
||||
|
||||
During delivery of type 1 APF cr2 contains a token that will
|
||||
be used to notify a guest when missing page becomes
|
||||
available. When page becomes available type 2 APF is sent with
|
||||
cr2 set to the token associated with the page. There is special
|
||||
kind of token 0xffffffff which tells vcpu that it should wake
|
||||
up all processes waiting for APFs and no individual type 2 APFs
|
||||
will be sent.
|
||||
|
||||
If APF is disabled while there are outstanding APFs, they will
|
||||
not be delivered.
|
||||
|
||||
Currently type 2 APF will be always delivered on the same vcpu as
|
||||
type 1 was, but guest should not rely on that.
|
||||
|
|
|
@ -590,6 +590,10 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu);
|
|||
int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
|
||||
void kvm_sal_emul(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define __KVM_HAVE_ARCH_VM_ALLOC 1
|
||||
struct kvm *kvm_arch_alloc_vm(void);
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
#endif /* __ASSEMBLY__*/
|
||||
|
||||
#endif
|
||||
|
|
|
@ -749,7 +749,7 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
static struct kvm *kvm_alloc_kvm(void)
|
||||
struct kvm *kvm_arch_alloc_vm(void)
|
||||
{
|
||||
|
||||
struct kvm *kvm;
|
||||
|
@ -760,7 +760,7 @@ static struct kvm *kvm_alloc_kvm(void)
|
|||
vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
|
||||
|
||||
if (!vm_base)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return NULL;
|
||||
|
||||
memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
|
||||
kvm = (struct kvm *)(vm_base +
|
||||
|
@ -806,10 +806,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
|
|||
#define GUEST_PHYSICAL_RR4 0x2739
|
||||
#define VMM_INIT_RR 0x1660
|
||||
|
||||
static void kvm_init_vm(struct kvm *kvm)
|
||||
int kvm_arch_init_vm(struct kvm *kvm)
|
||||
{
|
||||
BUG_ON(!kvm);
|
||||
|
||||
kvm->arch.is_sn2 = ia64_platform_is("sn2");
|
||||
|
||||
kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
|
||||
kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
|
||||
kvm->arch.vmm_init_rr = VMM_INIT_RR;
|
||||
|
@ -823,21 +825,8 @@ static void kvm_init_vm(struct kvm *kvm)
|
|||
|
||||
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
|
||||
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
|
||||
}
|
||||
|
||||
struct kvm *kvm_arch_create_vm(void)
|
||||
{
|
||||
struct kvm *kvm = kvm_alloc_kvm();
|
||||
|
||||
if (IS_ERR(kvm))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
kvm->arch.is_sn2 = ia64_platform_is("sn2");
|
||||
|
||||
kvm_init_vm(kvm);
|
||||
|
||||
return kvm;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
|
||||
|
@ -962,7 +951,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
goto out;
|
||||
r = kvm_setup_default_irq_routing(kvm);
|
||||
if (r) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
kvm_ioapic_destroy(kvm);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
|
@ -1357,7 +1348,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void free_kvm(struct kvm *kvm)
|
||||
void kvm_arch_free_vm(struct kvm *kvm)
|
||||
{
|
||||
unsigned long vm_base = kvm->arch.vm_base;
|
||||
|
||||
|
@ -1399,9 +1390,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
|||
#endif
|
||||
kfree(kvm->arch.vioapic);
|
||||
kvm_release_vm_pages(kvm);
|
||||
kvm_free_physmem(kvm);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
free_kvm(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -1307,12 +1307,10 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
|||
int err = -ENOMEM;
|
||||
unsigned long p;
|
||||
|
||||
vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
|
||||
vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
|
||||
if (!vcpu_book3s)
|
||||
goto out;
|
||||
|
||||
memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s));
|
||||
|
||||
vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
|
||||
kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
|
||||
if (!vcpu_book3s->shadow_vcpu)
|
||||
|
|
|
@ -145,18 +145,12 @@ void kvm_arch_check_processor_compat(void *rtn)
|
|||
*(int *)rtn = kvmppc_core_check_processor_compat();
|
||||
}
|
||||
|
||||
struct kvm *kvm_arch_create_vm(void)
|
||||
int kvm_arch_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
|
||||
kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
|
||||
if (!kvm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
return kvm;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvmppc_free_vcpus(struct kvm *kvm)
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
unsigned int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
@ -176,14 +170,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
|
|||
{
|
||||
}
|
||||
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
kvmppc_free_vcpus(kvm);
|
||||
kvm_free_physmem(kvm);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
kfree(kvm);
|
||||
}
|
||||
|
||||
int kvm_dev_ioctl_check_extension(long ext)
|
||||
{
|
||||
int r;
|
||||
|
|
|
@ -164,24 +164,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
return r;
|
||||
}
|
||||
|
||||
struct kvm *kvm_arch_create_vm(void)
|
||||
int kvm_arch_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
int rc;
|
||||
char debug_name[16];
|
||||
|
||||
rc = s390_enable_sie();
|
||||
if (rc)
|
||||
goto out_nokvm;
|
||||
|
||||
rc = -ENOMEM;
|
||||
kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
|
||||
if (!kvm)
|
||||
goto out_nokvm;
|
||||
goto out_err;
|
||||
|
||||
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
|
||||
if (!kvm->arch.sca)
|
||||
goto out_nosca;
|
||||
goto out_err;
|
||||
|
||||
sprintf(debug_name, "kvm-%u", current->pid);
|
||||
|
||||
|
@ -195,13 +189,11 @@ struct kvm *kvm_arch_create_vm(void)
|
|||
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
|
||||
VM_EVENT(kvm, 3, "%s", "vm created");
|
||||
|
||||
return kvm;
|
||||
return 0;
|
||||
out_nodbf:
|
||||
free_page((unsigned long)(kvm->arch.sca));
|
||||
out_nosca:
|
||||
kfree(kvm);
|
||||
out_nokvm:
|
||||
return ERR_PTR(rc);
|
||||
out_err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
@ -240,11 +232,8 @@ void kvm_arch_sync_events(struct kvm *kvm)
|
|||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
kvm_free_vcpus(kvm);
|
||||
kvm_free_physmem(kvm);
|
||||
free_page((unsigned long)(kvm->arch.sca));
|
||||
debug_unregister(kvm->arch.dbf);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
kfree(kvm);
|
||||
}
|
||||
|
||||
/* Section: vcpu related */
|
||||
|
|
|
@ -15,6 +15,14 @@
|
|||
|
||||
struct x86_emulate_ctxt;
|
||||
|
||||
struct x86_exception {
|
||||
u8 vector;
|
||||
bool error_code_valid;
|
||||
u16 error_code;
|
||||
bool nested_page_fault;
|
||||
u64 address; /* cr2 or nested page fault gpa */
|
||||
};
|
||||
|
||||
/*
|
||||
* x86_emulate_ops:
|
||||
*
|
||||
|
@ -64,7 +72,8 @@ struct x86_emulate_ops {
|
|||
* @bytes: [IN ] Number of bytes to read from memory.
|
||||
*/
|
||||
int (*read_std)(unsigned long addr, void *val,
|
||||
unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
|
||||
unsigned int bytes, struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *fault);
|
||||
|
||||
/*
|
||||
* write_std: Write bytes of standard (non-emulated/special) memory.
|
||||
|
@ -74,7 +83,8 @@ struct x86_emulate_ops {
|
|||
* @bytes: [IN ] Number of bytes to write to memory.
|
||||
*/
|
||||
int (*write_std)(unsigned long addr, void *val,
|
||||
unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
|
||||
unsigned int bytes, struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *fault);
|
||||
/*
|
||||
* fetch: Read bytes of standard (non-emulated/special) memory.
|
||||
* Used for instruction fetch.
|
||||
|
@ -83,7 +93,8 @@ struct x86_emulate_ops {
|
|||
* @bytes: [IN ] Number of bytes to read from memory.
|
||||
*/
|
||||
int (*fetch)(unsigned long addr, void *val,
|
||||
unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
|
||||
unsigned int bytes, struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *fault);
|
||||
|
||||
/*
|
||||
* read_emulated: Read bytes from emulated/special memory area.
|
||||
|
@ -94,7 +105,7 @@ struct x86_emulate_ops {
|
|||
int (*read_emulated)(unsigned long addr,
|
||||
void *val,
|
||||
unsigned int bytes,
|
||||
unsigned int *error,
|
||||
struct x86_exception *fault,
|
||||
struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
|
@ -107,7 +118,7 @@ struct x86_emulate_ops {
|
|||
int (*write_emulated)(unsigned long addr,
|
||||
const void *val,
|
||||
unsigned int bytes,
|
||||
unsigned int *error,
|
||||
struct x86_exception *fault,
|
||||
struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
|
@ -122,7 +133,7 @@ struct x86_emulate_ops {
|
|||
const void *old,
|
||||
const void *new,
|
||||
unsigned int bytes,
|
||||
unsigned int *error,
|
||||
struct x86_exception *fault,
|
||||
struct kvm_vcpu *vcpu);
|
||||
|
||||
int (*pio_in_emulated)(int size, unsigned short port, void *val,
|
||||
|
@ -159,7 +170,10 @@ struct operand {
|
|||
};
|
||||
union {
|
||||
unsigned long *reg;
|
||||
unsigned long mem;
|
||||
struct segmented_address {
|
||||
ulong ea;
|
||||
unsigned seg;
|
||||
} mem;
|
||||
} addr;
|
||||
union {
|
||||
unsigned long val;
|
||||
|
@ -226,9 +240,8 @@ struct x86_emulate_ctxt {
|
|||
|
||||
bool perm_ok; /* do not check permissions if true */
|
||||
|
||||
int exception; /* exception that happens during emulation or -1 */
|
||||
u32 error_code; /* error code for exception */
|
||||
bool error_code_valid;
|
||||
bool have_exception;
|
||||
struct x86_exception exception;
|
||||
|
||||
/* decode cache */
|
||||
struct decode_cache decode;
|
||||
|
@ -252,7 +265,7 @@ struct x86_emulate_ctxt {
|
|||
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
|
||||
#endif
|
||||
|
||||
int x86_decode_insn(struct x86_emulate_ctxt *ctxt);
|
||||
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
|
||||
#define EMULATION_FAILED -1
|
||||
#define EMULATION_OK 0
|
||||
#define EMULATION_RESTART 1
|
||||
|
|
|
@ -83,11 +83,14 @@
|
|||
#define KVM_NR_FIXED_MTRR_REGION 88
|
||||
#define KVM_NR_VAR_MTRR 8
|
||||
|
||||
#define ASYNC_PF_PER_VCPU 64
|
||||
|
||||
extern spinlock_t kvm_lock;
|
||||
extern struct list_head vm_list;
|
||||
|
||||
struct kvm_vcpu;
|
||||
struct kvm;
|
||||
struct kvm_async_pf;
|
||||
|
||||
enum kvm_reg {
|
||||
VCPU_REGS_RAX = 0,
|
||||
|
@ -114,6 +117,7 @@ enum kvm_reg {
|
|||
|
||||
enum kvm_reg_ex {
|
||||
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
|
||||
VCPU_EXREG_CR3,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -238,16 +242,18 @@ struct kvm_mmu {
|
|||
void (*new_cr3)(struct kvm_vcpu *vcpu);
|
||||
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
|
||||
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
|
||||
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
|
||||
void (*inject_page_fault)(struct kvm_vcpu *vcpu);
|
||||
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
|
||||
bool prefault);
|
||||
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *fault);
|
||||
void (*free)(struct kvm_vcpu *vcpu);
|
||||
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
|
||||
u32 *error);
|
||||
struct x86_exception *exception);
|
||||
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
|
||||
void (*prefetch_page)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *page);
|
||||
int (*sync_page)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, bool clear_unsync);
|
||||
struct kvm_mmu_page *sp);
|
||||
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
|
||||
hpa_t root_hpa;
|
||||
int root_level;
|
||||
|
@ -315,16 +321,6 @@ struct kvm_vcpu_arch {
|
|||
*/
|
||||
struct kvm_mmu *walk_mmu;
|
||||
|
||||
/*
|
||||
* This struct is filled with the necessary information to propagate a
|
||||
* page fault into the guest
|
||||
*/
|
||||
struct {
|
||||
u64 address;
|
||||
unsigned error_code;
|
||||
bool nested;
|
||||
} fault;
|
||||
|
||||
/* only needed in kvm_pv_mmu_op() path, but it's hot so
|
||||
* put it here to avoid allocation */
|
||||
struct kvm_pv_mmu_op_buffer mmu_op_buffer;
|
||||
|
@ -412,6 +408,15 @@ struct kvm_vcpu_arch {
|
|||
u64 hv_vapic;
|
||||
|
||||
cpumask_var_t wbinvd_dirty_mask;
|
||||
|
||||
struct {
|
||||
bool halted;
|
||||
gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
|
||||
struct gfn_to_hva_cache data;
|
||||
u64 msr_val;
|
||||
u32 id;
|
||||
bool send_user_only;
|
||||
} apf;
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
|
@ -456,6 +461,10 @@ struct kvm_arch {
|
|||
/* fields used by HYPER-V emulation */
|
||||
u64 hv_guest_os_id;
|
||||
u64 hv_hypercall;
|
||||
|
||||
#ifdef CONFIG_KVM_MMU_AUDIT
|
||||
int audit_point;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
|
@ -529,6 +538,7 @@ struct kvm_x86_ops {
|
|||
struct kvm_segment *var, int seg);
|
||||
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
|
||||
void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
|
||||
void (*decache_cr3)(struct kvm_vcpu *vcpu);
|
||||
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
|
||||
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
|
||||
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
|
@ -582,9 +592,17 @@ struct kvm_x86_ops {
|
|||
|
||||
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
||||
|
||||
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
|
||||
const struct trace_print_flags *exit_reasons_str;
|
||||
};
|
||||
|
||||
struct kvm_arch_async_pf {
|
||||
u32 token;
|
||||
gfn_t gfn;
|
||||
unsigned long cr3;
|
||||
bool direct_map;
|
||||
};
|
||||
|
||||
extern struct kvm_x86_ops *kvm_x86_ops;
|
||||
|
||||
int kvm_mmu_module_init(void);
|
||||
|
@ -594,7 +612,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
|
|||
int kvm_mmu_create(struct kvm_vcpu *vcpu);
|
||||
int kvm_mmu_setup(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
|
||||
void kvm_mmu_set_base_ptes(u64 base_pte);
|
||||
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask);
|
||||
|
||||
|
@ -623,8 +640,15 @@ enum emulation_result {
|
|||
#define EMULTYPE_NO_DECODE (1 << 0)
|
||||
#define EMULTYPE_TRAP_UD (1 << 1)
|
||||
#define EMULTYPE_SKIP (1 << 2)
|
||||
int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
unsigned long cr2, u16 error_code, int emulation_type);
|
||||
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
|
||||
int emulation_type, void *insn, int insn_len);
|
||||
|
||||
static inline int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
int emulation_type)
|
||||
{
|
||||
return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
|
||||
}
|
||||
|
||||
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
|
||||
void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
|
||||
|
||||
|
@ -650,7 +674,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
|
|||
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
|
||||
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
|
||||
void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
|
||||
int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
|
||||
int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
|
||||
int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
|
||||
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
|
||||
|
@ -668,11 +692,11 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
|
|||
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
|
||||
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
|
||||
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu);
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
|
||||
int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
gfn_t gfn, void *data, int offset, int len,
|
||||
u32 access);
|
||||
void kvm_propagate_fault(struct kvm_vcpu *vcpu);
|
||||
void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
|
||||
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
|
||||
|
||||
int kvm_pic_set_irq(void *opaque, int irq, int level);
|
||||
|
@ -690,16 +714,21 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
|
|||
int kvm_mmu_load(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
|
||||
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
|
||||
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
|
||||
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
|
||||
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
|
||||
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception);
|
||||
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception);
|
||||
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception);
|
||||
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception);
|
||||
|
||||
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code);
|
||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
|
||||
void *insn, int insn_len);
|
||||
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
|
||||
|
||||
void kvm_enable_tdp(void);
|
||||
|
@ -766,20 +795,25 @@ enum {
|
|||
#define HF_VINTR_MASK (1 << 2)
|
||||
#define HF_NMI_MASK (1 << 3)
|
||||
#define HF_IRET_MASK (1 << 4)
|
||||
#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
|
||||
|
||||
/*
|
||||
* Hardware virtualization extension instructions may fault if a
|
||||
* reboot turns off virtualization while processes are running.
|
||||
* Trap the fault and ignore the instruction if that happens.
|
||||
*/
|
||||
asmlinkage void kvm_handle_fault_on_reboot(void);
|
||||
asmlinkage void kvm_spurious_fault(void);
|
||||
extern bool kvm_rebooting;
|
||||
|
||||
#define __kvm_handle_fault_on_reboot(insn) \
|
||||
"666: " insn "\n\t" \
|
||||
"668: \n\t" \
|
||||
".pushsection .fixup, \"ax\" \n" \
|
||||
"667: \n\t" \
|
||||
"cmpb $0, kvm_rebooting \n\t" \
|
||||
"jne 668b \n\t" \
|
||||
__ASM_SIZE(push) " $666b \n\t" \
|
||||
"jmp kvm_handle_fault_on_reboot \n\t" \
|
||||
"call kvm_spurious_fault \n\t" \
|
||||
".popsection \n\t" \
|
||||
".pushsection __ex_table, \"a\" \n\t" \
|
||||
_ASM_PTR " 666b, 667b \n\t" \
|
||||
|
@ -799,4 +833,15 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
|
|||
|
||||
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
|
||||
|
||||
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work);
|
||||
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work);
|
||||
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work);
|
||||
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
|
||||
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
|
||||
void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
* are available. The use of 0x11 and 0x12 is deprecated
|
||||
*/
|
||||
#define KVM_FEATURE_CLOCKSOURCE2 3
|
||||
#define KVM_FEATURE_ASYNC_PF 4
|
||||
|
||||
/* The last 8 bits are used to indicate how to interpret the flags field
|
||||
* in pvclock structure. If no bits are set, all flags are ignored.
|
||||
|
@ -32,9 +33,13 @@
|
|||
/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
|
||||
#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
|
||||
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
|
||||
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
|
||||
|
||||
#define KVM_MAX_MMU_OP_BATCH 32
|
||||
|
||||
#define KVM_ASYNC_PF_ENABLED (1 << 0)
|
||||
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
|
||||
|
||||
/* Operations for KVM_HC_MMU_OP */
|
||||
#define KVM_MMU_OP_WRITE_PTE 1
|
||||
#define KVM_MMU_OP_FLUSH_TLB 2
|
||||
|
@ -61,10 +66,20 @@ struct kvm_mmu_op_release_pt {
|
|||
__u64 pt_phys;
|
||||
};
|
||||
|
||||
#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
|
||||
#define KVM_PV_REASON_PAGE_READY 2
|
||||
|
||||
struct kvm_vcpu_pv_apf_data {
|
||||
__u32 reason;
|
||||
__u8 pad[60];
|
||||
__u32 enabled;
|
||||
};
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <asm/processor.h>
|
||||
|
||||
extern void kvmclock_init(void);
|
||||
extern int kvm_register_clock(char *txt);
|
||||
|
||||
|
||||
/* This instruction is vmcall. On non-VT architectures, it will generate a
|
||||
|
@ -160,8 +175,17 @@ static inline unsigned int kvm_arch_para_features(void)
|
|||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
void __init kvm_guest_init(void);
|
||||
void kvm_async_pf_task_wait(u32 token);
|
||||
void kvm_async_pf_task_wake(u32 token);
|
||||
u32 kvm_read_and_reset_pf_reason(void);
|
||||
#else
|
||||
#define kvm_guest_init() do { } while (0)
|
||||
#define kvm_async_pf_task_wait(T) do {} while(0)
|
||||
#define kvm_async_pf_task_wake(T) do {} while(0)
|
||||
static inline u32 kvm_read_and_reset_pf_reason(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
|
|
@ -47,14 +47,13 @@ enum {
|
|||
INTERCEPT_MONITOR,
|
||||
INTERCEPT_MWAIT,
|
||||
INTERCEPT_MWAIT_COND,
|
||||
INTERCEPT_XSETBV,
|
||||
};
|
||||
|
||||
|
||||
struct __attribute__ ((__packed__)) vmcb_control_area {
|
||||
u16 intercept_cr_read;
|
||||
u16 intercept_cr_write;
|
||||
u16 intercept_dr_read;
|
||||
u16 intercept_dr_write;
|
||||
u32 intercept_cr;
|
||||
u32 intercept_dr;
|
||||
u32 intercept_exceptions;
|
||||
u64 intercept;
|
||||
u8 reserved_1[42];
|
||||
|
@ -81,14 +80,19 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
|||
u32 event_inj_err;
|
||||
u64 nested_cr3;
|
||||
u64 lbr_ctl;
|
||||
u64 reserved_5;
|
||||
u32 clean;
|
||||
u32 reserved_5;
|
||||
u64 next_rip;
|
||||
u8 reserved_6[816];
|
||||
u8 insn_len;
|
||||
u8 insn_bytes[15];
|
||||
u8 reserved_6[800];
|
||||
};
|
||||
|
||||
|
||||
#define TLB_CONTROL_DO_NOTHING 0
|
||||
#define TLB_CONTROL_FLUSH_ALL_ASID 1
|
||||
#define TLB_CONTROL_FLUSH_ASID 3
|
||||
#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
|
||||
|
||||
#define V_TPR_MASK 0x0f
|
||||
|
||||
|
@ -204,19 +208,31 @@ struct __attribute__ ((__packed__)) vmcb {
|
|||
#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
|
||||
#define SVM_SELECTOR_CODE_MASK (1 << 3)
|
||||
|
||||
#define INTERCEPT_CR0_MASK 1
|
||||
#define INTERCEPT_CR3_MASK (1 << 3)
|
||||
#define INTERCEPT_CR4_MASK (1 << 4)
|
||||
#define INTERCEPT_CR8_MASK (1 << 8)
|
||||
#define INTERCEPT_CR0_READ 0
|
||||
#define INTERCEPT_CR3_READ 3
|
||||
#define INTERCEPT_CR4_READ 4
|
||||
#define INTERCEPT_CR8_READ 8
|
||||
#define INTERCEPT_CR0_WRITE (16 + 0)
|
||||
#define INTERCEPT_CR3_WRITE (16 + 3)
|
||||
#define INTERCEPT_CR4_WRITE (16 + 4)
|
||||
#define INTERCEPT_CR8_WRITE (16 + 8)
|
||||
|
||||
#define INTERCEPT_DR0_MASK 1
|
||||
#define INTERCEPT_DR1_MASK (1 << 1)
|
||||
#define INTERCEPT_DR2_MASK (1 << 2)
|
||||
#define INTERCEPT_DR3_MASK (1 << 3)
|
||||
#define INTERCEPT_DR4_MASK (1 << 4)
|
||||
#define INTERCEPT_DR5_MASK (1 << 5)
|
||||
#define INTERCEPT_DR6_MASK (1 << 6)
|
||||
#define INTERCEPT_DR7_MASK (1 << 7)
|
||||
#define INTERCEPT_DR0_READ 0
|
||||
#define INTERCEPT_DR1_READ 1
|
||||
#define INTERCEPT_DR2_READ 2
|
||||
#define INTERCEPT_DR3_READ 3
|
||||
#define INTERCEPT_DR4_READ 4
|
||||
#define INTERCEPT_DR5_READ 5
|
||||
#define INTERCEPT_DR6_READ 6
|
||||
#define INTERCEPT_DR7_READ 7
|
||||
#define INTERCEPT_DR0_WRITE (16 + 0)
|
||||
#define INTERCEPT_DR1_WRITE (16 + 1)
|
||||
#define INTERCEPT_DR2_WRITE (16 + 2)
|
||||
#define INTERCEPT_DR3_WRITE (16 + 3)
|
||||
#define INTERCEPT_DR4_WRITE (16 + 4)
|
||||
#define INTERCEPT_DR5_WRITE (16 + 5)
|
||||
#define INTERCEPT_DR6_WRITE (16 + 6)
|
||||
#define INTERCEPT_DR7_WRITE (16 + 7)
|
||||
|
||||
#define SVM_EVTINJ_VEC_MASK 0xff
|
||||
|
||||
|
@ -246,6 +262,8 @@ struct __attribute__ ((__packed__)) vmcb {
|
|||
#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
|
||||
#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
|
||||
|
||||
#define SVM_EXITINFO_REG_MASK 0x0F
|
||||
|
||||
#define SVM_EXIT_READ_CR0 0x000
|
||||
#define SVM_EXIT_READ_CR3 0x003
|
||||
#define SVM_EXIT_READ_CR4 0x004
|
||||
|
@ -316,6 +334,7 @@ struct __attribute__ ((__packed__)) vmcb {
|
|||
#define SVM_EXIT_MONITOR 0x08a
|
||||
#define SVM_EXIT_MWAIT 0x08b
|
||||
#define SVM_EXIT_MWAIT_COND 0x08c
|
||||
#define SVM_EXIT_XSETBV 0x08d
|
||||
#define SVM_EXIT_NPF 0x400
|
||||
|
||||
#define SVM_EXIT_ERR -1
|
||||
|
|
|
@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void);
|
|||
asmlinkage void stack_segment(void);
|
||||
asmlinkage void general_protection(void);
|
||||
asmlinkage void page_fault(void);
|
||||
asmlinkage void async_page_fault(void);
|
||||
asmlinkage void spurious_interrupt_bug(void);
|
||||
asmlinkage void coprocessor_error(void);
|
||||
asmlinkage void alignment_check(void);
|
||||
|
|
|
@ -66,15 +66,23 @@
|
|||
#define PIN_BASED_NMI_EXITING 0x00000008
|
||||
#define PIN_BASED_VIRTUAL_NMIS 0x00000020
|
||||
|
||||
#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002
|
||||
#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
|
||||
#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
|
||||
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
|
||||
#define VM_EXIT_SAVE_IA32_PAT 0x00040000
|
||||
#define VM_EXIT_LOAD_IA32_PAT 0x00080000
|
||||
#define VM_EXIT_SAVE_IA32_EFER 0x00100000
|
||||
#define VM_EXIT_LOAD_IA32_EFER 0x00200000
|
||||
#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
|
||||
|
||||
#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002
|
||||
#define VM_ENTRY_IA32E_MODE 0x00000200
|
||||
#define VM_ENTRY_SMM 0x00000400
|
||||
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
|
||||
#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
|
||||
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
|
||||
#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
|
||||
|
||||
/* VMCS Encodings */
|
||||
enum vmcs_field {
|
||||
|
@ -239,6 +247,7 @@ enum vmcs_field {
|
|||
#define EXIT_REASON_TASK_SWITCH 9
|
||||
#define EXIT_REASON_CPUID 10
|
||||
#define EXIT_REASON_HLT 12
|
||||
#define EXIT_REASON_INVD 13
|
||||
#define EXIT_REASON_INVLPG 14
|
||||
#define EXIT_REASON_RDPMC 15
|
||||
#define EXIT_REASON_RDTSC 16
|
||||
|
@ -296,6 +305,12 @@ enum vmcs_field {
|
|||
#define GUEST_INTR_STATE_SMI 0x00000004
|
||||
#define GUEST_INTR_STATE_NMI 0x00000008
|
||||
|
||||
/* GUEST_ACTIVITY_STATE flags */
|
||||
#define GUEST_ACTIVITY_ACTIVE 0
|
||||
#define GUEST_ACTIVITY_HLT 1
|
||||
#define GUEST_ACTIVITY_SHUTDOWN 2
|
||||
#define GUEST_ACTIVITY_WAIT_SIPI 3
|
||||
|
||||
/*
|
||||
* Exit Qualifications for MOV for Control Register Access
|
||||
*/
|
||||
|
|
|
@ -1406,6 +1406,16 @@ ENTRY(general_protection)
|
|||
CFI_ENDPROC
|
||||
END(general_protection)
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
ENTRY(async_page_fault)
|
||||
RING0_EC_FRAME
|
||||
pushl $do_async_page_fault
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
END(apf_page_fault)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* End of kprobes section
|
||||
*/
|
||||
|
|
|
@ -1329,6 +1329,9 @@ errorentry xen_stack_segment do_stack_segment
|
|||
#endif
|
||||
errorentry general_protection do_general_protection
|
||||
errorentry page_fault do_page_fault
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
errorentry async_page_fault do_async_page_fault
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
paranoidzeroentry machine_check *machine_check_vector(%rip)
|
||||
#endif
|
||||
|
|
|
@ -169,6 +169,7 @@ int init_fpu(struct task_struct *tsk)
|
|||
set_stopped_child_used_math(tsk);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(init_fpu);
|
||||
|
||||
/*
|
||||
* The xstateregs_active() routine is the same as the fpregs_active() routine,
|
||||
|
|
|
@ -27,16 +27,37 @@
|
|||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#define MMU_QUEUE_SIZE 1024
|
||||
|
||||
static int kvmapf = 1;
|
||||
|
||||
static int parse_no_kvmapf(char *arg)
|
||||
{
|
||||
kvmapf = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("no-kvmapf", parse_no_kvmapf);
|
||||
|
||||
struct kvm_para_state {
|
||||
u8 mmu_queue[MMU_QUEUE_SIZE];
|
||||
int mmu_queue_len;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct kvm_para_state, para_state);
|
||||
static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
|
||||
|
||||
static struct kvm_para_state *kvm_para_state(void)
|
||||
{
|
||||
|
@ -50,6 +71,195 @@ static void kvm_io_delay(void)
|
|||
{
|
||||
}
|
||||
|
||||
#define KVM_TASK_SLEEP_HASHBITS 8
|
||||
#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
|
||||
|
||||
struct kvm_task_sleep_node {
|
||||
struct hlist_node link;
|
||||
wait_queue_head_t wq;
|
||||
u32 token;
|
||||
int cpu;
|
||||
bool halted;
|
||||
struct mm_struct *mm;
|
||||
};
|
||||
|
||||
static struct kvm_task_sleep_head {
|
||||
spinlock_t lock;
|
||||
struct hlist_head list;
|
||||
} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
|
||||
|
||||
static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
|
||||
u32 token)
|
||||
{
|
||||
struct hlist_node *p;
|
||||
|
||||
hlist_for_each(p, &b->list) {
|
||||
struct kvm_task_sleep_node *n =
|
||||
hlist_entry(p, typeof(*n), link);
|
||||
if (n->token == token)
|
||||
return n;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void kvm_async_pf_task_wait(u32 token)
|
||||
{
|
||||
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
|
||||
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
|
||||
struct kvm_task_sleep_node n, *e;
|
||||
DEFINE_WAIT(wait);
|
||||
int cpu, idle;
|
||||
|
||||
cpu = get_cpu();
|
||||
idle = idle_cpu(cpu);
|
||||
put_cpu();
|
||||
|
||||
spin_lock(&b->lock);
|
||||
e = _find_apf_task(b, token);
|
||||
if (e) {
|
||||
/* dummy entry exist -> wake up was delivered ahead of PF */
|
||||
hlist_del(&e->link);
|
||||
kfree(e);
|
||||
spin_unlock(&b->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
n.token = token;
|
||||
n.cpu = smp_processor_id();
|
||||
n.mm = current->active_mm;
|
||||
n.halted = idle || preempt_count() > 1;
|
||||
atomic_inc(&n.mm->mm_count);
|
||||
init_waitqueue_head(&n.wq);
|
||||
hlist_add_head(&n.link, &b->list);
|
||||
spin_unlock(&b->lock);
|
||||
|
||||
for (;;) {
|
||||
if (!n.halted)
|
||||
prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (hlist_unhashed(&n.link))
|
||||
break;
|
||||
|
||||
if (!n.halted) {
|
||||
local_irq_enable();
|
||||
schedule();
|
||||
local_irq_disable();
|
||||
} else {
|
||||
/*
|
||||
* We cannot reschedule. So halt.
|
||||
*/
|
||||
native_safe_halt();
|
||||
local_irq_disable();
|
||||
}
|
||||
}
|
||||
if (!n.halted)
|
||||
finish_wait(&n.wq, &wait);
|
||||
|
||||
return;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
|
||||
|
||||
static void apf_task_wake_one(struct kvm_task_sleep_node *n)
|
||||
{
|
||||
hlist_del_init(&n->link);
|
||||
if (!n->mm)
|
||||
return;
|
||||
mmdrop(n->mm);
|
||||
if (n->halted)
|
||||
smp_send_reschedule(n->cpu);
|
||||
else if (waitqueue_active(&n->wq))
|
||||
wake_up(&n->wq);
|
||||
}
|
||||
|
||||
static void apf_task_wake_all(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
|
||||
struct hlist_node *p, *next;
|
||||
struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
|
||||
spin_lock(&b->lock);
|
||||
hlist_for_each_safe(p, next, &b->list) {
|
||||
struct kvm_task_sleep_node *n =
|
||||
hlist_entry(p, typeof(*n), link);
|
||||
if (n->cpu == smp_processor_id())
|
||||
apf_task_wake_one(n);
|
||||
}
|
||||
spin_unlock(&b->lock);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_async_pf_task_wake(u32 token)
|
||||
{
|
||||
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
|
||||
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
|
||||
struct kvm_task_sleep_node *n;
|
||||
|
||||
if (token == ~0) {
|
||||
apf_task_wake_all();
|
||||
return;
|
||||
}
|
||||
|
||||
again:
|
||||
spin_lock(&b->lock);
|
||||
n = _find_apf_task(b, token);
|
||||
if (!n) {
|
||||
/*
|
||||
* async PF was not yet handled.
|
||||
* Add dummy entry for the token.
|
||||
*/
|
||||
n = kmalloc(sizeof(*n), GFP_ATOMIC);
|
||||
if (!n) {
|
||||
/*
|
||||
* Allocation failed! Busy wait while other cpu
|
||||
* handles async PF.
|
||||
*/
|
||||
spin_unlock(&b->lock);
|
||||
cpu_relax();
|
||||
goto again;
|
||||
}
|
||||
n->token = token;
|
||||
n->cpu = smp_processor_id();
|
||||
n->mm = NULL;
|
||||
init_waitqueue_head(&n->wq);
|
||||
hlist_add_head(&n->link, &b->list);
|
||||
} else
|
||||
apf_task_wake_one(n);
|
||||
spin_unlock(&b->lock);
|
||||
return;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
|
||||
|
||||
u32 kvm_read_and_reset_pf_reason(void)
|
||||
{
|
||||
u32 reason = 0;
|
||||
|
||||
if (__get_cpu_var(apf_reason).enabled) {
|
||||
reason = __get_cpu_var(apf_reason).reason;
|
||||
__get_cpu_var(apf_reason).reason = 0;
|
||||
}
|
||||
|
||||
return reason;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
|
||||
|
||||
dotraplinkage void __kprobes
|
||||
do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
switch (kvm_read_and_reset_pf_reason()) {
|
||||
default:
|
||||
do_page_fault(regs, error_code);
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
/* page is swapped out by the host. */
|
||||
kvm_async_pf_task_wait((u32)read_cr2());
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_READY:
|
||||
kvm_async_pf_task_wake((u32)read_cr2());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_mmu_op(void *buffer, unsigned len)
|
||||
{
|
||||
int r;
|
||||
|
@ -231,10 +441,117 @@ static void __init paravirt_ops_setup(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
void __init kvm_guest_init(void)
|
||||
void __cpuinit kvm_guest_cpu_init(void)
|
||||
{
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
|
||||
paravirt_ops_setup();
|
||||
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
|
||||
u64 pa = __pa(&__get_cpu_var(apf_reason));
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
|
||||
#endif
|
||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
|
||||
__get_cpu_var(apf_reason).enabled = 1;
|
||||
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_pv_disable_apf(void *unused)
|
||||
{
|
||||
if (!__get_cpu_var(apf_reason).enabled)
|
||||
return;
|
||||
|
||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
|
||||
__get_cpu_var(apf_reason).enabled = 0;
|
||||
|
||||
printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
static int kvm_pv_reboot_notify(struct notifier_block *nb,
|
||||
unsigned long code, void *unused)
|
||||
{
|
||||
if (code == SYS_RESTART)
|
||||
on_each_cpu(kvm_pv_disable_apf, NULL, 1);
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block kvm_pv_reboot_nb = {
|
||||
.notifier_call = kvm_pv_reboot_notify,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void __init kvm_smp_prepare_boot_cpu(void)
|
||||
{
|
||||
#ifdef CONFIG_KVM_CLOCK
|
||||
WARN_ON(kvm_register_clock("primary cpu clock"));
|
||||
#endif
|
||||
kvm_guest_cpu_init();
|
||||
native_smp_prepare_boot_cpu();
|
||||
}
|
||||
|
||||
static void kvm_guest_cpu_online(void *dummy)
|
||||
{
|
||||
kvm_guest_cpu_init();
|
||||
}
|
||||
|
||||
static void kvm_guest_cpu_offline(void *dummy)
|
||||
{
|
||||
kvm_pv_disable_apf(NULL);
|
||||
apf_task_wake_all();
|
||||
}
|
||||
|
||||
static int __cpuinit kvm_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
int cpu = (unsigned long)hcpu;
|
||||
switch (action) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0);
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata kvm_cpu_notifier = {
|
||||
.notifier_call = kvm_cpu_notify,
|
||||
};
|
||||
#endif
|
||||
|
||||
static void __init kvm_apf_trap_init(void)
|
||||
{
|
||||
set_intr_gate(14, &async_page_fault);
|
||||
}
|
||||
|
||||
void __init kvm_guest_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
|
||||
paravirt_ops_setup();
|
||||
register_reboot_notifier(&kvm_pv_reboot_nb);
|
||||
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
|
||||
spin_lock_init(&async_pf_sleepers[i].lock);
|
||||
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
|
||||
x86_init.irqs.trap_init = kvm_apf_trap_init;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
|
||||
register_cpu_notifier(&kvm_cpu_notifier);
|
||||
#else
|
||||
kvm_guest_cpu_init();
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -125,7 +125,7 @@ static struct clocksource kvm_clock = {
|
|||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
static int kvm_register_clock(char *txt)
|
||||
int kvm_register_clock(char *txt)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
int low, high, ret;
|
||||
|
@ -152,14 +152,6 @@ static void __cpuinit kvm_setup_secondary_clock(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void __init kvm_smp_prepare_boot_cpu(void)
|
||||
{
|
||||
WARN_ON(kvm_register_clock("primary cpu clock"));
|
||||
native_smp_prepare_boot_cpu();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* After the clock is registered, the host will keep writing to the
|
||||
* registered memory location. If the guest happens to shutdown, this memory
|
||||
|
@ -205,9 +197,6 @@ void __init kvmclock_init(void)
|
|||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
x86_cpuinit.setup_percpu_clockev =
|
||||
kvm_setup_secondary_clock;
|
||||
#endif
|
||||
#ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
|
||||
#endif
|
||||
machine_ops.shutdown = kvm_shutdown;
|
||||
#ifdef CONFIG_KEXEC
|
||||
|
|
|
@ -28,6 +28,7 @@ config KVM
|
|||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_APIC_ARCHITECTURE
|
||||
select KVM_ASYNC_PF
|
||||
select USER_RETURN_NOTIFIER
|
||||
select KVM_MMIO
|
||||
---help---
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
|
||||
ccflags-y += -Ivirt/kvm -Iarch/x86/kvm
|
||||
|
||||
CFLAGS_x86.o := -I.
|
||||
CFLAGS_svm.o := -I.
|
||||
|
@ -9,6 +9,7 @@ kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
|
|||
coalesced_mmio.o irq_comm.o eventfd.o \
|
||||
assigned-dev.o)
|
||||
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
|
||||
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
|
||||
|
||||
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
|
||||
i8254.o timer.o
|
||||
|
|
|
@ -20,16 +20,8 @@
|
|||
* From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
|
||||
*/
|
||||
|
||||
#ifndef __KERNEL__
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <public/xen.h>
|
||||
#define DPRINTF(_f, _a ...) printf(_f , ## _a)
|
||||
#else
|
||||
#include <linux/kvm_host.h>
|
||||
#include "kvm_cache_regs.h"
|
||||
#define DPRINTF(x...) do {} while (0)
|
||||
#endif
|
||||
#include <linux/module.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
|
||||
|
@ -418,9 +410,9 @@ address_mask(struct decode_cache *c, unsigned long reg)
|
|||
}
|
||||
|
||||
static inline unsigned long
|
||||
register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
|
||||
register_address(struct decode_cache *c, unsigned long reg)
|
||||
{
|
||||
return base + address_mask(c, reg);
|
||||
return address_mask(c, reg);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -452,60 +444,55 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
|
|||
return ops->get_cached_segment_base(seg, ctxt->vcpu);
|
||||
}
|
||||
|
||||
static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops,
|
||||
struct decode_cache *c)
|
||||
static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops,
|
||||
struct decode_cache *c)
|
||||
{
|
||||
if (!c->has_seg_override)
|
||||
return 0;
|
||||
|
||||
return seg_base(ctxt, ops, c->seg_override);
|
||||
return c->seg_override;
|
||||
}
|
||||
|
||||
static unsigned long es_base(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops)
|
||||
static ulong linear(struct x86_emulate_ctxt *ctxt,
|
||||
struct segmented_address addr)
|
||||
{
|
||||
return seg_base(ctxt, ops, VCPU_SREG_ES);
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
ulong la;
|
||||
|
||||
la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
|
||||
if (c->ad_bytes != 8)
|
||||
la &= (u32)-1;
|
||||
return la;
|
||||
}
|
||||
|
||||
static unsigned long ss_base(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops)
|
||||
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
|
||||
u32 error, bool valid)
|
||||
{
|
||||
return seg_base(ctxt, ops, VCPU_SREG_SS);
|
||||
ctxt->exception.vector = vec;
|
||||
ctxt->exception.error_code = error;
|
||||
ctxt->exception.error_code_valid = valid;
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
|
||||
static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
|
||||
u32 error, bool valid)
|
||||
static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
|
||||
{
|
||||
ctxt->exception = vec;
|
||||
ctxt->error_code = error;
|
||||
ctxt->error_code_valid = valid;
|
||||
return emulate_exception(ctxt, GP_VECTOR, err, true);
|
||||
}
|
||||
|
||||
static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
|
||||
static int emulate_ud(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_exception(ctxt, GP_VECTOR, err, true);
|
||||
return emulate_exception(ctxt, UD_VECTOR, 0, false);
|
||||
}
|
||||
|
||||
static void emulate_pf(struct x86_emulate_ctxt *ctxt)
|
||||
static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
|
||||
{
|
||||
emulate_exception(ctxt, PF_VECTOR, 0, true);
|
||||
}
|
||||
|
||||
static void emulate_ud(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_exception(ctxt, UD_VECTOR, 0, false);
|
||||
}
|
||||
|
||||
static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
|
||||
{
|
||||
emulate_exception(ctxt, TS_VECTOR, err, true);
|
||||
return emulate_exception(ctxt, TS_VECTOR, err, true);
|
||||
}
|
||||
|
||||
static int emulate_de(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_exception(ctxt, DE_VECTOR, 0, false);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
return emulate_exception(ctxt, DE_VECTOR, 0, false);
|
||||
}
|
||||
|
||||
static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
|
||||
|
@ -520,7 +507,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
|
|||
cur_size = fc->end - fc->start;
|
||||
size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
|
||||
rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
|
||||
size, ctxt->vcpu, NULL);
|
||||
size, ctxt->vcpu, &ctxt->exception);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
fc->end += size;
|
||||
|
@ -564,7 +551,7 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs,
|
|||
|
||||
static int read_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops,
|
||||
ulong addr,
|
||||
struct segmented_address addr,
|
||||
u16 *size, unsigned long *address, int op_bytes)
|
||||
{
|
||||
int rc;
|
||||
|
@ -572,10 +559,13 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
if (op_bytes == 2)
|
||||
op_bytes = 3;
|
||||
*address = 0;
|
||||
rc = ops->read_std(addr, (unsigned long *)size, 2, ctxt->vcpu, NULL);
|
||||
rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2,
|
||||
ctxt->vcpu, &ctxt->exception);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
rc = ops->read_std(addr + 2, address, op_bytes, ctxt->vcpu, NULL);
|
||||
addr.ea += 2;
|
||||
rc = ops->read_std(linear(ctxt, addr), address, op_bytes,
|
||||
ctxt->vcpu, &ctxt->exception);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -768,7 +758,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
|
|||
break;
|
||||
}
|
||||
}
|
||||
op->addr.mem = modrm_ea;
|
||||
op->addr.mem.ea = modrm_ea;
|
||||
done:
|
||||
return rc;
|
||||
}
|
||||
|
@ -783,13 +773,13 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt,
|
|||
op->type = OP_MEM;
|
||||
switch (c->ad_bytes) {
|
||||
case 2:
|
||||
op->addr.mem = insn_fetch(u16, 2, c->eip);
|
||||
op->addr.mem.ea = insn_fetch(u16, 2, c->eip);
|
||||
break;
|
||||
case 4:
|
||||
op->addr.mem = insn_fetch(u32, 4, c->eip);
|
||||
op->addr.mem.ea = insn_fetch(u32, 4, c->eip);
|
||||
break;
|
||||
case 8:
|
||||
op->addr.mem = insn_fetch(u64, 8, c->eip);
|
||||
op->addr.mem.ea = insn_fetch(u64, 8, c->eip);
|
||||
break;
|
||||
}
|
||||
done:
|
||||
|
@ -808,7 +798,7 @@ static void fetch_bit_operand(struct decode_cache *c)
|
|||
else if (c->src.bytes == 4)
|
||||
sv = (s32)c->src.val & (s32)mask;
|
||||
|
||||
c->dst.addr.mem += (sv >> 3);
|
||||
c->dst.addr.mem.ea += (sv >> 3);
|
||||
}
|
||||
|
||||
/* only subword offset */
|
||||
|
@ -821,7 +811,6 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
|
|||
{
|
||||
int rc;
|
||||
struct read_cache *mc = &ctxt->decode.mem_read;
|
||||
u32 err;
|
||||
|
||||
while (size) {
|
||||
int n = min(size, 8u);
|
||||
|
@ -829,10 +818,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
|
|||
if (mc->pos < mc->end)
|
||||
goto read_cached;
|
||||
|
||||
rc = ops->read_emulated(addr, mc->data + mc->end, n, &err,
|
||||
ctxt->vcpu);
|
||||
if (rc == X86EMUL_PROPAGATE_FAULT)
|
||||
emulate_pf(ctxt);
|
||||
rc = ops->read_emulated(addr, mc->data + mc->end, n,
|
||||
&ctxt->exception, ctxt->vcpu);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
mc->end += n;
|
||||
|
@ -907,19 +894,15 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
struct desc_ptr dt;
|
||||
u16 index = selector >> 3;
|
||||
int ret;
|
||||
u32 err;
|
||||
ulong addr;
|
||||
|
||||
get_descriptor_table_ptr(ctxt, ops, selector, &dt);
|
||||
|
||||
if (dt.size < index * 8 + 7) {
|
||||
emulate_gp(ctxt, selector & 0xfffc);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (dt.size < index * 8 + 7)
|
||||
return emulate_gp(ctxt, selector & 0xfffc);
|
||||
addr = dt.address + index * 8;
|
||||
ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT)
|
||||
emulate_pf(ctxt);
|
||||
ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,
|
||||
&ctxt->exception);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -931,21 +914,17 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||
{
|
||||
struct desc_ptr dt;
|
||||
u16 index = selector >> 3;
|
||||
u32 err;
|
||||
ulong addr;
|
||||
int ret;
|
||||
|
||||
get_descriptor_table_ptr(ctxt, ops, selector, &dt);
|
||||
|
||||
if (dt.size < index * 8 + 7) {
|
||||
emulate_gp(ctxt, selector & 0xfffc);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (dt.size < index * 8 + 7)
|
||||
return emulate_gp(ctxt, selector & 0xfffc);
|
||||
|
||||
addr = dt.address + index * 8;
|
||||
ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT)
|
||||
emulate_pf(ctxt);
|
||||
ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu,
|
||||
&ctxt->exception);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1092,7 +1071,6 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
|
|||
{
|
||||
int rc;
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
u32 err;
|
||||
|
||||
switch (c->dst.type) {
|
||||
case OP_REG:
|
||||
|
@ -1101,21 +1079,19 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
|
|||
case OP_MEM:
|
||||
if (c->lock_prefix)
|
||||
rc = ops->cmpxchg_emulated(
|
||||
c->dst.addr.mem,
|
||||
linear(ctxt, c->dst.addr.mem),
|
||||
&c->dst.orig_val,
|
||||
&c->dst.val,
|
||||
c->dst.bytes,
|
||||
&err,
|
||||
&ctxt->exception,
|
||||
ctxt->vcpu);
|
||||
else
|
||||
rc = ops->write_emulated(
|
||||
c->dst.addr.mem,
|
||||
linear(ctxt, c->dst.addr.mem),
|
||||
&c->dst.val,
|
||||
c->dst.bytes,
|
||||
&err,
|
||||
&ctxt->exception,
|
||||
ctxt->vcpu);
|
||||
if (rc == X86EMUL_PROPAGATE_FAULT)
|
||||
emulate_pf(ctxt);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
break;
|
||||
|
@ -1137,8 +1113,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
|
|||
c->dst.bytes = c->op_bytes;
|
||||
c->dst.val = c->src.val;
|
||||
register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
|
||||
c->dst.addr.mem = register_address(c, ss_base(ctxt, ops),
|
||||
c->regs[VCPU_REGS_RSP]);
|
||||
c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
|
||||
c->dst.addr.mem.seg = VCPU_SREG_SS;
|
||||
}
|
||||
|
||||
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
|
||||
|
@ -1147,10 +1123,11 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
|
|||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
int rc;
|
||||
struct segmented_address addr;
|
||||
|
||||
rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops),
|
||||
c->regs[VCPU_REGS_RSP]),
|
||||
dest, len);
|
||||
addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
|
||||
addr.seg = VCPU_SREG_SS;
|
||||
rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
|
@ -1184,10 +1161,8 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
|
|||
change_mask |= EFLG_IF;
|
||||
break;
|
||||
case X86EMUL_MODE_VM86:
|
||||
if (iopl < 3) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (iopl < 3)
|
||||
return emulate_gp(ctxt, 0);
|
||||
change_mask |= EFLG_IF;
|
||||
break;
|
||||
default: /* real mode */
|
||||
|
@ -1198,9 +1173,6 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
|
|||
*(unsigned long *)dest =
|
||||
(ctxt->eflags & ~change_mask) | (val & change_mask);
|
||||
|
||||
if (rc == X86EMUL_PROPAGATE_FAULT)
|
||||
emulate_pf(ctxt);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -1287,7 +1259,6 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt,
|
|||
gva_t cs_addr;
|
||||
gva_t eip_addr;
|
||||
u16 cs, eip;
|
||||
u32 err;
|
||||
|
||||
/* TODO: Add limit checks */
|
||||
c->src.val = ctxt->eflags;
|
||||
|
@ -1317,11 +1288,11 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt,
|
|||
eip_addr = dt.address + (irq << 2);
|
||||
cs_addr = dt.address + (irq << 2) + 2;
|
||||
|
||||
rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &err);
|
||||
rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &err);
|
||||
rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
|
@ -1370,10 +1341,8 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
|
|||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
||||
if (temp_eip & ~0xffff) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (temp_eip & ~0xffff)
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
||||
rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
|
||||
|
||||
|
@ -1624,10 +1593,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
|||
|
||||
/* syscall is not available in real mode */
|
||||
if (ctxt->mode == X86EMUL_MODE_REAL ||
|
||||
ctxt->mode == X86EMUL_MODE_VM86) {
|
||||
emulate_ud(ctxt);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
ctxt->mode == X86EMUL_MODE_VM86)
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
setup_syscalls_segments(ctxt, ops, &cs, &ss);
|
||||
|
||||
|
@ -1678,34 +1645,26 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
|||
u16 cs_sel, ss_sel;
|
||||
|
||||
/* inject #GP if in real mode */
|
||||
if (ctxt->mode == X86EMUL_MODE_REAL) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (ctxt->mode == X86EMUL_MODE_REAL)
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
||||
/* XXX sysenter/sysexit have not been tested in 64bit mode.
|
||||
* Therefore, we inject an #UD.
|
||||
*/
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64) {
|
||||
emulate_ud(ctxt);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64)
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
setup_syscalls_segments(ctxt, ops, &cs, &ss);
|
||||
|
||||
ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
|
||||
switch (ctxt->mode) {
|
||||
case X86EMUL_MODE_PROT32:
|
||||
if ((msr_data & 0xfffc) == 0x0) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if ((msr_data & 0xfffc) == 0x0)
|
||||
return emulate_gp(ctxt, 0);
|
||||
break;
|
||||
case X86EMUL_MODE_PROT64:
|
||||
if (msr_data == 0x0) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (msr_data == 0x0)
|
||||
return emulate_gp(ctxt, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1745,10 +1704,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
|||
|
||||
/* inject #GP if in real mode or Virtual 8086 mode */
|
||||
if (ctxt->mode == X86EMUL_MODE_REAL ||
|
||||
ctxt->mode == X86EMUL_MODE_VM86) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
ctxt->mode == X86EMUL_MODE_VM86)
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
||||
setup_syscalls_segments(ctxt, ops, &cs, &ss);
|
||||
|
||||
|
@ -1763,18 +1720,14 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
|||
switch (usermode) {
|
||||
case X86EMUL_MODE_PROT32:
|
||||
cs_sel = (u16)(msr_data + 16);
|
||||
if ((msr_data & 0xfffc) == 0x0) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if ((msr_data & 0xfffc) == 0x0)
|
||||
return emulate_gp(ctxt, 0);
|
||||
ss_sel = (u16)(msr_data + 24);
|
||||
break;
|
||||
case X86EMUL_MODE_PROT64:
|
||||
cs_sel = (u16)(msr_data + 32);
|
||||
if (msr_data == 0x0) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (msr_data == 0x0)
|
||||
return emulate_gp(ctxt, 0);
|
||||
ss_sel = cs_sel + 8;
|
||||
cs.d = 0;
|
||||
cs.l = 1;
|
||||
|
@ -1934,33 +1887,27 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
|
|||
{
|
||||
struct tss_segment_16 tss_seg;
|
||||
int ret;
|
||||
u32 err, new_tss_base = get_desc_base(new_desc);
|
||||
u32 new_tss_base = get_desc_base(new_desc);
|
||||
|
||||
ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
|
||||
&err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT) {
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
emulate_pf(ctxt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
save_state_to_tss16(ctxt, ops, &tss_seg);
|
||||
|
||||
ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
|
||||
&err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT) {
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
emulate_pf(ctxt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
|
||||
&err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT) {
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
emulate_pf(ctxt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (old_tss_sel != 0xffff) {
|
||||
tss_seg.prev_task_link = old_tss_sel;
|
||||
|
@ -1968,12 +1915,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
|
|||
ret = ops->write_std(new_tss_base,
|
||||
&tss_seg.prev_task_link,
|
||||
sizeof tss_seg.prev_task_link,
|
||||
ctxt->vcpu, &err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT) {
|
||||
ctxt->vcpu, &ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
emulate_pf(ctxt);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return load_state_from_tss16(ctxt, ops, &tss_seg);
|
||||
|
@ -2013,10 +1958,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
|
|||
struct decode_cache *c = &ctxt->decode;
|
||||
int ret;
|
||||
|
||||
if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (ops->set_cr(3, tss->cr3, ctxt->vcpu))
|
||||
return emulate_gp(ctxt, 0);
|
||||
c->eip = tss->eip;
|
||||
ctxt->eflags = tss->eflags | 2;
|
||||
c->regs[VCPU_REGS_RAX] = tss->eax;
|
||||
|
@ -2076,33 +2019,27 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
|
|||
{
|
||||
struct tss_segment_32 tss_seg;
|
||||
int ret;
|
||||
u32 err, new_tss_base = get_desc_base(new_desc);
|
||||
u32 new_tss_base = get_desc_base(new_desc);
|
||||
|
||||
ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
|
||||
&err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT) {
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
emulate_pf(ctxt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
save_state_to_tss32(ctxt, ops, &tss_seg);
|
||||
|
||||
ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
|
||||
&err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT) {
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
emulate_pf(ctxt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
|
||||
&err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT) {
|
||||
&ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
emulate_pf(ctxt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (old_tss_sel != 0xffff) {
|
||||
tss_seg.prev_task_link = old_tss_sel;
|
||||
|
@ -2110,12 +2047,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
|
|||
ret = ops->write_std(new_tss_base,
|
||||
&tss_seg.prev_task_link,
|
||||
sizeof tss_seg.prev_task_link,
|
||||
ctxt->vcpu, &err);
|
||||
if (ret == X86EMUL_PROPAGATE_FAULT) {
|
||||
ctxt->vcpu, &ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
/* FIXME: need to provide precise fault address */
|
||||
emulate_pf(ctxt);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return load_state_from_tss32(ctxt, ops, &tss_seg);
|
||||
|
@ -2146,10 +2081,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
|
|||
|
||||
if (reason != TASK_SWITCH_IRET) {
|
||||
if ((tss_selector & 3) > next_tss_desc.dpl ||
|
||||
ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
ops->cpl(ctxt->vcpu) > next_tss_desc.dpl)
|
||||
return emulate_gp(ctxt, 0);
|
||||
}
|
||||
|
||||
desc_limit = desc_limit_scaled(&next_tss_desc);
|
||||
|
@ -2231,14 +2164,15 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
|
|||
return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
|
||||
}
|
||||
|
||||
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
|
||||
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
|
||||
int reg, struct operand *op)
|
||||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
|
||||
|
||||
register_address_increment(c, &c->regs[reg], df * op->bytes);
|
||||
op->addr.mem = register_address(c, base, c->regs[reg]);
|
||||
op->addr.mem.ea = register_address(c, c->regs[reg]);
|
||||
op->addr.mem.seg = seg;
|
||||
}
|
||||
|
||||
static int em_push(struct x86_emulate_ctxt *ctxt)
|
||||
|
@ -2369,10 +2303,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
|
|||
struct decode_cache *c = &ctxt->decode;
|
||||
u64 tsc = 0;
|
||||
|
||||
if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD))
|
||||
return emulate_gp(ctxt, 0);
|
||||
ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc);
|
||||
c->regs[VCPU_REGS_RAX] = (u32)tsc;
|
||||
c->regs[VCPU_REGS_RDX] = tsc >> 32;
|
||||
|
@ -2647,7 +2579,7 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
|
|||
|
||||
op->type = OP_IMM;
|
||||
op->bytes = size;
|
||||
op->addr.mem = c->eip;
|
||||
op->addr.mem.ea = c->eip;
|
||||
/* NB. Immediates are sign-extended as necessary. */
|
||||
switch (op->bytes) {
|
||||
case 1:
|
||||
|
@ -2678,7 +2610,7 @@ done:
|
|||
}
|
||||
|
||||
int
|
||||
x86_decode_insn(struct x86_emulate_ctxt *ctxt)
|
||||
x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
|
||||
{
|
||||
struct x86_emulate_ops *ops = ctxt->ops;
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
|
@ -2689,7 +2621,10 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt)
|
|||
struct operand memop = { .type = OP_NONE };
|
||||
|
||||
c->eip = ctxt->eip;
|
||||
c->fetch.start = c->fetch.end = c->eip;
|
||||
c->fetch.start = c->eip;
|
||||
c->fetch.end = c->fetch.start + insn_len;
|
||||
if (insn_len > 0)
|
||||
memcpy(c->fetch.data, insn, insn_len);
|
||||
ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
|
||||
|
||||
switch (mode) {
|
||||
|
@ -2803,10 +2738,8 @@ done_prefixes:
|
|||
c->execute = opcode.u.execute;
|
||||
|
||||
/* Unrecognised? */
|
||||
if (c->d == 0 || (c->d & Undefined)) {
|
||||
DPRINTF("Cannot emulate %02x\n", c->b);
|
||||
if (c->d == 0 || (c->d & Undefined))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
|
||||
c->op_bytes = 8;
|
||||
|
@ -2831,14 +2764,13 @@ done_prefixes:
|
|||
if (!c->has_seg_override)
|
||||
set_seg_override(c, VCPU_SREG_DS);
|
||||
|
||||
if (memop.type == OP_MEM && !(!c->twobyte && c->b == 0x8d))
|
||||
memop.addr.mem += seg_override_base(ctxt, ops, c);
|
||||
memop.addr.mem.seg = seg_override(ctxt, ops, c);
|
||||
|
||||
if (memop.type == OP_MEM && c->ad_bytes != 8)
|
||||
memop.addr.mem = (u32)memop.addr.mem;
|
||||
memop.addr.mem.ea = (u32)memop.addr.mem.ea;
|
||||
|
||||
if (memop.type == OP_MEM && c->rip_relative)
|
||||
memop.addr.mem += c->eip;
|
||||
memop.addr.mem.ea += c->eip;
|
||||
|
||||
/*
|
||||
* Decode and fetch the source operand: register, memory
|
||||
|
@ -2890,14 +2822,14 @@ done_prefixes:
|
|||
case SrcSI:
|
||||
c->src.type = OP_MEM;
|
||||
c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
|
||||
c->src.addr.mem =
|
||||
register_address(c, seg_override_base(ctxt, ops, c),
|
||||
c->regs[VCPU_REGS_RSI]);
|
||||
c->src.addr.mem.ea =
|
||||
register_address(c, c->regs[VCPU_REGS_RSI]);
|
||||
c->src.addr.mem.seg = seg_override(ctxt, ops, c),
|
||||
c->src.val = 0;
|
||||
break;
|
||||
case SrcImmFAddr:
|
||||
c->src.type = OP_IMM;
|
||||
c->src.addr.mem = c->eip;
|
||||
c->src.addr.mem.ea = c->eip;
|
||||
c->src.bytes = c->op_bytes + 2;
|
||||
insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
|
||||
break;
|
||||
|
@ -2944,7 +2876,7 @@ done_prefixes:
|
|||
break;
|
||||
case DstImmUByte:
|
||||
c->dst.type = OP_IMM;
|
||||
c->dst.addr.mem = c->eip;
|
||||
c->dst.addr.mem.ea = c->eip;
|
||||
c->dst.bytes = 1;
|
||||
c->dst.val = insn_fetch(u8, 1, c->eip);
|
||||
break;
|
||||
|
@ -2969,9 +2901,9 @@ done_prefixes:
|
|||
case DstDI:
|
||||
c->dst.type = OP_MEM;
|
||||
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
|
||||
c->dst.addr.mem =
|
||||
register_address(c, es_base(ctxt, ops),
|
||||
c->regs[VCPU_REGS_RDI]);
|
||||
c->dst.addr.mem.ea =
|
||||
register_address(c, c->regs[VCPU_REGS_RDI]);
|
||||
c->dst.addr.mem.seg = VCPU_SREG_ES;
|
||||
c->dst.val = 0;
|
||||
break;
|
||||
case ImplicitOps:
|
||||
|
@ -3020,24 +2952,24 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
|||
ctxt->decode.mem_read.pos = 0;
|
||||
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
|
||||
emulate_ud(ctxt);
|
||||
rc = emulate_ud(ctxt);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* LOCK prefix is allowed only with some instructions */
|
||||
if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
|
||||
emulate_ud(ctxt);
|
||||
rc = emulate_ud(ctxt);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) {
|
||||
emulate_ud(ctxt);
|
||||
rc = emulate_ud(ctxt);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Privileged instruction can be executed only in CPL=0 */
|
||||
if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = emulate_gp(ctxt, 0);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -3050,7 +2982,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
|||
}
|
||||
|
||||
if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) {
|
||||
rc = read_emulated(ctxt, ops, c->src.addr.mem,
|
||||
rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem),
|
||||
c->src.valptr, c->src.bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto done;
|
||||
|
@ -3058,7 +2990,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
|||
}
|
||||
|
||||
if (c->src2.type == OP_MEM) {
|
||||
rc = read_emulated(ctxt, ops, c->src2.addr.mem,
|
||||
rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem),
|
||||
&c->src2.val, c->src2.bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto done;
|
||||
|
@ -3070,7 +3002,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
|||
|
||||
if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
|
||||
/* optimisation - avoid slow emulated read if Mov */
|
||||
rc = read_emulated(ctxt, ops, c->dst.addr.mem,
|
||||
rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem),
|
||||
&c->dst.val, c->dst.bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto done;
|
||||
|
@ -3215,13 +3147,13 @@ special_insn:
|
|||
break;
|
||||
case 0x8c: /* mov r/m, sreg */
|
||||
if (c->modrm_reg > VCPU_SREG_GS) {
|
||||
emulate_ud(ctxt);
|
||||
rc = emulate_ud(ctxt);
|
||||
goto done;
|
||||
}
|
||||
c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
|
||||
break;
|
||||
case 0x8d: /* lea r16/r32, m */
|
||||
c->dst.val = c->src.addr.mem;
|
||||
c->dst.val = c->src.addr.mem.ea;
|
||||
break;
|
||||
case 0x8e: { /* mov seg, r/m16 */
|
||||
uint16_t sel;
|
||||
|
@ -3230,7 +3162,7 @@ special_insn:
|
|||
|
||||
if (c->modrm_reg == VCPU_SREG_CS ||
|
||||
c->modrm_reg > VCPU_SREG_GS) {
|
||||
emulate_ud(ctxt);
|
||||
rc = emulate_ud(ctxt);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -3268,7 +3200,6 @@ special_insn:
|
|||
break;
|
||||
case 0xa6 ... 0xa7: /* cmps */
|
||||
c->dst.type = OP_NONE; /* Disable writeback. */
|
||||
DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.addr.mem, c->dst.addr.mem);
|
||||
goto cmp;
|
||||
case 0xa8 ... 0xa9: /* test ax, imm */
|
||||
goto test;
|
||||
|
@ -3363,7 +3294,7 @@ special_insn:
|
|||
do_io_in:
|
||||
c->dst.bytes = min(c->dst.bytes, 4u);
|
||||
if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = emulate_gp(ctxt, 0);
|
||||
goto done;
|
||||
}
|
||||
if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
|
||||
|
@ -3377,7 +3308,7 @@ special_insn:
|
|||
c->src.bytes = min(c->src.bytes, 4u);
|
||||
if (!emulator_io_permited(ctxt, ops, c->dst.val,
|
||||
c->src.bytes)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = emulate_gp(ctxt, 0);
|
||||
goto done;
|
||||
}
|
||||
ops->pio_out_emulated(c->src.bytes, c->dst.val,
|
||||
|
@ -3402,14 +3333,14 @@ special_insn:
|
|||
break;
|
||||
case 0xfa: /* cli */
|
||||
if (emulator_bad_iopl(ctxt, ops)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = emulate_gp(ctxt, 0);
|
||||
goto done;
|
||||
} else
|
||||
ctxt->eflags &= ~X86_EFLAGS_IF;
|
||||
break;
|
||||
case 0xfb: /* sti */
|
||||
if (emulator_bad_iopl(ctxt, ops)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = emulate_gp(ctxt, 0);
|
||||
goto done;
|
||||
} else {
|
||||
ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
|
||||
|
@ -3449,11 +3380,11 @@ writeback:
|
|||
c->dst.type = saved_dst_type;
|
||||
|
||||
if ((c->d & SrcMask) == SrcSI)
|
||||
string_addr_inc(ctxt, seg_override_base(ctxt, ops, c),
|
||||
string_addr_inc(ctxt, seg_override(ctxt, ops, c),
|
||||
VCPU_REGS_RSI, &c->src);
|
||||
|
||||
if ((c->d & DstMask) == DstDI)
|
||||
string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI,
|
||||
string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI,
|
||||
&c->dst);
|
||||
|
||||
if (c->rep_prefix && (c->d & String)) {
|
||||
|
@ -3482,6 +3413,8 @@ writeback:
|
|||
ctxt->eip = c->eip;
|
||||
|
||||
done:
|
||||
if (rc == X86EMUL_PROPAGATE_FAULT)
|
||||
ctxt->have_exception = true;
|
||||
return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
|
||||
|
||||
twobyte_insn:
|
||||
|
@ -3544,9 +3477,11 @@ twobyte_insn:
|
|||
break;
|
||||
case 5: /* not defined */
|
||||
emulate_ud(ctxt);
|
||||
rc = X86EMUL_PROPAGATE_FAULT;
|
||||
goto done;
|
||||
case 7: /* invlpg*/
|
||||
emulate_invlpg(ctxt->vcpu, c->src.addr.mem);
|
||||
emulate_invlpg(ctxt->vcpu,
|
||||
linear(ctxt, c->src.addr.mem));
|
||||
/* Disable writeback. */
|
||||
c->dst.type = OP_NONE;
|
||||
break;
|
||||
|
@ -3573,6 +3508,7 @@ twobyte_insn:
|
|||
case 5 ... 7:
|
||||
case 9 ... 15:
|
||||
emulate_ud(ctxt);
|
||||
rc = X86EMUL_PROPAGATE_FAULT;
|
||||
goto done;
|
||||
}
|
||||
c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
|
||||
|
@ -3581,6 +3517,7 @@ twobyte_insn:
|
|||
if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
|
||||
(c->modrm_reg == 4 || c->modrm_reg == 5)) {
|
||||
emulate_ud(ctxt);
|
||||
rc = X86EMUL_PROPAGATE_FAULT;
|
||||
goto done;
|
||||
}
|
||||
ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu);
|
||||
|
@ -3588,6 +3525,7 @@ twobyte_insn:
|
|||
case 0x22: /* mov reg, cr */
|
||||
if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = X86EMUL_PROPAGATE_FAULT;
|
||||
goto done;
|
||||
}
|
||||
c->dst.type = OP_NONE;
|
||||
|
@ -3596,6 +3534,7 @@ twobyte_insn:
|
|||
if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
|
||||
(c->modrm_reg == 4 || c->modrm_reg == 5)) {
|
||||
emulate_ud(ctxt);
|
||||
rc = X86EMUL_PROPAGATE_FAULT;
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -3604,6 +3543,7 @@ twobyte_insn:
|
|||
~0ULL : ~0U), ctxt->vcpu) < 0) {
|
||||
/* #UD condition is already handled by the code above */
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = X86EMUL_PROPAGATE_FAULT;
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -3615,6 +3555,7 @@ twobyte_insn:
|
|||
| ((u64)c->regs[VCPU_REGS_RDX] << 32);
|
||||
if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = X86EMUL_PROPAGATE_FAULT;
|
||||
goto done;
|
||||
}
|
||||
rc = X86EMUL_CONTINUE;
|
||||
|
@ -3623,6 +3564,7 @@ twobyte_insn:
|
|||
/* rdmsr */
|
||||
if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
|
||||
emulate_gp(ctxt, 0);
|
||||
rc = X86EMUL_PROPAGATE_FAULT;
|
||||
goto done;
|
||||
} else {
|
||||
c->regs[VCPU_REGS_RAX] = (u32)msr_data;
|
||||
|
@ -3785,6 +3727,5 @@ twobyte_insn:
|
|||
goto writeback;
|
||||
|
||||
cannot_emulate:
|
||||
DPRINTF("Cannot emulate %02x\n", c->b);
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -73,6 +73,13 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
|
|||
return vcpu->arch.cr4 & mask;
|
||||
}
|
||||
|
||||
static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
|
||||
kvm_x86_ops->decache_cr3(vcpu);
|
||||
return vcpu->arch.cr3;
|
||||
}
|
||||
|
||||
static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_read_cr4_bits(vcpu, ~0UL);
|
||||
|
@ -84,4 +91,19 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
|
|||
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
|
||||
}
|
||||
|
||||
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hflags |= HF_GUEST_MASK;
|
||||
}
|
||||
|
||||
static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hflags &= ~HF_GUEST_MASK;
|
||||
}
|
||||
|
||||
static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.hflags & HF_GUEST_MASK;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -277,7 +277,8 @@ static void apic_update_ppr(struct kvm_lapic *apic)
|
|||
|
||||
if (old_ppr != ppr) {
|
||||
apic_set_reg(apic, APIC_PROCPRI, ppr);
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
if (ppr < old_ppr)
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,9 +18,11 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "irq.h"
|
||||
#include "mmu.h"
|
||||
#include "x86.h"
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "x86.h"
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/types.h>
|
||||
|
@ -194,7 +196,6 @@ static struct percpu_counter kvm_total_used_mmu_pages;
|
|||
|
||||
static u64 __read_mostly shadow_trap_nonpresent_pte;
|
||||
static u64 __read_mostly shadow_notrap_nonpresent_pte;
|
||||
static u64 __read_mostly shadow_base_present_pte;
|
||||
static u64 __read_mostly shadow_nx_mask;
|
||||
static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
|
||||
static u64 __read_mostly shadow_user_mask;
|
||||
|
@ -213,12 +214,6 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
|
||||
|
||||
void kvm_mmu_set_base_ptes(u64 base_pte)
|
||||
{
|
||||
shadow_base_present_pte = base_pte;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
|
||||
|
||||
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask)
|
||||
{
|
||||
|
@ -482,46 +477,46 @@ static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
|
|||
}
|
||||
|
||||
/*
|
||||
* Return the pointer to the largepage write count for a given
|
||||
* gfn, handling slots that are not large page aligned.
|
||||
* Return the pointer to the large page information for a given gfn,
|
||||
* handling slots that are not large page aligned.
|
||||
*/
|
||||
static int *slot_largepage_idx(gfn_t gfn,
|
||||
struct kvm_memory_slot *slot,
|
||||
int level)
|
||||
static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
|
||||
struct kvm_memory_slot *slot,
|
||||
int level)
|
||||
{
|
||||
unsigned long idx;
|
||||
|
||||
idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
|
||||
(slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
||||
return &slot->lpage_info[level - 2][idx].write_count;
|
||||
return &slot->lpage_info[level - 2][idx];
|
||||
}
|
||||
|
||||
static void account_shadowed(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
int *write_count;
|
||||
struct kvm_lpage_info *linfo;
|
||||
int i;
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
for (i = PT_DIRECTORY_LEVEL;
|
||||
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
|
||||
write_count = slot_largepage_idx(gfn, slot, i);
|
||||
*write_count += 1;
|
||||
linfo = lpage_info_slot(gfn, slot, i);
|
||||
linfo->write_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
int *write_count;
|
||||
struct kvm_lpage_info *linfo;
|
||||
int i;
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
for (i = PT_DIRECTORY_LEVEL;
|
||||
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
|
||||
write_count = slot_largepage_idx(gfn, slot, i);
|
||||
*write_count -= 1;
|
||||
WARN_ON(*write_count < 0);
|
||||
linfo = lpage_info_slot(gfn, slot, i);
|
||||
linfo->write_count -= 1;
|
||||
WARN_ON(linfo->write_count < 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -530,12 +525,12 @@ static int has_wrprotected_page(struct kvm *kvm,
|
|||
int level)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
int *largepage_idx;
|
||||
struct kvm_lpage_info *linfo;
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (slot) {
|
||||
largepage_idx = slot_largepage_idx(gfn, slot, level);
|
||||
return *largepage_idx;
|
||||
linfo = lpage_info_slot(gfn, slot, level);
|
||||
return linfo->write_count;
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -590,16 +585,15 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
|
|||
static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long idx;
|
||||
struct kvm_lpage_info *linfo;
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (likely(level == PT_PAGE_TABLE_LEVEL))
|
||||
return &slot->rmap[gfn - slot->base_gfn];
|
||||
|
||||
idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
|
||||
(slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
||||
linfo = lpage_info_slot(gfn, slot, level);
|
||||
|
||||
return &slot->lpage_info[level - 2][idx].rmap_pde;
|
||||
return &linfo->rmap_pde;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -887,19 +881,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
|||
end = start + (memslot->npages << PAGE_SHIFT);
|
||||
if (hva >= start && hva < end) {
|
||||
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
|
||||
gfn_t gfn = memslot->base_gfn + gfn_offset;
|
||||
|
||||
ret = handler(kvm, &memslot->rmap[gfn_offset], data);
|
||||
|
||||
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
|
||||
unsigned long idx;
|
||||
int sh;
|
||||
struct kvm_lpage_info *linfo;
|
||||
|
||||
sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j);
|
||||
idx = ((memslot->base_gfn+gfn_offset) >> sh) -
|
||||
(memslot->base_gfn >> sh);
|
||||
ret |= handler(kvm,
|
||||
&memslot->lpage_info[j][idx].rmap_pde,
|
||||
data);
|
||||
linfo = lpage_info_slot(gfn, memslot,
|
||||
PT_DIRECTORY_LEVEL + j);
|
||||
ret |= handler(kvm, &linfo->rmap_pde, data);
|
||||
}
|
||||
trace_kvm_age_page(hva, memslot, ret);
|
||||
retval |= ret;
|
||||
|
@ -1161,7 +1152,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
|
||||
static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, bool clear_unsync)
|
||||
struct kvm_mmu_page *sp)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
@ -1291,7 +1282,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|||
if (clear_unsync)
|
||||
kvm_unlink_unsync_page(vcpu->kvm, sp);
|
||||
|
||||
if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) {
|
||||
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
|
||||
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
|
||||
return 1;
|
||||
}
|
||||
|
@ -1332,12 +1323,12 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
|
|||
continue;
|
||||
|
||||
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
|
||||
kvm_unlink_unsync_page(vcpu->kvm, s);
|
||||
if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
|
||||
(vcpu->arch.mmu.sync_page(vcpu, s, true))) {
|
||||
(vcpu->arch.mmu.sync_page(vcpu, s))) {
|
||||
kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
|
||||
continue;
|
||||
}
|
||||
kvm_unlink_unsync_page(vcpu->kvm, s);
|
||||
flush = true;
|
||||
}
|
||||
|
||||
|
@ -1963,9 +1954,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
unsigned pte_access, int user_fault,
|
||||
int write_fault, int dirty, int level,
|
||||
gfn_t gfn, pfn_t pfn, bool speculative,
|
||||
bool can_unsync, bool reset_host_protection)
|
||||
bool can_unsync, bool host_writable)
|
||||
{
|
||||
u64 spte;
|
||||
u64 spte, entry = *sptep;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
|
@ -1973,7 +1964,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
* whether the guest actually used the pte (in order to detect
|
||||
* demand paging).
|
||||
*/
|
||||
spte = shadow_base_present_pte;
|
||||
spte = PT_PRESENT_MASK;
|
||||
if (!speculative)
|
||||
spte |= shadow_accessed_mask;
|
||||
if (!dirty)
|
||||
|
@ -1990,8 +1981,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
|
||||
kvm_is_mmio_pfn(pfn));
|
||||
|
||||
if (reset_host_protection)
|
||||
if (host_writable)
|
||||
spte |= SPTE_HOST_WRITEABLE;
|
||||
else
|
||||
pte_access &= ~ACC_WRITE_MASK;
|
||||
|
||||
spte |= (u64)pfn << PAGE_SHIFT;
|
||||
|
||||
|
@ -2036,6 +2029,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
|
||||
set_pte:
|
||||
update_spte(sptep, spte);
|
||||
/*
|
||||
* If we overwrite a writable spte with a read-only one we
|
||||
* should flush remote TLBs. Otherwise rmap_write_protect
|
||||
* will find a read-only spte, even though the writable spte
|
||||
* might be cached on a CPU's TLB.
|
||||
*/
|
||||
if (is_writable_pte(entry) && !is_writable_pte(*sptep))
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
done:
|
||||
return ret;
|
||||
}
|
||||
|
@ -2045,7 +2046,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
int user_fault, int write_fault, int dirty,
|
||||
int *ptwrite, int level, gfn_t gfn,
|
||||
pfn_t pfn, bool speculative,
|
||||
bool reset_host_protection)
|
||||
bool host_writable)
|
||||
{
|
||||
int was_rmapped = 0;
|
||||
int rmap_count;
|
||||
|
@ -2080,7 +2081,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
|
||||
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
|
||||
dirty, level, gfn, pfn, speculative, true,
|
||||
reset_host_protection)) {
|
||||
host_writable)) {
|
||||
if (write_fault)
|
||||
*ptwrite = 1;
|
||||
kvm_mmu_flush_tlb(vcpu);
|
||||
|
@ -2211,7 +2212,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
|
|||
}
|
||||
|
||||
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|
||||
int level, gfn_t gfn, pfn_t pfn)
|
||||
int map_writable, int level, gfn_t gfn, pfn_t pfn,
|
||||
bool prefault)
|
||||
{
|
||||
struct kvm_shadow_walk_iterator iterator;
|
||||
struct kvm_mmu_page *sp;
|
||||
|
@ -2220,9 +2222,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|
|||
|
||||
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
|
||||
if (iterator.level == level) {
|
||||
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
|
||||
unsigned pte_access = ACC_ALL;
|
||||
|
||||
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
|
||||
0, write, 1, &pt_write,
|
||||
level, gfn, pfn, false, true);
|
||||
level, gfn, pfn, prefault, map_writable);
|
||||
direct_pte_prefetch(vcpu, iterator.sptep);
|
||||
++vcpu->stat.pf_fixed;
|
||||
break;
|
||||
|
@ -2277,12 +2281,17 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
|
||||
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
||||
gva_t gva, pfn_t *pfn, bool write, bool *writable);
|
||||
|
||||
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
|
||||
bool prefault)
|
||||
{
|
||||
int r;
|
||||
int level;
|
||||
pfn_t pfn;
|
||||
unsigned long mmu_seq;
|
||||
bool map_writable;
|
||||
|
||||
level = mapping_level(vcpu, gfn);
|
||||
|
||||
|
@ -2297,7 +2306,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
|
|||
|
||||
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
pfn = gfn_to_pfn(vcpu->kvm, gfn);
|
||||
|
||||
if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
|
||||
return 0;
|
||||
|
||||
/* mmio */
|
||||
if (is_error_pfn(pfn))
|
||||
|
@ -2307,7 +2318,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
|
|||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
goto out_unlock;
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
r = __direct_map(vcpu, v, write, level, gfn, pfn);
|
||||
r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
|
||||
prefault);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
|
||||
|
@ -2530,6 +2542,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
|
|||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
sp = page_header(root);
|
||||
mmu_sync_children(vcpu, sp);
|
||||
trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < 4; ++i) {
|
||||
|
@ -2552,23 +2565,24 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
|
||||
u32 access, u32 *error)
|
||||
u32 access, struct x86_exception *exception)
|
||||
{
|
||||
if (error)
|
||||
*error = 0;
|
||||
if (exception)
|
||||
exception->error_code = 0;
|
||||
return vaddr;
|
||||
}
|
||||
|
||||
static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
|
||||
u32 access, u32 *error)
|
||||
u32 access,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
if (error)
|
||||
*error = 0;
|
||||
if (exception)
|
||||
exception->error_code = 0;
|
||||
return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access);
|
||||
}
|
||||
|
||||
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
u32 error_code)
|
||||
u32 error_code, bool prefault)
|
||||
{
|
||||
gfn_t gfn;
|
||||
int r;
|
||||
|
@ -2584,17 +2598,67 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
|
|||
gfn = gva >> PAGE_SHIFT;
|
||||
|
||||
return nonpaging_map(vcpu, gva & PAGE_MASK,
|
||||
error_code & PFERR_WRITE_MASK, gfn);
|
||||
error_code & PFERR_WRITE_MASK, gfn, prefault);
|
||||
}
|
||||
|
||||
static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
|
||||
u32 error_code)
|
||||
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
|
||||
{
|
||||
struct kvm_arch_async_pf arch;
|
||||
|
||||
arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
|
||||
arch.gfn = gfn;
|
||||
arch.direct_map = vcpu->arch.mmu.direct_map;
|
||||
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
|
||||
|
||||
return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
|
||||
}
|
||||
|
||||
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
|
||||
kvm_event_needs_reinjection(vcpu)))
|
||||
return false;
|
||||
|
||||
return kvm_x86_ops->interrupt_allowed(vcpu);
|
||||
}
|
||||
|
||||
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
||||
gva_t gva, pfn_t *pfn, bool write, bool *writable)
|
||||
{
|
||||
bool async;
|
||||
|
||||
*pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
|
||||
|
||||
if (!async)
|
||||
return false; /* *pfn has correct page already */
|
||||
|
||||
put_page(pfn_to_page(*pfn));
|
||||
|
||||
if (!prefault && can_do_async_pf(vcpu)) {
|
||||
trace_kvm_try_async_get_page(gva, gfn);
|
||||
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
|
||||
trace_kvm_async_pf_doublefault(gva, gfn);
|
||||
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
|
||||
return true;
|
||||
} else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
|
||||
return true;
|
||||
}
|
||||
|
||||
*pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
||||
bool prefault)
|
||||
{
|
||||
pfn_t pfn;
|
||||
int r;
|
||||
int level;
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
unsigned long mmu_seq;
|
||||
int write = error_code & PFERR_WRITE_MASK;
|
||||
bool map_writable;
|
||||
|
||||
ASSERT(vcpu);
|
||||
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
|
@ -2609,15 +2673,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
|
|||
|
||||
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
pfn = gfn_to_pfn(vcpu->kvm, gfn);
|
||||
|
||||
if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
|
||||
return 0;
|
||||
|
||||
/* mmio */
|
||||
if (is_error_pfn(pfn))
|
||||
return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
goto out_unlock;
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
|
||||
level, gfn, pfn);
|
||||
r = __direct_map(vcpu, gpa, write, map_writable,
|
||||
level, gfn, pfn, prefault);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return r;
|
||||
|
@ -2659,18 +2727,19 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
|
|||
|
||||
static void paging_new_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3);
|
||||
pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
|
||||
mmu_free_roots(vcpu);
|
||||
}
|
||||
|
||||
static unsigned long get_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.cr3;
|
||||
return kvm_read_cr3(vcpu);
|
||||
}
|
||||
|
||||
static void inject_page_fault(struct kvm_vcpu *vcpu)
|
||||
static void inject_page_fault(struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *fault)
|
||||
{
|
||||
vcpu->arch.mmu.inject_page_fault(vcpu);
|
||||
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
|
||||
}
|
||||
|
||||
static void paging_free(struct kvm_vcpu *vcpu)
|
||||
|
@ -2816,6 +2885,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct kvm_mmu *context = vcpu->arch.walk_mmu;
|
||||
|
||||
context->base_role.word = 0;
|
||||
context->new_cr3 = nonpaging_new_cr3;
|
||||
context->page_fault = tdp_page_fault;
|
||||
context->free = nonpaging_free;
|
||||
|
@ -3008,9 +3078,6 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
|
|||
return;
|
||||
}
|
||||
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL))
|
||||
return;
|
||||
|
||||
++vcpu->kvm->stat.mmu_pte_updated;
|
||||
if (!sp->role.cr4_pae)
|
||||
paging32_update_pte(vcpu, sp, spte, new);
|
||||
|
@ -3264,12 +3331,13 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
|
||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
|
||||
void *insn, int insn_len)
|
||||
{
|
||||
int r;
|
||||
enum emulation_result er;
|
||||
|
||||
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
|
||||
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
|
||||
|
@ -3282,7 +3350,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
|
|||
if (r)
|
||||
goto out;
|
||||
|
||||
er = emulate_instruction(vcpu, cr2, error_code, 0);
|
||||
er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
|
||||
|
||||
switch (er) {
|
||||
case EMULATE_DONE:
|
||||
|
@ -3377,11 +3445,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
|
|||
if (!test_bit(slot, sp->slot_bitmap))
|
||||
continue;
|
||||
|
||||
if (sp->role.level != PT_PAGE_TABLE_LEVEL)
|
||||
continue;
|
||||
|
||||
pt = sp->spt;
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
|
||||
/* avoid RMW */
|
||||
if (is_writable_pte(pt[i]))
|
||||
pt[i] &= ~PT_WRITABLE_MASK;
|
||||
update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK);
|
||||
}
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
@ -3463,13 +3534,6 @@ static void mmu_destroy_caches(void)
|
|||
kmem_cache_destroy(mmu_page_header_cache);
|
||||
}
|
||||
|
||||
void kvm_mmu_module_exit(void)
|
||||
{
|
||||
mmu_destroy_caches();
|
||||
percpu_counter_destroy(&kvm_total_used_mmu_pages);
|
||||
unregister_shrinker(&mmu_shrinker);
|
||||
}
|
||||
|
||||
int kvm_mmu_module_init(void)
|
||||
{
|
||||
pte_chain_cache = kmem_cache_create("kvm_pte_chain",
|
||||
|
@ -3566,7 +3630,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
|
|||
|
||||
static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
(void)kvm_set_cr3(vcpu, vcpu->arch.cr3);
|
||||
(void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu));
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -3662,12 +3726,6 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
|
||||
|
||||
#ifdef CONFIG_KVM_MMU_AUDIT
|
||||
#include "mmu_audit.c"
|
||||
#else
|
||||
static void mmu_audit_disable(void) { }
|
||||
#endif
|
||||
|
||||
void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ASSERT(vcpu);
|
||||
|
@ -3675,5 +3733,18 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
|
|||
destroy_kvm_mmu(vcpu);
|
||||
free_mmu_pages(vcpu);
|
||||
mmu_free_memory_caches(vcpu);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_MMU_AUDIT
|
||||
#include "mmu_audit.c"
|
||||
#else
|
||||
static void mmu_audit_disable(void) { }
|
||||
#endif
|
||||
|
||||
void kvm_mmu_module_exit(void)
|
||||
{
|
||||
mmu_destroy_caches();
|
||||
percpu_counter_destroy(&kvm_total_used_mmu_pages);
|
||||
unregister_shrinker(&mmu_shrinker);
|
||||
mmu_audit_disable();
|
||||
}
|
||||
|
|
|
@ -19,11 +19,9 @@
|
|||
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
static int audit_point;
|
||||
|
||||
#define audit_printk(fmt, args...) \
|
||||
#define audit_printk(kvm, fmt, args...) \
|
||||
printk(KERN_ERR "audit: (%s) error: " \
|
||||
fmt, audit_point_name[audit_point], ##args)
|
||||
fmt, audit_point_name[kvm->arch.audit_point], ##args)
|
||||
|
||||
typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
|
||||
|
||||
|
@ -97,18 +95,21 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
|||
|
||||
if (sp->unsync) {
|
||||
if (level != PT_PAGE_TABLE_LEVEL) {
|
||||
audit_printk("unsync sp: %p level = %d\n", sp, level);
|
||||
audit_printk(vcpu->kvm, "unsync sp: %p "
|
||||
"level = %d\n", sp, level);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*sptep == shadow_notrap_nonpresent_pte) {
|
||||
audit_printk("notrap spte in unsync sp: %p\n", sp);
|
||||
audit_printk(vcpu->kvm, "notrap spte in unsync "
|
||||
"sp: %p\n", sp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) {
|
||||
audit_printk("notrap spte in direct sp: %p\n", sp);
|
||||
audit_printk(vcpu->kvm, "notrap spte in direct sp: %p\n",
|
||||
sp);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -125,8 +126,9 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
|||
|
||||
hpa = pfn << PAGE_SHIFT;
|
||||
if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
|
||||
audit_printk("levels %d pfn %llx hpa %llx ent %llxn",
|
||||
vcpu->arch.mmu.root_level, pfn, hpa, *sptep);
|
||||
audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
|
||||
"ent %llxn", vcpu->arch.mmu.root_level, pfn,
|
||||
hpa, *sptep);
|
||||
}
|
||||
|
||||
static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
||||
|
@ -142,8 +144,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
|||
if (!gfn_to_memslot(kvm, gfn)) {
|
||||
if (!printk_ratelimit())
|
||||
return;
|
||||
audit_printk("no memslot for gfn %llx\n", gfn);
|
||||
audit_printk("index %ld of sp (gfn=%llx)\n",
|
||||
audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
|
||||
audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
|
||||
(long int)(sptep - rev_sp->spt), rev_sp->gfn);
|
||||
dump_stack();
|
||||
return;
|
||||
|
@ -153,7 +155,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
|||
if (!*rmapp) {
|
||||
if (!printk_ratelimit())
|
||||
return;
|
||||
audit_printk("no rmap for writable spte %llx\n", *sptep);
|
||||
audit_printk(kvm, "no rmap for writable spte %llx\n",
|
||||
*sptep);
|
||||
dump_stack();
|
||||
}
|
||||
}
|
||||
|
@ -168,8 +171,9 @@ static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
|||
{
|
||||
struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
||||
|
||||
if (audit_point == AUDIT_POST_SYNC && sp->unsync)
|
||||
audit_printk("meet unsync sp(%p) after sync root.\n", sp);
|
||||
if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
|
||||
audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "
|
||||
"root.\n", sp);
|
||||
}
|
||||
|
||||
static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
|
@ -202,8 +206,9 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
while (spte) {
|
||||
if (is_writable_pte(*spte))
|
||||
audit_printk("shadow page has writable mappings: gfn "
|
||||
"%llx role %x\n", sp->gfn, sp->role.word);
|
||||
audit_printk(kvm, "shadow page has writable "
|
||||
"mappings: gfn %llx role %x\n",
|
||||
sp->gfn, sp->role.word);
|
||||
spte = rmap_next(kvm, rmapp, spte);
|
||||
}
|
||||
}
|
||||
|
@ -238,7 +243,7 @@ static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point)
|
|||
if (!__ratelimit(&ratelimit_state))
|
||||
return;
|
||||
|
||||
audit_point = point;
|
||||
vcpu->kvm->arch.audit_point = point;
|
||||
audit_all_active_sps(vcpu->kvm);
|
||||
audit_vcpu_spte(vcpu);
|
||||
}
|
||||
|
|
|
@ -72,7 +72,7 @@ struct guest_walker {
|
|||
unsigned pt_access;
|
||||
unsigned pte_access;
|
||||
gfn_t gfn;
|
||||
u32 error_code;
|
||||
struct x86_exception fault;
|
||||
};
|
||||
|
||||
static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
|
||||
|
@ -266,21 +266,23 @@ walk:
|
|||
return 1;
|
||||
|
||||
error:
|
||||
walker->error_code = 0;
|
||||
walker->fault.vector = PF_VECTOR;
|
||||
walker->fault.error_code_valid = true;
|
||||
walker->fault.error_code = 0;
|
||||
if (present)
|
||||
walker->error_code |= PFERR_PRESENT_MASK;
|
||||
walker->fault.error_code |= PFERR_PRESENT_MASK;
|
||||
|
||||
walker->error_code |= write_fault | user_fault;
|
||||
walker->fault.error_code |= write_fault | user_fault;
|
||||
|
||||
if (fetch_fault && mmu->nx)
|
||||
walker->error_code |= PFERR_FETCH_MASK;
|
||||
walker->fault.error_code |= PFERR_FETCH_MASK;
|
||||
if (rsvd_fault)
|
||||
walker->error_code |= PFERR_RSVD_MASK;
|
||||
walker->fault.error_code |= PFERR_RSVD_MASK;
|
||||
|
||||
vcpu->arch.fault.address = addr;
|
||||
vcpu->arch.fault.error_code = walker->error_code;
|
||||
walker->fault.address = addr;
|
||||
walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
|
||||
|
||||
trace_kvm_mmu_walker_error(walker->error_code);
|
||||
trace_kvm_mmu_walker_error(walker->fault.error_code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -299,25 +301,42 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
|
|||
addr, access);
|
||||
}
|
||||
|
||||
static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, u64 *spte,
|
||||
pt_element_t gpte)
|
||||
{
|
||||
u64 nonpresent = shadow_trap_nonpresent_pte;
|
||||
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
|
||||
goto no_present;
|
||||
|
||||
if (!is_present_gpte(gpte)) {
|
||||
if (!sp->unsync)
|
||||
nonpresent = shadow_notrap_nonpresent_pte;
|
||||
goto no_present;
|
||||
}
|
||||
|
||||
if (!(gpte & PT_ACCESSED_MASK))
|
||||
goto no_present;
|
||||
|
||||
return false;
|
||||
|
||||
no_present:
|
||||
drop_spte(vcpu->kvm, spte, nonpresent);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, const void *pte)
|
||||
{
|
||||
pt_element_t gpte;
|
||||
unsigned pte_access;
|
||||
pfn_t pfn;
|
||||
u64 new_spte;
|
||||
|
||||
gpte = *(const pt_element_t *)pte;
|
||||
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
|
||||
if (!is_present_gpte(gpte)) {
|
||||
if (sp->unsync)
|
||||
new_spte = shadow_trap_nonpresent_pte;
|
||||
else
|
||||
new_spte = shadow_notrap_nonpresent_pte;
|
||||
__set_spte(spte, new_spte);
|
||||
}
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||
return;
|
||||
}
|
||||
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
|
||||
if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
|
||||
|
@ -329,7 +348,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|||
return;
|
||||
kvm_get_pfn(pfn);
|
||||
/*
|
||||
* we call mmu_set_spte() with reset_host_protection = true beacuse that
|
||||
* we call mmu_set_spte() with host_writable = true beacuse that
|
||||
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
|
||||
*/
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
|
@ -364,7 +383,6 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
|||
u64 *sptep)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||
pt_element_t *gptep = gw->prefetch_ptes;
|
||||
u64 *spte;
|
||||
int i;
|
||||
|
@ -395,14 +413,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
|||
|
||||
gpte = gptep[i];
|
||||
|
||||
if (!is_present_gpte(gpte) ||
|
||||
is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) {
|
||||
if (!sp->unsync)
|
||||
__set_spte(spte, shadow_notrap_nonpresent_pte);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(gpte & PT_ACCESSED_MASK))
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||
continue;
|
||||
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
|
||||
|
@ -427,7 +438,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
|||
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
struct guest_walker *gw,
|
||||
int user_fault, int write_fault, int hlevel,
|
||||
int *ptwrite, pfn_t pfn)
|
||||
int *ptwrite, pfn_t pfn, bool map_writable,
|
||||
bool prefault)
|
||||
{
|
||||
unsigned access = gw->pt_access;
|
||||
struct kvm_mmu_page *sp = NULL;
|
||||
|
@ -501,7 +513,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
|
||||
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
|
||||
user_fault, write_fault, dirty, ptwrite, it.level,
|
||||
gw->gfn, pfn, false, true);
|
||||
gw->gfn, pfn, prefault, map_writable);
|
||||
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
|
||||
|
||||
return it.sptep;
|
||||
|
@ -527,8 +539,8 @@ out_gpte_changed:
|
|||
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
|
||||
* a negative value on error.
|
||||
*/
|
||||
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
u32 error_code)
|
||||
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
bool prefault)
|
||||
{
|
||||
int write_fault = error_code & PFERR_WRITE_MASK;
|
||||
int user_fault = error_code & PFERR_USER_MASK;
|
||||
|
@ -539,6 +551,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
pfn_t pfn;
|
||||
int level = PT_PAGE_TABLE_LEVEL;
|
||||
unsigned long mmu_seq;
|
||||
bool map_writable;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
||||
|
||||
|
@ -556,8 +569,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
*/
|
||||
if (!r) {
|
||||
pgprintk("%s: guest page fault\n", __func__);
|
||||
inject_page_fault(vcpu);
|
||||
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
|
||||
if (!prefault) {
|
||||
inject_page_fault(vcpu, &walker.fault);
|
||||
/* reset fork detector */
|
||||
vcpu->arch.last_pt_write_count = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -568,7 +584,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
|
||||
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
|
||||
|
||||
if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
|
||||
&map_writable))
|
||||
return 0;
|
||||
|
||||
/* mmio */
|
||||
if (is_error_pfn(pfn))
|
||||
|
@ -581,7 +600,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
level, &write_pt, pfn);
|
||||
level, &write_pt, pfn, map_writable, prefault);
|
||||
(void)sptep;
|
||||
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
|
||||
sptep, *sptep, write_pt);
|
||||
|
@ -661,7 +680,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
|||
}
|
||||
|
||||
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
|
||||
u32 *error)
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
struct guest_walker walker;
|
||||
gpa_t gpa = UNMAPPED_GVA;
|
||||
|
@ -672,14 +691,15 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
|
|||
if (r) {
|
||||
gpa = gfn_to_gpa(walker.gfn);
|
||||
gpa |= vaddr & ~PAGE_MASK;
|
||||
} else if (error)
|
||||
*error = walker.error_code;
|
||||
} else if (exception)
|
||||
*exception = walker.fault;
|
||||
|
||||
return gpa;
|
||||
}
|
||||
|
||||
static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
|
||||
u32 access, u32 *error)
|
||||
u32 access,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
struct guest_walker walker;
|
||||
gpa_t gpa = UNMAPPED_GVA;
|
||||
|
@ -690,8 +710,8 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
|
|||
if (r) {
|
||||
gpa = gfn_to_gpa(walker.gfn);
|
||||
gpa |= vaddr & ~PAGE_MASK;
|
||||
} else if (error)
|
||||
*error = walker.error_code;
|
||||
} else if (exception)
|
||||
*exception = walker.fault;
|
||||
|
||||
return gpa;
|
||||
}
|
||||
|
@ -730,12 +750,19 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
|
|||
* Using the cached information from sp->gfns is safe because:
|
||||
* - The spte has a reference to the struct page, so the pfn for a given gfn
|
||||
* can't change unless all sptes pointing to it are nuked first.
|
||||
*
|
||||
* Note:
|
||||
* We should flush all tlbs if spte is dropped even though guest is
|
||||
* responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
|
||||
* and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
|
||||
* used by guest then tlbs are not flushed, so guest is allowed to access the
|
||||
* freed pages.
|
||||
* And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
|
||||
*/
|
||||
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
bool clear_unsync)
|
||||
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
{
|
||||
int i, offset, nr_present;
|
||||
bool reset_host_protection;
|
||||
bool host_writable;
|
||||
gpa_t first_pte_gpa;
|
||||
|
||||
offset = nr_present = 0;
|
||||
|
@ -764,31 +791,27 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|||
return -EINVAL;
|
||||
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)
|
||||
|| gfn != sp->gfns[i] || !is_present_gpte(gpte)
|
||||
|| !(gpte & PT_ACCESSED_MASK)) {
|
||||
u64 nonpresent;
|
||||
|
||||
if (is_present_gpte(gpte) || !clear_unsync)
|
||||
nonpresent = shadow_trap_nonpresent_pte;
|
||||
else
|
||||
nonpresent = shadow_notrap_nonpresent_pte;
|
||||
drop_spte(vcpu->kvm, &sp->spt[i], nonpresent);
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
|
||||
vcpu->kvm->tlbs_dirty++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (gfn != sp->gfns[i]) {
|
||||
drop_spte(vcpu->kvm, &sp->spt[i],
|
||||
shadow_trap_nonpresent_pte);
|
||||
vcpu->kvm->tlbs_dirty++;
|
||||
continue;
|
||||
}
|
||||
|
||||
nr_present++;
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
|
||||
if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
|
||||
pte_access &= ~ACC_WRITE_MASK;
|
||||
reset_host_protection = 0;
|
||||
} else {
|
||||
reset_host_protection = 1;
|
||||
}
|
||||
host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
|
||||
|
||||
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
|
||||
is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
|
||||
spte_to_pfn(sp->spt[i]), true, false,
|
||||
reset_host_protection);
|
||||
host_writable);
|
||||
}
|
||||
|
||||
return !nr_present;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -178,27 +178,36 @@ TRACE_EVENT(kvm_apic,
|
|||
#define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val)
|
||||
#define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val)
|
||||
|
||||
#define KVM_ISA_VMX 1
|
||||
#define KVM_ISA_SVM 2
|
||||
|
||||
/*
|
||||
* Tracepoint for kvm guest exit:
|
||||
*/
|
||||
TRACE_EVENT(kvm_exit,
|
||||
TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu),
|
||||
TP_ARGS(exit_reason, vcpu),
|
||||
TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa),
|
||||
TP_ARGS(exit_reason, vcpu, isa),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, exit_reason )
|
||||
__field( unsigned long, guest_rip )
|
||||
__field( u32, isa )
|
||||
__field( u64, info1 )
|
||||
__field( u64, info2 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->exit_reason = exit_reason;
|
||||
__entry->guest_rip = kvm_rip_read(vcpu);
|
||||
__entry->isa = isa;
|
||||
kvm_x86_ops->get_exit_info(vcpu, &__entry->info1,
|
||||
&__entry->info2);
|
||||
),
|
||||
|
||||
TP_printk("reason %s rip 0x%lx",
|
||||
TP_printk("reason %s rip 0x%lx info %llx %llx",
|
||||
ftrace_print_symbols_seq(p, __entry->exit_reason,
|
||||
kvm_x86_ops->exit_reasons_str),
|
||||
__entry->guest_rip)
|
||||
__entry->guest_rip, __entry->info1, __entry->info2)
|
||||
);
|
||||
|
||||
/*
|
||||
|
|
|
@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
|
|||
static int __read_mostly vmm_exclusive = 1;
|
||||
module_param(vmm_exclusive, bool, S_IRUGO);
|
||||
|
||||
static int __read_mostly yield_on_hlt = 1;
|
||||
module_param(yield_on_hlt, bool, S_IRUGO);
|
||||
|
||||
#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
|
||||
(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
|
||||
#define KVM_GUEST_CR0_MASK \
|
||||
|
@ -177,6 +180,7 @@ static int init_rmode(struct kvm *kvm);
|
|||
static u64 construct_eptp(unsigned long root_hpa);
|
||||
static void kvm_cpu_vmxon(u64 addr);
|
||||
static void kvm_cpu_vmxoff(void);
|
||||
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
|
||||
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
|
||||
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
||||
|
@ -188,6 +192,8 @@ static unsigned long *vmx_io_bitmap_b;
|
|||
static unsigned long *vmx_msr_bitmap_legacy;
|
||||
static unsigned long *vmx_msr_bitmap_longmode;
|
||||
|
||||
static bool cpu_has_load_ia32_efer;
|
||||
|
||||
static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
|
||||
static DEFINE_SPINLOCK(vmx_vpid_lock);
|
||||
|
||||
|
@ -472,7 +478,7 @@ static void vmcs_clear(struct vmcs *vmcs)
|
|||
u8 error;
|
||||
|
||||
asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
|
||||
: "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
|
||||
: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
|
||||
: "cc", "memory");
|
||||
if (error)
|
||||
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
|
||||
|
@ -485,7 +491,7 @@ static void vmcs_load(struct vmcs *vmcs)
|
|||
u8 error;
|
||||
|
||||
asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
|
||||
: "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
|
||||
: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
|
||||
: "cc", "memory");
|
||||
if (error)
|
||||
printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
|
||||
|
@ -565,10 +571,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
|
|||
|
||||
static unsigned long vmcs_readl(unsigned long field)
|
||||
{
|
||||
unsigned long value;
|
||||
unsigned long value = 0;
|
||||
|
||||
asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX)
|
||||
: "=a"(value) : "d"(field) : "cc");
|
||||
: "+a"(value) : "d"(field) : "cc");
|
||||
return value;
|
||||
}
|
||||
|
||||
|
@ -661,6 +667,12 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
|
|||
unsigned i;
|
||||
struct msr_autoload *m = &vmx->msr_autoload;
|
||||
|
||||
if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
|
||||
vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
|
||||
vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < m->nr; ++i)
|
||||
if (m->guest[i].index == msr)
|
||||
break;
|
||||
|
@ -680,6 +692,14 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
|
|||
unsigned i;
|
||||
struct msr_autoload *m = &vmx->msr_autoload;
|
||||
|
||||
if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
|
||||
vmcs_write64(GUEST_IA32_EFER, guest_val);
|
||||
vmcs_write64(HOST_IA32_EFER, host_val);
|
||||
vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
|
||||
vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < m->nr; ++i)
|
||||
if (m->guest[i].index == msr)
|
||||
break;
|
||||
|
@ -1009,6 +1029,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
|||
vmx_set_interrupt_shadow(vcpu, 0);
|
||||
}
|
||||
|
||||
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Ensure that we clear the HLT state in the VMCS. We don't need to
|
||||
* explicitly skip the instruction because if the HLT state is set, then
|
||||
* the instruction is already executing and RIP has already been
|
||||
* advanced. */
|
||||
if (!yield_on_hlt &&
|
||||
vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
|
||||
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
|
||||
}
|
||||
|
||||
static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
|
||||
bool has_error_code, u32 error_code,
|
||||
bool reinject)
|
||||
|
@ -1035,6 +1066,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
|
|||
intr_info |= INTR_TYPE_HARD_EXCEPTION;
|
||||
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
|
||||
vmx_clear_hlt(vcpu);
|
||||
}
|
||||
|
||||
static bool vmx_rdtscp_supported(void)
|
||||
|
@ -1305,8 +1337,11 @@ static __init int vmx_disabled_by_bios(void)
|
|||
&& tboot_enabled())
|
||||
return 1;
|
||||
if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
|
||||
&& !tboot_enabled())
|
||||
&& !tboot_enabled()) {
|
||||
printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
|
||||
" activate TXT before enabling KVM\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1400,6 +1435,14 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static __init bool allow_1_setting(u32 msr, u32 ctl)
|
||||
{
|
||||
u32 vmx_msr_low, vmx_msr_high;
|
||||
|
||||
rdmsr(msr, vmx_msr_low, vmx_msr_high);
|
||||
return vmx_msr_high & ctl;
|
||||
}
|
||||
|
||||
static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
{
|
||||
u32 vmx_msr_low, vmx_msr_high;
|
||||
|
@ -1416,7 +1459,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|||
&_pin_based_exec_control) < 0)
|
||||
return -EIO;
|
||||
|
||||
min = CPU_BASED_HLT_EXITING |
|
||||
min =
|
||||
#ifdef CONFIG_X86_64
|
||||
CPU_BASED_CR8_LOAD_EXITING |
|
||||
CPU_BASED_CR8_STORE_EXITING |
|
||||
|
@ -1429,6 +1472,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|||
CPU_BASED_MWAIT_EXITING |
|
||||
CPU_BASED_MONITOR_EXITING |
|
||||
CPU_BASED_INVLPG_EXITING;
|
||||
|
||||
if (yield_on_hlt)
|
||||
min |= CPU_BASED_HLT_EXITING;
|
||||
|
||||
opt = CPU_BASED_TPR_SHADOW |
|
||||
CPU_BASED_USE_MSR_BITMAPS |
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
||||
|
@ -1510,6 +1557,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|||
vmcs_conf->vmexit_ctrl = _vmexit_control;
|
||||
vmcs_conf->vmentry_ctrl = _vmentry_control;
|
||||
|
||||
cpu_has_load_ia32_efer =
|
||||
allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
|
||||
VM_ENTRY_LOAD_IA32_EFER)
|
||||
&& allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
|
||||
VM_EXIT_LOAD_IA32_EFER);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1683,9 +1736,13 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
|
|||
save->limit = vmcs_read32(sf->limit);
|
||||
save->ar = vmcs_read32(sf->ar_bytes);
|
||||
vmcs_write16(sf->selector, save->base >> 4);
|
||||
vmcs_write32(sf->base, save->base & 0xfffff);
|
||||
vmcs_write32(sf->base, save->base & 0xffff0);
|
||||
vmcs_write32(sf->limit, 0xffff);
|
||||
vmcs_write32(sf->ar_bytes, 0xf3);
|
||||
if (save->base & 0xf)
|
||||
printk_once(KERN_WARNING "kvm: segment base is not paragraph"
|
||||
" aligned when entering protected mode (seg=%d)",
|
||||
seg);
|
||||
}
|
||||
|
||||
static void enter_rmode(struct kvm_vcpu *vcpu)
|
||||
|
@ -1814,6 +1871,13 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
|
||||
}
|
||||
|
||||
static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (enable_ept && is_paging(vcpu))
|
||||
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
|
||||
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
|
||||
}
|
||||
|
||||
static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
|
||||
|
@ -1857,6 +1921,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
|
|||
unsigned long cr0,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vmx_decache_cr3(vcpu);
|
||||
if (!(cr0 & X86_CR0_PG)) {
|
||||
/* From paging/starting to nonpaging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
|
@ -1937,7 +2002,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
|||
if (enable_ept) {
|
||||
eptp = construct_eptp(cr3);
|
||||
vmcs_write64(EPT_POINTER, eptp);
|
||||
guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
|
||||
guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
|
||||
vcpu->kvm->arch.ept_identity_map_addr;
|
||||
ept_load_pdptrs(vcpu);
|
||||
}
|
||||
|
@ -2725,7 +2790,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
|||
vmcs_writel(GUEST_IDTR_BASE, 0);
|
||||
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
|
||||
|
||||
vmcs_write32(GUEST_ACTIVITY_STATE, 0);
|
||||
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
|
||||
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
|
||||
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
|
||||
|
||||
|
@ -2787,6 +2852,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
|||
return;
|
||||
}
|
||||
|
||||
if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
|
||||
enable_irq_window(vcpu);
|
||||
return;
|
||||
}
|
||||
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
|
||||
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
|
||||
|
@ -2814,6 +2883,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
|
|||
} else
|
||||
intr |= INTR_TYPE_EXT_INTR;
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
|
||||
vmx_clear_hlt(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
|
@ -2841,6 +2911,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
|
||||
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
|
||||
vmx_clear_hlt(vcpu);
|
||||
}
|
||||
|
||||
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||
|
@ -2849,7 +2920,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
|||
return 0;
|
||||
|
||||
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
|
||||
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI));
|
||||
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
|
||||
| GUEST_INTR_STATE_NMI));
|
||||
}
|
||||
|
||||
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
|
||||
|
@ -2910,7 +2982,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
|
|||
* Cause the #SS fault with 0 error code in VM86 mode.
|
||||
*/
|
||||
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
|
||||
if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE)
|
||||
if (emulate_instruction(vcpu, 0) == EMULATE_DONE)
|
||||
return 1;
|
||||
/*
|
||||
* Forward all other exceptions that are valid in real mode.
|
||||
|
@ -3007,7 +3079,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
if (is_invalid_opcode(intr_info)) {
|
||||
er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD);
|
||||
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
|
||||
if (er != EMULATE_DONE)
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
|
@ -3026,7 +3098,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (kvm_event_needs_reinjection(vcpu))
|
||||
kvm_mmu_unprotect_page_virt(vcpu, cr2);
|
||||
return kvm_mmu_page_fault(vcpu, cr2, error_code);
|
||||
return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
|
||||
}
|
||||
|
||||
if (vmx->rmode.vm86_active &&
|
||||
|
@ -3098,7 +3170,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
|
|||
++vcpu->stat.io_exits;
|
||||
|
||||
if (string || in)
|
||||
return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
|
||||
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
||||
|
||||
port = exit_qualification >> 16;
|
||||
size = (exit_qualification & 7) + 1;
|
||||
|
@ -3118,14 +3190,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
|
|||
hypercall[2] = 0xc1;
|
||||
}
|
||||
|
||||
static void complete_insn_gp(struct kvm_vcpu *vcpu, int err)
|
||||
{
|
||||
if (err)
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
else
|
||||
skip_emulated_instruction(vcpu);
|
||||
}
|
||||
|
||||
static int handle_cr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long exit_qualification, val;
|
||||
|
@ -3143,21 +3207,21 @@ static int handle_cr(struct kvm_vcpu *vcpu)
|
|||
switch (cr) {
|
||||
case 0:
|
||||
err = kvm_set_cr0(vcpu, val);
|
||||
complete_insn_gp(vcpu, err);
|
||||
kvm_complete_insn_gp(vcpu, err);
|
||||
return 1;
|
||||
case 3:
|
||||
err = kvm_set_cr3(vcpu, val);
|
||||
complete_insn_gp(vcpu, err);
|
||||
kvm_complete_insn_gp(vcpu, err);
|
||||
return 1;
|
||||
case 4:
|
||||
err = kvm_set_cr4(vcpu, val);
|
||||
complete_insn_gp(vcpu, err);
|
||||
kvm_complete_insn_gp(vcpu, err);
|
||||
return 1;
|
||||
case 8: {
|
||||
u8 cr8_prev = kvm_get_cr8(vcpu);
|
||||
u8 cr8 = kvm_register_read(vcpu, reg);
|
||||
kvm_set_cr8(vcpu, cr8);
|
||||
skip_emulated_instruction(vcpu);
|
||||
err = kvm_set_cr8(vcpu, cr8);
|
||||
kvm_complete_insn_gp(vcpu, err);
|
||||
if (irqchip_in_kernel(vcpu->kvm))
|
||||
return 1;
|
||||
if (cr8_prev <= cr8)
|
||||
|
@ -3176,8 +3240,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
|
|||
case 1: /*mov from cr*/
|
||||
switch (cr) {
|
||||
case 3:
|
||||
kvm_register_write(vcpu, reg, vcpu->arch.cr3);
|
||||
trace_kvm_cr_read(cr, vcpu->arch.cr3);
|
||||
val = kvm_read_cr3(vcpu);
|
||||
kvm_register_write(vcpu, reg, val);
|
||||
trace_kvm_cr_read(cr, val);
|
||||
skip_emulated_instruction(vcpu);
|
||||
return 1;
|
||||
case 8:
|
||||
|
@ -3349,6 +3414,11 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int handle_invd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int handle_invlpg(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
||||
|
@ -3377,7 +3447,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_apic_access(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
|
||||
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int handle_task_switch(struct kvm_vcpu *vcpu)
|
||||
|
@ -3476,7 +3546,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
|||
|
||||
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
|
||||
trace_kvm_page_fault(gpa, exit_qualification);
|
||||
return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
|
||||
return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0);
|
||||
}
|
||||
|
||||
static u64 ept_rsvd_mask(u64 spte, int level)
|
||||
|
@ -3592,7 +3662,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
|||
&& (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF))
|
||||
return handle_interrupt_window(&vmx->vcpu);
|
||||
|
||||
err = emulate_instruction(vcpu, 0, 0, 0);
|
||||
err = emulate_instruction(vcpu, 0);
|
||||
|
||||
if (err == EMULATE_DO_MMIO) {
|
||||
ret = 0;
|
||||
|
@ -3649,6 +3719,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
|||
[EXIT_REASON_MSR_WRITE] = handle_wrmsr,
|
||||
[EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
|
||||
[EXIT_REASON_HLT] = handle_halt,
|
||||
[EXIT_REASON_INVD] = handle_invd,
|
||||
[EXIT_REASON_INVLPG] = handle_invlpg,
|
||||
[EXIT_REASON_VMCALL] = handle_vmcall,
|
||||
[EXIT_REASON_VMCLEAR] = handle_vmx_insn,
|
||||
|
@ -3676,6 +3747,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
|||
static const int kvm_vmx_max_exit_handlers =
|
||||
ARRAY_SIZE(kvm_vmx_exit_handlers);
|
||||
|
||||
static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
|
||||
{
|
||||
*info1 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
}
|
||||
|
||||
/*
|
||||
* The guest has exited. See if we can fix it or if we need userspace
|
||||
* assistance.
|
||||
|
@ -3686,17 +3763,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
|||
u32 exit_reason = vmx->exit_reason;
|
||||
u32 vectoring_info = vmx->idt_vectoring_info;
|
||||
|
||||
trace_kvm_exit(exit_reason, vcpu);
|
||||
trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
|
||||
|
||||
/* If guest state is invalid, start emulating */
|
||||
if (vmx->emulation_required && emulate_invalid_guest_state)
|
||||
return handle_invalid_guest_state(vcpu);
|
||||
|
||||
/* Access CR3 don't cause VMExit in paging mode, so we need
|
||||
* to sync with guest real CR3. */
|
||||
if (enable_ept && is_paging(vcpu))
|
||||
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
|
||||
|
||||
if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
|
||||
vcpu->run->fail_entry.hardware_entry_failure_reason
|
||||
|
@ -4013,7 +4085,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
);
|
||||
|
||||
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
|
||||
| (1 << VCPU_EXREG_PDPTR));
|
||||
| (1 << VCPU_EXREG_PDPTR)
|
||||
| (1 << VCPU_EXREG_CR3));
|
||||
vcpu->arch.regs_dirty = 0;
|
||||
|
||||
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
|
||||
|
@ -4280,6 +4353,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|||
.get_cpl = vmx_get_cpl,
|
||||
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
|
||||
.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
|
||||
.decache_cr3 = vmx_decache_cr3,
|
||||
.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
|
||||
.set_cr0 = vmx_set_cr0,
|
||||
.set_cr3 = vmx_set_cr3,
|
||||
|
@ -4320,7 +4394,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|||
.get_tdp_level = get_ept_level,
|
||||
.get_mt_mask = vmx_get_mt_mask,
|
||||
|
||||
.get_exit_info = vmx_get_exit_info,
|
||||
.exit_reasons_str = vmx_exit_reasons_str,
|
||||
|
||||
.get_lpage_level = vmx_get_lpage_level,
|
||||
|
||||
.cpuid_update = vmx_cpuid_update,
|
||||
|
@ -4396,8 +4472,6 @@ static int __init vmx_init(void)
|
|||
|
||||
if (enable_ept) {
|
||||
bypass_guest_pf = 0;
|
||||
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
|
||||
VMX_EPT_WRITABLE_MASK);
|
||||
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
|
||||
VMX_EPT_EXECUTABLE_MASK);
|
||||
kvm_enable_tdp();
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hash.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
|
||||
u64 __read_mostly host_xcr0;
|
||||
|
||||
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
|
||||
vcpu->arch.apf.gfns[i] = ~0;
|
||||
}
|
||||
|
||||
static void kvm_on_user_return(struct user_return_notifier *urn)
|
||||
{
|
||||
unsigned slot;
|
||||
|
@ -326,23 +334,28 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
|
||||
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu)
|
||||
void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
|
||||
{
|
||||
unsigned error_code = vcpu->arch.fault.error_code;
|
||||
if (err)
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
else
|
||||
kvm_x86_ops->skip_emulated_instruction(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
|
||||
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
|
||||
{
|
||||
++vcpu->stat.pf_guest;
|
||||
vcpu->arch.cr2 = vcpu->arch.fault.address;
|
||||
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
|
||||
vcpu->arch.cr2 = fault->address;
|
||||
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
|
||||
}
|
||||
|
||||
void kvm_propagate_fault(struct kvm_vcpu *vcpu)
|
||||
void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
|
||||
{
|
||||
if (mmu_is_nested(vcpu) && !vcpu->arch.fault.nested)
|
||||
vcpu->arch.nested_mmu.inject_page_fault(vcpu);
|
||||
if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
|
||||
vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
|
||||
else
|
||||
vcpu->arch.mmu.inject_page_fault(vcpu);
|
||||
|
||||
vcpu->arch.fault.nested = false;
|
||||
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
|
||||
}
|
||||
|
||||
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
|
@ -460,8 +473,8 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
|
|||
(unsigned long *)&vcpu->arch.regs_avail))
|
||||
return true;
|
||||
|
||||
gfn = (vcpu->arch.cr3 & ~31u) >> PAGE_SHIFT;
|
||||
offset = (vcpu->arch.cr3 & ~31u) & (PAGE_SIZE - 1);
|
||||
gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
|
||||
offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
|
||||
r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
|
||||
PFERR_USER_MASK | PFERR_WRITE_MASK);
|
||||
if (r < 0)
|
||||
|
@ -506,12 +519,15 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|||
} else
|
||||
#endif
|
||||
if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
|
||||
vcpu->arch.cr3))
|
||||
kvm_read_cr3(vcpu)))
|
||||
return 1;
|
||||
}
|
||||
|
||||
kvm_x86_ops->set_cr0(vcpu, cr0);
|
||||
|
||||
if ((cr0 ^ old_cr0) & X86_CR0_PG)
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
|
||||
if ((cr0 ^ old_cr0) & update_bits)
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
return 0;
|
||||
|
@ -595,7 +611,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
|||
return 1;
|
||||
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
|
||||
&& ((cr4 ^ old_cr4) & pdptr_bits)
|
||||
&& !load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))
|
||||
&& !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
|
||||
kvm_read_cr3(vcpu)))
|
||||
return 1;
|
||||
|
||||
if (cr4 & X86_CR4_VMXE)
|
||||
|
@ -615,7 +632,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
|
|||
|
||||
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
{
|
||||
if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
|
||||
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
|
||||
kvm_mmu_sync_roots(vcpu);
|
||||
kvm_mmu_flush_tlb(vcpu);
|
||||
return 0;
|
||||
|
@ -650,12 +667,13 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
|||
if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
|
||||
return 1;
|
||||
vcpu->arch.cr3 = cr3;
|
||||
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
|
||||
vcpu->arch.mmu.new_cr3(vcpu);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_cr3);
|
||||
|
||||
int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
||||
int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
||||
{
|
||||
if (cr8 & CR8_RESERVED_BITS)
|
||||
return 1;
|
||||
|
@ -665,12 +683,6 @@ int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
|||
vcpu->arch.cr8 = cr8;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
|
||||
{
|
||||
if (__kvm_set_cr8(vcpu, cr8))
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_cr8);
|
||||
|
||||
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
|
||||
|
@ -775,12 +787,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
|
|||
* kvm-specific. Those are put in the beginning of the list.
|
||||
*/
|
||||
|
||||
#define KVM_SAVE_MSRS_BEGIN 7
|
||||
#define KVM_SAVE_MSRS_BEGIN 8
|
||||
static u32 msrs_to_save[] = {
|
||||
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
|
||||
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
|
||||
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
|
||||
HV_X64_MSR_APIC_ASSIST_PAGE,
|
||||
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
|
||||
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
|
||||
MSR_STAR,
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -830,7 +842,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
|||
kvm_x86_ops->set_efer(vcpu, efer);
|
||||
|
||||
vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
/* Update reserved bits */
|
||||
if ((efer ^ old_efer) & EFER_NX)
|
||||
|
@ -1418,6 +1429,30 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
|
||||
{
|
||||
gpa_t gpa = data & ~0x3f;
|
||||
|
||||
/* Bits 2:5 are resrved, Should be zero */
|
||||
if (data & 0x3c)
|
||||
return 1;
|
||||
|
||||
vcpu->arch.apf.msr_val = data;
|
||||
|
||||
if (!(data & KVM_ASYNC_PF_ENABLED)) {
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
kvm_async_pf_hash_reset(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
|
||||
return 1;
|
||||
|
||||
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
|
||||
kvm_async_pf_wakeup_all(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
{
|
||||
switch (msr) {
|
||||
|
@ -1499,6 +1534,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case MSR_KVM_ASYNC_PF_EN:
|
||||
if (kvm_pv_enable_async_pf(vcpu, data))
|
||||
return 1;
|
||||
break;
|
||||
case MSR_IA32_MCG_CTL:
|
||||
case MSR_IA32_MCG_STATUS:
|
||||
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
|
||||
|
@ -1775,6 +1814,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|||
case MSR_KVM_SYSTEM_TIME_NEW:
|
||||
data = vcpu->arch.time;
|
||||
break;
|
||||
case MSR_KVM_ASYNC_PF_EN:
|
||||
data = vcpu->arch.apf.msr_val;
|
||||
break;
|
||||
case MSR_IA32_P5_MC_ADDR:
|
||||
case MSR_IA32_P5_MC_TYPE:
|
||||
case MSR_IA32_MCG_CAP:
|
||||
|
@ -1904,6 +1946,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|||
case KVM_CAP_NOP_IO_DELAY:
|
||||
case KVM_CAP_MP_STATE:
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
case KVM_CAP_USER_NMI:
|
||||
case KVM_CAP_REINJECT_CONTROL:
|
||||
case KVM_CAP_IRQ_INJECT_STATUS:
|
||||
case KVM_CAP_ASSIGN_DEV_IRQ:
|
||||
|
@ -1922,6 +1965,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|||
case KVM_CAP_DEBUGREGS:
|
||||
case KVM_CAP_X86_ROBUST_SINGLESTEP:
|
||||
case KVM_CAP_XSAVE:
|
||||
case KVM_CAP_ASYNC_PF:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_COALESCED_MMIO:
|
||||
|
@ -2185,6 +2229,11 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
static void cpuid_mask(u32 *word, int wordnum)
|
||||
{
|
||||
*word &= boot_cpu_data.x86_capability[wordnum];
|
||||
}
|
||||
|
||||
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
u32 index)
|
||||
{
|
||||
|
@ -2259,7 +2308,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
break;
|
||||
case 1:
|
||||
entry->edx &= kvm_supported_word0_x86_features;
|
||||
cpuid_mask(&entry->edx, 0);
|
||||
entry->ecx &= kvm_supported_word4_x86_features;
|
||||
cpuid_mask(&entry->ecx, 4);
|
||||
/* we support x2apic emulation even if host does not support
|
||||
* it since we emulate x2apic in software */
|
||||
entry->ecx |= F(X2APIC);
|
||||
|
@ -2350,7 +2401,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
break;
|
||||
case 0x80000001:
|
||||
entry->edx &= kvm_supported_word1_x86_features;
|
||||
cpuid_mask(&entry->edx, 1);
|
||||
entry->ecx &= kvm_supported_word6_x86_features;
|
||||
cpuid_mask(&entry->ecx, 6);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3169,20 +3222,18 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
|||
struct kvm_memslots *slots, *old_slots;
|
||||
unsigned long *dirty_bitmap;
|
||||
|
||||
r = -ENOMEM;
|
||||
dirty_bitmap = vmalloc(n);
|
||||
if (!dirty_bitmap)
|
||||
goto out;
|
||||
dirty_bitmap = memslot->dirty_bitmap_head;
|
||||
if (memslot->dirty_bitmap == dirty_bitmap)
|
||||
dirty_bitmap += n / sizeof(long);
|
||||
memset(dirty_bitmap, 0, n);
|
||||
|
||||
r = -ENOMEM;
|
||||
slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
|
||||
if (!slots) {
|
||||
vfree(dirty_bitmap);
|
||||
if (!slots)
|
||||
goto out;
|
||||
}
|
||||
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
|
||||
slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
|
||||
slots->generation++;
|
||||
|
||||
old_slots = kvm->memslots;
|
||||
rcu_assign_pointer(kvm->memslots, slots);
|
||||
|
@ -3195,11 +3246,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
|||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
|
||||
vfree(dirty_bitmap);
|
||||
if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
|
||||
goto out;
|
||||
}
|
||||
vfree(dirty_bitmap);
|
||||
} else {
|
||||
r = -EFAULT;
|
||||
if (clear_user(log->dirty_bitmap, n))
|
||||
|
@ -3266,8 +3314,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
if (vpic) {
|
||||
r = kvm_ioapic_init(kvm);
|
||||
if (r) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
|
||||
&vpic->dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
kfree(vpic);
|
||||
goto create_irqchip_unlock;
|
||||
}
|
||||
|
@ -3278,10 +3328,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
smp_wmb();
|
||||
r = kvm_setup_default_irq_routing(kvm);
|
||||
if (r) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kvm_ioapic_destroy(kvm);
|
||||
kvm_destroy_pic(kvm);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
}
|
||||
create_irqchip_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
@ -3557,63 +3609,63 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
|
|||
static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
|
||||
{
|
||||
gpa_t t_gpa;
|
||||
u32 error;
|
||||
struct x86_exception exception;
|
||||
|
||||
BUG_ON(!mmu_is_nested(vcpu));
|
||||
|
||||
/* NPT walks are always user-walks */
|
||||
access |= PFERR_USER_MASK;
|
||||
t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &error);
|
||||
if (t_gpa == UNMAPPED_GVA)
|
||||
vcpu->arch.fault.nested = true;
|
||||
t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
|
||||
|
||||
return t_gpa;
|
||||
}
|
||||
|
||||
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
|
||||
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
||||
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
|
||||
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
|
||||
}
|
||||
|
||||
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
|
||||
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
||||
access |= PFERR_FETCH_MASK;
|
||||
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
|
||||
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
|
||||
}
|
||||
|
||||
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
|
||||
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
||||
access |= PFERR_WRITE_MASK;
|
||||
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
|
||||
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
|
||||
}
|
||||
|
||||
/* uses this to access any guest's mapped memory without checking CPL */
|
||||
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
|
||||
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, error);
|
||||
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
|
||||
}
|
||||
|
||||
static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
|
||||
struct kvm_vcpu *vcpu, u32 access,
|
||||
u32 *error)
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
void *data = val;
|
||||
int r = X86EMUL_CONTINUE;
|
||||
|
||||
while (bytes) {
|
||||
gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
|
||||
error);
|
||||
exception);
|
||||
unsigned offset = addr & (PAGE_SIZE-1);
|
||||
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
|
||||
int ret;
|
||||
|
||||
if (gpa == UNMAPPED_GVA) {
|
||||
r = X86EMUL_PROPAGATE_FAULT;
|
||||
goto out;
|
||||
}
|
||||
if (gpa == UNMAPPED_GVA)
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
|
||||
if (ret < 0) {
|
||||
r = X86EMUL_IO_NEEDED;
|
||||
|
@ -3630,31 +3682,35 @@ out:
|
|||
|
||||
/* used for instruction fetching */
|
||||
static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
|
||||
struct kvm_vcpu *vcpu, u32 *error)
|
||||
struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
||||
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
|
||||
access | PFERR_FETCH_MASK, error);
|
||||
access | PFERR_FETCH_MASK,
|
||||
exception);
|
||||
}
|
||||
|
||||
static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
|
||||
struct kvm_vcpu *vcpu, u32 *error)
|
||||
struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
||||
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
|
||||
error);
|
||||
exception);
|
||||
}
|
||||
|
||||
static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
|
||||
struct kvm_vcpu *vcpu, u32 *error)
|
||||
struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
|
||||
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
|
||||
}
|
||||
|
||||
static int kvm_write_guest_virt_system(gva_t addr, void *val,
|
||||
unsigned int bytes,
|
||||
struct kvm_vcpu *vcpu,
|
||||
u32 *error)
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
void *data = val;
|
||||
int r = X86EMUL_CONTINUE;
|
||||
|
@ -3662,15 +3718,13 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val,
|
|||
while (bytes) {
|
||||
gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
|
||||
PFERR_WRITE_MASK,
|
||||
error);
|
||||
exception);
|
||||
unsigned offset = addr & (PAGE_SIZE-1);
|
||||
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
|
||||
int ret;
|
||||
|
||||
if (gpa == UNMAPPED_GVA) {
|
||||
r = X86EMUL_PROPAGATE_FAULT;
|
||||
goto out;
|
||||
}
|
||||
if (gpa == UNMAPPED_GVA)
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
|
||||
if (ret < 0) {
|
||||
r = X86EMUL_IO_NEEDED;
|
||||
|
@ -3688,7 +3742,7 @@ out:
|
|||
static int emulator_read_emulated(unsigned long addr,
|
||||
void *val,
|
||||
unsigned int bytes,
|
||||
unsigned int *error_code,
|
||||
struct x86_exception *exception,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
gpa_t gpa;
|
||||
|
@ -3701,7 +3755,7 @@ static int emulator_read_emulated(unsigned long addr,
|
|||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code);
|
||||
gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception);
|
||||
|
||||
if (gpa == UNMAPPED_GVA)
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
|
@ -3710,8 +3764,8 @@ static int emulator_read_emulated(unsigned long addr,
|
|||
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
|
||||
goto mmio;
|
||||
|
||||
if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
|
||||
== X86EMUL_CONTINUE)
|
||||
if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception)
|
||||
== X86EMUL_CONTINUE)
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
mmio:
|
||||
|
@ -3735,7 +3789,7 @@ mmio:
|
|||
}
|
||||
|
||||
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const void *val, int bytes)
|
||||
const void *val, int bytes)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
@ -3749,12 +3803,12 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|||
static int emulator_write_emulated_onepage(unsigned long addr,
|
||||
const void *val,
|
||||
unsigned int bytes,
|
||||
unsigned int *error_code,
|
||||
struct x86_exception *exception,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
gpa_t gpa;
|
||||
|
||||
gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code);
|
||||
gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
|
||||
|
||||
if (gpa == UNMAPPED_GVA)
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
|
@ -3787,7 +3841,7 @@ mmio:
|
|||
int emulator_write_emulated(unsigned long addr,
|
||||
const void *val,
|
||||
unsigned int bytes,
|
||||
unsigned int *error_code,
|
||||
struct x86_exception *exception,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Crossing a page boundary? */
|
||||
|
@ -3795,7 +3849,7 @@ int emulator_write_emulated(unsigned long addr,
|
|||
int rc, now;
|
||||
|
||||
now = -addr & ~PAGE_MASK;
|
||||
rc = emulator_write_emulated_onepage(addr, val, now, error_code,
|
||||
rc = emulator_write_emulated_onepage(addr, val, now, exception,
|
||||
vcpu);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
|
@ -3803,7 +3857,7 @@ int emulator_write_emulated(unsigned long addr,
|
|||
val += now;
|
||||
bytes -= now;
|
||||
}
|
||||
return emulator_write_emulated_onepage(addr, val, bytes, error_code,
|
||||
return emulator_write_emulated_onepage(addr, val, bytes, exception,
|
||||
vcpu);
|
||||
}
|
||||
|
||||
|
@ -3821,7 +3875,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
|
|||
const void *old,
|
||||
const void *new,
|
||||
unsigned int bytes,
|
||||
unsigned int *error_code,
|
||||
struct x86_exception *exception,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
gpa_t gpa;
|
||||
|
@ -3879,7 +3933,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
|
|||
emul_write:
|
||||
printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
|
||||
|
||||
return emulator_write_emulated(addr, new, bytes, error_code, vcpu);
|
||||
return emulator_write_emulated(addr, new, bytes, exception, vcpu);
|
||||
}
|
||||
|
||||
static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
|
||||
|
@ -3904,7 +3958,7 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
|
|||
if (vcpu->arch.pio.count)
|
||||
goto data_avail;
|
||||
|
||||
trace_kvm_pio(0, port, size, 1);
|
||||
trace_kvm_pio(0, port, size, count);
|
||||
|
||||
vcpu->arch.pio.port = port;
|
||||
vcpu->arch.pio.in = 1;
|
||||
|
@ -3932,7 +3986,7 @@ static int emulator_pio_out_emulated(int size, unsigned short port,
|
|||
const void *val, unsigned int count,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
trace_kvm_pio(1, port, size, 1);
|
||||
trace_kvm_pio(1, port, size, count);
|
||||
|
||||
vcpu->arch.pio.port = port;
|
||||
vcpu->arch.pio.in = 0;
|
||||
|
@ -3973,13 +4027,15 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
|
|||
return X86EMUL_CONTINUE;
|
||||
|
||||
if (kvm_x86_ops->has_wbinvd_exit()) {
|
||||
preempt_disable();
|
||||
int cpu = get_cpu();
|
||||
|
||||
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
|
||||
smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
|
||||
wbinvd_ipi, NULL, 1);
|
||||
preempt_enable();
|
||||
put_cpu();
|
||||
cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
|
||||
}
|
||||
wbinvd();
|
||||
} else
|
||||
wbinvd();
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
|
||||
|
@ -4019,7 +4075,7 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
|
|||
value = vcpu->arch.cr2;
|
||||
break;
|
||||
case 3:
|
||||
value = vcpu->arch.cr3;
|
||||
value = kvm_read_cr3(vcpu);
|
||||
break;
|
||||
case 4:
|
||||
value = kvm_read_cr4(vcpu);
|
||||
|
@ -4053,7 +4109,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
|
|||
res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
|
||||
break;
|
||||
case 8:
|
||||
res = __kvm_set_cr8(vcpu, val & 0xfUL);
|
||||
res = kvm_set_cr8(vcpu, val);
|
||||
break;
|
||||
default:
|
||||
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
|
||||
|
@ -4206,12 +4262,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
|
|||
static void inject_emulated_exception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
||||
if (ctxt->exception == PF_VECTOR)
|
||||
kvm_propagate_fault(vcpu);
|
||||
else if (ctxt->error_code_valid)
|
||||
kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code);
|
||||
if (ctxt->exception.vector == PF_VECTOR)
|
||||
kvm_propagate_fault(vcpu, &ctxt->exception);
|
||||
else if (ctxt->exception.error_code_valid)
|
||||
kvm_queue_exception_e(vcpu, ctxt->exception.vector,
|
||||
ctxt->exception.error_code);
|
||||
else
|
||||
kvm_queue_exception(vcpu, ctxt->exception);
|
||||
kvm_queue_exception(vcpu, ctxt->exception.vector);
|
||||
}
|
||||
|
||||
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
|
||||
|
@ -4267,13 +4324,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
|
|||
|
||||
static int handle_emulation_failure(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r = EMULATE_DONE;
|
||||
|
||||
++vcpu->stat.insn_emulation_fail;
|
||||
trace_kvm_emulate_insn_failed(vcpu);
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||
vcpu->run->internal.ndata = 0;
|
||||
if (!is_guest_mode(vcpu)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||
vcpu->run->internal.ndata = 0;
|
||||
r = EMULATE_FAIL;
|
||||
}
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return EMULATE_FAIL;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
|
@ -4302,10 +4365,11 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
|||
return false;
|
||||
}
|
||||
|
||||
int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
unsigned long cr2,
|
||||
u16 error_code,
|
||||
int emulation_type)
|
||||
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
unsigned long cr2,
|
||||
int emulation_type,
|
||||
void *insn,
|
||||
int insn_len)
|
||||
{
|
||||
int r;
|
||||
struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
|
||||
|
@ -4323,10 +4387,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
|
|||
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
|
||||
init_emulate_ctxt(vcpu);
|
||||
vcpu->arch.emulate_ctxt.interruptibility = 0;
|
||||
vcpu->arch.emulate_ctxt.exception = -1;
|
||||
vcpu->arch.emulate_ctxt.have_exception = false;
|
||||
vcpu->arch.emulate_ctxt.perm_ok = false;
|
||||
|
||||
r = x86_decode_insn(&vcpu->arch.emulate_ctxt);
|
||||
r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len);
|
||||
if (r == X86EMUL_PROPAGATE_FAULT)
|
||||
goto done;
|
||||
|
||||
|
@ -4389,7 +4453,7 @@ restart:
|
|||
}
|
||||
|
||||
done:
|
||||
if (vcpu->arch.emulate_ctxt.exception >= 0) {
|
||||
if (vcpu->arch.emulate_ctxt.have_exception) {
|
||||
inject_emulated_exception(vcpu);
|
||||
r = EMULATE_DONE;
|
||||
} else if (vcpu->arch.pio.count) {
|
||||
|
@ -4413,7 +4477,7 @@ done:
|
|||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(emulate_instruction);
|
||||
EXPORT_SYMBOL_GPL(x86_emulate_instruction);
|
||||
|
||||
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
|
||||
{
|
||||
|
@ -4653,7 +4717,6 @@ int kvm_arch_init(void *opaque)
|
|||
|
||||
kvm_x86_ops = ops;
|
||||
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
|
||||
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
|
||||
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
|
||||
PT_DIRTY_MASK, PT64_NX_MASK, 0);
|
||||
|
||||
|
@ -5116,6 +5179,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
vcpu->fpu_active = 0;
|
||||
kvm_x86_ops->fpu_deactivate(vcpu);
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
|
||||
/* Page is swapped out. Do synthetic halt */
|
||||
vcpu->arch.apf.halted = true;
|
||||
r = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
r = kvm_mmu_reload(vcpu);
|
||||
|
@ -5244,7 +5313,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|||
|
||||
r = 1;
|
||||
while (r > 0) {
|
||||
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
|
||||
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
||||
!vcpu->arch.apf.halted)
|
||||
r = vcpu_enter_guest(vcpu);
|
||||
else {
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
|
@ -5257,6 +5327,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.mp_state =
|
||||
KVM_MP_STATE_RUNNABLE;
|
||||
case KVM_MP_STATE_RUNNABLE:
|
||||
vcpu->arch.apf.halted = false;
|
||||
break;
|
||||
case KVM_MP_STATE_SIPI_RECEIVED:
|
||||
default:
|
||||
|
@ -5278,6 +5349,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
++vcpu->stat.request_irq_exits;
|
||||
}
|
||||
|
||||
kvm_check_async_pf_completion(vcpu);
|
||||
|
||||
if (signal_pending(current)) {
|
||||
r = -EINTR;
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
|
@ -5302,6 +5376,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
int r;
|
||||
sigset_t sigsaved;
|
||||
|
||||
if (!tsk_used_math(current) && init_fpu(current))
|
||||
return -ENOMEM;
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
|
||||
|
||||
|
@ -5313,8 +5390,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
}
|
||||
|
||||
/* re-sync apic's tpr */
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
kvm_set_cr8(vcpu, kvm_run->cr8);
|
||||
if (!irqchip_in_kernel(vcpu->kvm)) {
|
||||
if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (vcpu->arch.pio.count || vcpu->mmio_needed) {
|
||||
if (vcpu->mmio_needed) {
|
||||
|
@ -5323,7 +5404,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
vcpu->mmio_needed = 0;
|
||||
}
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
|
||||
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
if (r != EMULATE_DONE) {
|
||||
r = 0;
|
||||
|
@ -5436,7 +5517,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
|||
|
||||
sregs->cr0 = kvm_read_cr0(vcpu);
|
||||
sregs->cr2 = vcpu->arch.cr2;
|
||||
sregs->cr3 = vcpu->arch.cr3;
|
||||
sregs->cr3 = kvm_read_cr3(vcpu);
|
||||
sregs->cr4 = kvm_read_cr4(vcpu);
|
||||
sregs->cr8 = kvm_get_cr8(vcpu);
|
||||
sregs->efer = vcpu->arch.efer;
|
||||
|
@ -5504,8 +5585,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|||
kvm_x86_ops->set_gdt(vcpu, &dt);
|
||||
|
||||
vcpu->arch.cr2 = sregs->cr2;
|
||||
mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
|
||||
mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
|
||||
vcpu->arch.cr3 = sregs->cr3;
|
||||
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
|
||||
|
||||
kvm_set_cr8(vcpu, sregs->cr8);
|
||||
|
||||
|
@ -5522,7 +5604,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|||
if (sregs->cr4 & X86_CR4_OSXSAVE)
|
||||
update_cpuid(vcpu);
|
||||
if (!is_long_mode(vcpu) && is_pae(vcpu)) {
|
||||
load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
|
||||
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
|
||||
mmu_reset_needed = 1;
|
||||
}
|
||||
|
||||
|
@ -5773,6 +5855,8 @@ free_vcpu:
|
|||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.apf.msr_val = 0;
|
||||
|
||||
vcpu_load(vcpu);
|
||||
kvm_mmu_unload(vcpu);
|
||||
vcpu_put(vcpu);
|
||||
|
@ -5792,6 +5876,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.dr7 = DR7_FIXED_1;
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
vcpu->arch.apf.msr_val = 0;
|
||||
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
kvm_async_pf_hash_reset(vcpu);
|
||||
vcpu->arch.apf.halted = false;
|
||||
|
||||
return kvm_x86_ops->vcpu_reset(vcpu);
|
||||
}
|
||||
|
@ -5881,6 +5970,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
|
||||
goto fail_free_mce_banks;
|
||||
|
||||
kvm_async_pf_hash_reset(vcpu);
|
||||
|
||||
return 0;
|
||||
fail_free_mce_banks:
|
||||
kfree(vcpu->arch.mce_banks);
|
||||
|
@ -5906,13 +5997,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
|||
free_page((unsigned long)vcpu->arch.pio_data);
|
||||
}
|
||||
|
||||
struct kvm *kvm_arch_create_vm(void)
|
||||
int kvm_arch_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
|
||||
|
||||
if (!kvm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
|
||||
|
||||
|
@ -5921,7 +6007,7 @@ struct kvm *kvm_arch_create_vm(void)
|
|||
|
||||
spin_lock_init(&kvm->arch.tsc_write_lock);
|
||||
|
||||
return kvm;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
||||
|
@ -5939,8 +6025,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
|
|||
/*
|
||||
* Unpin any mmu pages first.
|
||||
*/
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
kvm_unload_vcpu_mmu(vcpu);
|
||||
}
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_arch_vcpu_free(vcpu);
|
||||
|
||||
|
@ -5964,13 +6052,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
|||
kfree(kvm->arch.vpic);
|
||||
kfree(kvm->arch.vioapic);
|
||||
kvm_free_vcpus(kvm);
|
||||
kvm_free_physmem(kvm);
|
||||
if (kvm->arch.apic_access_page)
|
||||
put_page(kvm->arch.apic_access_page);
|
||||
if (kvm->arch.ept_identity_pagetable)
|
||||
put_page(kvm->arch.ept_identity_pagetable);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
kfree(kvm);
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
|
@ -6051,7 +6136,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
|
|||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
|
||||
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
||||
!vcpu->arch.apf.halted)
|
||||
|| !list_empty_careful(&vcpu->async_pf.done)
|
||||
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|
||||
|| vcpu->arch.nmi_pending ||
|
||||
(kvm_arch_interrupt_allowed(vcpu) &&
|
||||
|
@ -6110,6 +6197,147 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_rflags);
|
||||
|
||||
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
|
||||
{
|
||||
int r;
|
||||
|
||||
if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
|
||||
is_error_page(work->page))
|
||||
return;
|
||||
|
||||
r = kvm_mmu_reload(vcpu);
|
||||
if (unlikely(r))
|
||||
return;
|
||||
|
||||
if (!vcpu->arch.mmu.direct_map &&
|
||||
work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
|
||||
return;
|
||||
|
||||
vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
|
||||
}
|
||||
|
||||
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
|
||||
{
|
||||
return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
|
||||
}
|
||||
|
||||
static inline u32 kvm_async_pf_next_probe(u32 key)
|
||||
{
|
||||
return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
|
||||
}
|
||||
|
||||
static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
u32 key = kvm_async_pf_hash_fn(gfn);
|
||||
|
||||
while (vcpu->arch.apf.gfns[key] != ~0)
|
||||
key = kvm_async_pf_next_probe(key);
|
||||
|
||||
vcpu->arch.apf.gfns[key] = gfn;
|
||||
}
|
||||
|
||||
static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
int i;
|
||||
u32 key = kvm_async_pf_hash_fn(gfn);
|
||||
|
||||
for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
|
||||
(vcpu->arch.apf.gfns[key] != gfn &&
|
||||
vcpu->arch.apf.gfns[key] != ~0); i++)
|
||||
key = kvm_async_pf_next_probe(key);
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
|
||||
}
|
||||
|
||||
static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
u32 i, j, k;
|
||||
|
||||
i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
|
||||
while (true) {
|
||||
vcpu->arch.apf.gfns[i] = ~0;
|
||||
do {
|
||||
j = kvm_async_pf_next_probe(j);
|
||||
if (vcpu->arch.apf.gfns[j] == ~0)
|
||||
return;
|
||||
k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
|
||||
/*
|
||||
* k lies cyclically in ]i,j]
|
||||
* | i.k.j |
|
||||
* |....j i.k.| or |.k..j i...|
|
||||
*/
|
||||
} while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
|
||||
vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
|
||||
{
|
||||
|
||||
return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
|
||||
sizeof(val));
|
||||
}
|
||||
|
||||
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work)
|
||||
{
|
||||
struct x86_exception fault;
|
||||
|
||||
trace_kvm_async_pf_not_present(work->arch.token, work->gva);
|
||||
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
|
||||
|
||||
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
|
||||
(vcpu->arch.apf.send_user_only &&
|
||||
kvm_x86_ops->get_cpl(vcpu) == 0))
|
||||
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
|
||||
else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
|
||||
fault.vector = PF_VECTOR;
|
||||
fault.error_code_valid = true;
|
||||
fault.error_code = 0;
|
||||
fault.nested_page_fault = false;
|
||||
fault.address = work->arch.token;
|
||||
kvm_inject_page_fault(vcpu, &fault);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work)
|
||||
{
|
||||
struct x86_exception fault;
|
||||
|
||||
trace_kvm_async_pf_ready(work->arch.token, work->gva);
|
||||
if (is_error_page(work->page))
|
||||
work->arch.token = ~0; /* broadcast wakeup */
|
||||
else
|
||||
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
|
||||
|
||||
if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
|
||||
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
|
||||
fault.vector = PF_VECTOR;
|
||||
fault.error_code_valid = true;
|
||||
fault.error_code = 0;
|
||||
fault.nested_page_fault = false;
|
||||
fault.address = work->arch.token;
|
||||
kvm_inject_page_fault(vcpu, &fault);
|
||||
}
|
||||
vcpu->arch.apf.halted = false;
|
||||
}
|
||||
|
||||
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
|
||||
return true;
|
||||
else
|
||||
return !kvm_event_needs_reinjection(vcpu) &&
|
||||
kvm_x86_ops->interrupt_allowed(vcpu);
|
||||
}
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
|
||||
|
|
|
@ -540,6 +540,7 @@ struct kvm_ppc_pvinfo {
|
|||
#endif
|
||||
#define KVM_CAP_PPC_GET_PVINFO 57
|
||||
#define KVM_CAP_PPC_IRQ_LEVEL 58
|
||||
#define KVM_CAP_ASYNC_PF 59
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#include <linux/mm.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/msi.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <asm/signal.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
|
@ -40,6 +42,7 @@
|
|||
#define KVM_REQ_KICK 9
|
||||
#define KVM_REQ_DEACTIVATE_FPU 10
|
||||
#define KVM_REQ_EVENT 11
|
||||
#define KVM_REQ_APF_HALT 12
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
|
||||
|
@ -74,6 +77,27 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
|||
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
struct kvm_io_device *dev);
|
||||
|
||||
#ifdef CONFIG_KVM_ASYNC_PF
|
||||
struct kvm_async_pf {
|
||||
struct work_struct work;
|
||||
struct list_head link;
|
||||
struct list_head queue;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct mm_struct *mm;
|
||||
gva_t gva;
|
||||
unsigned long addr;
|
||||
struct kvm_arch_async_pf arch;
|
||||
struct page *page;
|
||||
bool done;
|
||||
};
|
||||
|
||||
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
|
||||
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
|
||||
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
|
||||
struct kvm_arch_async_pf *arch);
|
||||
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
|
||||
#endif
|
||||
|
||||
struct kvm_vcpu {
|
||||
struct kvm *kvm;
|
||||
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||
|
@ -104,6 +128,15 @@ struct kvm_vcpu {
|
|||
gpa_t mmio_phys_addr;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_ASYNC_PF
|
||||
struct {
|
||||
u32 queued;
|
||||
struct list_head queue;
|
||||
struct list_head done;
|
||||
spinlock_t lock;
|
||||
} async_pf;
|
||||
#endif
|
||||
|
||||
struct kvm_vcpu_arch arch;
|
||||
};
|
||||
|
||||
|
@ -113,16 +146,19 @@ struct kvm_vcpu {
|
|||
*/
|
||||
#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
|
||||
|
||||
struct kvm_lpage_info {
|
||||
unsigned long rmap_pde;
|
||||
int write_count;
|
||||
};
|
||||
|
||||
struct kvm_memory_slot {
|
||||
gfn_t base_gfn;
|
||||
unsigned long npages;
|
||||
unsigned long flags;
|
||||
unsigned long *rmap;
|
||||
unsigned long *dirty_bitmap;
|
||||
struct {
|
||||
unsigned long rmap_pde;
|
||||
int write_count;
|
||||
} *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||
unsigned long *dirty_bitmap_head;
|
||||
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||
unsigned long userspace_addr;
|
||||
int user_alloc;
|
||||
int id;
|
||||
|
@ -169,6 +205,7 @@ struct kvm_irq_routing_table {};
|
|||
|
||||
struct kvm_memslots {
|
||||
int nmemslots;
|
||||
u64 generation;
|
||||
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
|
||||
KVM_PRIVATE_MEM_SLOTS];
|
||||
};
|
||||
|
@ -206,6 +243,10 @@ struct kvm {
|
|||
|
||||
struct mutex irq_lock;
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
/*
|
||||
* Update side is protected by irq_lock and,
|
||||
* if configured, irqfds.lock.
|
||||
*/
|
||||
struct kvm_irq_routing_table __rcu *irq_routing;
|
||||
struct hlist_head mask_notifier_list;
|
||||
struct hlist_head irq_ack_notifier_list;
|
||||
|
@ -216,6 +257,7 @@ struct kvm {
|
|||
unsigned long mmu_notifier_seq;
|
||||
long mmu_notifier_count;
|
||||
#endif
|
||||
long tlbs_dirty;
|
||||
};
|
||||
|
||||
/* The guest did something we don't support. */
|
||||
|
@ -302,7 +344,11 @@ void kvm_set_page_accessed(struct page *page);
|
|||
|
||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
|
||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
|
||||
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
|
||||
bool write_fault, bool *writable);
|
||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
|
||||
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
||||
bool *writable);
|
||||
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
int memslot_id(struct kvm *kvm, gfn_t gfn);
|
||||
|
@ -321,18 +367,25 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
|
|||
int offset, int len);
|
||||
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
|
||||
unsigned long len);
|
||||
int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
void *data, unsigned long len);
|
||||
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
gpa_t gpa);
|
||||
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
|
||||
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
|
||||
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
|
||||
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
|
||||
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
|
||||
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
|
||||
void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
gfn_t gfn);
|
||||
|
||||
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
|
||||
void kvm_resched(struct kvm_vcpu *vcpu);
|
||||
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm);
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm);
|
||||
|
||||
|
@ -398,7 +451,19 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
|
|||
|
||||
void kvm_free_physmem(struct kvm *kvm);
|
||||
|
||||
struct kvm *kvm_arch_create_vm(void);
|
||||
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
|
||||
static inline struct kvm *kvm_arch_alloc_vm(void)
|
||||
{
|
||||
return kzalloc(sizeof(struct kvm), GFP_KERNEL);
|
||||
}
|
||||
|
||||
static inline void kvm_arch_free_vm(struct kvm *kvm)
|
||||
{
|
||||
kfree(kvm);
|
||||
}
|
||||
#endif
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm);
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm);
|
||||
void kvm_free_all_assigned_devices(struct kvm *kvm);
|
||||
void kvm_arch_sync_events(struct kvm *kvm);
|
||||
|
@ -414,16 +479,8 @@ struct kvm_irq_ack_notifier {
|
|||
void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
|
||||
};
|
||||
|
||||
#define KVM_ASSIGNED_MSIX_PENDING 0x1
|
||||
struct kvm_guest_msix_entry {
|
||||
u32 vector;
|
||||
u16 entry;
|
||||
u16 flags;
|
||||
};
|
||||
|
||||
struct kvm_assigned_dev_kernel {
|
||||
struct kvm_irq_ack_notifier ack_notifier;
|
||||
struct work_struct interrupt_work;
|
||||
struct list_head list;
|
||||
int assigned_dev_id;
|
||||
int host_segnr;
|
||||
|
@ -434,13 +491,14 @@ struct kvm_assigned_dev_kernel {
|
|||
bool host_irq_disabled;
|
||||
struct msix_entry *host_msix_entries;
|
||||
int guest_irq;
|
||||
struct kvm_guest_msix_entry *guest_msix_entries;
|
||||
struct msix_entry *guest_msix_entries;
|
||||
unsigned long irq_requested_type;
|
||||
int irq_source_id;
|
||||
int flags;
|
||||
struct pci_dev *dev;
|
||||
struct kvm *kvm;
|
||||
spinlock_t assigned_dev_lock;
|
||||
spinlock_t intx_lock;
|
||||
char irq_name[32];
|
||||
};
|
||||
|
||||
struct kvm_irq_mask_notifier {
|
||||
|
@ -462,6 +520,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
|
|||
unsigned long *deliver_bitmask);
|
||||
#endif
|
||||
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
|
||||
int irq_source_id, int level);
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian);
|
||||
|
@ -603,17 +663,28 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
|
|||
void kvm_eventfd_init(struct kvm *kvm);
|
||||
int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
|
||||
void kvm_irqfd_release(struct kvm *kvm);
|
||||
void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
|
||||
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
|
||||
|
||||
#else
|
||||
|
||||
static inline void kvm_eventfd_init(struct kvm *kvm) {}
|
||||
|
||||
static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline void kvm_irqfd_release(struct kvm *kvm) {}
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
static inline void kvm_irq_routing_update(struct kvm *kvm,
|
||||
struct kvm_irq_routing_table *irq_rt)
|
||||
{
|
||||
rcu_assign_pointer(kvm->irq_routing, irq_rt);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
return -ENOSYS;
|
||||
|
|
|
@ -67,4 +67,11 @@ struct kvm_lapic_irq {
|
|||
u32 dest_id;
|
||||
};
|
||||
|
||||
struct gfn_to_hva_cache {
|
||||
u64 generation;
|
||||
gpa_t gpa;
|
||||
unsigned long hva;
|
||||
struct kvm_memory_slot *memslot;
|
||||
};
|
||||
|
||||
#endif /* __KVM_TYPES_H__ */
|
||||
|
|
|
@ -6,6 +6,36 @@
|
|||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
|
||||
#define ERSN(x) { KVM_EXIT_##x, "KVM_EXIT_" #x }
|
||||
|
||||
#define kvm_trace_exit_reason \
|
||||
ERSN(UNKNOWN), ERSN(EXCEPTION), ERSN(IO), ERSN(HYPERCALL), \
|
||||
ERSN(DEBUG), ERSN(HLT), ERSN(MMIO), ERSN(IRQ_WINDOW_OPEN), \
|
||||
ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \
|
||||
ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
|
||||
ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI)
|
||||
|
||||
TRACE_EVENT(kvm_userspace_exit,
|
||||
TP_PROTO(__u32 reason, int errno),
|
||||
TP_ARGS(reason, errno),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, reason )
|
||||
__field( int, errno )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->reason = reason;
|
||||
__entry->errno = errno;
|
||||
),
|
||||
|
||||
TP_printk("reason %s (%d)",
|
||||
__entry->errno < 0 ?
|
||||
(__entry->errno == -EINTR ? "restart" : "error") :
|
||||
__print_symbolic(__entry->reason, kvm_trace_exit_reason),
|
||||
__entry->errno < 0 ? -__entry->errno : __entry->reason)
|
||||
);
|
||||
|
||||
#if defined(__KVM_HAVE_IOAPIC)
|
||||
TRACE_EVENT(kvm_set_irq,
|
||||
TP_PROTO(unsigned int gsi, int level, int irq_source_id),
|
||||
|
@ -185,6 +215,97 @@ TRACE_EVENT(kvm_age_page,
|
|||
__entry->referenced ? "YOUNG" : "OLD")
|
||||
);
|
||||
|
||||
#ifdef CONFIG_KVM_ASYNC_PF
|
||||
DECLARE_EVENT_CLASS(kvm_async_get_page_class,
|
||||
|
||||
TP_PROTO(u64 gva, u64 gfn),
|
||||
|
||||
TP_ARGS(gva, gfn),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, gva)
|
||||
__field(u64, gfn)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gva = gva;
|
||||
__entry->gfn = gfn;
|
||||
),
|
||||
|
||||
TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page,
|
||||
|
||||
TP_PROTO(u64 gva, u64 gfn),
|
||||
|
||||
TP_ARGS(gva, gfn)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault,
|
||||
|
||||
TP_PROTO(u64 gva, u64 gfn),
|
||||
|
||||
TP_ARGS(gva, gfn)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(kvm_async_pf_nopresent_ready,
|
||||
|
||||
TP_PROTO(u64 token, u64 gva),
|
||||
|
||||
TP_ARGS(token, gva),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, token)
|
||||
__field(__u64, gva)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->token = token;
|
||||
__entry->gva = gva;
|
||||
),
|
||||
|
||||
TP_printk("token %#llx gva %#llx", __entry->token, __entry->gva)
|
||||
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_not_present,
|
||||
|
||||
TP_PROTO(u64 token, u64 gva),
|
||||
|
||||
TP_ARGS(token, gva)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
|
||||
|
||||
TP_PROTO(u64 token, u64 gva),
|
||||
|
||||
TP_ARGS(token, gva)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_async_pf_completed,
|
||||
TP_PROTO(unsigned long address, struct page *page, u64 gva),
|
||||
TP_ARGS(address, page, gva),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, address)
|
||||
__field(pfn_t, pfn)
|
||||
__field(u64, gva)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->address = address;
|
||||
__entry->pfn = page ? page_to_pfn(page) : 0;
|
||||
__entry->gva = gva;
|
||||
),
|
||||
|
||||
TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva,
|
||||
__entry->address, __entry->pfn)
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _TRACE_KVM_MAIN_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
|
|
@ -15,3 +15,6 @@ config KVM_APIC_ARCHITECTURE
|
|||
|
||||
config KVM_MMIO
|
||||
bool
|
||||
|
||||
config KVM_ASYNC_PF
|
||||
bool
|
||||
|
|
|
@ -55,58 +55,31 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
|
|||
return index;
|
||||
}
|
||||
|
||||
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
|
||||
static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev;
|
||||
int i;
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
u32 vector;
|
||||
int index;
|
||||
|
||||
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
|
||||
interrupt_work);
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) {
|
||||
spin_lock(&assigned_dev->intx_lock);
|
||||
disable_irq_nosync(irq);
|
||||
assigned_dev->host_irq_disabled = true;
|
||||
spin_unlock(&assigned_dev->intx_lock);
|
||||
}
|
||||
|
||||
spin_lock_irq(&assigned_dev->assigned_dev_lock);
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
|
||||
struct kvm_guest_msix_entry *guest_entries =
|
||||
assigned_dev->guest_msix_entries;
|
||||
for (i = 0; i < assigned_dev->entries_nr; i++) {
|
||||
if (!(guest_entries[i].flags &
|
||||
KVM_ASSIGNED_MSIX_PENDING))
|
||||
continue;
|
||||
guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
|
||||
index = find_index_from_host_irq(assigned_dev, irq);
|
||||
if (index >= 0) {
|
||||
vector = assigned_dev->
|
||||
guest_msix_entries[index].vector;
|
||||
kvm_set_irq(assigned_dev->kvm,
|
||||
assigned_dev->irq_source_id,
|
||||
guest_entries[i].vector, 1);
|
||||
assigned_dev->irq_source_id, vector, 1);
|
||||
}
|
||||
} else
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 1);
|
||||
|
||||
spin_unlock_irq(&assigned_dev->assigned_dev_lock);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kvm_assigned_dev_kernel *assigned_dev =
|
||||
(struct kvm_assigned_dev_kernel *) dev_id;
|
||||
|
||||
spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
|
||||
int index = find_index_from_host_irq(assigned_dev, irq);
|
||||
if (index < 0)
|
||||
goto out;
|
||||
assigned_dev->guest_msix_entries[index].flags |=
|
||||
KVM_ASSIGNED_MSIX_PENDING;
|
||||
}
|
||||
|
||||
schedule_work(&assigned_dev->interrupt_work);
|
||||
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
|
||||
disable_irq_nosync(irq);
|
||||
assigned_dev->host_irq_disabled = true;
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
|
@ -114,7 +87,6 @@ out:
|
|||
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *dev;
|
||||
unsigned long flags;
|
||||
|
||||
if (kian->gsi == -1)
|
||||
return;
|
||||
|
@ -127,12 +99,12 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
|
|||
/* The guest irq may be shared so this ack may be
|
||||
* from another device.
|
||||
*/
|
||||
spin_lock_irqsave(&dev->assigned_dev_lock, flags);
|
||||
spin_lock(&dev->intx_lock);
|
||||
if (dev->host_irq_disabled) {
|
||||
enable_irq(dev->host_irq);
|
||||
dev->host_irq_disabled = false;
|
||||
}
|
||||
spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
|
||||
spin_unlock(&dev->intx_lock);
|
||||
}
|
||||
|
||||
static void deassign_guest_irq(struct kvm *kvm,
|
||||
|
@ -141,6 +113,9 @@ static void deassign_guest_irq(struct kvm *kvm,
|
|||
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
|
||||
assigned_dev->ack_notifier.gsi = -1;
|
||||
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 0);
|
||||
|
||||
if (assigned_dev->irq_source_id != -1)
|
||||
kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
|
||||
assigned_dev->irq_source_id = -1;
|
||||
|
@ -152,28 +127,19 @@ static void deassign_host_irq(struct kvm *kvm,
|
|||
struct kvm_assigned_dev_kernel *assigned_dev)
|
||||
{
|
||||
/*
|
||||
* In kvm_free_device_irq, cancel_work_sync return true if:
|
||||
* 1. work is scheduled, and then cancelled.
|
||||
* 2. work callback is executed.
|
||||
*
|
||||
* The first one ensured that the irq is disabled and no more events
|
||||
* would happen. But for the second one, the irq may be enabled (e.g.
|
||||
* for MSI). So we disable irq here to prevent further events.
|
||||
* We disable irq here to prevent further events.
|
||||
*
|
||||
* Notice this maybe result in nested disable if the interrupt type is
|
||||
* INTx, but it's OK for we are going to free it.
|
||||
*
|
||||
* If this function is a part of VM destroy, please ensure that till
|
||||
* now, the kvm state is still legal for probably we also have to wait
|
||||
* interrupt_work done.
|
||||
* on a currently running IRQ handler.
|
||||
*/
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
|
||||
int i;
|
||||
for (i = 0; i < assigned_dev->entries_nr; i++)
|
||||
disable_irq_nosync(assigned_dev->
|
||||
host_msix_entries[i].vector);
|
||||
|
||||
cancel_work_sync(&assigned_dev->interrupt_work);
|
||||
disable_irq(assigned_dev->host_msix_entries[i].vector);
|
||||
|
||||
for (i = 0; i < assigned_dev->entries_nr; i++)
|
||||
free_irq(assigned_dev->host_msix_entries[i].vector,
|
||||
|
@ -185,8 +151,7 @@ static void deassign_host_irq(struct kvm *kvm,
|
|||
pci_disable_msix(assigned_dev->dev);
|
||||
} else {
|
||||
/* Deal with MSI and INTx */
|
||||
disable_irq_nosync(assigned_dev->host_irq);
|
||||
cancel_work_sync(&assigned_dev->interrupt_work);
|
||||
disable_irq(assigned_dev->host_irq);
|
||||
|
||||
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
|
||||
|
||||
|
@ -232,7 +197,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
|
|||
{
|
||||
kvm_free_assigned_irq(kvm, assigned_dev);
|
||||
|
||||
pci_reset_function(assigned_dev->dev);
|
||||
__pci_reset_function(assigned_dev->dev);
|
||||
pci_restore_state(assigned_dev->dev);
|
||||
|
||||
pci_release_regions(assigned_dev->dev);
|
||||
pci_disable_device(assigned_dev->dev);
|
||||
|
@ -265,8 +231,8 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
|
|||
* on the same interrupt line is not a happy situation: there
|
||||
* are going to be long delays in accepting, acking, etc.
|
||||
*/
|
||||
if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
|
||||
0, "kvm_assigned_intx_device", (void *)dev))
|
||||
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
|
||||
IRQF_ONESHOT, dev->irq_name, (void *)dev))
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
|
@ -284,8 +250,8 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
|
|||
}
|
||||
|
||||
dev->host_irq = dev->dev->irq;
|
||||
if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
|
||||
"kvm_assigned_msi_device", (void *)dev)) {
|
||||
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
|
||||
0, dev->irq_name, (void *)dev)) {
|
||||
pci_disable_msi(dev->dev);
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -310,10 +276,9 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
|
|||
return r;
|
||||
|
||||
for (i = 0; i < dev->entries_nr; i++) {
|
||||
r = request_irq(dev->host_msix_entries[i].vector,
|
||||
kvm_assigned_dev_intr, 0,
|
||||
"kvm_assigned_msix_device",
|
||||
(void *)dev);
|
||||
r = request_threaded_irq(dev->host_msix_entries[i].vector,
|
||||
NULL, kvm_assigned_dev_thread,
|
||||
0, dev->irq_name, (void *)dev);
|
||||
if (r)
|
||||
goto err;
|
||||
}
|
||||
|
@ -370,6 +335,9 @@ static int assign_host_irq(struct kvm *kvm,
|
|||
if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
|
||||
return r;
|
||||
|
||||
snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
|
||||
pci_name(dev->dev));
|
||||
|
||||
switch (host_irq_type) {
|
||||
case KVM_DEV_IRQ_HOST_INTX:
|
||||
r = assigned_device_enable_host_intx(kvm, dev);
|
||||
|
@ -547,6 +515,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
|
|||
}
|
||||
|
||||
pci_reset_function(dev);
|
||||
pci_save_state(dev);
|
||||
|
||||
match->assigned_dev_id = assigned_dev->assigned_dev_id;
|
||||
match->host_segnr = assigned_dev->segnr;
|
||||
|
@ -554,12 +523,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
|
|||
match->host_devfn = assigned_dev->devfn;
|
||||
match->flags = assigned_dev->flags;
|
||||
match->dev = dev;
|
||||
spin_lock_init(&match->assigned_dev_lock);
|
||||
spin_lock_init(&match->intx_lock);
|
||||
match->irq_source_id = -1;
|
||||
match->kvm = kvm;
|
||||
match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
|
||||
INIT_WORK(&match->interrupt_work,
|
||||
kvm_assigned_dev_interrupt_work_handler);
|
||||
|
||||
list_add(&match->list, &kvm->arch.assigned_dev_head);
|
||||
|
||||
|
@ -579,6 +546,7 @@ out:
|
|||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
out_list_del:
|
||||
pci_restore_state(dev);
|
||||
list_del(&match->list);
|
||||
pci_release_regions(dev);
|
||||
out_disable:
|
||||
|
@ -651,9 +619,9 @@ static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
|
|||
r = -ENOMEM;
|
||||
goto msix_nr_out;
|
||||
}
|
||||
adev->guest_msix_entries = kzalloc(
|
||||
sizeof(struct kvm_guest_msix_entry) *
|
||||
entry_nr->entry_nr, GFP_KERNEL);
|
||||
adev->guest_msix_entries =
|
||||
kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
|
||||
GFP_KERNEL);
|
||||
if (!adev->guest_msix_entries) {
|
||||
kfree(adev->host_msix_entries);
|
||||
r = -ENOMEM;
|
||||
|
@ -706,7 +674,7 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
|||
unsigned long arg)
|
||||
{
|
||||
void __user *argp = (void __user *)arg;
|
||||
int r = -ENOTTY;
|
||||
int r;
|
||||
|
||||
switch (ioctl) {
|
||||
case KVM_ASSIGN_PCI_DEVICE: {
|
||||
|
@ -724,7 +692,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
|||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
#ifdef KVM_CAP_ASSIGN_DEV_IRQ
|
||||
case KVM_ASSIGN_DEV_IRQ: {
|
||||
struct kvm_assigned_irq assigned_irq;
|
||||
|
||||
|
@ -747,8 +714,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
|||
goto out;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
|
||||
case KVM_DEASSIGN_PCI_DEVICE: {
|
||||
struct kvm_assigned_pci_dev assigned_dev;
|
||||
|
||||
|
@ -760,7 +725,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
|||
goto out;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
case KVM_SET_GSI_ROUTING: {
|
||||
struct kvm_irq_routing routing;
|
||||
|
@ -813,6 +777,9 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
|||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
break;
|
||||
}
|
||||
out:
|
||||
return r;
|
||||
|
|
|
@ -0,0 +1,216 @@
|
|||
/*
|
||||
* kvm asynchronous fault support
|
||||
*
|
||||
* Copyright 2010 Red Hat, Inc.
|
||||
*
|
||||
* Author:
|
||||
* Gleb Natapov <gleb@redhat.com>
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mmu_context.h>
|
||||
|
||||
#include "async_pf.h"
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
static struct kmem_cache *async_pf_cache;
|
||||
|
||||
int kvm_async_pf_init(void)
|
||||
{
|
||||
async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
|
||||
|
||||
if (!async_pf_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_async_pf_deinit(void)
|
||||
{
|
||||
if (async_pf_cache)
|
||||
kmem_cache_destroy(async_pf_cache);
|
||||
async_pf_cache = NULL;
|
||||
}
|
||||
|
||||
void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
INIT_LIST_HEAD(&vcpu->async_pf.done);
|
||||
INIT_LIST_HEAD(&vcpu->async_pf.queue);
|
||||
spin_lock_init(&vcpu->async_pf.lock);
|
||||
}
|
||||
|
||||
static void async_pf_execute(struct work_struct *work)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
struct kvm_async_pf *apf =
|
||||
container_of(work, struct kvm_async_pf, work);
|
||||
struct mm_struct *mm = apf->mm;
|
||||
struct kvm_vcpu *vcpu = apf->vcpu;
|
||||
unsigned long addr = apf->addr;
|
||||
gva_t gva = apf->gva;
|
||||
|
||||
might_sleep();
|
||||
|
||||
use_mm(mm);
|
||||
down_read(&mm->mmap_sem);
|
||||
get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
|
||||
up_read(&mm->mmap_sem);
|
||||
unuse_mm(mm);
|
||||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
list_add_tail(&apf->link, &vcpu->async_pf.done);
|
||||
apf->page = page;
|
||||
apf->done = true;
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
/*
|
||||
* apf may be freed by kvm_check_async_pf_completion() after
|
||||
* this point
|
||||
*/
|
||||
|
||||
trace_kvm_async_pf_completed(addr, page, gva);
|
||||
|
||||
if (waitqueue_active(&vcpu->wq))
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
|
||||
mmdrop(mm);
|
||||
kvm_put_kvm(vcpu->kvm);
|
||||
}
|
||||
|
||||
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* cancel outstanding work queue item */
|
||||
while (!list_empty(&vcpu->async_pf.queue)) {
|
||||
struct kvm_async_pf *work =
|
||||
list_entry(vcpu->async_pf.queue.next,
|
||||
typeof(*work), queue);
|
||||
cancel_work_sync(&work->work);
|
||||
list_del(&work->queue);
|
||||
if (!work->done) /* work was canceled */
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
while (!list_empty(&vcpu->async_pf.done)) {
|
||||
struct kvm_async_pf *work =
|
||||
list_entry(vcpu->async_pf.done.next,
|
||||
typeof(*work), link);
|
||||
list_del(&work->link);
|
||||
if (work->page)
|
||||
put_page(work->page);
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
}
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
vcpu->async_pf.queued = 0;
|
||||
}
|
||||
|
||||
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_async_pf *work;
|
||||
|
||||
while (!list_empty_careful(&vcpu->async_pf.done) &&
|
||||
kvm_arch_can_inject_async_page_present(vcpu)) {
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
|
||||
link);
|
||||
list_del(&work->link);
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
if (work->page)
|
||||
kvm_arch_async_page_ready(vcpu, work);
|
||||
kvm_arch_async_page_present(vcpu, work);
|
||||
|
||||
list_del(&work->queue);
|
||||
vcpu->async_pf.queued--;
|
||||
if (work->page)
|
||||
put_page(work->page);
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
|
||||
struct kvm_arch_async_pf *arch)
|
||||
{
|
||||
struct kvm_async_pf *work;
|
||||
|
||||
if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
|
||||
return 0;
|
||||
|
||||
/* setup delayed work */
|
||||
|
||||
/*
|
||||
* do alloc nowait since if we are going to sleep anyway we
|
||||
* may as well sleep faulting in page
|
||||
*/
|
||||
work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
|
||||
if (!work)
|
||||
return 0;
|
||||
|
||||
work->page = NULL;
|
||||
work->done = false;
|
||||
work->vcpu = vcpu;
|
||||
work->gva = gva;
|
||||
work->addr = gfn_to_hva(vcpu->kvm, gfn);
|
||||
work->arch = *arch;
|
||||
work->mm = current->mm;
|
||||
atomic_inc(&work->mm->mm_count);
|
||||
kvm_get_kvm(work->vcpu->kvm);
|
||||
|
||||
/* this can't really happen otherwise gfn_to_pfn_async
|
||||
would succeed */
|
||||
if (unlikely(kvm_is_error_hva(work->addr)))
|
||||
goto retry_sync;
|
||||
|
||||
INIT_WORK(&work->work, async_pf_execute);
|
||||
if (!schedule_work(&work->work))
|
||||
goto retry_sync;
|
||||
|
||||
list_add_tail(&work->queue, &vcpu->async_pf.queue);
|
||||
vcpu->async_pf.queued++;
|
||||
kvm_arch_async_page_not_present(vcpu, work);
|
||||
return 1;
|
||||
retry_sync:
|
||||
kvm_put_kvm(work->vcpu->kvm);
|
||||
mmdrop(work->mm);
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_async_pf *work;
|
||||
|
||||
if (!list_empty_careful(&vcpu->async_pf.done))
|
||||
return 0;
|
||||
|
||||
work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
|
||||
if (!work)
|
||||
return -ENOMEM;
|
||||
|
||||
work->page = bad_page;
|
||||
get_page(bad_page);
|
||||
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
|
||||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
list_add_tail(&work->link, &vcpu->async_pf.done);
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
vcpu->async_pf.queued++;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* kvm asynchronous fault support
|
||||
*
|
||||
* Copyright 2010 Red Hat, Inc.
|
||||
*
|
||||
* Author:
|
||||
* Gleb Natapov <gleb@redhat.com>
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __KVM_ASYNC_PF_H__
|
||||
#define __KVM_ASYNC_PF_H__
|
||||
|
||||
#ifdef CONFIG_KVM_ASYNC_PF
|
||||
int kvm_async_pf_init(void);
|
||||
void kvm_async_pf_deinit(void);
|
||||
void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
|
||||
#else
|
||||
#define kvm_async_pf_init() (0)
|
||||
#define kvm_async_pf_deinit() do{}while(0)
|
||||
#define kvm_async_pf_vcpu_init(C) do{}while(0)
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -44,14 +44,19 @@
|
|||
*/
|
||||
|
||||
struct _irqfd {
|
||||
struct kvm *kvm;
|
||||
struct eventfd_ctx *eventfd;
|
||||
int gsi;
|
||||
struct list_head list;
|
||||
poll_table pt;
|
||||
wait_queue_t wait;
|
||||
struct work_struct inject;
|
||||
struct work_struct shutdown;
|
||||
/* Used for MSI fast-path */
|
||||
struct kvm *kvm;
|
||||
wait_queue_t wait;
|
||||
/* Update side is protected by irqfds.lock */
|
||||
struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
|
||||
/* Used for level IRQ fast-path */
|
||||
int gsi;
|
||||
struct work_struct inject;
|
||||
/* Used for setup/shutdown */
|
||||
struct eventfd_ctx *eventfd;
|
||||
struct list_head list;
|
||||
poll_table pt;
|
||||
struct work_struct shutdown;
|
||||
};
|
||||
|
||||
static struct workqueue_struct *irqfd_cleanup_wq;
|
||||
|
@ -125,14 +130,22 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
|||
{
|
||||
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
|
||||
unsigned long flags = (unsigned long)key;
|
||||
struct kvm_kernel_irq_routing_entry *irq;
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
|
||||
if (flags & POLLIN)
|
||||
if (flags & POLLIN) {
|
||||
rcu_read_lock();
|
||||
irq = rcu_dereference(irqfd->irq_entry);
|
||||
/* An event has been signaled, inject an interrupt */
|
||||
schedule_work(&irqfd->inject);
|
||||
if (irq)
|
||||
kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
|
||||
else
|
||||
schedule_work(&irqfd->inject);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
if (flags & POLLHUP) {
|
||||
/* The eventfd is closing, detach from KVM */
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kvm->irqfds.lock, flags);
|
||||
|
@ -163,9 +176,31 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
|
|||
add_wait_queue(wqh, &irqfd->wait);
|
||||
}
|
||||
|
||||
/* Must be called under irqfds.lock */
|
||||
static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
|
||||
struct kvm_irq_routing_table *irq_rt)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
struct hlist_node *n;
|
||||
|
||||
if (irqfd->gsi >= irq_rt->nr_rt_entries) {
|
||||
rcu_assign_pointer(irqfd->irq_entry, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
|
||||
/* Only fast-path MSI. */
|
||||
if (e->type == KVM_IRQ_ROUTING_MSI)
|
||||
rcu_assign_pointer(irqfd->irq_entry, e);
|
||||
else
|
||||
rcu_assign_pointer(irqfd->irq_entry, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
|
||||
{
|
||||
struct kvm_irq_routing_table *irq_rt;
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
struct file *file = NULL;
|
||||
struct eventfd_ctx *eventfd = NULL;
|
||||
|
@ -215,6 +250,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
|
|||
goto fail;
|
||||
}
|
||||
|
||||
irq_rt = rcu_dereference_protected(kvm->irq_routing,
|
||||
lockdep_is_held(&kvm->irqfds.lock));
|
||||
irqfd_update(kvm, irqfd, irq_rt);
|
||||
|
||||
events = file->f_op->poll(file, &irqfd->pt);
|
||||
|
||||
list_add_tail(&irqfd->list, &kvm->irqfds.items);
|
||||
|
@ -271,8 +310,17 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
|
|||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
|
||||
if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
|
||||
if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
|
||||
/*
|
||||
* This rcu_assign_pointer is needed for when
|
||||
* another thread calls kvm_irqfd_update before
|
||||
* we flush workqueue below.
|
||||
* It is paired with synchronize_rcu done by caller
|
||||
* of that function.
|
||||
*/
|
||||
rcu_assign_pointer(irqfd->irq_entry, NULL);
|
||||
irqfd_deactivate(irqfd);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
|
@ -321,6 +369,25 @@ kvm_irqfd_release(struct kvm *kvm)
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* Change irq_routing and irqfd.
|
||||
* Caller must invoke synchronize_rcu afterwards.
|
||||
*/
|
||||
void kvm_irq_routing_update(struct kvm *kvm,
|
||||
struct kvm_irq_routing_table *irq_rt)
|
||||
{
|
||||
struct _irqfd *irqfd;
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
rcu_assign_pointer(kvm->irq_routing, irq_rt);
|
||||
|
||||
list_for_each_entry(irqfd, &kvm->irqfds.items, list)
|
||||
irqfd_update(kvm, irqfd, irq_rt);
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* create a host-wide workqueue for issuing deferred shutdown requests
|
||||
* aggregated from all vm* instances. We need our own isolated single-thread
|
||||
|
|
|
@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
|||
return r;
|
||||
}
|
||||
|
||||
static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level)
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level)
|
||||
{
|
||||
struct kvm_lapic_irq irq;
|
||||
|
||||
|
@ -409,8 +409,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
|||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
old = kvm->irq_routing;
|
||||
rcu_assign_pointer(kvm->irq_routing, new);
|
||||
kvm_irq_routing_update(kvm, new);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
new = old;
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
#include <asm-generic/bitops/le.h>
|
||||
|
||||
#include "coalesced_mmio.h"
|
||||
#include "async_pf.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/kvm.h>
|
||||
|
@ -89,7 +90,8 @@ static void hardware_disable_all(void);
|
|||
|
||||
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
|
||||
|
||||
static bool kvm_rebooting;
|
||||
bool kvm_rebooting;
|
||||
EXPORT_SYMBOL_GPL(kvm_rebooting);
|
||||
|
||||
static bool largepages_enabled = true;
|
||||
|
||||
|
@ -167,8 +169,12 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
|||
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
int dirty_count = kvm->tlbs_dirty;
|
||||
|
||||
smp_mb();
|
||||
if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
|
||||
++kvm->stat.remote_tlb_flush;
|
||||
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
|
||||
}
|
||||
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm)
|
||||
|
@ -186,6 +192,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
|||
vcpu->kvm = kvm;
|
||||
vcpu->vcpu_id = id;
|
||||
init_waitqueue_head(&vcpu->wq);
|
||||
kvm_async_pf_vcpu_init(vcpu);
|
||||
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page) {
|
||||
|
@ -247,7 +254,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
|
|||
idx = srcu_read_lock(&kvm->srcu);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
kvm->mmu_notifier_seq++;
|
||||
need_tlb_flush = kvm_unmap_hva(kvm, address);
|
||||
need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
|
@ -291,6 +298,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
|||
kvm->mmu_notifier_count++;
|
||||
for (; start < end; start += PAGE_SIZE)
|
||||
need_tlb_flush |= kvm_unmap_hva(kvm, start);
|
||||
need_tlb_flush |= kvm->tlbs_dirty;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
|
@ -381,11 +389,15 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
|
|||
|
||||
static struct kvm *kvm_create_vm(void)
|
||||
{
|
||||
int r = 0, i;
|
||||
struct kvm *kvm = kvm_arch_create_vm();
|
||||
int r, i;
|
||||
struct kvm *kvm = kvm_arch_alloc_vm();
|
||||
|
||||
if (IS_ERR(kvm))
|
||||
goto out;
|
||||
if (!kvm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
r = kvm_arch_init_vm(kvm);
|
||||
if (r)
|
||||
goto out_err_nodisable;
|
||||
|
||||
r = hardware_enable_all();
|
||||
if (r)
|
||||
|
@ -399,23 +411,19 @@ static struct kvm *kvm_create_vm(void)
|
|||
r = -ENOMEM;
|
||||
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
|
||||
if (!kvm->memslots)
|
||||
goto out_err;
|
||||
goto out_err_nosrcu;
|
||||
if (init_srcu_struct(&kvm->srcu))
|
||||
goto out_err;
|
||||
goto out_err_nosrcu;
|
||||
for (i = 0; i < KVM_NR_BUSES; i++) {
|
||||
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
|
||||
GFP_KERNEL);
|
||||
if (!kvm->buses[i]) {
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
if (!kvm->buses[i])
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
r = kvm_init_mmu_notifier(kvm);
|
||||
if (r) {
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
if (r)
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
kvm->mm = current->mm;
|
||||
atomic_inc(&kvm->mm->mm_count);
|
||||
|
@ -429,19 +437,35 @@ static struct kvm *kvm_create_vm(void)
|
|||
spin_lock(&kvm_lock);
|
||||
list_add(&kvm->vm_list, &vm_list);
|
||||
spin_unlock(&kvm_lock);
|
||||
out:
|
||||
|
||||
return kvm;
|
||||
|
||||
out_err:
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
out_err_nosrcu:
|
||||
hardware_disable_all();
|
||||
out_err_nodisable:
|
||||
for (i = 0; i < KVM_NR_BUSES; i++)
|
||||
kfree(kvm->buses[i]);
|
||||
kfree(kvm->memslots);
|
||||
kfree(kvm);
|
||||
kvm_arch_free_vm(kvm);
|
||||
return ERR_PTR(r);
|
||||
}
|
||||
|
||||
static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
if (!memslot->dirty_bitmap)
|
||||
return;
|
||||
|
||||
if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
|
||||
vfree(memslot->dirty_bitmap_head);
|
||||
else
|
||||
kfree(memslot->dirty_bitmap_head);
|
||||
|
||||
memslot->dirty_bitmap = NULL;
|
||||
memslot->dirty_bitmap_head = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free any memory in @free but not in @dont.
|
||||
*/
|
||||
|
@ -454,7 +478,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
|
|||
vfree(free->rmap);
|
||||
|
||||
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
|
||||
vfree(free->dirty_bitmap);
|
||||
kvm_destroy_dirty_bitmap(free);
|
||||
|
||||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||
|
@ -465,7 +489,6 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
|
|||
}
|
||||
|
||||
free->npages = 0;
|
||||
free->dirty_bitmap = NULL;
|
||||
free->rmap = NULL;
|
||||
}
|
||||
|
||||
|
@ -499,6 +522,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
|||
kvm_arch_flush_shadow(kvm);
|
||||
#endif
|
||||
kvm_arch_destroy_vm(kvm);
|
||||
kvm_free_physmem(kvm);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
kvm_arch_free_vm(kvm);
|
||||
hardware_disable_all();
|
||||
mmdrop(mm);
|
||||
}
|
||||
|
@ -527,6 +553,27 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocation size is twice as large as the actual dirty bitmap size.
|
||||
* This makes it possible to do double buffering: see x86's
|
||||
* kvm_vm_ioctl_get_dirty_log().
|
||||
*/
|
||||
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
|
||||
|
||||
if (dirty_bytes > PAGE_SIZE)
|
||||
memslot->dirty_bitmap = vzalloc(dirty_bytes);
|
||||
else
|
||||
memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL);
|
||||
|
||||
if (!memslot->dirty_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
memslot->dirty_bitmap_head = memslot->dirty_bitmap;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate some memory and give it an address in the guest physical address
|
||||
* space.
|
||||
|
@ -604,13 +651,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||
/* Allocate if a slot is being created */
|
||||
#ifndef CONFIG_S390
|
||||
if (npages && !new.rmap) {
|
||||
new.rmap = vmalloc(npages * sizeof(*new.rmap));
|
||||
new.rmap = vzalloc(npages * sizeof(*new.rmap));
|
||||
|
||||
if (!new.rmap)
|
||||
goto out_free;
|
||||
|
||||
memset(new.rmap, 0, npages * sizeof(*new.rmap));
|
||||
|
||||
new.user_alloc = user_alloc;
|
||||
new.userspace_addr = mem->userspace_addr;
|
||||
}
|
||||
|
@ -633,14 +678,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||
>> KVM_HPAGE_GFN_SHIFT(level));
|
||||
lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level);
|
||||
|
||||
new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
|
||||
new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
|
||||
|
||||
if (!new.lpage_info[i])
|
||||
goto out_free;
|
||||
|
||||
memset(new.lpage_info[i], 0,
|
||||
lpages * sizeof(*new.lpage_info[i]));
|
||||
|
||||
if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
|
||||
new.lpage_info[i][0].write_count = 1;
|
||||
if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
|
||||
|
@ -661,12 +703,8 @@ skip_lpage:
|
|||
|
||||
/* Allocate page dirty bitmap if needed */
|
||||
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
|
||||
unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
|
||||
|
||||
new.dirty_bitmap = vmalloc(dirty_bytes);
|
||||
if (!new.dirty_bitmap)
|
||||
if (kvm_create_dirty_bitmap(&new) < 0)
|
||||
goto out_free;
|
||||
memset(new.dirty_bitmap, 0, dirty_bytes);
|
||||
/* destroy any largepage mappings for dirty tracking */
|
||||
if (old.npages)
|
||||
flush_shadow = 1;
|
||||
|
@ -685,6 +723,7 @@ skip_lpage:
|
|||
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
|
||||
if (mem->slot >= slots->nmemslots)
|
||||
slots->nmemslots = mem->slot + 1;
|
||||
slots->generation++;
|
||||
slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
|
||||
|
||||
old_memslots = kvm->memslots;
|
||||
|
@ -719,6 +758,7 @@ skip_lpage:
|
|||
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
|
||||
if (mem->slot >= slots->nmemslots)
|
||||
slots->nmemslots = mem->slot + 1;
|
||||
slots->generation++;
|
||||
|
||||
/* actual memory is freed via old in kvm_free_physmem_slot below */
|
||||
if (!npages) {
|
||||
|
@ -849,10 +889,10 @@ int kvm_is_error_hva(unsigned long addr)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
|
||||
|
||||
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
||||
static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
|
||||
gfn_t gfn)
|
||||
{
|
||||
int i;
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
|
||||
for (i = 0; i < slots->nmemslots; ++i) {
|
||||
struct kvm_memory_slot *memslot = &slots->memslots[i];
|
||||
|
@ -863,6 +903,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
|||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_memslot);
|
||||
|
||||
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
|
@ -925,12 +970,9 @@ int memslot_id(struct kvm *kvm, gfn_t gfn)
|
|||
return memslot - slots->memslots;
|
||||
}
|
||||
|
||||
static unsigned long gfn_to_hva_many(struct kvm *kvm, gfn_t gfn,
|
||||
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
gfn_t *nr_pages)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
|
||||
return bad_hva();
|
||||
|
||||
|
@ -942,28 +984,61 @@ static unsigned long gfn_to_hva_many(struct kvm *kvm, gfn_t gfn,
|
|||
|
||||
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_hva_many(kvm, gfn, NULL);
|
||||
return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
||||
|
||||
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
|
||||
static pfn_t get_fault_pfn(void)
|
||||
{
|
||||
get_page(fault_page);
|
||||
return fault_pfn;
|
||||
}
|
||||
|
||||
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
|
||||
bool *async, bool write_fault, bool *writable)
|
||||
{
|
||||
struct page *page[1];
|
||||
int npages;
|
||||
int npages = 0;
|
||||
pfn_t pfn;
|
||||
|
||||
if (atomic)
|
||||
/* we can do it either atomically or asynchronously, not both */
|
||||
BUG_ON(atomic && async);
|
||||
|
||||
BUG_ON(!write_fault && !writable);
|
||||
|
||||
if (writable)
|
||||
*writable = true;
|
||||
|
||||
if (atomic || async)
|
||||
npages = __get_user_pages_fast(addr, 1, 1, page);
|
||||
else {
|
||||
|
||||
if (unlikely(npages != 1) && !atomic) {
|
||||
might_sleep();
|
||||
npages = get_user_pages_fast(addr, 1, 1, page);
|
||||
|
||||
if (writable)
|
||||
*writable = write_fault;
|
||||
|
||||
npages = get_user_pages_fast(addr, 1, write_fault, page);
|
||||
|
||||
/* map read fault as writable if possible */
|
||||
if (unlikely(!write_fault) && npages == 1) {
|
||||
struct page *wpage[1];
|
||||
|
||||
npages = __get_user_pages_fast(addr, 1, 1, wpage);
|
||||
if (npages == 1) {
|
||||
*writable = true;
|
||||
put_page(page[0]);
|
||||
page[0] = wpage[0];
|
||||
}
|
||||
npages = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(npages != 1)) {
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if (atomic)
|
||||
goto return_fault_page;
|
||||
return get_fault_pfn();
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
if (is_hwpoison_address(addr)) {
|
||||
|
@ -972,19 +1047,20 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
|
|||
return page_to_pfn(hwpoison_page);
|
||||
}
|
||||
|
||||
vma = find_vma(current->mm, addr);
|
||||
vma = find_vma_intersection(current->mm, addr, addr+1);
|
||||
|
||||
if (vma == NULL || addr < vma->vm_start ||
|
||||
!(vma->vm_flags & VM_PFNMAP)) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return_fault_page:
|
||||
get_page(fault_page);
|
||||
return page_to_pfn(fault_page);
|
||||
if (vma == NULL)
|
||||
pfn = get_fault_pfn();
|
||||
else if ((vma->vm_flags & VM_PFNMAP)) {
|
||||
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
|
||||
vma->vm_pgoff;
|
||||
BUG_ON(!kvm_is_mmio_pfn(pfn));
|
||||
} else {
|
||||
if (async && (vma->vm_flags & VM_WRITE))
|
||||
*async = true;
|
||||
pfn = get_fault_pfn();
|
||||
}
|
||||
|
||||
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
BUG_ON(!kvm_is_mmio_pfn(pfn));
|
||||
} else
|
||||
pfn = page_to_pfn(page[0]);
|
||||
|
||||
|
@ -993,40 +1069,58 @@ return_fault_page:
|
|||
|
||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
|
||||
{
|
||||
return hva_to_pfn(kvm, addr, true);
|
||||
return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
|
||||
|
||||
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic)
|
||||
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
|
||||
bool write_fault, bool *writable)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
if (async)
|
||||
*async = false;
|
||||
|
||||
addr = gfn_to_hva(kvm, gfn);
|
||||
if (kvm_is_error_hva(addr)) {
|
||||
get_page(bad_page);
|
||||
return page_to_pfn(bad_page);
|
||||
}
|
||||
|
||||
return hva_to_pfn(kvm, addr, atomic);
|
||||
return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
|
||||
}
|
||||
|
||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, true);
|
||||
return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
|
||||
|
||||
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
|
||||
bool write_fault, bool *writable)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
|
||||
|
||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, false);
|
||||
return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn);
|
||||
|
||||
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
||||
bool *writable)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
|
||||
|
||||
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
|
||||
return hva_to_pfn(kvm, addr, false);
|
||||
return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
|
||||
}
|
||||
|
||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||
|
@ -1035,7 +1129,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
|||
unsigned long addr;
|
||||
gfn_t entry;
|
||||
|
||||
addr = gfn_to_hva_many(kvm, gfn, &entry);
|
||||
addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return -1;
|
||||
|
||||
|
@ -1219,9 +1313,51 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
gpa_t gpa)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
int offset = offset_in_page(gpa);
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
|
||||
ghc->gpa = gpa;
|
||||
ghc->generation = slots->generation;
|
||||
ghc->memslot = __gfn_to_memslot(slots, gfn);
|
||||
ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL);
|
||||
if (!kvm_is_error_hva(ghc->hva))
|
||||
ghc->hva += offset;
|
||||
else
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
|
||||
|
||||
int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
void *data, unsigned long len)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
int r;
|
||||
|
||||
if (slots->generation != ghc->generation)
|
||||
kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa);
|
||||
|
||||
if (kvm_is_error_hva(ghc->hva))
|
||||
return -EFAULT;
|
||||
|
||||
r = copy_to_user((void __user *)ghc->hva, data, len);
|
||||
if (r)
|
||||
return -EFAULT;
|
||||
mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
|
||||
|
||||
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
|
||||
{
|
||||
return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
|
||||
return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
|
||||
offset, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
|
||||
|
||||
|
@ -1244,11 +1380,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_clear_guest);
|
||||
|
||||
void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
|
||||
void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
if (memslot && memslot->dirty_bitmap) {
|
||||
unsigned long rel_gfn = gfn - memslot->base_gfn;
|
||||
|
||||
|
@ -1256,6 +1390,14 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
|
|||
}
|
||||
}
|
||||
|
||||
void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
mark_page_dirty_in_slot(kvm, memslot, gfn);
|
||||
}
|
||||
|
||||
/*
|
||||
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
|
||||
*/
|
||||
|
@ -1457,6 +1599,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
|
|||
if (arg)
|
||||
goto out;
|
||||
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
|
||||
trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
|
||||
break;
|
||||
case KVM_GET_REGS: {
|
||||
struct kvm_regs *kvm_regs;
|
||||
|
@ -1824,7 +1967,7 @@ static struct file_operations kvm_vm_fops = {
|
|||
|
||||
static int kvm_dev_ioctl_create_vm(void)
|
||||
{
|
||||
int fd, r;
|
||||
int r;
|
||||
struct kvm *kvm;
|
||||
|
||||
kvm = kvm_create_vm();
|
||||
|
@ -1837,11 +1980,11 @@ static int kvm_dev_ioctl_create_vm(void)
|
|||
return r;
|
||||
}
|
||||
#endif
|
||||
fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
|
||||
if (fd < 0)
|
||||
r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
|
||||
if (r < 0)
|
||||
kvm_put_kvm(kvm);
|
||||
|
||||
return fd;
|
||||
return r;
|
||||
}
|
||||
|
||||
static long kvm_dev_ioctl_check_extension_generic(long arg)
|
||||
|
@ -1922,7 +2065,7 @@ static struct miscdevice kvm_dev = {
|
|||
&kvm_chardev_ops,
|
||||
};
|
||||
|
||||
static void hardware_enable(void *junk)
|
||||
static void hardware_enable_nolock(void *junk)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
int r;
|
||||
|
@ -1942,7 +2085,14 @@ static void hardware_enable(void *junk)
|
|||
}
|
||||
}
|
||||
|
||||
static void hardware_disable(void *junk)
|
||||
static void hardware_enable(void *junk)
|
||||
{
|
||||
spin_lock(&kvm_lock);
|
||||
hardware_enable_nolock(junk);
|
||||
spin_unlock(&kvm_lock);
|
||||
}
|
||||
|
||||
static void hardware_disable_nolock(void *junk)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
|
||||
|
@ -1952,13 +2102,20 @@ static void hardware_disable(void *junk)
|
|||
kvm_arch_hardware_disable(NULL);
|
||||
}
|
||||
|
||||
static void hardware_disable(void *junk)
|
||||
{
|
||||
spin_lock(&kvm_lock);
|
||||
hardware_disable_nolock(junk);
|
||||
spin_unlock(&kvm_lock);
|
||||
}
|
||||
|
||||
static void hardware_disable_all_nolock(void)
|
||||
{
|
||||
BUG_ON(!kvm_usage_count);
|
||||
|
||||
kvm_usage_count--;
|
||||
if (!kvm_usage_count)
|
||||
on_each_cpu(hardware_disable, NULL, 1);
|
||||
on_each_cpu(hardware_disable_nolock, NULL, 1);
|
||||
}
|
||||
|
||||
static void hardware_disable_all(void)
|
||||
|
@ -1977,7 +2134,7 @@ static int hardware_enable_all(void)
|
|||
kvm_usage_count++;
|
||||
if (kvm_usage_count == 1) {
|
||||
atomic_set(&hardware_enable_failed, 0);
|
||||
on_each_cpu(hardware_enable, NULL, 1);
|
||||
on_each_cpu(hardware_enable_nolock, NULL, 1);
|
||||
|
||||
if (atomic_read(&hardware_enable_failed)) {
|
||||
hardware_disable_all_nolock();
|
||||
|
@ -2008,27 +2165,19 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
|
|||
case CPU_STARTING:
|
||||
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
|
||||
cpu);
|
||||
spin_lock(&kvm_lock);
|
||||
hardware_enable(NULL);
|
||||
spin_unlock(&kvm_lock);
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
|
||||
asmlinkage void kvm_handle_fault_on_reboot(void)
|
||||
asmlinkage void kvm_spurious_fault(void)
|
||||
{
|
||||
if (kvm_rebooting) {
|
||||
/* spin while reset goes on */
|
||||
local_irq_enable();
|
||||
while (true)
|
||||
cpu_relax();
|
||||
}
|
||||
/* Fault while not rebooting. We want the trace. */
|
||||
BUG();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
|
||||
EXPORT_SYMBOL_GPL(kvm_spurious_fault);
|
||||
|
||||
static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
|
||||
void *v)
|
||||
|
@ -2041,7 +2190,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
|
|||
*/
|
||||
printk(KERN_INFO "kvm: exiting hardware virtualization\n");
|
||||
kvm_rebooting = true;
|
||||
on_each_cpu(hardware_disable, NULL, 1);
|
||||
on_each_cpu(hardware_disable_nolock, NULL, 1);
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
|
@ -2211,7 +2360,7 @@ static void kvm_exit_debug(void)
|
|||
static int kvm_suspend(struct sys_device *dev, pm_message_t state)
|
||||
{
|
||||
if (kvm_usage_count)
|
||||
hardware_disable(NULL);
|
||||
hardware_disable_nolock(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2219,7 +2368,7 @@ static int kvm_resume(struct sys_device *dev)
|
|||
{
|
||||
if (kvm_usage_count) {
|
||||
WARN_ON(spin_is_locked(&kvm_lock));
|
||||
hardware_enable(NULL);
|
||||
hardware_enable_nolock(NULL);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -2336,6 +2485,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
|||
goto out_free_5;
|
||||
}
|
||||
|
||||
r = kvm_async_pf_init();
|
||||
if (r)
|
||||
goto out_free;
|
||||
|
||||
kvm_chardev_ops.owner = module;
|
||||
kvm_vm_fops.owner = module;
|
||||
kvm_vcpu_fops.owner = module;
|
||||
|
@ -2343,7 +2496,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
|||
r = misc_register(&kvm_dev);
|
||||
if (r) {
|
||||
printk(KERN_ERR "kvm: misc device register failed\n");
|
||||
goto out_free;
|
||||
goto out_unreg;
|
||||
}
|
||||
|
||||
kvm_preempt_ops.sched_in = kvm_sched_in;
|
||||
|
@ -2353,6 +2506,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
|||
|
||||
return 0;
|
||||
|
||||
out_unreg:
|
||||
kvm_async_pf_deinit();
|
||||
out_free:
|
||||
kmem_cache_destroy(kvm_vcpu_cache);
|
||||
out_free_5:
|
||||
|
@ -2385,11 +2540,12 @@ void kvm_exit(void)
|
|||
kvm_exit_debug();
|
||||
misc_deregister(&kvm_dev);
|
||||
kmem_cache_destroy(kvm_vcpu_cache);
|
||||
kvm_async_pf_deinit();
|
||||
sysdev_unregister(&kvm_sysdev);
|
||||
sysdev_class_unregister(&kvm_sysdev_class);
|
||||
unregister_reboot_notifier(&kvm_reboot_notifier);
|
||||
unregister_cpu_notifier(&kvm_cpu_notifier);
|
||||
on_each_cpu(hardware_disable, NULL, 1);
|
||||
on_each_cpu(hardware_disable_nolock, NULL, 1);
|
||||
kvm_arch_hardware_unsetup();
|
||||
kvm_arch_exit();
|
||||
free_cpumask_var(cpus_hardware_enabled);
|
||||
|
|
Loading…
Reference in New Issue