Merge tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Gleb Natapov:
 "Highlights of the updates are:

  general:
   - new emulated device API
   - legacy device assignment is now optional
   - irqfd interface is more generic and can be shared between arches

  x86:
   - VMCS shadow support and other nested VMX improvements
   - APIC virtualization and Posted Interrupt hardware support
   - Optimize mmio spte zapping

  ppc:
    - BookE: in-kernel MPIC emulation with irqfd support
    - Book3S: in-kernel XICS emulation (incomplete)
    - Book3S: HV: migration fixes
    - BookE: more debug support preparation
    - BookE: e6500 support

  ARM:
   - reworking of Hyp idmaps

  s390:
   - ioeventfd for virtio-ccw

  And many other bug fixes, cleanups and improvements"

* tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits)
  kvm: Add compat_ioctl for device control API
  KVM: x86: Account for failing enable_irq_window for NMI window request
  KVM: PPC: Book3S: Add API for in-kernel XICS emulation
  kvm/ppc/mpic: fix missing unlock in set_base_addr()
  kvm/ppc: Hold srcu lock when calling kvm_io_bus_read/write
  kvm/ppc/mpic: remove users
  kvm/ppc/mpic: fix mmio region lists when multiple guests used
  kvm/ppc/mpic: remove default routes from documentation
  kvm: KVM_CAP_IOMMU only available with device assignment
  ARM: KVM: iterate over all CPUs for CPU compatibility check
  KVM: ARM: Fix spelling in error message
  ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally
  KVM: ARM: Fix API documentation for ONE_REG encoding
  ARM: KVM: promote vfp_host pointer to generic host cpu context
  ARM: KVM: add architecture specific hook for capabilities
  ARM: KVM: perform HYP initilization for hotplugged CPUs
  ARM: KVM: switch to a dual-step HYP init code
  ARM: KVM: rework HYP page table freeing
  ARM: KVM: enforce maximum size for identity mapped code
  ARM: KVM: move to a KVM provided HYP idmap
  ...
This commit is contained in:
Linus Torvalds 2013-05-05 14:47:31 -07:00
commit 01227a889e
110 changed files with 8607 additions and 2355 deletions

View File

@ -1486,15 +1486,23 @@ struct kvm_ioeventfd {
__u8 pad[36];
};
For the special case of virtio-ccw devices on s390, the ioevent is matched
to a subchannel/virtqueue tuple instead.
The following flags are defined:
#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
If datamatch flag is set, the event will be signaled only if the written value
to the registered address is equal to datamatch in struct kvm_ioeventfd.
For virtio-ccw devices, addr contains the subchannel id and datamatch the
virtqueue index.
4.60 KVM_DIRTY_TLB
@ -1780,27 +1788,48 @@ registers, find a list below:
PPC | KVM_REG_PPC_VPA_DTL | 128
PPC | KVM_REG_PPC_EPCR | 32
PPC | KVM_REG_PPC_EPR | 32
PPC | KVM_REG_PPC_TCR | 32
PPC | KVM_REG_PPC_TSR | 32
PPC | KVM_REG_PPC_OR_TSR | 32
PPC | KVM_REG_PPC_CLEAR_TSR | 32
PPC | KVM_REG_PPC_MAS0 | 32
PPC | KVM_REG_PPC_MAS1 | 32
PPC | KVM_REG_PPC_MAS2 | 64
PPC | KVM_REG_PPC_MAS7_3 | 64
PPC | KVM_REG_PPC_MAS4 | 32
PPC | KVM_REG_PPC_MAS6 | 32
PPC | KVM_REG_PPC_MMUCFG | 32
PPC | KVM_REG_PPC_TLB0CFG | 32
PPC | KVM_REG_PPC_TLB1CFG | 32
PPC | KVM_REG_PPC_TLB2CFG | 32
PPC | KVM_REG_PPC_TLB3CFG | 32
PPC | KVM_REG_PPC_TLB0PS | 32
PPC | KVM_REG_PPC_TLB1PS | 32
PPC | KVM_REG_PPC_TLB2PS | 32
PPC | KVM_REG_PPC_TLB3PS | 32
PPC | KVM_REG_PPC_EPTCFG | 32
PPC | KVM_REG_PPC_ICP_STATE | 64
ARM registers are mapped using the lower 32 bits. The upper 16 of that
is the register group type, or coprocessor number:
ARM core registers have the following id bit patterns:
0x4002 0000 0010 <index into the kvm_regs struct:16>
0x4020 0000 0010 <index into the kvm_regs struct:16>
ARM 32-bit CP15 registers have the following id bit patterns:
0x4002 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
ARM 64-bit CP15 registers have the following id bit patterns:
0x4003 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
ARM CCSIDR registers are demultiplexed by CSSELR value:
0x4002 0000 0011 00 <csselr:8>
0x4020 0000 0011 00 <csselr:8>
ARM 32-bit VFP control registers have the following id bit patterns:
0x4002 0000 0012 1 <regno:12>
0x4020 0000 0012 1 <regno:12>
ARM 64-bit FP registers have the following id bit patterns:
0x4002 0000 0012 0 <regno:12>
0x4030 0000 0012 0 <regno:12>
4.69 KVM_GET_ONE_REG
@ -2161,6 +2190,76 @@ header; first `n_valid' valid entries with contents from the data
written, then `n_invalid' invalid entries, invalidating any previously
valid entries found.
4.79 KVM_CREATE_DEVICE
Capability: KVM_CAP_DEVICE_CTRL
Type: vm ioctl
Parameters: struct kvm_create_device (in/out)
Returns: 0 on success, -1 on error
Errors:
ENODEV: The device type is unknown or unsupported
EEXIST: Device already created, and this type of device may not
be instantiated multiple times
Other error conditions may be defined by individual device types or
have their standard meanings.
Creates an emulated device in the kernel. The file descriptor returned
in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
device type is supported (not necessarily whether it can be created
in the current vm).
Individual devices should not define flags. Attributes should be used
for specifying any behavior that is not implied by the device type
number.
struct kvm_create_device {
__u32 type; /* in: KVM_DEV_TYPE_xxx */
__u32 fd; /* out: device handle */
__u32 flags; /* in: KVM_CREATE_DEVICE_xxx */
};
4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL
Type: device ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
ENXIO: The group or attribute is unknown/unsupported for this device
EPERM: The attribute cannot (currently) be accessed this way
(e.g. read-only attribute, or attribute that only makes
sense when the device is in a different state)
Other error conditions may be defined by individual device types.
Gets/sets a specified piece of device configuration and/or state. The
semantics are device-specific. See individual device documentation in
the "devices" directory. As with ONE_REG, the size of the data
transferred is defined by the particular attribute.
struct kvm_device_attr {
__u32 flags; /* no flags currently defined */
__u32 group; /* device-defined */
__u64 attr; /* group-defined */
__u64 addr; /* userspace address of attr data */
};
4.81 KVM_HAS_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL
Type: device ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
ENXIO: The group or attribute is unknown/unsupported for this device
Tests whether a device supports a particular attribute. A successful
return indicates the attribute is implemented. It does not necessarily
indicate that the attribute can be read or written in the device's
current state. "addr" is ignored.
4.77 KVM_ARM_VCPU_INIT
@ -2243,6 +2342,25 @@ and distributor interface, the ioctl must be called after calling
KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling
this ioctl twice for any of the base addresses will return -EEXIST.
4.82 KVM_PPC_RTAS_DEFINE_TOKEN
Capability: KVM_CAP_PPC_RTAS
Architectures: ppc
Type: vm ioctl
Parameters: struct kvm_rtas_token_args
Returns: 0 on success, -1 on error
Defines a token value for a RTAS (Run Time Abstraction Services)
service in order to allow it to be handled in the kernel. The
argument struct gives the name of the service, which must be the name
of a service that has a kernel-side implementation. If the token
value is non-zero, it will be associated with that service, and
subsequent RTAS calls by the guest specifying that token will be
handled by the kernel. If the token value is 0, then any token
associated with the service will be forgotten, and subsequent RTAS
calls by the guest for that service will be passed to userspace to be
handled.
5. The kvm_run structure
------------------------
@ -2646,3 +2764,19 @@ to receive the topmost interrupt vector.
When disabled (args[0] == 0), behavior is as if this facility is unsupported.
When this capability is enabled, KVM_EXIT_EPR can occur.
6.6 KVM_CAP_IRQ_MPIC
Architectures: ppc
Parameters: args[0] is the MPIC device fd
args[1] is the MPIC CPU number for this vcpu
This capability connects the vcpu to an in-kernel MPIC device.
6.7 KVM_CAP_IRQ_XICS
Architectures: ppc
Parameters: args[0] is the XICS device fd
args[1] is the XICS CPU number (server ID) for this vcpu
This capability connects the vcpu to an in-kernel XICS device.

View File

@ -0,0 +1 @@
This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.

View File

@ -0,0 +1,53 @@
MPIC interrupt controller
=========================
Device types supported:
KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
Only one MPIC instance, of any type, may be instantiated. The created
MPIC will act as the system interrupt controller, connecting to each
vcpu's interrupt inputs.
Groups:
KVM_DEV_MPIC_GRP_MISC
Attributes:
KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
Base address of the 256 KiB MPIC register space. Must be
naturally aligned. A value of zero disables the mapping.
Reset value is zero.
KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
Access an MPIC register, as if the access were made from the guest.
"attr" is the byte offset into the MPIC register space. Accesses
must be 4-byte aligned.
MSIs may be signaled by using this attribute group to write
to the relevant MSIIR.
KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
IRQ input line for each standard openpic source. 0 is inactive and 1
is active, regardless of interrupt sense.
For edge-triggered interrupts: Writing 1 is considered an activating
edge, and writing 0 is ignored. Reading returns 1 if a previously
signaled edge has not been acknowledged, and 0 otherwise.
"attr" is the IRQ number. IRQ numbers for standard sources are the
byte offset of the relevant IVPR from EIVPR0, divided by 32.
IRQ Routing:
The MPIC emulation supports IRQ routing. Only a single MPIC device can
be instantiated. Once that device has been created, it's available as
irqchip id 0.
This irqchip 0 has 256 interrupt pins, which expose the interrupts in
the main array of interrupt sources (a.k.a. "SRC" interrupts).
The numbering is the same as the MPIC device tree binding -- based on
the register offset from the beginning of the sources array, without
regard to any subdivisions in chip documentation such as "internal"
or "external" interrupts.
Access to non-SRC interrupts is not implemented through IRQ routing mechanisms.

View File

@ -0,0 +1,66 @@
XICS interrupt controller
Device type supported: KVM_DEV_TYPE_XICS
Groups:
KVM_DEV_XICS_SOURCES
Attributes: One per interrupt source, indexed by the source number.
This device emulates the XICS (eXternal Interrupt Controller
Specification) defined in PAPR. The XICS has a set of interrupt
sources, each identified by a 20-bit source number, and a set of
Interrupt Control Presentation (ICP) entities, also called "servers",
each associated with a virtual CPU.
The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
the interrupt server number (i.e. the vcpu number from the XICS's
point of view) in args[1] of the kvm_enable_cap struct. Each ICP has
64 bits of state which can be read and written using the
KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit
state word has the following bitfields, starting at the
least-significant end of the word:
* Unused, 16 bits
* Pending interrupt priority, 8 bits
Zero is the highest priority, 255 means no interrupt is pending.
* Pending IPI (inter-processor interrupt) priority, 8 bits
Zero is the highest priority, 255 means no IPI is pending.
* Pending interrupt source number, 24 bits
Zero means no interrupt pending, 2 means an IPI is pending
* Current processor priority, 8 bits
Zero is the highest priority, meaning no interrupts can be
delivered, and 255 is the lowest priority.
Each source has 64 bits of state that can be read and written using
the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
the interrupt source number. The 64 bit state word has the following
bitfields, starting from the least-significant end of the word:
* Destination (server number), 32 bits
This specifies where the interrupt should be sent, and is the
interrupt server number specified for the destination vcpu.
* Priority, 8 bits
This is the priority specified for this interrupt source, where 0 is
the highest priority and 255 is the lowest. An interrupt with a
priority of 255 will never be delivered.
* Level sensitive flag, 1 bit
This bit is 1 for a level-sensitive interrupt source, or 0 for
edge-sensitive (or MSI).
* Masked flag, 1 bit
This bit is set to 1 if the interrupt is masked (cannot be delivered
regardless of its priority), for example by the ibm,int-off RTAS
call, or 0 if it is not masked.
* Pending flag, 1 bit
This bit is 1 if the source has a pending interrupt, otherwise 0.
Only one XICS instance may be created per VM.

View File

@ -8,7 +8,6 @@
#define __idmap __section(.idmap.text) noinline notrace
extern pgd_t *idmap_pgd;
extern pgd_t *hyp_pgd;
void setup_mm_for_reboot(void);

View File

@ -87,7 +87,7 @@ struct kvm_vcpu_fault_info {
u32 hyp_pc; /* PC when exception was taken from Hyp mode */
};
typedef struct vfp_hard_struct kvm_kernel_vfp_t;
typedef struct vfp_hard_struct kvm_cpu_context_t;
struct kvm_vcpu_arch {
struct kvm_regs regs;
@ -105,8 +105,10 @@ struct kvm_vcpu_arch {
struct kvm_vcpu_fault_info fault;
/* Floating point registers (VFP and Advanced SIMD/NEON) */
kvm_kernel_vfp_t vfp_guest;
kvm_kernel_vfp_t *vfp_host;
struct vfp_hard_struct vfp_guest;
/* Host FP context */
kvm_cpu_context_t *host_cpu_context;
/* VGIC state */
struct vgic_cpu vgic_cpu;
@ -188,23 +190,38 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr,
static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
unsigned long long pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
{
unsigned long pgd_low, pgd_high;
pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
pgd_high = (pgd_ptr >> 32ULL);
/*
* Call initialization code, and switch to the full blown
* HYP code. The init code doesn't need to preserve these registers as
* r1-r3 and r12 are already callee save according to the AAPCS.
* Note that we slightly misuse the prototype by casing the pgd_low to
* a void *.
* Call initialization code, and switch to the full blown HYP
* code. The init code doesn't need to preserve these
* registers as r0-r3 are already callee saved according to
* the AAPCS.
* Note that we slightly misuse the prototype by casing the
* stack pointer to a void *.
*
* We don't have enough registers to perform the full init in
* one go. Install the boot PGD first, and then install the
* runtime PGD, stack pointer and vectors. The PGDs are always
* passed as the third argument, in order to be passed into
* r2-r3 to the init code (yes, this is compliant with the
* PCS!).
*/
kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
kvm_call_hyp(NULL, 0, boot_pgd_ptr);
kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
}
static inline int kvm_arch_dev_ioctl_check_extension(long ext)
{
return 0;
}
int kvm_perf_init(void);
int kvm_perf_teardown(void);
#endif /* __ARM_KVM_HOST_H__ */

View File

@ -19,21 +19,33 @@
#ifndef __ARM_KVM_MMU_H__
#define __ARM_KVM_MMU_H__
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
#include <asm/idmap.h>
#include <asm/memory.h>
#include <asm/page.h>
/*
* We directly use the kernel VA for the HYP, as we can directly share
* the mapping (HTTBR "covers" TTBR1).
*/
#define HYP_PAGE_OFFSET_MASK (~0UL)
#define HYP_PAGE_OFFSET_MASK UL(~0)
#define HYP_PAGE_OFFSET PAGE_OFFSET
#define KERN_TO_HYP(kva) (kva)
/*
* Our virtual mapping for the boot-time MMU-enable code. Must be
* shared across all the page-tables. Conveniently, we use the vectors
* page, where no kernel data will ever be shared with HYP.
*/
#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE)
#ifndef __ASSEMBLY__
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
int create_hyp_mappings(void *from, void *to);
int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
void free_hyp_pmds(void);
void free_boot_hyp_pgd(void);
void free_hyp_pgds(void);
int kvm_alloc_stage2_pgd(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm *kvm);
@ -45,6 +57,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_mmu_get_boot_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
@ -114,4 +128,8 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
}
}
#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
#endif /* !__ASSEMBLY__ */
#endif /* __ARM_KVM_MMU_H__ */

View File

@ -158,7 +158,7 @@ int main(void)
DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr));
DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15));
DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest));
DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host));
DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context));
DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs));
DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs));
DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs));

View File

@ -20,7 +20,7 @@
VMLINUX_SYMBOL(__idmap_text_start) = .; \
*(.idmap.text) \
VMLINUX_SYMBOL(__idmap_text_end) = .; \
ALIGN_FUNCTION(); \
. = ALIGN(32); \
VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
*(.hyp.idmap.text) \
VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
@ -315,3 +315,8 @@ SECTIONS
*/
ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
/*
* The HYP init code can't be more than a page long.
* The above comment applies as well.
*/
ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big")

View File

@ -41,9 +41,9 @@ config KVM_ARM_HOST
Provides host support for ARM processors.
config KVM_ARM_MAX_VCPUS
int "Number maximum supported virtual CPUs per VM"
depends on KVM_ARM_HOST
default 4
int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
default 4 if KVM_ARM_HOST
default 0
help
Static number of max supported virtual CPUs per VM.

View File

@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
obj-y += coproc.o coproc_a15.o mmio.o psci.o
obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o

View File

@ -22,6 +22,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
#include <asm/kvm_vgic.h>
@ -64,7 +65,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */
timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
vcpu->arch.timer_cpu.irq->irq,
vcpu->arch.timer_cpu.irq->level);
@ -133,8 +134,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
cycle_t cval, now;
u64 ns;
/* Check if the timer is enabled and unmasked first */
if ((timer->cntv_ctl & 3) != 1)
if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
return;
cval = timer->cntv_cval;

View File

@ -16,6 +16,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
@ -48,7 +49,7 @@ __asm__(".arch_extension virt");
#endif
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state;
static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
static unsigned long hyp_default_vectors;
/* Per-CPU variable containing the currently running vcpu. */
@ -206,7 +207,7 @@ int kvm_dev_ioctl_check_extension(long ext)
r = KVM_MAX_VCPUS;
break;
default:
r = 0;
r = kvm_arch_dev_ioctl_check_extension(ext);
break;
}
return r;
@ -218,27 +219,18 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc)
{
return 0;
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
bool user_alloc)
enum kvm_mr_change change)
{
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
bool user_alloc)
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
{
}
@ -326,7 +318,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
vcpu->cpu = cpu;
vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
/*
* Check whether this vcpu requires the cache to be flushed on
@ -639,7 +631,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
return 0;
}
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
bool line_status)
{
u32 irq = irq_level->irq;
unsigned int irq_type, vcpu_idx, irq_num;
@ -794,30 +787,48 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
static void cpu_init_hyp_mode(void *vector)
static void cpu_init_hyp_mode(void *dummy)
{
unsigned long long boot_pgd_ptr;
unsigned long long pgd_ptr;
unsigned long hyp_stack_ptr;
unsigned long stack_page;
unsigned long vector_ptr;
/* Switch from the HYP stub to our own HYP init vector */
__hyp_set_vectors((unsigned long)vector);
__hyp_set_vectors(kvm_get_idmap_vector());
boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
hyp_stack_ptr = stack_page + PAGE_SIZE;
vector_ptr = (unsigned long)__kvm_hyp_vector;
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
}
static int hyp_init_cpu_notify(struct notifier_block *self,
unsigned long action, void *cpu)
{
switch (action) {
case CPU_STARTING:
case CPU_STARTING_FROZEN:
cpu_init_hyp_mode(NULL);
break;
}
return NOTIFY_OK;
}
static struct notifier_block hyp_init_cpu_nb = {
.notifier_call = hyp_init_cpu_notify,
};
/**
* Inits Hyp-mode on all online CPUs
*/
static int init_hyp_mode(void)
{
phys_addr_t init_phys_addr;
int cpu;
int err = 0;
@ -849,24 +860,6 @@ static int init_hyp_mode(void)
per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
}
/*
* Execute the init code on each CPU.
*
* Note: The stack is not mapped yet, so don't do anything else than
* initializing the hypervisor mode on each CPU using a local stack
* space for temporary storage.
*/
init_phys_addr = virt_to_phys(__kvm_hyp_init);
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, cpu_init_hyp_mode,
(void *)(long)init_phys_addr, 1);
}
/*
* Unmap the identity mapping
*/
kvm_clear_hyp_idmap();
/*
* Map the Hyp-code called directly from the host
*/
@ -890,33 +883,38 @@ static int init_hyp_mode(void)
}
/*
* Map the host VFP structures
* Map the host CPU structures
*/
kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t);
if (!kvm_host_vfp_state) {
kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
if (!kvm_host_cpu_state) {
err = -ENOMEM;
kvm_err("Cannot allocate host VFP state\n");
kvm_err("Cannot allocate host CPU state\n");
goto out_free_mappings;
}
for_each_possible_cpu(cpu) {
kvm_kernel_vfp_t *vfp;
kvm_cpu_context_t *cpu_ctxt;
vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
err = create_hyp_mappings(vfp, vfp + 1);
cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1);
if (err) {
kvm_err("Cannot map host VFP state: %d\n", err);
goto out_free_vfp;
kvm_err("Cannot map host CPU state: %d\n", err);
goto out_free_context;
}
}
/*
* Execute the init code on each CPU.
*/
on_each_cpu(cpu_init_hyp_mode, NULL, 1);
/*
* Init HYP view of VGIC
*/
err = kvm_vgic_hyp_init();
if (err)
goto out_free_vfp;
goto out_free_context;
#ifdef CONFIG_KVM_ARM_VGIC
vgic_present = true;
@ -929,12 +927,19 @@ static int init_hyp_mode(void)
if (err)
goto out_free_mappings;
#ifndef CONFIG_HOTPLUG_CPU
free_boot_hyp_pgd();
#endif
kvm_perf_init();
kvm_info("Hyp mode initialized successfully\n");
return 0;
out_free_vfp:
free_percpu(kvm_host_vfp_state);
out_free_context:
free_percpu(kvm_host_cpu_state);
out_free_mappings:
free_hyp_pmds();
free_hyp_pgds();
out_free_stack_pages:
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
@ -943,27 +948,42 @@ out_err:
return err;
}
static void check_kvm_target_cpu(void *ret)
{
*(int *)ret = kvm_target_cpu();
}
/**
* Initialize Hyp-mode and memory mappings on all CPUs.
*/
int kvm_arch_init(void *opaque)
{
int err;
int ret, cpu;
if (!is_hyp_mode_available()) {
kvm_err("HYP mode not available\n");
return -ENODEV;
}
if (kvm_target_cpu() < 0) {
kvm_err("Target CPU not supported!\n");
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
if (ret < 0) {
kvm_err("Error, CPU %d not supported!\n", cpu);
return -ENODEV;
}
}
err = init_hyp_mode();
if (err)
goto out_err;
err = register_cpu_notifier(&hyp_init_cpu_nb);
if (err) {
kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
goto out_err;
}
kvm_coproc_table_init();
return 0;
out_err:
@ -973,6 +993,7 @@ out_err:
/* NOP: Compiling as a module not supported */
void kvm_arch_exit(void)
{
kvm_perf_teardown();
}
static int arm_init(void)

View File

@ -21,13 +21,33 @@
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
/********************************************************************
* Hypervisor initialization
* - should be called with:
* r0,r1 = Hypervisor pgd pointer
* r2 = top of Hyp stack (kernel VA)
* r3 = pointer to hyp vectors
* r0 = top of Hyp stack (kernel VA)
* r1 = pointer to hyp vectors
* r2,r3 = Hypervisor pgd pointer
*
* The init scenario is:
* - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd,
* runtime stack, runtime vectors
* - Enable the MMU with the boot pgd
* - Jump to a target into the trampoline page (remember, this is the same
* physical page!)
* - Now switch to the runtime pgd (same VA, and still the same physical
* page!)
* - Invalidate TLBs
* - Set stack and vectors
* - Profit! (or eret, if you only care about the code).
*
* As we only have four registers available to pass parameters (and we
* need six), we split the init in two phases:
* - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD.
* Provides the basic HYP init, and enable the MMU.
* - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD.
* Switches to the runtime PGD, set stack and vectors.
*/
.text
@ -47,22 +67,25 @@ __kvm_hyp_init:
W(b) .
__do_hyp_init:
cmp r0, #0 @ We have a SP?
bne phase2 @ Yes, second stage init
@ Set the HTTBR to point to the hypervisor PGD pointer passed
mcrr p15, 4, r0, r1, c2
mcrr p15, 4, r2, r3, c2
@ Set the HTCR and VTCR to the same shareability and cacheability
@ settings as the non-secure TTBCR and with T0SZ == 0.
mrc p15, 4, r0, c2, c0, 2 @ HTCR
ldr r12, =HTCR_MASK
bic r0, r0, r12
ldr r2, =HTCR_MASK
bic r0, r0, r2
mrc p15, 0, r1, c2, c0, 2 @ TTBCR
and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
orr r0, r0, r1
mcr p15, 4, r0, c2, c0, 2 @ HTCR
mrc p15, 4, r1, c2, c1, 2 @ VTCR
ldr r12, =VTCR_MASK
bic r1, r1, r12
ldr r2, =VTCR_MASK
bic r1, r1, r2
bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits
orr r1, r0, r1
orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
@ -85,24 +108,41 @@ __do_hyp_init:
@ - Memory alignment checks: enabled
@ - MMU: enabled (this code must be run from an identity mapping)
mrc p15, 4, r0, c1, c0, 0 @ HSCR
ldr r12, =HSCTLR_MASK
bic r0, r0, r12
ldr r2, =HSCTLR_MASK
bic r0, r0, r2
mrc p15, 0, r1, c1, c0, 0 @ SCTLR
ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
and r1, r1, r12
ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) )
THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
orr r1, r1, r12
ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
and r1, r1, r2
ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) )
THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
orr r1, r1, r2
orr r0, r0, r1
isb
mcr p15, 4, r0, c1, c0, 0 @ HSCR
isb
@ Set stack pointer and return to the kernel
mov sp, r2
@ End of init phase-1
eret
phase2:
@ Set stack pointer
mov sp, r0
@ Set HVBAR to point to the HYP vectors
mcr p15, 4, r3, c12, c0, 0 @ HVBAR
mcr p15, 4, r1, c12, c0, 0 @ HVBAR
@ Jump to the trampoline page
ldr r0, =TRAMPOLINE_VA
adr r1, target
bfi r0, r1, #0, #PAGE_SHIFT
mov pc, r0
target: @ We're now in the trampoline code, switch page tables
mcrr p15, 4, r2, r3, c2
isb
@ Invalidate the old TLBs
mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
dsb
eret

View File

@ -32,8 +32,15 @@
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
static pgd_t *boot_hyp_pgd;
static pgd_t *hyp_pgd;
static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
static void *init_bounce_page;
static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
@ -71,242 +78,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
return p;
}
static void free_ptes(pmd_t *pmd, unsigned long addr)
{
pte_t *pte;
unsigned int i;
for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
if (!pmd_none(*pmd) && pmd_table(*pmd)) {
pte = pte_offset_kernel(pmd, addr);
pte_free_kernel(NULL, pte);
}
pmd++;
}
}
static void free_hyp_pgd_entry(unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
unsigned long hyp_addr = KERN_TO_HYP(addr);
pgd = hyp_pgd + pgd_index(hyp_addr);
pud = pud_offset(pgd, hyp_addr);
if (pud_none(*pud))
return;
BUG_ON(pud_bad(*pud));
pmd = pmd_offset(pud, hyp_addr);
free_ptes(pmd, addr);
pmd_free(NULL, pmd);
pud_clear(pud);
}
/**
* free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
*
* Assumes this is a page table used strictly in Hyp-mode and therefore contains
* either mappings in the kernel memory area (above PAGE_OFFSET), or
* device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
*/
void free_hyp_pmds(void)
{
unsigned long addr;
mutex_lock(&kvm_hyp_pgd_mutex);
for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
free_hyp_pgd_entry(addr);
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
free_hyp_pgd_entry(addr);
mutex_unlock(&kvm_hyp_pgd_mutex);
}
static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
unsigned long end)
{
pte_t *pte;
unsigned long addr;
struct page *page;
for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
unsigned long hyp_addr = KERN_TO_HYP(addr);
pte = pte_offset_kernel(pmd, hyp_addr);
BUG_ON(!virt_addr_valid(addr));
page = virt_to_page(addr);
kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
}
}
static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
unsigned long end,
unsigned long *pfn_base)
{
pte_t *pte;
unsigned long addr;
for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
unsigned long hyp_addr = KERN_TO_HYP(addr);
pte = pte_offset_kernel(pmd, hyp_addr);
BUG_ON(pfn_valid(*pfn_base));
kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
(*pfn_base)++;
}
}
static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
unsigned long end, unsigned long *pfn_base)
{
pmd_t *pmd;
pte_t *pte;
unsigned long addr, next;
for (addr = start; addr < end; addr = next) {
unsigned long hyp_addr = KERN_TO_HYP(addr);
pmd = pmd_offset(pud, hyp_addr);
BUG_ON(pmd_sect(*pmd));
if (pmd_none(*pmd)) {
pte = pte_alloc_one_kernel(NULL, hyp_addr);
if (!pte) {
kvm_err("Cannot allocate Hyp pte\n");
return -ENOMEM;
}
pmd_populate_kernel(NULL, pmd, pte);
}
next = pmd_addr_end(addr, end);
/*
* If pfn_base is NULL, we map kernel pages into HYP with the
* virtual address. Otherwise, this is considered an I/O
* mapping and we map the physical region starting at
* *pfn_base to [start, end[.
*/
if (!pfn_base)
create_hyp_pte_mappings(pmd, addr, next);
else
create_hyp_io_pte_mappings(pmd, addr, next, pfn_base);
}
return 0;
}
static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
{
unsigned long start = (unsigned long)from;
unsigned long end = (unsigned long)to;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
unsigned long addr, next;
int err = 0;
if (start >= end)
return -EINVAL;
/* Check for a valid kernel memory mapping */
if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1)))
return -EINVAL;
/* Check for a valid kernel IO mapping */
if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)))
return -EINVAL;
mutex_lock(&kvm_hyp_pgd_mutex);
for (addr = start; addr < end; addr = next) {
unsigned long hyp_addr = KERN_TO_HYP(addr);
pgd = hyp_pgd + pgd_index(hyp_addr);
pud = pud_offset(pgd, hyp_addr);
if (pud_none_or_clear_bad(pud)) {
pmd = pmd_alloc_one(NULL, hyp_addr);
if (!pmd) {
kvm_err("Cannot allocate Hyp pmd\n");
err = -ENOMEM;
goto out;
}
pud_populate(NULL, pud, pmd);
}
next = pgd_addr_end(addr, end);
err = create_hyp_pmd_mappings(pud, addr, next, pfn_base);
if (err)
goto out;
}
out:
mutex_unlock(&kvm_hyp_pgd_mutex);
return err;
}
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
* @to: The virtual kernel end address of the range (exclusive)
*
* The same virtual address as the kernel virtual address is also used
* in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
* physical pages.
*
* Note: Wrapping around zero in the "to" address is not supported.
*/
int create_hyp_mappings(void *from, void *to)
{
return __create_hyp_mappings(from, to, NULL);
}
/**
* create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
* @from: The kernel start VA of the range
* @to: The kernel end VA of the range (exclusive)
* @addr: The physical start address which gets mapped
*
* The resulting HYP VA is the same as the kernel VA, modulo
* HYP_PAGE_OFFSET.
*/
int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
{
unsigned long pfn = __phys_to_pfn(addr);
return __create_hyp_mappings(from, to, &pfn);
}
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
*
* Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
* support either full 40-bit input addresses or limited to 32-bit input
* addresses). Clears the allocated pages.
*
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
pgd_t *pgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
if (!pgd)
return -ENOMEM;
/* stage-2 pgd must be aligned to its size */
VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
}
static void clear_pud_entry(pud_t *pud)
{
pmd_t *pmd_table = pmd_offset(pud, 0);
@ -343,28 +114,17 @@ static bool pte_empty(pte_t *pte)
return page_count(pte_page) == 1;
}
/**
* unmap_stage2_range -- Clear stage2 page table entries to unmap a range
* @kvm: The VM pointer
* @start: The intermediate physical base address of the range to unmap
* @size: The size of the area to unmap
*
* Clear a range of stage-2 mappings, lowering the various ref-counts. Must
* be called while holding mmu_lock (unless for freeing the stage2 pgd before
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
phys_addr_t addr = start, end = start + size;
unsigned long long addr = start, end = start + size;
u64 range;
while (addr < end) {
pgd = kvm->arch.pgd + pgd_index(addr);
pgd = pgdp + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
addr += PUD_SIZE;
@ -395,6 +155,247 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
}
}
/**
* free_boot_hyp_pgd - free HYP boot page tables
*
* Free the HYP boot page tables. The bounce page is also freed.
*/
void free_boot_hyp_pgd(void)
{
mutex_lock(&kvm_hyp_pgd_mutex);
if (boot_hyp_pgd) {
unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
kfree(boot_hyp_pgd);
boot_hyp_pgd = NULL;
}
if (hyp_pgd)
unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
kfree(init_bounce_page);
init_bounce_page = NULL;
mutex_unlock(&kvm_hyp_pgd_mutex);
}
/**
* free_hyp_pgds - free Hyp-mode page tables
*
* Assumes hyp_pgd is a page table used strictly in Hyp-mode and
* therefore contains either mappings in the kernel memory area (above
* PAGE_OFFSET), or device mappings in the vmalloc range (from
* VMALLOC_START to VMALLOC_END).
*
* boot_hyp_pgd should only map two pages for the init code.
*/
void free_hyp_pgds(void)
{
unsigned long addr;
free_boot_hyp_pgd();
mutex_lock(&kvm_hyp_pgd_mutex);
if (hyp_pgd) {
for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
kfree(hyp_pgd);
hyp_pgd = NULL;
}
mutex_unlock(&kvm_hyp_pgd_mutex);
}
static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
unsigned long end, unsigned long pfn,
pgprot_t prot)
{
pte_t *pte;
unsigned long addr;
addr = start;
do {
pte = pte_offset_kernel(pmd, addr);
kvm_set_pte(pte, pfn_pte(pfn, prot));
get_page(virt_to_page(pte));
kvm_flush_dcache_to_poc(pte, sizeof(*pte));
pfn++;
} while (addr += PAGE_SIZE, addr != end);
}
static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
unsigned long end, unsigned long pfn,
pgprot_t prot)
{
pmd_t *pmd;
pte_t *pte;
unsigned long addr, next;
addr = start;
do {
pmd = pmd_offset(pud, addr);
BUG_ON(pmd_sect(*pmd));
if (pmd_none(*pmd)) {
pte = pte_alloc_one_kernel(NULL, addr);
if (!pte) {
kvm_err("Cannot allocate Hyp pte\n");
return -ENOMEM;
}
pmd_populate_kernel(NULL, pmd, pte);
get_page(virt_to_page(pmd));
kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
}
next = pmd_addr_end(addr, end);
create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
pfn += (next - addr) >> PAGE_SHIFT;
} while (addr = next, addr != end);
return 0;
}
static int __create_hyp_mappings(pgd_t *pgdp,
unsigned long start, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
unsigned long addr, next;
int err = 0;
mutex_lock(&kvm_hyp_pgd_mutex);
addr = start & PAGE_MASK;
end = PAGE_ALIGN(end);
do {
pgd = pgdp + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none_or_clear_bad(pud)) {
pmd = pmd_alloc_one(NULL, addr);
if (!pmd) {
kvm_err("Cannot allocate Hyp pmd\n");
err = -ENOMEM;
goto out;
}
pud_populate(NULL, pud, pmd);
get_page(virt_to_page(pud));
kvm_flush_dcache_to_poc(pud, sizeof(*pud));
}
next = pgd_addr_end(addr, end);
err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
if (err)
goto out;
pfn += (next - addr) >> PAGE_SHIFT;
} while (addr = next, addr != end);
out:
mutex_unlock(&kvm_hyp_pgd_mutex);
return err;
}
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
* @to: The virtual kernel end address of the range (exclusive)
*
* The same virtual address as the kernel virtual address is also used
* in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
* physical pages.
*/
int create_hyp_mappings(void *from, void *to)
{
unsigned long phys_addr = virt_to_phys(from);
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);
/* Check for a valid kernel memory mapping */
if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
return -EINVAL;
return __create_hyp_mappings(hyp_pgd, start, end,
__phys_to_pfn(phys_addr), PAGE_HYP);
}
/**
* create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
* @from: The kernel start VA of the range
* @to: The kernel end VA of the range (exclusive)
* @phys_addr: The physical start address which gets mapped
*
* The resulting HYP VA is the same as the kernel VA, modulo
* HYP_PAGE_OFFSET.
*/
int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
{
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);
/* Check for a valid kernel IO mapping */
if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
return -EINVAL;
return __create_hyp_mappings(hyp_pgd, start, end,
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
*
* Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
* support either full 40-bit input addresses or limited to 32-bit input
* addresses). Clears the allocated pages.
*
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
pgd_t *pgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
if (!pgd)
return -ENOMEM;
/* stage-2 pgd must be aligned to its size */
VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
}
/**
* unmap_stage2_range -- Clear stage2 page table entries to unmap a range
* @kvm: The VM pointer
* @start: The intermediate physical base address of the range to unmap
* @size: The size of the area to unmap
*
* Clear a range of stage-2 mappings, lowering the various ref-counts. Must
* be called while holding mmu_lock (unless for freeing the stage2 pgd before
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
{
unmap_range(kvm->arch.pgd, start, size);
}
/**
* kvm_free_stage2_pgd - free all stage-2 tables
* @kvm: The KVM struct pointer for the VM.
@ -728,47 +729,105 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
phys_addr_t kvm_mmu_get_httbr(void)
{
VM_BUG_ON(!virt_addr_valid(hyp_pgd));
return virt_to_phys(hyp_pgd);
}
phys_addr_t kvm_mmu_get_boot_httbr(void)
{
return virt_to_phys(boot_hyp_pgd);
}
phys_addr_t kvm_get_idmap_vector(void)
{
return hyp_idmap_vector;
}
int kvm_mmu_init(void)
{
if (!hyp_pgd) {
int err;
hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
/*
* Our init code is crossing a page boundary. Allocate
* a bounce page, copy the code over and use that.
*/
size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
phys_addr_t phys_base;
init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!init_bounce_page) {
kvm_err("Couldn't allocate HYP init bounce page\n");
err = -ENOMEM;
goto out;
}
memcpy(init_bounce_page, __hyp_idmap_text_start, len);
/*
* Warning: the code we just copied to the bounce page
* must be flushed to the point of coherency.
* Otherwise, the data may be sitting in L2, and HYP
* mode won't be able to observe it as it runs with
* caches off at that point.
*/
kvm_flush_dcache_to_poc(init_bounce_page, len);
phys_base = virt_to_phys(init_bounce_page);
hyp_idmap_vector += phys_base - hyp_idmap_start;
hyp_idmap_start = phys_base;
hyp_idmap_end = phys_base + len;
kvm_info("Using HYP init bounce page @%lx\n",
(unsigned long)phys_base);
}
hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
if (!hyp_pgd || !boot_hyp_pgd) {
kvm_err("Hyp mode PGD not allocated\n");
return -ENOMEM;
err = -ENOMEM;
goto out;
}
/* Create the idmap in the boot page tables */
err = __create_hyp_mappings(boot_hyp_pgd,
hyp_idmap_start, hyp_idmap_end,
__phys_to_pfn(hyp_idmap_start),
PAGE_HYP);
if (err) {
kvm_err("Failed to idmap %lx-%lx\n",
hyp_idmap_start, hyp_idmap_end);
goto out;
}
/* Map the very same page at the trampoline VA */
err = __create_hyp_mappings(boot_hyp_pgd,
TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
__phys_to_pfn(hyp_idmap_start),
PAGE_HYP);
if (err) {
kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
TRAMPOLINE_VA);
goto out;
}
/* Map the same page again into the runtime page tables */
err = __create_hyp_mappings(hyp_pgd,
TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
__phys_to_pfn(hyp_idmap_start),
PAGE_HYP);
if (err) {
kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
TRAMPOLINE_VA);
goto out;
}
return 0;
}
/**
* kvm_clear_idmap - remove all idmaps from the hyp pgd
*
* Free the underlying pmds for all pgds in range and clear the pgds (but
* don't free them) afterwards.
*/
void kvm_clear_hyp_idmap(void)
{
unsigned long addr, end;
unsigned long next;
pgd_t *pgd = hyp_pgd;
pud_t *pud;
pmd_t *pmd;
addr = virt_to_phys(__hyp_idmap_text_start);
end = virt_to_phys(__hyp_idmap_text_end);
pgd += pgd_index(addr);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
pud = pud_offset(pgd, addr);
pmd = pmd_offset(pud, addr);
pud_clear(pud);
kvm_clean_pmd_entry(pmd);
pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
} while (pgd++, addr = next, addr < end);
out:
free_hyp_pgds();
return err;
}

68
arch/arm/kvm/perf.c Normal file
View File

@ -0,0 +1,68 @@
/*
* Based on the x86 implementation.
*
* Copyright (C) 2012 ARM Ltd.
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/perf_event.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
static int kvm_is_in_guest(void)
{
return kvm_arm_get_running_vcpu() != NULL;
}
static int kvm_is_user_mode(void)
{
struct kvm_vcpu *vcpu;
vcpu = kvm_arm_get_running_vcpu();
if (vcpu)
return !vcpu_mode_priv(vcpu);
return 0;
}
static unsigned long kvm_get_guest_ip(void)
{
struct kvm_vcpu *vcpu;
vcpu = kvm_arm_get_running_vcpu();
if (vcpu)
return *vcpu_pc(vcpu);
return 0;
}
static struct perf_guest_info_callbacks kvm_guest_cbs = {
.is_in_guest = kvm_is_in_guest,
.is_user_mode = kvm_is_user_mode,
.get_guest_ip = kvm_get_guest_ip,
};
int kvm_perf_init(void)
{
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
}
int kvm_perf_teardown(void)
{
return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
}

View File

@ -8,7 +8,6 @@
#include <asm/pgtable.h>
#include <asm/sections.h>
#include <asm/system_info.h>
#include <asm/virt.h>
pgd_t *idmap_pgd;
@ -83,37 +82,10 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,
} while (pgd++, addr = next, addr != end);
}
#if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE)
pgd_t *hyp_pgd;
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
static int __init init_static_idmap_hyp(void)
{
hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
if (!hyp_pgd)
return -ENOMEM;
pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n",
__hyp_idmap_text_start, __hyp_idmap_text_end);
identity_mapping_add(hyp_pgd, __hyp_idmap_text_start,
__hyp_idmap_text_end, PMD_SECT_AP1);
return 0;
}
#else
static int __init init_static_idmap_hyp(void)
{
return 0;
}
#endif
extern char __idmap_text_start[], __idmap_text_end[];
static int __init init_static_idmap(void)
{
int ret;
idmap_pgd = pgd_alloc(&init_mm);
if (!idmap_pgd)
return -ENOMEM;
@ -123,12 +95,10 @@ static int __init init_static_idmap(void)
identity_mapping_add(idmap_pgd, __idmap_text_start,
__idmap_text_end, 0);
ret = init_static_idmap_hyp();
/* Flush L1 for the hardware to see this page table content */
flush_cache_louis();
return ret;
return 0;
}
early_initcall(init_static_idmap);

View File

@ -26,6 +26,7 @@
#define KVM_USER_MEM_SLOTS 32
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
/* define exit reasons from vmm to kvm*/
#define EXIT_REASON_VM_PANIC 0

View File

@ -27,7 +27,6 @@
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_IOAPIC
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_DEVICE_ASSIGNMENT
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256

View File

@ -21,12 +21,11 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on BROKEN
depends on HAVE_KVM && MODULES
# for device assignment:
depends on PCI
depends on BROKEN
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select KVM_APIC_ARCHITECTURE
select KVM_MMIO
---help---
@ -50,6 +49,17 @@ config KVM_INTEL
Provides support for KVM on Itanium 2 processors equipped with the VT
extensions.
config KVM_DEVICE_ASSIGNMENT
bool "KVM legacy PCI device assignment support"
depends on KVM && PCI && IOMMU_API
default y
---help---
Provide support for legacy PCI device assignment through KVM. The
kernel now also supports a full featured userspace device driver
framework through VFIO, which supersedes much of this support.
If unsure, say Y.
source drivers/vhost/Kconfig
endif # VIRTUALIZATION

View File

@ -49,10 +49,10 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o assigned-dev.o)
coalesced_mmio.o irq_comm.o)
ifeq ($(CONFIG_IOMMU_API),y)
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
endif
kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o

View File

@ -204,9 +204,11 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_IOMMU:
r = iommu_present(&pci_bus_type);
break;
#endif
default:
r = 0;
}
@ -924,13 +926,15 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
bool line_status)
{
if (!irqchip_in_kernel(kvm))
return -ENXIO;
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event->irq, irq_event->level);
irq_event->irq, irq_event->level,
line_status);
return 0;
}
@ -942,24 +946,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
int r = -ENOTTY;
switch (ioctl) {
case KVM_SET_MEMORY_REGION: {
struct kvm_memory_region kvm_mem;
struct kvm_userspace_memory_region kvm_userspace_mem;
r = -EFAULT;
if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
goto out;
kvm_userspace_mem.slot = kvm_mem.slot;
kvm_userspace_mem.flags = kvm_mem.flags;
kvm_userspace_mem.guest_phys_addr =
kvm_mem.guest_phys_addr;
kvm_userspace_mem.memory_size = kvm_mem.memory_size;
r = kvm_vm_ioctl_set_memory_region(kvm,
&kvm_userspace_mem, false);
if (r)
goto out;
break;
}
case KVM_CREATE_IRQCHIP:
r = -EFAULT;
r = kvm_ioapic_init(kvm);
@ -1384,9 +1370,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
kvm_free_all_assigned_devices(kvm);
#endif
kfree(kvm->arch.vioapic);
kvm_release_vm_pages(kvm);
}
@ -1578,9 +1562,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
bool user_alloc)
enum kvm_mr_change change)
{
unsigned long i;
unsigned long pfn;
@ -1610,8 +1593,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
bool user_alloc)
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
{
return;
}

View File

@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
#define kvm_apic_present(x) (true)
#define kvm_lapic_enabled(x) (true)
static inline bool kvm_apic_vid_enabled(void)
{
/* IA64 has no apicv supporting, do nothing here */
return false;
}
#endif

View File

@ -270,6 +270,9 @@
#define H_SET_MODE 0x31C
#define MAX_HCALL_OPCODE H_SET_MODE
/* Platform specific hcalls, used by KVM */
#define H_RTAS 0xf000
#ifndef __ASSEMBLY__
/**

View File

@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void);
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec);
extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
bool upper, u32 val);
@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
unsigned long pte_index);
extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
unsigned long gpa, bool dirty);
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
@ -458,6 +461,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
#define OSI_SC_MAGIC_R4 0x77810F9B
#define INS_DCBZ 0x7c0007ec
/* TO = 31 for unconditional trap */
#define INS_TW 0x7fe00008
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)

View File

@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
}
#ifdef CONFIG_KVM_BOOK3S_64_HV
/*
* Note modification of an HPTE; set the HPTE modified bit
* if anyone is interested.
*/
static inline void note_hpte_modification(struct kvm *kvm,
struct revmap_entry *rev)
{
if (atomic_read(&kvm->arch.hpte_mod_interest))
rev->guest_rpte |= HPTE_GR_MODIFIED;
}
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#endif /* __ASM_KVM_BOOK3S_64_H__ */

View File

@ -20,6 +20,11 @@
#ifndef __ASM_KVM_BOOK3S_ASM_H__
#define __ASM_KVM_BOOK3S_ASM_H__
/* XICS ICP register offsets */
#define XICS_XIRR 4
#define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
#ifdef __ASSEMBLY__
#ifdef CONFIG_KVM_BOOK3S_HANDLER
@ -81,10 +86,11 @@ struct kvmppc_host_state {
#ifdef CONFIG_KVM_BOOK3S_64_HV
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
unsigned long xics_phys;
u32 saved_xirr;
u64 dabr;
u64 host_mmcr[3];
u32 host_pmc[8];

View File

@ -26,6 +26,8 @@
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS 64
#define KVMPPC_INST_EHPRIV 0x7c00021c
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
vcpu->arch.gpr[num] = val;

View File

@ -44,6 +44,10 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
/* These values are internal and can be increased later */
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 256
#if !defined(CONFIG_KVM_440)
#include <linux/mmu_notifier.h>
@ -188,6 +192,10 @@ struct kvmppc_linear_info {
int type;
};
/* XICS components, defined in book3s_xics.c */
struct kvmppc_xics;
struct kvmppc_icp;
/*
* The reverse mapping array has one entry for each HPTE,
* which stores the guest's view of the second word of the HPTE
@ -255,6 +263,13 @@ struct kvm_arch {
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
struct list_head rtas_tokens;
#endif
#ifdef CONFIG_KVM_MPIC
struct openpic *mpic;
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
#endif
};
@ -301,11 +316,13 @@ struct kvmppc_vcore {
* that a guest can register.
*/
struct kvmppc_vpa {
unsigned long gpa; /* Current guest phys addr */
void *pinned_addr; /* Address in kernel linear mapping */
void *pinned_end; /* End of region */
unsigned long next_gpa; /* Guest phys addr for update */
unsigned long len; /* Number of bytes required */
u8 update_pending; /* 1 => update pinned_addr from next_gpa */
bool dirty; /* true => area has been modified by kernel */
};
struct kvmppc_pte {
@ -359,6 +376,11 @@ struct kvmppc_slb {
#define KVMPPC_BOOKE_MAX_IAC 4
#define KVMPPC_BOOKE_MAX_DAC 2
/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
#define KVMPPC_EPR_NONE 0 /* EPR not supported */
#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */
#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */
struct kvmppc_booke_debug_reg {
u32 dbcr0;
u32 dbcr1;
@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg {
u64 dac[KVMPPC_BOOKE_MAX_DAC];
};
#define KVMPPC_IRQ_DEFAULT 0
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2
struct openpic;
struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
@ -502,8 +530,11 @@ struct kvm_vcpu_arch {
spinlock_t wdt_lock;
struct timer_list wdt_timer;
u32 tlbcfg[4];
u32 tlbps[4];
u32 mmucfg;
u32 eptcfg;
u32 epr;
u32 crit_save;
struct kvmppc_booke_debug_reg dbg_reg;
#endif
gpa_t paddr_accessed;
@ -521,7 +552,7 @@ struct kvm_vcpu_arch {
u8 sane;
u8 cpu_type;
u8 hcall_needed;
u8 epr_enabled;
u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed;
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@ -548,6 +579,13 @@ struct kvm_vcpu_arch {
unsigned long magic_page_pa; /* phys addr to map the magic page to */
unsigned long magic_page_ea; /* effect. addr to map the magic page to */
int irq_type; /* one of KVM_IRQ_* */
int irq_cpu_id;
struct openpic *mpic; /* KVM_IRQ_MPIC */
#ifdef CONFIG_KVM_XICS
struct kvmppc_icp *icp; /* XICS presentation controller */
#endif
#ifdef CONFIG_KVM_BOOK3S_64_HV
struct kvm_vcpu_arch_shared shregs;
@ -588,5 +626,6 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_FQPR 0x0060
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
#endif /* __POWERPC_KVM_HOST_H__ */

View File

@ -44,7 +44,7 @@ enum emulation_result {
EMULATE_DO_DCR, /* kvm_run filled with DCR request */
EMULATE_FAIL, /* can't emulate this instruction */
EMULATE_AGAIN, /* something went wrong. go again */
EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */
EMULATE_EXIT_USER, /* emulation requires exit to user-space */
};
extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
@ -131,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old);
const struct kvm_memory_slot *old);
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
struct kvm_ppc_smmu_info *info);
extern void kvmppc_core_flush_memslot(struct kvm *kvm,
@ -165,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority);
extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority);
extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
@ -246,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
struct openpic;
#ifdef CONFIG_KVM_BOOK3S_64_HV
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{
paca[cpu].kvm_hstate.xics_phys = addr;
}
static inline u32 kvmppc_get_xics_latch(void)
{
u32 xirr = get_paca()->kvm_hstate.saved_xirr;
get_paca()->kvm_hstate.saved_xirr = 0;
return xirr;
}
static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
{
paca[cpu].kvm_hstate.host_ipi = host_ipi;
}
extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
extern void kvm_linear_init(void);
#else
@ -260,6 +289,46 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
static inline void kvm_linear_init(void)
{}
static inline u32 kvmppc_get_xics_latch(void)
{
return 0;
}
static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
{}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
{
kvm_vcpu_kick(vcpu);
}
#endif
#ifdef CONFIG_KVM_XICS
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
#else
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
unsigned long server)
{ return -EINVAL; }
static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
struct kvm_irq_level *args)
{ return -ENOTTY; }
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
#endif
static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
@ -271,6 +340,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
#endif
}
#ifdef CONFIG_KVM_MPIC
void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
u32 cpu);
void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
#else
static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
{
}
static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu)
{
return -EINVAL;
}
static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
struct kvm_vcpu *vcpu)
{
}
#endif /* CONFIG_KVM_MPIC */
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg);
int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
@ -283,8 +378,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids);
static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
{
/* Clear i-cache for new pages */
struct page *page;
/*
* We can only access pages that the kernel maps
* as memory. Bail out for unmapped ones.
*/
if (!pfn_valid(pfn))
return;
/* Clear i-cache for new pages */
page = pfn_to_page(pfn);
if (!test_bit(PG_arch_1, &page->flags)) {
flush_dcache_icache_page(page);
@ -324,4 +426,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
return ea;
}
extern void xics_wake_cpu(int cpu);
#endif /* __POWERPC_KVM_PPC_H__ */

View File

@ -300,6 +300,7 @@
#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */
#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */
#define LPCR_MER 0x00000800 /* Mediated External Exception */
#define LPCR_MER_SH 11
#define LPCR_LPES 0x0000000c
#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */
#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */

View File

@ -25,6 +25,8 @@
/* Select powerpc specific features in <linux/kvm.h> */
#define __KVM_HAVE_SPAPR_TCE
#define __KVM_HAVE_PPC_SMT
#define __KVM_HAVE_IRQCHIP
#define __KVM_HAVE_IRQ_LINE
struct kvm_regs {
__u64 pc;
@ -272,8 +274,31 @@ struct kvm_debug_exit_arch {
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
struct {
/* H/W breakpoint/watchpoint address */
__u64 addr;
/*
* Type denotes h/w breakpoint, read watchpoint, write
* watchpoint or watchpoint (both read and write).
*/
#define KVMPPC_DEBUG_NONE 0x0
#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1)
#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2)
#define KVMPPC_DEBUG_WATCH_READ (1UL << 3)
__u32 type;
__u32 reserved;
} bp[16];
};
/* Debug related defines */
/*
* kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
* and upper 16 bits are architecture specific. Architecture specific defines
* that ioctl is for setting hardware breakpoint or software breakpoint.
*/
#define KVM_GUESTDBG_USE_SW_BP 0x00010000
#define KVM_GUESTDBG_USE_HW_BP 0x00020000
/* definition of registers in kvm_run */
struct kvm_sync_regs {
};
@ -299,6 +324,12 @@ struct kvm_allocate_rma {
__u64 rma_size;
};
/* for KVM_CAP_PPC_RTAS */
struct kvm_rtas_token_args {
char name[120];
__u64 token; /* Use a token of 0 to undefine a mapping */
};
struct kvm_book3e_206_tlb_entry {
__u32 mas8;
__u32 mas1;
@ -359,6 +390,26 @@ struct kvm_get_htab_header {
__u16 n_invalid;
};
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
#define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */
#define KVM_REG_PPC_ICP_CPPR_MASK 0xff
#define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */
#define KVM_REG_PPC_ICP_XISR_MASK 0xffffff
#define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */
#define KVM_REG_PPC_ICP_MFRR_MASK 0xff
#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
/* Device control API: PPC-specific devices */
#define KVM_DEV_MPIC_GRP_MISC 1
#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */
#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */
/* One-Reg API: PPC-specific registers */
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
@ -417,4 +468,47 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
#define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
/* Timer Status Register OR/CLEAR interface */
#define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
#define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
#define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
#define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
/* Debugging: Special instruction for software breakpoint */
#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
/* MMU registers */
#define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
#define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
#define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
#define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
#define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
/*
* TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
* KVM_CAP_SW_TLB ioctl
*/
#define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
#define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
#define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
#define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
#define KVM_XICS_DESTINATION_MASK 0xffffffffULL
#define KVM_XICS_PRIORITY_SHIFT 32
#define KVM_XICS_PRIORITY_MASK 0xff
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
#define KVM_XICS_MASKED (1ULL << 41)
#define KVM_XICS_PENDING (1ULL << 42)
#endif /* __LINUX_KVM_POWERPC_H */

View File

@ -480,6 +480,7 @@ int main(void)
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
#endif
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@ -576,6 +577,8 @@ int main(void)
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
HSTATE_FIELD(HSTATE_PMC, host_pmc);
HSTATE_FIELD(HSTATE_PURR, host_purr);
@ -599,6 +602,7 @@ int main(void)
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
#endif /* CONFIG_PPC_BOOK3S */
#endif /* CONFIG_KVM */

View File

@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
return -EINVAL;
}
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
return -EINVAL;
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;

View File

@ -136,21 +136,41 @@ config KVM_E500V2
If unsure, say N.
config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500 processors"
bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
---help---
Support running unmodified E500MC/E5500 (32-bit) guest kernels in
virtual machines on E500MC/E5500 host processors.
Support running unmodified E500MC/E5500/E6500 guest kernels in
virtual machines on E500MC/E5500/E6500 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
If unsure, say N.
config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && E500
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
help
Enable support for emulating MPIC devices inside the
host kernel, rather than relying on userspace to emulate.
Currently, support is limited to certain versions of
Freescale's MPIC implementation.
config KVM_XICS
bool "KVM in-kernel XICS emulation"
depends on KVM_BOOK3S_64 && !KVM_MPIC
---help---
Include support for the XICS (eXternal Interrupt Controller
Specification) interrupt controller architecture used on
IBM POWER (pSeries) servers.
source drivers/vhost/Kconfig
endif # VIRTUALIZATION

View File

@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv.o \
book3s_hv_interrupts.o \
book3s_64_mmu_hv.o
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
book3s_hv_rm_xics.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
book3s_hv_ras.o \
book3s_hv_builtin.o
book3s_hv_builtin.o \
$(kvm-book3s_64-builtin-xics-objs-y)
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
kvm-book3s_64-module-objs := \
../../../virt/kvm/kvm_main.o \
@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \
emulate.o \
book3s.o \
book3s_64_vio.o \
book3s_rtas.o \
$(kvm-book3s_64-objs-y)
kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@ -103,6 +110,9 @@ kvm-book3s_32-objs := \
book3s_32_mmu.o
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_440) += kvm.o

View File

@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
return prio;
}
static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec)
{
unsigned long old_pending = vcpu->arch.pending_exceptions;
@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_book3s_queue_irqprio(vcpu, vec);
}
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
@ -530,6 +529,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
break;
#endif /* CONFIG_ALTIVEC */
case KVM_REG_PPC_DEBUG_INST: {
u32 opcode = INS_TW;
r = copy_to_user((u32 __user *)(long)reg->addr,
&opcode, sizeof(u32));
break;
}
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
r = -ENXIO;
break;
}
val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
@ -592,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
break;
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
r = -ENXIO;
break;
}
r = kvmppc_xics_set_icp(vcpu,
set_reg_val(reg->id, val));
break;
#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
@ -607,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return 0;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;

View File

@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
/* Harvest R and C */
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
if (rcbits & ~rev[i].guest_rpte) {
rev[i].guest_rpte = ptel | rcbits;
note_hpte_modification(kvm, &rev[i]);
}
}
unlock_rmap(rmapp);
hptep[0] &= ~HPTE_V_HVLOCK;
@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
/* Now check and modify the HPTE */
if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
kvmppc_clear_ref_hpte(kvm, hptep, i);
if (!(rev[i].guest_rpte & HPTE_R_R)) {
rev[i].guest_rpte |= HPTE_R_R;
note_hpte_modification(kvm, &rev[i]);
}
ret = 1;
}
hptep[0] &= ~HPTE_V_HVLOCK;
@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
hptep[1] &= ~HPTE_R_C;
eieio();
hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
if (!(rev[i].guest_rpte & HPTE_R_C)) {
rev[i].guest_rpte |= HPTE_R_C;
note_hpte_modification(kvm, &rev[i]);
}
ret = 1;
}
hptep[0] &= ~HPTE_V_HVLOCK;
@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
return ret;
}
static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot,
unsigned long *map)
{
unsigned long gfn;
if (!vpa->dirty || !vpa->pinned_addr)
return;
gfn = vpa->gpa >> PAGE_SHIFT;
if (gfn < memslot->base_gfn ||
gfn >= memslot->base_gfn + memslot->npages)
return;
vpa->dirty = false;
if (map)
__set_bit_le(gfn - memslot->base_gfn, map);
}
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long *map)
{
unsigned long i;
unsigned long *rmapp;
struct kvm_vcpu *vcpu;
preempt_disable();
rmapp = memslot->arch.rmap;
@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
__set_bit_le(i, map);
++rmapp;
}
/* Harvest dirty bits from VPA and DTL updates */
/* Note: we never modify the SLB shadow buffer areas */
kvm_for_each_vcpu(i, vcpu, kvm) {
spin_lock(&vcpu->arch.vpa_update_lock);
harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
spin_unlock(&vcpu->arch.vpa_update_lock);
}
preempt_enable();
return 0;
}
@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
unsigned long gfn = gpa >> PAGE_SHIFT;
struct page *page, *pages[1];
int npages;
unsigned long hva, psize, offset;
unsigned long hva, offset;
unsigned long pa;
unsigned long *physp;
int srcu_idx;
@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
psize = PAGE_SIZE;
if (PageHuge(page)) {
page = compound_head(page);
psize <<= compound_order(page);
}
offset = gpa & (psize - 1);
offset = gpa & (PAGE_SIZE - 1);
if (nb_ret)
*nb_ret = psize - offset;
*nb_ret = PAGE_SIZE - offset;
return page_address(page) + offset;
err:
@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
return NULL;
}
void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
bool dirty)
{
struct page *page = virt_to_page(va);
struct kvm_memory_slot *memslot;
unsigned long gfn;
unsigned long *rmap;
int srcu_idx;
put_page(page);
if (!dirty || !kvm->arch.using_mmu_notifiers)
return;
/* We need to mark this page dirty in the rmap chain */
gfn = gpa >> PAGE_SHIFT;
srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn);
if (memslot) {
rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
lock_rmap(rmap);
*rmap |= KVMPPC_RMAP_CHANGED;
unlock_rmap(rmap);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
/*
@ -1193,16 +1245,36 @@ struct kvm_htab_ctx {
#define HPTE_SIZE (2 * sizeof(unsigned long))
/*
* Returns 1 if this HPT entry has been modified or has pending
* R/C bit changes.
*/
static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
{
unsigned long rcbits_unset;
if (revp->guest_rpte & HPTE_GR_MODIFIED)
return 1;
/* Also need to consider changes in reference and changed bits */
rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
return 1;
return 0;
}
static long record_hpte(unsigned long flags, unsigned long *hptp,
unsigned long *hpte, struct revmap_entry *revp,
int want_valid, int first_pass)
{
unsigned long v, r;
unsigned long rcbits_unset;
int ok = 1;
int valid, dirty;
/* Unmodified entries are uninteresting except on the first pass */
dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
dirty = hpte_dirty(revp, hptp);
if (!first_pass && !dirty)
return 0;
@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
cpu_relax();
v = hptp[0];
/* re-evaluate valid and dirty from synchronized HPTE value */
valid = !!(v & HPTE_V_VALID);
dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
/* Harvest R and C into guest view if necessary */
rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
if (valid && (rcbits_unset & hptp[1])) {
revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
HPTE_GR_MODIFIED;
dirty = 1;
}
if (v & HPTE_V_ABSENT) {
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
valid = 1;
}
/* re-evaluate valid and dirty from synchronized HPTE value */
valid = !!(v & HPTE_V_VALID);
if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
valid = 0;
r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
r = revp->guest_rpte;
/* only clear modified if this is the right sort of entry */
if (valid == want_valid && dirty) {
r &= ~HPTE_GR_MODIFIED;
@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
/* Skip uninteresting entries, i.e. clean on not-first pass */
if (!first_pass) {
while (i < kvm->arch.hpt_npte &&
!(revp->guest_rpte & HPTE_GR_MODIFIED)) {
!hpte_dirty(revp, hptp)) {
++i;
hptp += 2;
++revp;

View File

@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->papr_hcall.args[i] = gpr;
}
emulated = EMULATE_DO_PAPR;
run->exit_reason = KVM_EXIT_PAPR_HCALL;
vcpu->arch.hcall_needed = 1;
emulated = EMULATE_EXIT_USER;
break;
}
#endif

View File

@ -66,6 +66,31 @@
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
{
int me;
int cpu = vcpu->cpu;
wait_queue_head_t *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
if (waitqueue_active(wqp)) {
wake_up_interruptible(wqp);
++vcpu->stat.halt_wakeup;
}
me = get_cpu();
/* CPU points to the first thread of the core */
if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
int real_cpu = cpu + vcpu->arch.ptid;
if (paca[real_cpu].kvm_hstate.xics_phys)
xics_wake_cpu(real_cpu);
else if (cpu_online(cpu))
smp_send_reschedule(cpu);
}
put_cpu();
}
/*
* We use the vcpu_load/put functions to measure stolen time.
* Stolen time is counted as time when either the vcpu is able to
@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
len = ((struct reg_vpa *)va)->length.hword;
else
len = ((struct reg_vpa *)va)->length.word;
kvmppc_unpin_guest_page(kvm, va);
kvmppc_unpin_guest_page(kvm, va, vpa, false);
/* Check length */
if (len > nb || len < sizeof(struct reg_vpa))
@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
va = NULL;
nb = 0;
if (gpa)
va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
va = kvmppc_pin_guest_page(kvm, gpa, &nb);
spin_lock(&vcpu->arch.vpa_update_lock);
if (gpa == vpap->next_gpa)
break;
/* sigh... unpin that one and try again */
if (va)
kvmppc_unpin_guest_page(kvm, va);
kvmppc_unpin_guest_page(kvm, va, gpa, false);
}
vpap->update_pending = 0;
@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
* has changed the mappings underlying guest memory,
* so unregister the region.
*/
kvmppc_unpin_guest_page(kvm, va);
kvmppc_unpin_guest_page(kvm, va, gpa, false);
va = NULL;
}
if (vpap->pinned_addr)
kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
vpap->dirty);
vpap->gpa = gpa;
vpap->pinned_addr = va;
vpap->dirty = false;
if (va)
vpap->pinned_end = va + vpap->len;
}
@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
/* order writing *dt vs. writing vpa->dtl_idx */
smp_wmb();
vpa->dtl_idx = ++vcpu->arch.dtl_index;
vcpu->arch.dtl.dirty = true;
}
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
struct kvm_vcpu *tvcpu;
int idx;
int idx, rc;
switch (req) {
case H_ENTER:
@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
if (list_empty(&vcpu->kvm->arch.rtas_tokens))
return RESUME_HOST;
rc = kvmppc_rtas_hcall(vcpu);
if (rc == -ENOENT)
return RESUME_HOST;
else if (rc == 0)
break;
/* Send the error out to userspace via KVM_RUN */
return rc;
case H_XIRR:
case H_CPPR:
case H_EOI:
case H_IPI:
if (kvmppc_xics_enabled(vcpu)) {
ret = kvmppc_xics_hcall(vcpu, req);
break;
} /* fallthrough */
default:
return RESUME_HOST;
}
@ -913,15 +964,19 @@ out:
return ERR_PTR(err);
}
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
{
if (vpa->pinned_addr)
kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
vpa->dirty);
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->arch.vpa_update_lock);
if (vcpu->arch.dtl.pinned_addr)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
if (vcpu->arch.slb_shadow.pinned_addr)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
if (vcpu->arch.vpa.pinned_addr)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
extern void xics_wake_cpu(int cpu);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
break;
vc->runner = vcpu;
n_ceded = 0;
list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
if (!v->arch.pending_exceptions)
n_ceded += v->arch.ceded;
else
v->arch.ceded = 0;
}
if (n_ceded == vc->n_runnable)
kvmppc_vcore_blocked(vc);
else
@ -1645,12 +1702,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old)
const struct kvm_memory_slot *old)
{
unsigned long npages = mem->memory_size >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
if (npages && old.npages) {
if (npages && old->npages) {
/*
* If modifying a memslot, reset all the rmap dirty bits.
* If this is a new memslot, we don't need to do anything
@ -1827,6 +1884,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
cpumask_setall(&kvm->arch.need_tlb_flush);
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
kvm->arch.rma = NULL;
@ -1872,6 +1930,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvm->arch.rma = NULL;
}
kvmppc_rtas_tokens_free(kvm);
kvmppc_free_hpt(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
}

View File

@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
/*
* Note modification of an HPTE; set the HPTE modified bit
* if anyone is interested.
*/
static inline void note_hpte_modification(struct kvm *kvm,
struct revmap_entry *rev)
{
if (atomic_read(&kvm->arch.hpte_mod_interest))
rev->guest_rpte |= HPTE_GR_MODIFIED;
}
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,

View File

@ -0,0 +1,406 @@
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
* Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include "book3s_xics.h"
#define DEBUG_PASSUP
static inline void rm_writeb(unsigned long paddr, u8 val)
{
__asm__ __volatile__("sync; stbcix %0,0,%1"
: : "r" (val), "r" (paddr) : "memory");
}
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
unsigned long xics_phys;
int cpu;
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
/* Kick self ? Just set MER and return */
if (vcpu == this_vcpu) {
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
return;
}
/* Check if the core is loaded, if not, too hard */
cpu = vcpu->cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
return;
}
/* In SMT cpu will always point to thread 0, we adjust it */
cpu += vcpu->arch.ptid;
/* Not too hard, then poke the target */
xics_phys = paca[cpu].kvm_hstate.xics_phys;
rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
}
static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
{
/* Note: Only called on self ! */
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
}
static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
union kvmppc_icp_state old,
union kvmppc_icp_state new)
{
struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
bool success;
/* Calculate new output value */
new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
/* Attempt atomic update */
success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
if (!success)
goto bail;
/*
* Check for output state update
*
* Note that this is racy since another processor could be updating
* the state already. This is why we never clear the interrupt output
* here, we only ever set it. The clear only happens prior to doing
* an update and only by the processor itself. Currently we do it
* in Accept (H_XIRR) and Up_Cppr (H_XPPR).
*
* We also do not try to figure out whether the EE state has changed,
* we unconditionally set it if the new state calls for it. The reason
* for that is that we opportunistically remove the pending interrupt
* flag when raising CPPR, so we need to set it back here if an
* interrupt is still pending.
*/
if (new.out_ee)
icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
/* Expose the state change for debug purposes */
this_vcpu->arch.icp->rm_dbgstate = new;
this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
bail:
return success;
}
static inline int check_too_hard(struct kvmppc_xics *xics,
struct kvmppc_icp *icp)
{
return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
}
static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u8 new_cppr)
{
union kvmppc_icp_state old_state, new_state;
bool resend;
/*
* This handles several related states in one operation:
*
* ICP State: Down_CPPR
*
* Load CPPR with new value and if the XISR is 0
* then check for resends:
*
* ICP State: Resend
*
* If MFRR is more favored than CPPR, check for IPIs
* and notify ICS of a potential resend. This is done
* asynchronously (when used in real mode, we will have
* to exit here).
*
* We do not handle the complete Check_IPI as documented
* here. In the PAPR, this state will be used for both
* Set_MFRR and Down_CPPR. However, we know that we aren't
* changing the MFRR state here so we don't need to handle
* the case of an MFRR causing a reject of a pending irq,
* this will have been handled when the MFRR was set in the
* first place.
*
* Thus we don't have to handle rejects, only resends.
*
* When implementing real mode for HV KVM, resend will lead to
* a H_TOO_HARD return and the whole transaction will be handled
* in virtual mode.
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
/* Down_CPPR */
new_state.cppr = new_cppr;
/*
* Cut down Resend / Check_IPI / IPI
*
* The logic is that we cannot have a pending interrupt
* trumped by an IPI at this point (see above), so we
* know that either the pending interrupt is already an
* IPI (in which case we don't care to override it) or
* it's either more favored than us or non existent
*/
if (new_state.mfrr < new_cppr &&
new_state.mfrr <= new_state.pending_pri) {
new_state.pending_pri = new_state.mfrr;
new_state.xisr = XICS_IPI;
}
/* Latch/clear resend bit */
resend = new_state.need_resend;
new_state.need_resend = 0;
} while (!icp_rm_try_update(icp, old_state, new_state));
/*
* Now handle resend checks. Those are asynchronous to the ICP
* state update in HW (ie bus transactions) so we can handle them
* separately here as well.
*/
if (resend)
icp->rm_action |= XICS_RM_CHECK_RESEND;
}
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
u32 xirr;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
/* First clear the interrupt */
icp_rm_clr_vcpu_irq(icp->vcpu);
/*
* ICP State: Accept_Interrupt
*
* Return the pending interrupt (if any) along with the
* current CPPR, then clear the XISR & set CPPR to the
* pending priority
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
if (!old_state.xisr)
break;
new_state.cppr = new_state.pending_pri;
new_state.pending_pri = 0xff;
new_state.xisr = 0;
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Return the result in GPR4 */
vcpu->arch.gpr[4] = xirr;
return check_too_hard(xics, icp);
}
int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
u32 reject;
bool resend;
bool local;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
local = this_icp->server_num == server;
if (local)
icp = this_icp;
else
icp = kvmppc_xics_find_server(vcpu->kvm, server);
if (!icp)
return H_PARAMETER;
/*
* ICP state: Set_MFRR
*
* If the CPPR is more favored than the new MFRR, then
* nothing needs to be done as there can be no XISR to
* reject.
*
* If the CPPR is less favored, then we might be replacing
* an interrupt, and thus need to possibly reject it as in
*
* ICP state: Check_IPI
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
/* Set_MFRR */
new_state.mfrr = mfrr;
/* Check_IPI */
reject = 0;
resend = false;
if (mfrr < new_state.cppr) {
/* Reject a pending interrupt if not an IPI */
if (mfrr <= new_state.pending_pri)
reject = new_state.xisr;
new_state.pending_pri = mfrr;
new_state.xisr = XICS_IPI;
}
if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
resend = new_state.need_resend;
new_state.need_resend = 0;
}
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Pass rejects to virtual mode */
if (reject && reject != XICS_IPI) {
this_icp->rm_action |= XICS_RM_REJECT;
this_icp->rm_reject = reject;
}
/* Pass resends to virtual mode */
if (resend)
this_icp->rm_action |= XICS_RM_CHECK_RESEND;
return check_too_hard(xics, this_icp);
}
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
u32 reject;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
/*
* ICP State: Set_CPPR
*
* We can safely compare the new value with the current
* value outside of the transaction as the CPPR is only
* ever changed by the processor on itself
*/
if (cppr > icp->state.cppr) {
icp_rm_down_cppr(xics, icp, cppr);
goto bail;
} else if (cppr == icp->state.cppr)
return H_SUCCESS;
/*
* ICP State: Up_CPPR
*
* The processor is raising its priority, this can result
* in a rejection of a pending interrupt:
*
* ICP State: Reject_Current
*
* We can remove EE from the current processor, the update
* transaction will set it again if needed
*/
icp_rm_clr_vcpu_irq(icp->vcpu);
do {
old_state = new_state = ACCESS_ONCE(icp->state);
reject = 0;
new_state.cppr = cppr;
if (cppr <= new_state.pending_pri) {
reject = new_state.xisr;
new_state.xisr = 0;
new_state.pending_pri = 0xff;
}
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Pass rejects to virtual mode */
if (reject && reject != XICS_IPI) {
icp->rm_action |= XICS_RM_REJECT;
icp->rm_reject = reject;
}
bail:
return check_too_hard(xics, icp);
}
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
struct kvmppc_ics *ics;
struct ics_irq_state *state;
u32 irq = xirr & 0x00ffffff;
u16 src;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
/*
* ICP State: EOI
*
* Note: If EOI is incorrectly used by SW to lower the CPPR
* value (ie more favored), we do not check for rejection of
* a pending interrupt, this is a SW error and PAPR sepcifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the
* CPPR update
*
* ICP State: Down_CPPR which we handle
* in a separate function as it's shared with H_CPPR.
*/
icp_rm_down_cppr(xics, icp, xirr >> 24);
/* IPIs have no EOI */
if (irq == XICS_IPI)
goto bail;
/*
* EOI handling: If the interrupt is still asserted, we need to
* resend it. We can take a lockless "peek" at the ICS state here.
*
* "Message" interrupts will never have "asserted" set
*/
ics = kvmppc_xics_find_ics(xics, irq, &src);
if (!ics)
goto bail;
state = &ics->irq_state[src];
/* Still asserted, resend it, we make it look like a reject */
if (state->asserted) {
icp->rm_action |= XICS_RM_REJECT;
icp->rm_reject = irq;
}
bail:
return check_too_hard(xics, icp);
}

View File

@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
* *
*****************************************************************************/
#define XICS_XIRR 4
#define XICS_QIRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
/*
* We come in here when wakened from nap mode on a secondary hw thread.
* Relocation is off and most register values are lost.
@ -101,50 +97,51 @@ kvm_start_guest:
li r0,1
stb r0,PACA_NAPSTATELOST(r13)
/* get vcpu pointer, NULL if we have no vcpu to run */
ld r4,HSTATE_KVM_VCPU(r13)
cmpdi cr1,r4,0
/* were we napping due to cede? */
lbz r0,HSTATE_NAPPING(r13)
cmpwi r0,0
bne kvm_end_cede
/*
* We weren't napping due to cede, so this must be a secondary
* thread being woken up to run a guest, or being woken up due
* to a stray IPI. (Or due to some machine check or hypervisor
* maintenance interrupt while the core is in KVM.)
*/
/* Check the wake reason in SRR1 to see why we got here */
mfspr r3,SPRN_SRR1
rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
cmpwi r3,4 /* was it an external interrupt? */
bne 27f
/*
* External interrupt - for now assume it is an IPI, since we
* should never get any other interrupts sent to offline threads.
* Only do this for secondary threads.
*/
beq cr1,25f
lwz r3,VCPU_PTID(r4)
cmpwi r3,0
beq 27f
25: ld r5,HSTATE_XICS_PHYS(r13)
li r0,0xff
li r6,XICS_QIRR
li r7,XICS_XIRR
bne 27f /* if not */
ld r5,HSTATE_XICS_PHYS(r13)
li r7,XICS_XIRR /* if it was an external interrupt, */
lwzcix r8,r5,r7 /* get and ack the interrupt */
sync
clrldi. r9,r8,40 /* get interrupt source ID. */
beq 27f /* none there? */
cmpwi r9,XICS_IPI
bne 26f
beq 28f /* none there? */
cmpwi r9,XICS_IPI /* was it an IPI? */
bne 29f
li r0,0xff
li r6,XICS_MFRR
stbcix r0,r5,r6 /* clear IPI */
26: stwcix r8,r5,r7 /* EOI the interrupt */
stwcix r8,r5,r7 /* EOI the interrupt */
sync /* order loading of vcpu after that */
27: /* XXX should handle hypervisor maintenance interrupts etc. here */
/* reload vcpu pointer after clearing the IPI */
/* get vcpu pointer, NULL if we have no vcpu to run */
ld r4,HSTATE_KVM_VCPU(r13)
cmpdi r4,0
/* if we have no vcpu to run, go back to sleep */
beq kvm_no_guest
b kvmppc_hv_entry
/* were we napping due to cede? */
lbz r0,HSTATE_NAPPING(r13)
cmpwi r0,0
bne kvm_end_cede
27: /* XXX should handle hypervisor maintenance interrupts etc. here */
b kvm_no_guest
28: /* SRR1 said external but ICP said nope?? */
b kvm_no_guest
29: /* External non-IPI interrupt to offline secondary thread? help?? */
stw r8,HSTATE_SAVED_XIRR(r13)
b kvm_no_guest
.global kvmppc_hv_entry
kvmppc_hv_entry:
@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
lwz r5, LPPACA_YIELDCOUNT(r3)
addi r5, r5, 1
stw r5, LPPACA_YIELDCOUNT(r3)
li r6, 1
stb r6, VCPU_VPA_DIRTY(r4)
25:
/* Load up DAR and DSISR */
ld r5, VCPU_DAR(r4)
@ -485,20 +484,20 @@ toc_tlbie_lock:
mtctr r6
mtxer r7
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4)
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
/* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
ori r11, r11, MSR_ME
/* Check if we can deliver an external or decrementer interrupt now */
ld r0,VCPU_PENDING_EXC(r4)
li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
and r0,r0,r8
cmpdi cr1,r0,0
andi. r0,r11,MSR_EE
@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Move SRR0 and SRR1 into the respective regs */
5: mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
li r0,0
stb r0,VCPU_CEDED(r4) /* cancel cede */
fast_guest_return:
li r0,0
stb r0,VCPU_CEDED(r4) /* cancel cede */
mtspr SPRN_HSRR0,r10
mtspr SPRN_HSRR1,r11
@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
/* Check for mediated interrupts (could be done earlier really ...) */
/* Only handle external interrupts here on arch 206 and later */
BEGIN_FTR_SECTION
b ext_interrupt_to_host
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
bne+ 1f
bne+ ext_interrupt_to_host
/* External interrupt, first check for host_ipi. If this is
* set, we know the host wants us out so let's do it now
*/
do_ext_interrupt:
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bne ext_interrupt_to_host
/* Now read the interrupt from the ICP */
ld r5, HSTATE_XICS_PHYS(r13)
li r7, XICS_XIRR
cmpdi r5, 0
beq- ext_interrupt_to_host
lwzcix r3, r5, r7
rlwinm. r0, r3, 0, 0xffffff
sync
beq 3f /* if nothing pending in the ICP */
/* We found something in the ICP...
*
* If it's not an IPI, stash it in the PACA and return to
* the host, we don't (yet) handle directing real external
* interrupts directly to the guest
*/
cmpwi r0, XICS_IPI
bne ext_stash_for_host
/* It's an IPI, clear the MFRR and EOI it */
li r0, 0xff
li r6, XICS_MFRR
stbcix r0, r5, r6 /* clear the IPI */
stwcix r3, r5, r7 /* EOI it */
sync
/* We need to re-check host IPI now in case it got set in the
* meantime. If it's clear, we bounce the interrupt to the
* guest
*/
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bne- 1f
/* Allright, looks like an IPI for the guest, we need to set MER */
3:
/* Check if any CPU is heading out to the host, if so head out too */
ld r5, HSTATE_KVM_VCORE(r13)
lwz r0, VCORE_ENTRY_EXIT(r5)
cmpwi r0, 0x100
bge ext_interrupt_to_host
/* See if there is a pending interrupt for the guest */
mfspr r8, SPRN_LPCR
ld r0, VCPU_PENDING_EXC(r9)
/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
beq 2f
/* And if the guest EE is set, we can deliver immediately, else
* we return to the guest with MER set
*/
andi. r0, r11, MSR_EE
beq 1f
mfspr r5,SPRN_LPCR
andi. r0,r5,LPCR_MER
bne bounce_ext_interrupt
1:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
beq 2f
mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
li r10, BOOK3S_INTERRUPT_EXTERNAL
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
rotldi r11, r11, 63
2: mr r4, r9
mtspr SPRN_LPCR, r8
b fast_guest_return
/* We raced with the host, we need to resend that IPI, bummer */
1: li r0, IPI_PRIORITY
stbcix r0, r5, r6 /* set the IPI */
sync
b ext_interrupt_to_host
ext_stash_for_host:
/* It's not an IPI and it's for the host, stash it in the PACA
* before exit, it will be picked up by the host ICP driver
*/
stw r3, HSTATE_SAVED_XIRR(r13)
ext_interrupt_to_host:
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Save DEC */
@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
beq 44f
ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
li r0,IPI_PRIORITY
li r7,XICS_QIRR
li r7,XICS_MFRR
stbcix r0,r7,r8 /* trigger the IPI */
44: srdi. r3,r3,1
addi r6,r6,PACA_SIZE
@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
lwz r3, LPPACA_YIELDCOUNT(r8)
addi r3, r3, 1
stw r3, LPPACA_YIELDCOUNT(r8)
li r3, 1
stb r3, VCPU_VPA_DIRTY(r9)
25:
/* Save PMU registers if requested */
/* r8 and cr0.eq are live here */
@ -1350,11 +1433,19 @@ hcall_real_table:
.long 0 /* 0x58 */
.long 0 /* 0x5c */
.long 0 /* 0x60 */
.long 0 /* 0x64 */
.long 0 /* 0x68 */
.long 0 /* 0x6c */
.long 0 /* 0x70 */
.long 0 /* 0x74 */
#ifdef CONFIG_KVM_XICS
.long .kvmppc_rm_h_eoi - hcall_real_table
.long .kvmppc_rm_h_cppr - hcall_real_table
.long .kvmppc_rm_h_ipi - hcall_real_table
.long 0 /* 0x70 - H_IPOLL */
.long .kvmppc_rm_h_xirr - hcall_real_table
#else
.long 0 /* 0x64 - H_EOI */
.long 0 /* 0x68 - H_CPPR */
.long 0 /* 0x6c - H_IPI */
.long 0 /* 0x70 - H_IPOLL */
.long 0 /* 0x74 - H_XIRR */
#endif
.long 0 /* 0x78 */
.long 0 /* 0x7c */
.long 0 /* 0x80 */
@ -1405,15 +1496,6 @@ ignore_hdec:
mr r4,r9
b fast_guest_return
bounce_ext_interrupt:
mr r4,r9
mtspr SPRN_SRR0,r10
mtspr SPRN_SRR1,r11
li r10,BOOK3S_INTERRUPT_EXTERNAL
li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
rotldi r11,r11,63
b fast_guest_return
_GLOBAL(kvmppc_h_set_dabr)
std r4,VCPU_DABR(r3)
/* Work around P7 bug where DABR can get corrupted on mtspr */
@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
b .
kvm_end_cede:
/* get vcpu pointer */
ld r4, HSTATE_KVM_VCPU(r13)
/* Woken by external or decrementer interrupt */
ld r1, HSTATE_HOST_R1(r13)
@ -1558,6 +1643,16 @@ kvm_end_cede:
li r0,0
stb r0,HSTATE_NAPPING(r13)
/* Check the wake reason in SRR1 to see why we got here */
mfspr r3, SPRN_SRR1
rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */
cmpwi r3, 4 /* was it an external interrupt? */
li r12, BOOK3S_INTERRUPT_EXTERNAL
mr r9, r4
ld r10, VCPU_PC(r9)
ld r11, VCPU_MSR(r9)
beq do_ext_interrupt /* if so */
/* see if any other thread is already exiting */
lwz r0,VCORE_ENTRY_EXIT(r5)
cmpwi r0,0x100
@ -1577,8 +1672,7 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
li r3,H_TOO_HARD
blr
b hcall_real_fallback
/* Try to handle a machine check in real mode */
machine_check_realmode:
@ -1626,7 +1720,7 @@ secondary_nap:
beq 37f
sync
li r0, 0xff
li r6, XICS_QIRR
li r6, XICS_MFRR
stbcix r0, r5, r6 /* clear the IPI */
stwcix r3, r5, r7 /* EOI it */
37: sync

View File

@ -762,9 +762,7 @@ program_interrupt:
run->exit_reason = KVM_EXIT_MMIO;
r = RESUME_HOST_NV;
break;
case EMULATE_DO_PAPR:
run->exit_reason = KVM_EXIT_PAPR_HCALL;
vcpu->arch.hcall_needed = 1;
case EMULATE_EXIT_USER:
r = RESUME_HOST_NV;
break;
default:
@ -1283,7 +1281,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old)
const struct kvm_memory_slot *old)
{
}
@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {

View File

@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
{
switch (cmd) {
@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
case H_XIRR:
case H_CPPR:
case H_EOI:
case H_IPI:
if (kvmppc_xics_enabled(vcpu))
return kvmppc_h_pr_xics_hcall(vcpu, cmd);
break;
case H_RTAS:
if (list_empty(&vcpu->kvm->arch.rtas_tokens))
return RESUME_HOST;
if (kvmppc_rtas_hcall(vcpu))
break;
kvmppc_set_gpr(vcpu, 3, 0);
return EMULATE_DONE;
}
return EMULATE_FAIL;

View File

@ -0,0 +1,274 @@
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/err.h>
#include <asm/uaccess.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/rtas.h>
#ifdef CONFIG_KVM_XICS
static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
{
u32 irq, server, priority;
int rc;
if (args->nargs != 3 || args->nret != 1) {
rc = -3;
goto out;
}
irq = args->args[0];
server = args->args[1];
priority = args->args[2];
rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
if (rc)
rc = -3;
out:
args->rets[0] = rc;
}
static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
{
u32 irq, server, priority;
int rc;
if (args->nargs != 1 || args->nret != 3) {
rc = -3;
goto out;
}
irq = args->args[0];
server = priority = 0;
rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
if (rc) {
rc = -3;
goto out;
}
args->rets[1] = server;
args->rets[2] = priority;
out:
args->rets[0] = rc;
}
static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
{
u32 irq;
int rc;
if (args->nargs != 1 || args->nret != 1) {
rc = -3;
goto out;
}
irq = args->args[0];
rc = kvmppc_xics_int_off(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
args->rets[0] = rc;
}
static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
{
u32 irq;
int rc;
if (args->nargs != 1 || args->nret != 1) {
rc = -3;
goto out;
}
irq = args->args[0];
rc = kvmppc_xics_int_on(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
args->rets[0] = rc;
}
#endif /* CONFIG_KVM_XICS */
struct rtas_handler {
void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
char *name;
};
static struct rtas_handler rtas_handlers[] = {
#ifdef CONFIG_KVM_XICS
{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
{ .name = "ibm,int-off", .handler = kvm_rtas_int_off },
{ .name = "ibm,int-on", .handler = kvm_rtas_int_on },
#endif
};
struct rtas_token_definition {
struct list_head list;
struct rtas_handler *handler;
u64 token;
};
static int rtas_name_matches(char *s1, char *s2)
{
struct kvm_rtas_token_args args;
return !strncmp(s1, s2, sizeof(args.name));
}
static int rtas_token_undefine(struct kvm *kvm, char *name)
{
struct rtas_token_definition *d, *tmp;
lockdep_assert_held(&kvm->lock);
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
if (rtas_name_matches(d->handler->name, name)) {
list_del(&d->list);
kfree(d);
return 0;
}
}
/* It's not an error to undefine an undefined token */
return 0;
}
static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
{
struct rtas_token_definition *d;
struct rtas_handler *h = NULL;
bool found;
int i;
lockdep_assert_held(&kvm->lock);
list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
if (d->token == token)
return -EEXIST;
}
found = false;
for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
h = &rtas_handlers[i];
if (rtas_name_matches(h->name, name)) {
found = true;
break;
}
}
if (!found)
return -ENOENT;
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
d->handler = h;
d->token = token;
list_add_tail(&d->list, &kvm->arch.rtas_tokens);
return 0;
}
int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
{
struct kvm_rtas_token_args args;
int rc;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
mutex_lock(&kvm->lock);
if (args.token)
rc = rtas_token_define(kvm, args.name, args.token);
else
rc = rtas_token_undefine(kvm, args.name);
mutex_unlock(&kvm->lock);
return rc;
}
int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
{
struct rtas_token_definition *d;
struct rtas_args args;
rtas_arg_t *orig_rets;
gpa_t args_phys;
int rc;
/* r4 contains the guest physical address of the RTAS args */
args_phys = kvmppc_get_gpr(vcpu, 4);
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
if (rc)
goto fail;
/*
* args->rets is a pointer into args->args. Now that we've
* copied args we need to fix it up to point into our copy,
* not the guest args. We also need to save the original
* value so we can restore it on the way out.
*/
orig_rets = args.rets;
args.rets = &args.args[args.nargs];
mutex_lock(&vcpu->kvm->lock);
rc = -ENOENT;
list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
if (d->token == args.token) {
d->handler->handler(vcpu, &args);
rc = 0;
break;
}
}
mutex_unlock(&vcpu->kvm->lock);
if (rc == 0) {
args.rets = orig_rets;
rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
if (rc)
goto fail;
}
return rc;
fail:
/*
* We only get here if the guest has called RTAS with a bogus
* args pointer. That means we can't get to the args, and so we
* can't fail the RTAS call. So fail right out to userspace,
* which should kill the guest.
*/
return rc;
}
void kvmppc_rtas_tokens_free(struct kvm *kvm)
{
struct rtas_token_definition *d, *tmp;
lockdep_assert_held(&kvm->lock);
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
list_del(&d->list);
kfree(d);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,130 @@
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
* Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
#ifndef _KVM_PPC_BOOK3S_XICS_H
#define _KVM_PPC_BOOK3S_XICS_H
/*
* We use a two-level tree to store interrupt source information.
* There are up to 1024 ICS nodes, each of which can represent
* 1024 sources.
*/
#define KVMPPC_XICS_MAX_ICS_ID 1023
#define KVMPPC_XICS_ICS_SHIFT 10
#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT)
#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1)
/*
* Interrupt source numbers below this are reserved, for example
* 0 is "no interrupt", and 2 is used for IPIs.
*/
#define KVMPPC_XICS_FIRST_IRQ 16
#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \
KVMPPC_XICS_IRQ_PER_ICS)
/* Priority value to use for disabling an interrupt */
#define MASKED 0xff
/* State for one irq source */
struct ics_irq_state {
u32 number;
u32 server;
u8 priority;
u8 saved_priority;
u8 resend;
u8 masked_pending;
u8 asserted; /* Only for LSI */
u8 exists;
};
/* Atomic ICP state, updated with a single compare & swap */
union kvmppc_icp_state {
unsigned long raw;
struct {
u8 out_ee:1;
u8 need_resend:1;
u8 cppr;
u8 mfrr;
u8 pending_pri;
u32 xisr;
};
};
/* One bit per ICS */
#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
struct kvmppc_icp {
struct kvm_vcpu *vcpu;
unsigned long server_num;
union kvmppc_icp_state state;
unsigned long resend_map[ICP_RESEND_MAP_SIZE];
/* Real mode might find something too hard, here's the action
* it might request from virtual mode
*/
#define XICS_RM_KICK_VCPU 0x1
#define XICS_RM_CHECK_RESEND 0x2
#define XICS_RM_REJECT 0x4
u32 rm_action;
struct kvm_vcpu *rm_kick_target;
u32 rm_reject;
/* Debug stuff for real mode */
union kvmppc_icp_state rm_dbgstate;
struct kvm_vcpu *rm_dbgtgt;
};
struct kvmppc_ics {
struct mutex lock;
u16 icsid;
struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
};
struct kvmppc_xics {
struct kvm *kvm;
struct kvm_device *dev;
struct dentry *dentry;
u32 max_icsid;
bool real_mode;
bool real_mode_dbg;
struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
};
static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
u32 nr)
{
struct kvm_vcpu *vcpu = NULL;
int i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
return vcpu->arch.icp;
}
return NULL;
}
static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
u32 irq, u16 *source)
{
u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
u16 src = irq & KVMPPC_XICS_SRC_MASK;
struct kvmppc_ics *ics;
if (source)
*source = src;
if (icsid > KVMPPC_XICS_MAX_ICS_ID)
return NULL;
ics = xics->ics[icsid];
if (!ics)
return NULL;
return ics;
}
#endif /* _KVM_PPC_BOOK3S_XICS_H */

View File

@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, prio);
}
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
@ -347,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
keep_irq = true;
}
if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
update_epr = true;
switch (priority) {
@ -428,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
set_guest_esr(vcpu, vcpu->arch.queued_esr);
if (update_dear == true)
set_guest_dear(vcpu, vcpu->arch.queued_dear);
if (update_epr == true)
if (update_epr == true) {
if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
kvmppc_mpic_set_epr(vcpu);
}
}
new_msr &= msr_mask;
#if defined(CONFIG_64BIT)
@ -746,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_core_queue_program(vcpu, ESR_PIL);
return RESUME_HOST;
case EMULATE_EXIT_USER:
return RESUME_HOST;
default:
BUG();
}
@ -1148,6 +1156,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
return r;
}
static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
{
u32 old_tsr = vcpu->arch.tsr;
vcpu->arch.tsr = new_tsr;
if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
arm_next_watchdog(vcpu);
update_timer_ints(vcpu);
}
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
@ -1287,16 +1307,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
kvmppc_emulate_dec(vcpu);
}
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
u32 old_tsr = vcpu->arch.tsr;
vcpu->arch.tsr = sregs->u.e.tsr;
if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
arm_next_watchdog(vcpu);
update_timer_ints(vcpu);
}
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR)
kvmppc_set_tsr(vcpu, sregs->u.e.tsr);
return 0;
}
@ -1409,84 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
int r = -EINVAL;
int r = 0;
union kvmppc_one_reg val;
int size;
long int i;
size = one_reg_size(reg->id);
if (size > sizeof(val))
return -EINVAL;
switch (reg->id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
case KVM_REG_PPC_IAC4: {
int iac = reg->id - KVM_REG_PPC_IAC1;
r = copy_to_user((u64 __user *)(long)reg->addr,
&vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
case KVM_REG_PPC_IAC4:
i = reg->id - KVM_REG_PPC_IAC1;
val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
break;
}
case KVM_REG_PPC_DAC1:
case KVM_REG_PPC_DAC2: {
int dac = reg->id - KVM_REG_PPC_DAC1;
r = copy_to_user((u64 __user *)(long)reg->addr,
&vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
case KVM_REG_PPC_DAC2:
i = reg->id - KVM_REG_PPC_DAC1;
val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
break;
}
case KVM_REG_PPC_EPR: {
u32 epr = get_guest_epr(vcpu);
r = put_user(epr, (u32 __user *)(long)reg->addr);
val = get_reg_val(reg->id, epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR:
r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
val = get_reg_val(reg->id, vcpu->arch.epcr);
break;
#endif
case KVM_REG_PPC_TCR:
val = get_reg_val(reg->id, vcpu->arch.tcr);
break;
case KVM_REG_PPC_TSR:
val = get_reg_val(reg->id, vcpu->arch.tsr);
break;
case KVM_REG_PPC_DEBUG_INST:
val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
break;
default:
r = kvmppc_get_one_reg(vcpu, reg->id, &val);
break;
}
if (r)
return r;
if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
r = -EFAULT;
return r;
}
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
int r = -EINVAL;
int r = 0;
union kvmppc_one_reg val;
int size;
long int i;
size = one_reg_size(reg->id);
if (size > sizeof(val))
return -EINVAL;
if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
return -EFAULT;
switch (reg->id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
case KVM_REG_PPC_IAC4: {
int iac = reg->id - KVM_REG_PPC_IAC1;
r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
(u64 __user *)(long)reg->addr, sizeof(u64));
case KVM_REG_PPC_IAC4:
i = reg->id - KVM_REG_PPC_IAC1;
vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
break;
}
case KVM_REG_PPC_DAC1:
case KVM_REG_PPC_DAC2: {
int dac = reg->id - KVM_REG_PPC_DAC1;
r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
(u64 __user *)(long)reg->addr, sizeof(u64));
case KVM_REG_PPC_DAC2:
i = reg->id - KVM_REG_PPC_DAC1;
vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
break;
}
case KVM_REG_PPC_EPR: {
u32 new_epr;
r = get_user(new_epr, (u32 __user *)(long)reg->addr);
if (!r)
u32 new_epr = set_reg_val(reg->id, val);
kvmppc_set_epr(vcpu, new_epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR: {
u32 new_epcr;
r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
if (r == 0)
u32 new_epcr = set_reg_val(reg->id, val);
kvmppc_set_epcr(vcpu, new_epcr);
break;
}
#endif
default:
case KVM_REG_PPC_OR_TSR: {
u32 tsr_bits = set_reg_val(reg->id, val);
kvmppc_set_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_CLEAR_TSR: {
u32 tsr_bits = set_reg_val(reg->id, val);
kvmppc_clr_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_TSR: {
u32 tsr = set_reg_val(reg->id, val);
kvmppc_set_tsr(vcpu, tsr);
break;
}
case KVM_REG_PPC_TCR: {
u32 tcr = set_reg_val(reg->id, val);
kvmppc_set_tcr(vcpu, tcr);
break;
}
default:
r = kvmppc_set_one_reg(vcpu, reg->id, &val);
break;
}
return r;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -ENOTSUPP;
@ -1531,7 +1593,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old)
const struct kvm_memory_slot *old)
{
}

View File

@ -54,8 +54,7 @@
(1<<BOOKE_INTERRUPT_DTLB_MISS) | \
(1<<BOOKE_INTERRUPT_ALIGNMENT))
.macro KVM_HANDLER ivor_nr scratch srr0
_GLOBAL(kvmppc_handler_\ivor_nr)
.macro __KVM_HANDLER ivor_nr scratch srr0
/* Get pointer to vcpu and record exit number. */
mtspr \scratch , r4
mfspr r4, SPRN_SPRG_THREAD
@ -76,6 +75,43 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
bctr
.endm
.macro KVM_HANDLER ivor_nr scratch srr0
_GLOBAL(kvmppc_handler_\ivor_nr)
__KVM_HANDLER \ivor_nr \scratch \srr0
.endm
.macro KVM_DBG_HANDLER ivor_nr scratch srr0
_GLOBAL(kvmppc_handler_\ivor_nr)
mtspr \scratch, r4
mfspr r4, SPRN_SPRG_THREAD
lwz r4, THREAD_KVM_VCPU(r4)
stw r3, VCPU_CRIT_SAVE(r4)
mfcr r3
mfspr r4, SPRN_CSRR1
andi. r4, r4, MSR_PR
bne 1f
/* debug interrupt happened in enter/exit path */
mfspr r4, SPRN_CSRR1
rlwinm r4, r4, 0, ~MSR_DE
mtspr SPRN_CSRR1, r4
lis r4, 0xffff
ori r4, r4, 0xffff
mtspr SPRN_DBSR, r4
mfspr r4, SPRN_SPRG_THREAD
lwz r4, THREAD_KVM_VCPU(r4)
mtcr r3
lwz r3, VCPU_CRIT_SAVE(r4)
mfspr r4, \scratch
rfci
1: /* debug interrupt happened in guest */
mtcr r3
mfspr r4, SPRN_SPRG_THREAD
lwz r4, THREAD_KVM_VCPU(r4)
lwz r3, VCPU_CRIT_SAVE(r4)
mfspr r4, \scratch
__KVM_HANDLER \ivor_nr \scratch \srr0
.endm
.macro KVM_HANDLER_ADDR ivor_nr
.long kvmppc_handler_\ivor_nr
.endm
@ -100,7 +136,7 @@ KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0

View File

@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;

View File

@ -23,6 +23,10 @@
#include <asm/mmu-book3e.h>
#include <asm/tlb.h>
enum vcpu_ftr {
VCPU_FTR_MMU_V2
};
#define E500_PID_NUM 3
#define E500_TLB_NUM 2
@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val);
int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val);
#ifdef CONFIG_KVM_E500V2
unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
#define get_tlb_sts(gtlbe) (MAS1_TS)
#endif /* !BOOKE_HV */
static inline bool has_feature(const struct kvm_vcpu *vcpu,
enum vcpu_ftr ftr)
{
bool has_ftr;
switch (ftr) {
case VCPU_FTR_MMU_V2:
has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
break;
default:
return false;
}
return has_ftr;
}
#endif /* KVM_E500_H */

View File

@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_TLB1CFG:
*spr_val = vcpu->arch.tlbcfg[1];
break;
case SPRN_TLB0PS:
if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
return EMULATE_FAIL;
*spr_val = vcpu->arch.tlbps[0];
break;
case SPRN_TLB1PS:
if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
return EMULATE_FAIL;
*spr_val = vcpu->arch.tlbps[1];
break;
case SPRN_L1CSR0:
*spr_val = vcpu_e500->l1csr0;
break;
@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_MMUCFG:
*spr_val = vcpu->arch.mmucfg;
break;
case SPRN_EPTCFG:
if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
return EMULATE_FAIL;
/*
* Legacy Linux guests access EPTCFG register even if the E.PT
* category is disabled in the VM. Give them a chance to live.
*/
*spr_val = vcpu->arch.eptcfg;
break;
/* extra exceptions */
case SPRN_IVOR32:

View File

@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return 0;
}
int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = 0;
long int i;
switch (id) {
case KVM_REG_PPC_MAS0:
*val = get_reg_val(id, vcpu->arch.shared->mas0);
break;
case KVM_REG_PPC_MAS1:
*val = get_reg_val(id, vcpu->arch.shared->mas1);
break;
case KVM_REG_PPC_MAS2:
*val = get_reg_val(id, vcpu->arch.shared->mas2);
break;
case KVM_REG_PPC_MAS7_3:
*val = get_reg_val(id, vcpu->arch.shared->mas7_3);
break;
case KVM_REG_PPC_MAS4:
*val = get_reg_val(id, vcpu->arch.shared->mas4);
break;
case KVM_REG_PPC_MAS6:
*val = get_reg_val(id, vcpu->arch.shared->mas6);
break;
case KVM_REG_PPC_MMUCFG:
*val = get_reg_val(id, vcpu->arch.mmucfg);
break;
case KVM_REG_PPC_EPTCFG:
*val = get_reg_val(id, vcpu->arch.eptcfg);
break;
case KVM_REG_PPC_TLB0CFG:
case KVM_REG_PPC_TLB1CFG:
case KVM_REG_PPC_TLB2CFG:
case KVM_REG_PPC_TLB3CFG:
i = id - KVM_REG_PPC_TLB0CFG;
*val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
break;
case KVM_REG_PPC_TLB0PS:
case KVM_REG_PPC_TLB1PS:
case KVM_REG_PPC_TLB2PS:
case KVM_REG_PPC_TLB3PS:
i = id - KVM_REG_PPC_TLB0PS;
*val = get_reg_val(id, vcpu->arch.tlbps[i]);
break;
default:
r = -EINVAL;
break;
}
return r;
}
int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = 0;
long int i;
switch (id) {
case KVM_REG_PPC_MAS0:
vcpu->arch.shared->mas0 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS1:
vcpu->arch.shared->mas1 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS2:
vcpu->arch.shared->mas2 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS7_3:
vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS4:
vcpu->arch.shared->mas4 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS6:
vcpu->arch.shared->mas6 = set_reg_val(id, *val);
break;
/* Only allow MMU registers to be set to the config supported by KVM */
case KVM_REG_PPC_MMUCFG: {
u32 reg = set_reg_val(id, *val);
if (reg != vcpu->arch.mmucfg)
r = -EINVAL;
break;
}
case KVM_REG_PPC_EPTCFG: {
u32 reg = set_reg_val(id, *val);
if (reg != vcpu->arch.eptcfg)
r = -EINVAL;
break;
}
case KVM_REG_PPC_TLB0CFG:
case KVM_REG_PPC_TLB1CFG:
case KVM_REG_PPC_TLB2CFG:
case KVM_REG_PPC_TLB3CFG: {
/* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
u32 reg = set_reg_val(id, *val);
i = id - KVM_REG_PPC_TLB0CFG;
if (reg != vcpu->arch.tlbcfg[i])
r = -EINVAL;
break;
}
case KVM_REG_PPC_TLB0PS:
case KVM_REG_PPC_TLB1PS:
case KVM_REG_PPC_TLB2PS:
case KVM_REG_PPC_TLB3PS: {
u32 reg = set_reg_val(id, *val);
i = id - KVM_REG_PPC_TLB0PS;
if (reg != vcpu->arch.tlbps[i])
r = -EINVAL;
break;
}
default:
r = -EINVAL;
break;
}
return r;
}
static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
struct kvm_book3e_206_tlb_params *params)
{
vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
if (params->tlb_sizes[0] <= 2048)
vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
return 0;
}
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg)
{
@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500->gtlb_offset[0] = 0;
vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
if (params.tlb_sizes[0] <= 2048)
vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
/* Update vcpu's MMU geometry based on SW_TLB input */
vcpu_mmu_geometry_update(vcpu, &params);
vcpu_e500->shared_tlb_pages = pages;
vcpu_e500->num_shared_tlb_pages = num_pages;
@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
return 0;
}
/* Vcpu's MMU default configuration */
static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
struct kvmppc_e500_tlb_params *params)
{
/* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
/* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[0] |= params[0].entries;
vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[1] |= params[1].entries;
vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
/* Guest mmu emulation currently doesn't handle E.PT */
vcpu->arch.eptcfg = 0;
vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
}
return 0;
}
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
if (!vcpu_e500->g2h_tlb1_map)
goto err;
/* Init TLB configuration register */
vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
vcpu->arch.tlbcfg[0] |=
vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
vcpu->arch.tlbcfg[1] |=
vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;

View File

@ -177,6 +177,8 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
r = 0;
else
r = -ENOTSUPP;
@ -260,6 +262,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
return r;
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;

View File

@ -38,6 +38,7 @@
#define OP_31_XOP_TRAP 4
#define OP_31_XOP_LWZX 23
#define OP_31_XOP_DCBST 54
#define OP_31_XOP_TRAP_64 68
#define OP_31_XOP_DCBF 86
#define OP_31_XOP_LBZX 87
@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
break;
case OP_31_XOP_DCBST:
case OP_31_XOP_DCBF:
case OP_31_XOP_DCBI:
/* Do nothing. The guest is performing dcbi because

20
arch/powerpc/kvm/irq.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef __IRQ_H
#define __IRQ_H
#include <linux/kvm_host.h>
static inline int irqchip_in_kernel(struct kvm *kvm)
{
int ret = 0;
#ifdef CONFIG_KVM_MPIC
ret = ret || (kvm->arch.mpic != NULL);
#endif
#ifdef CONFIG_KVM_XICS
ret = ret || (kvm->arch.xics != NULL);
#endif
smp_rmb();
return ret;
}
#endif

1853
arch/powerpc/kvm/mpic.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,7 @@
#include <linux/hrtimer.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
@ -32,6 +33,7 @@
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
#define CREATE_TRACE_POINTS
@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
r = 1;
break;
#ifndef CONFIG_KVM_BOOK3S_64_HV
@ -325,6 +328,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_GET_PVINFO:
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB:
#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC:
#endif
r = 1;
break;
@ -335,6 +341,10 @@ int kvm_dev_ioctl_check_extension(long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS:
#endif
r = 1;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */
@ -412,17 +422,16 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
bool user_alloc)
enum kvm_mr_change change)
{
return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
bool user_alloc)
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
{
kvmppc_core_commit_memory_region(kvm, mem, old);
}
@ -460,6 +469,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
tasklet_kill(&vcpu->arch.tasklet);
kvmppc_remove_vcpu_debugfs(vcpu);
switch (vcpu->arch.irq_type) {
case KVMPPC_IRQ_MPIC:
kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
break;
case KVMPPC_IRQ_XICS:
kvmppc_xics_free_icp(vcpu);
break;
}
kvmppc_core_vcpu_free(vcpu);
}
@ -532,12 +551,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#endif
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
@ -612,6 +625,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes, int is_bigendian)
{
int idx, ret;
if (bytes > sizeof(run->mmio.data)) {
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
run->mmio.len);
@ -627,8 +642,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->mmio_is_write = 0;
vcpu->arch.mmio_sign_extend = 0;
if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
bytes, &run->mmio.data)) {
idx = srcu_read_lock(&vcpu->kvm->srcu);
ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
bytes, &run->mmio.data);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (!ret) {
kvmppc_complete_mmio_load(vcpu, run);
vcpu->mmio_needed = 0;
return EMULATE_DONE;
@ -653,6 +674,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes, int is_bigendian)
{
void *data = run->mmio.data;
int idx, ret;
if (bytes > sizeof(run->mmio.data)) {
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
@ -682,9 +704,14 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
bytes, &run->mmio.data)) {
kvmppc_complete_mmio_load(vcpu, run);
idx = srcu_read_lock(&vcpu->kvm->srcu);
ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
bytes, &run->mmio.data);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (!ret) {
vcpu->mmio_needed = 0;
return EMULATE_DONE;
}
@ -740,7 +767,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
if (irq->irq == KVM_INTERRUPT_UNSET) {
kvmppc_core_dequeue_external(vcpu, irq);
kvmppc_core_dequeue_external(vcpu);
return 0;
}
@ -770,7 +797,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
case KVM_CAP_PPC_EPR:
r = 0;
vcpu->arch.epr_enabled = cap->args[0];
if (cap->args[0])
vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
else
vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
break;
#ifdef CONFIG_BOOKE
case KVM_CAP_PPC_BOOKE_WATCHDOG:
@ -791,6 +821,44 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC: {
struct file *filp;
struct kvm_device *dev;
r = -EBADF;
filp = fget(cap->args[0]);
if (!filp)
break;
r = -EPERM;
dev = kvm_device_from_filp(filp);
if (dev)
r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
fput(filp);
break;
}
#endif
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS: {
struct file *filp;
struct kvm_device *dev;
r = -EBADF;
filp = fget(cap->args[0]);
if (!filp)
break;
r = -EPERM;
dev = kvm_device_from_filp(filp);
if (dev)
r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
fput(filp);
break;
}
#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
@ -913,9 +981,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
return 0;
}
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
bool line_status)
{
if (!irqchip_in_kernel(kvm))
return -ENXIO;
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event->irq, irq_event->level,
line_status);
return 0;
}
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm __maybe_unused = filp->private_data;
void __user *argp = (void __user *)arg;
long r;
@ -934,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
struct kvm *kvm = filp->private_data;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
@ -946,8 +1026,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_KVM_BOOK3S_64_HV
case KVM_ALLOCATE_RMA: {
struct kvm *kvm = filp->private_data;
struct kvm_allocate_rma rma;
struct kvm *kvm = filp->private_data;
r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
@ -956,7 +1036,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
case KVM_PPC_ALLOCATE_HTAB: {
struct kvm *kvm = filp->private_data;
u32 htab_order;
r = -EFAULT;
@ -973,7 +1052,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
case KVM_PPC_GET_HTAB_FD: {
struct kvm *kvm = filp->private_data;
struct kvm_get_htab_fd ghf;
r = -EFAULT;
@ -986,7 +1064,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_PPC_GET_SMMU_INFO: {
struct kvm *kvm = filp->private_data;
struct kvm_ppc_smmu_info info;
memset(&info, 0, sizeof(info));
@ -995,6 +1072,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
case KVM_PPC_RTAS_DEFINE_TOKEN: {
struct kvm *kvm = filp->private_data;
r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
break;
}
#endif /* CONFIG_PPC_BOOK3S_64 */
default:
r = -ENOTTY;

View File

@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
static inline unsigned int icp_native_get_xirr(void)
{
int cpu = smp_processor_id();
unsigned int xirr;
/* Handled an interrupt latched by KVM */
xirr = kvmppc_get_xics_latch();
if (xirr)
return xirr;
return in_be32(&icp_native_regs[cpu]->xirr.word);
}
@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void)
static void icp_native_cause_ipi(int cpu, unsigned long data)
{
kvmppc_set_host_ipi(cpu, 1);
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
kvmppc_set_host_ipi(cpu, 0);
icp_native_set_qirr(cpu, 0xff);
return smp_ipi_demux();

View File

@ -44,5 +44,6 @@ header-y += termios.h
header-y += types.h
header-y += ucontext.h
header-y += unistd.h
header-y += virtio-ccw.h
header-y += vtoc.h
header-y += zcrypt.h

View File

@ -0,0 +1,21 @@
/*
* Definitions for virtio-ccw devices.
*
* Copyright IBM Corp. 2013
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
* as published by the Free Software Foundation.
*
* Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
*/
#ifndef __KVM_VIRTIO_CCW_H
#define __KVM_VIRTIO_CCW_H
/* Alignment of vring buffers. */
#define KVM_VIRTIO_CCW_RING_ALIGN 4096
/* Subcode for diagnose 500 (virtio hypercall). */
#define KVM_S390_VIRTIO_CCW_NOTIFY 3
#endif

View File

@ -22,6 +22,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_EVENTFD
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work

View File

@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License (version 2 only)
# as published by the Free Software Foundation.
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm

View File

@ -13,6 +13,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EREMOTE;
}
static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
{
int ret, idx;
/* No virtio-ccw notification? Get out quickly. */
if (!vcpu->kvm->arch.css_support ||
(vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
return -EOPNOTSUPP;
idx = srcu_read_lock(&vcpu->kvm->srcu);
/*
* The layout is as follows:
* - gpr 2 contains the subchannel id (passed as addr)
* - gpr 3 contains the virtqueue index (passed as datamatch)
*/
ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
vcpu->run->s.regs.gprs[2],
8, &vcpu->run->s.regs.gprs[3]);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
/* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */
return ret < 0 ? ret : 0;
}
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
{
int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
@ -118,6 +142,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
return __diag_time_slice_end_directed(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
case 0x500:
return __diag_virtio_hypercall(vcpu);
default:
return -EOPNOTSUPP;
}

View File

@ -18,369 +18,86 @@
#include <asm/uaccess.h>
#include "kvm-s390.h"
static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
unsigned long guestaddr)
static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
void __user *gptr,
int prefixing)
{
unsigned long prefix = vcpu->arch.sie_block->prefix;
unsigned long gaddr = (unsigned long) gptr;
unsigned long uaddr;
if (guestaddr < 2 * PAGE_SIZE)
guestaddr += prefix;
else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
guestaddr -= prefix;
return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap);
if (prefixing) {
if (gaddr < 2 * PAGE_SIZE)
gaddr += prefix;
else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE))
gaddr -= prefix;
}
uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
if (IS_ERR_VALUE(uaddr))
uaddr = -EFAULT;
return (void __user *)uaddr;
}
static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr,
u64 *result)
#define get_guest(vcpu, x, gptr) \
({ \
__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
int __mask = sizeof(__typeof__(*(gptr))) - 1; \
int __ret = PTR_RET((void __force *)__uptr); \
\
if (!__ret) { \
BUG_ON((unsigned long)__uptr & __mask); \
__ret = get_user(x, __uptr); \
} \
__ret; \
})
#define put_guest(vcpu, x, gptr) \
({ \
__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
int __mask = sizeof(__typeof__(*(gptr))) - 1; \
int __ret = PTR_RET((void __force *)__uptr); \
\
if (!__ret) { \
BUG_ON((unsigned long)__uptr & __mask); \
__ret = put_user(x, __uptr); \
} \
__ret; \
})
static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
unsigned long from, unsigned long len,
int to_guest, int prefixing)
{
void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
BUG_ON(guestaddr & 7);
unsigned long _len, rc;
void __user *uptr;
while (len) {
uptr = to_guest ? (void __user *)to : (void __user *)from;
uptr = __gptr_to_uptr(vcpu, uptr, prefixing);
if (IS_ERR((void __force *)uptr))
return PTR_ERR((void __force *) uptr);
return get_user(*result, (unsigned long __user *) uptr);
}
static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr,
u32 *result)
{
void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
BUG_ON(guestaddr & 3);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
return get_user(*result, (u32 __user *) uptr);
}
static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr,
u16 *result)
{
void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
BUG_ON(guestaddr & 1);
if (IS_ERR(uptr))
return PTR_ERR(uptr);
return get_user(*result, (u16 __user *) uptr);
}
static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr,
u8 *result)
{
void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
return get_user(*result, (u8 __user *) uptr);
}
static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr,
u64 value)
{
void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
BUG_ON(guestaddr & 7);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
return put_user(value, (u64 __user *) uptr);
}
static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr,
u32 value)
{
void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
BUG_ON(guestaddr & 3);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
return put_user(value, (u32 __user *) uptr);
}
static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr,
u16 value)
{
void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
BUG_ON(guestaddr & 1);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
return put_user(value, (u16 __user *) uptr);
}
static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr,
u8 value)
{
void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
return put_user(value, (u8 __user *) uptr);
}
static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu,
unsigned long guestdest,
void *from, unsigned long n)
{
int rc;
unsigned long i;
u8 *data = from;
for (i = 0; i < n; i++) {
rc = put_guest_u8(vcpu, guestdest++, *(data++));
if (rc < 0)
return rc;
return -EFAULT;
_len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1));
_len = min(_len, len);
if (to_guest)
rc = copy_to_user((void __user *) uptr, (void *)from, _len);
else
rc = copy_from_user((void *)to, (void __user *)uptr, _len);
if (rc)
return -EFAULT;
len -= _len;
from += _len;
to += _len;
}
return 0;
}
static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu,
unsigned long guestdest,
void *from, unsigned long n)
{
int r;
void __user *uptr;
unsigned long size;
#define copy_to_guest(vcpu, to, from, size) \
__copy_guest(vcpu, to, (unsigned long)from, size, 1, 1)
#define copy_from_guest(vcpu, to, from, size) \
__copy_guest(vcpu, (unsigned long)to, from, size, 0, 1)
#define copy_to_guest_absolute(vcpu, to, from, size) \
__copy_guest(vcpu, to, (unsigned long)from, size, 1, 0)
#define copy_from_guest_absolute(vcpu, to, from, size) \
__copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
if (guestdest + n < guestdest)
return -EFAULT;
/* simple case: all within one segment table entry? */
if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) {
uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
r = copy_to_user(uptr, from, n);
if (r)
r = -EFAULT;
goto out;
}
/* copy first segment */
uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
size = PMD_SIZE - (guestdest & ~PMD_MASK);
r = copy_to_user(uptr, from, size);
if (r) {
r = -EFAULT;
goto out;
}
from += size;
n -= size;
guestdest += size;
/* copy full segments */
while (n >= PMD_SIZE) {
uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
r = copy_to_user(uptr, from, PMD_SIZE);
if (r) {
r = -EFAULT;
goto out;
}
from += PMD_SIZE;
n -= PMD_SIZE;
guestdest += PMD_SIZE;
}
/* copy the tail segment */
if (n) {
uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
r = copy_to_user(uptr, from, n);
if (r)
r = -EFAULT;
}
out:
return r;
}
static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu,
unsigned long guestdest,
void *from, unsigned long n)
{
return __copy_to_guest_fast(vcpu, guestdest, from, n);
}
static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest,
void *from, unsigned long n)
{
unsigned long prefix = vcpu->arch.sie_block->prefix;
if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
goto slowpath;
if ((guestdest < prefix) && (guestdest + n > prefix))
goto slowpath;
if ((guestdest < prefix + 2 * PAGE_SIZE)
&& (guestdest + n > prefix + 2 * PAGE_SIZE))
goto slowpath;
if (guestdest < 2 * PAGE_SIZE)
guestdest += prefix;
else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
guestdest -= prefix;
return __copy_to_guest_fast(vcpu, guestdest, from, n);
slowpath:
return __copy_to_guest_slow(vcpu, guestdest, from, n);
}
static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
unsigned long guestsrc,
unsigned long n)
{
int rc;
unsigned long i;
u8 *data = to;
for (i = 0; i < n; i++) {
rc = get_guest_u8(vcpu, guestsrc++, data++);
if (rc < 0)
return rc;
}
return 0;
}
static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to,
unsigned long guestsrc,
unsigned long n)
{
int r;
void __user *uptr;
unsigned long size;
if (guestsrc + n < guestsrc)
return -EFAULT;
/* simple case: all within one segment table entry? */
if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) {
uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
r = copy_from_user(to, uptr, n);
if (r)
r = -EFAULT;
goto out;
}
/* copy first segment */
uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
size = PMD_SIZE - (guestsrc & ~PMD_MASK);
r = copy_from_user(to, uptr, size);
if (r) {
r = -EFAULT;
goto out;
}
to += size;
n -= size;
guestsrc += size;
/* copy full segments */
while (n >= PMD_SIZE) {
uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
r = copy_from_user(to, uptr, PMD_SIZE);
if (r) {
r = -EFAULT;
goto out;
}
to += PMD_SIZE;
n -= PMD_SIZE;
guestsrc += PMD_SIZE;
}
/* copy the tail segment */
if (n) {
uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
if (IS_ERR((void __force *) uptr))
return PTR_ERR((void __force *) uptr);
r = copy_from_user(to, uptr, n);
if (r)
r = -EFAULT;
}
out:
return r;
}
static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
unsigned long guestsrc,
unsigned long n)
{
return __copy_from_guest_fast(vcpu, to, guestsrc, n);
}
static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
unsigned long guestsrc, unsigned long n)
{
unsigned long prefix = vcpu->arch.sie_block->prefix;
if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
goto slowpath;
if ((guestsrc < prefix) && (guestsrc + n > prefix))
goto slowpath;
if ((guestsrc < prefix + 2 * PAGE_SIZE)
&& (guestsrc + n > prefix + 2 * PAGE_SIZE))
goto slowpath;
if (guestsrc < 2 * PAGE_SIZE)
guestsrc += prefix;
else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
guestsrc -= prefix;
return __copy_from_guest_fast(vcpu, to, guestsrc, n);
slowpath:
return __copy_from_guest_slow(vcpu, to, guestsrc, n);
}
#endif
#endif /* __KVM_S390_GACCESS_H */

View File

@ -43,12 +43,10 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
do {
rc = get_guest_u64(vcpu, useraddr,
&vcpu->arch.sie_block->gcr[reg]);
if (rc == -EFAULT) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
break;
}
rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
(u64 __user *) useraddr);
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
useraddr += 8;
if (reg == reg3)
break;
@ -78,11 +76,9 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
reg = reg1;
do {
rc = get_guest_u32(vcpu, useraddr, &val);
if (rc == -EFAULT) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
break;
}
rc = get_guest(vcpu, val, (u32 __user *) useraddr);
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
vcpu->arch.sie_block->gcr[reg] |= val;
useraddr += 4;

View File

@ -180,7 +180,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
const unsigned short table[] = { 2, 4, 4, 6 };
int rc, exception = 0;
int rc = 0;
switch (inti->type) {
case KVM_S390_INT_EMERGENCY:
@ -188,74 +188,41 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_emergency_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->emerg.code, 0);
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE);
rc |= put_guest(vcpu, inti->emerg.code,
(u16 __user *)__LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
break;
case KVM_S390_INT_EXTERNAL_CALL:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
vcpu->stat.deliver_external_call++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->extcall.code, 0);
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE);
rc |= put_guest(vcpu, inti->extcall.code,
(u16 __user *)__LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
break;
case KVM_S390_INT_SERVICE:
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
inti->ext.ext_params);
vcpu->stat.deliver_service_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->ext.ext_params, 0);
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
if (rc == -EFAULT)
exception = 1;
rc |= put_guest(vcpu, inti->ext.ext_params,
(u32 __user *)__LC_EXT_PARAMS);
break;
case KVM_S390_INT_VIRTIO:
VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
inti->ext.ext_params, inti->ext.ext_params2);
@ -263,34 +230,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->ext.ext_params,
inti->ext.ext_params2);
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE);
rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2,
inti->ext.ext_params2);
if (rc == -EFAULT)
exception = 1;
rc |= put_guest(vcpu, inti->ext.ext_params,
(u32 __user *)__LC_EXT_PARAMS);
rc |= put_guest(vcpu, inti->ext.ext_params2,
(u64 __user *)__LC_EXT_PARAMS2);
break;
case KVM_S390_SIGP_STOP:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
vcpu->stat.deliver_stop_signal++;
@ -313,18 +263,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_restart_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
0, 0);
rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu,
offsetof(struct _lowcore, restart_old_psw),
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
offsetof(struct _lowcore, restart_psw),
sizeof(psw_t));
atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
break;
case KVM_S390_PROGRAM_INT:
VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
inti->pgm.code,
@ -332,24 +278,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_program_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->pgm.code, 0);
rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u16(vcpu, __LC_PGM_ILC,
table[vcpu->arch.sie_block->ipa >> 14]);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE);
rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
(u16 __user *)__LC_PGM_ILC);
rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_PGM_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
break;
case KVM_S390_MCHK:
@ -360,22 +295,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
inti->mchk.mcic);
rc = kvm_s390_vcpu_store_status(vcpu,
KVM_S390_STORE_STATUS_PREFIXED);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE);
rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_MCK_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@ -388,41 +312,24 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_io_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
param0, param1);
rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID,
inti->io.subchannel_id);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR,
inti->io.subchannel_nr);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u32(vcpu, __LC_IO_INT_PARM,
inti->io.io_int_parm);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u32(vcpu, __LC_IO_INT_WORD,
inti->io.io_int_word);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW,
rc = put_guest(vcpu, inti->io.subchannel_id,
(u16 __user *) __LC_SUBCHANNEL_ID);
rc |= put_guest(vcpu, inti->io.subchannel_nr,
(u16 __user *) __LC_SUBCHANNEL_NR);
rc |= put_guest(vcpu, inti->io.io_int_parm,
(u32 __user *) __LC_IO_INT_PARM);
rc |= put_guest(vcpu, inti->io.io_int_word,
(u32 __user *) __LC_IO_INT_WORD);
rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_IO_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
break;
}
default:
BUG();
}
if (exception) {
if (rc) {
printk("kvm: The guest lowcore is not mapped during interrupt "
"delivery, killing userspace\n");
do_exit(SIGKILL);
@ -431,24 +338,18 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
{
int rc, exception = 0;
int rc;
if (psw_extint_disabled(vcpu))
return 0;
if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
return 0;
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
if (exception) {
if (rc) {
printk("kvm: The guest lowcore is not mapped during interrupt "
"delivery, killing userspace\n");
do_exit(SIGKILL);

View File

@ -142,12 +142,16 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_S390_COW:
r = MACHINE_HAS_ESOP;
break;
@ -632,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
} else {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu);
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = 0;
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
}
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
@ -974,22 +977,13 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
bool user_alloc)
enum kvm_mr_change change)
{
/* A few sanity checks. We can have exactly one memory slot which has
to start at guest virtual zero and which has to be located at a
page boundary in userland and which has to end at a page boundary.
The memory in userland is ok to be fragmented into various different
vmas. It is okay to mmap() and munmap() stuff in this slot after
doing this call at any time */
if (mem->slot)
return -EINVAL;
if (mem->guest_phys_addr)
return -EINVAL;
/* A few sanity checks. We can have memory slots which have to be
located/ended at a segment boundary (1MB). The memory in userland is
ok to be fragmented into various different vmas. It is okay to mmap()
and munmap() stuff in this slot after doing this call at any time */
if (mem->userspace_addr & 0xffffful)
return -EINVAL;
@ -997,19 +991,26 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (mem->memory_size & 0xffffful)
return -EINVAL;
if (!user_alloc)
return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
bool user_alloc)
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
{
int rc;
/* If the basics of the memslot do not change, we do not want
* to update the gmap. Every update causes several unnecessary
* segment translation exceptions. This is usually handled just
* fine by the normal fault handler + gmap, but it will also
* cause faults on the prefix page of running guest CPUs.
*/
if (old->userspace_addr == mem->userspace_addr &&
old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
old->npages * PAGE_SIZE == mem->memory_size)
return;
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);

View File

@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
int kvm_s390_inject_vm(struct kvm *kvm,
int __must_check kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int);
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt *s390int);
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid);

View File

@ -14,6 +14,8 @@
#include <linux/kvm.h>
#include <linux/gfp.h>
#include <linux/errno.h>
#include <linux/compat.h>
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/debug.h>
#include <asm/ebcdic.h>
@ -35,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
if (operand2 & 3) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
if (operand2 & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* get the value */
if (get_guest_u32(vcpu, operand2, &address)) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out;
}
if (get_guest(vcpu, address, (u32 __user *) operand2))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
address = address & 0x7fffe000u;
/* make sure that the new value is valid memory */
if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
(copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out;
}
(copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
kvm_s390_set_prefix(vcpu, address);
VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
trace_kvm_s390_handle_prefix(vcpu, 1, address);
out:
return 0;
}
@ -73,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
if (operand2 & 3) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
if (operand2 & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
address = vcpu->arch.sie_block->prefix;
address = address & 0x7fffe000u;
/* get the value */
if (put_guest_u32(vcpu, operand2, address)) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out;
}
if (put_guest(vcpu, address, (u32 __user *)operand2))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
trace_kvm_s390_handle_prefix(vcpu, 0, address);
out:
return 0;
}
static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
{
u64 useraddr;
int rc;
vcpu->stat.instruction_stap++;
useraddr = kvm_s390_get_base_disp_s(vcpu);
if (useraddr & 1) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
if (useraddr & 1)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
if (rc == -EFAULT) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out;
}
if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
trace_kvm_s390_handle_stap(vcpu, useraddr);
out:
return 0;
}
@ -129,36 +112,38 @@ static int handle_skey(struct kvm_vcpu *vcpu)
static int handle_tpi(struct kvm_vcpu *vcpu)
{
u64 addr;
struct kvm_s390_interrupt_info *inti;
u64 addr;
int cc;
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
cc = 0;
inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
if (inti) {
if (!inti)
goto no_interrupt;
cc = 1;
if (addr) {
/*
* Store the two-word I/O interruption code into the
* provided area.
*/
put_guest_u16(vcpu, addr, inti->io.subchannel_id);
put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr);
put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm);
put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr);
put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2));
put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4));
} else {
/*
* Store the three-word I/O interruption code into
* the appropriate lowcore area.
*/
put_guest_u16(vcpu, 184, inti->io.subchannel_id);
put_guest_u16(vcpu, 186, inti->io.subchannel_nr);
put_guest_u32(vcpu, 188, inti->io.io_int_parm);
put_guest_u32(vcpu, 192, inti->io.io_int_word);
put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID);
put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR);
put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM);
put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD);
}
cc = 1;
} else
cc = 0;
kfree(inti);
no_interrupt:
/* Set condition code and we're done. */
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
@ -230,13 +215,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
&facility_list, sizeof(facility_list));
if (rc == -EFAULT)
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
else {
VCPU_EVENT(vcpu, 5, "store facility list value %x",
facility_list);
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list);
trace_kvm_s390_handle_stfl(vcpu, facility_list);
}
return 0;
}
@ -249,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu)
#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
#define PSW_ADDR_24 0x00000000000fffffUL
#define PSW_ADDR_24 0x0000000000ffffffUL
#define PSW_ADDR_31 0x000000007fffffffUL
static int is_valid_psw(psw_t *psw) {
if (psw->mask & PSW_MASK_UNASSIGNED)
return 0;
if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
if (psw->addr & ~PSW_ADDR_31)
return 0;
}
if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
return 0;
if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA)
return 0;
return 1;
}
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
{
u64 addr;
psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
psw_compat_t new_psw;
u64 addr;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
if (gpsw->mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu,
PGM_PRIVILEGED_OPERATION);
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 7) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out;
}
if (!(new_psw.mask & PSW32_MASK_BASE)) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
vcpu->arch.sie_block->gpsw.mask =
(new_psw.mask & ~PSW32_MASK_BASE) << 32;
vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
(!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
(vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
PSW_MASK_EA)) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (!(new_psw.mask & PSW32_MASK_BASE))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
if (!is_valid_psw(gpsw))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
handle_new_psw(vcpu);
out:
return 0;
}
static int handle_lpswe(struct kvm_vcpu *vcpu)
{
u64 addr;
psw_t new_psw;
u64 addr;
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 7) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out;
}
vcpu->arch.sie_block->gpsw.mask = new_psw.mask;
vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
(((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
PSW_MASK_BA) &&
(vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) ||
(!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
(vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
PSW_MASK_EA)) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
vcpu->arch.sie_block->gpsw = new_psw;
if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
handle_new_psw(vcpu);
out:
return 0;
}
static int handle_stidp(struct kvm_vcpu *vcpu)
{
u64 operand2;
int rc;
vcpu->stat.instruction_stidp++;
operand2 = kvm_s390_get_base_disp_s(vcpu);
if (operand2 & 7) {
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
goto out;
}
if (operand2 & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
if (rc == -EFAULT) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out;
}
if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
out:
return 0;
}
@ -394,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
unsigned long mem = 0;
u64 operand2;
unsigned long mem;
int rc = 0;
vcpu->stat.instruction_stsi++;
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@ -414,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
case 2:
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
goto out_fail;
goto out_no_data;
if (stsi((void *) mem, fc, sel1, sel2))
goto out_mem;
goto out_no_data;
break;
case 3:
if (sel1 != 2 || sel2 != 2)
goto out_fail;
goto out_no_data;
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
goto out_fail;
goto out_no_data;
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
default:
goto out_fail;
goto out_no_data;
}
if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out_mem;
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out_exception;
}
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
vcpu->run->s.regs.gprs[0] = 0;
return 0;
out_mem:
free_page(mem);
out_fail:
out_no_data:
/* condition code 3 */
vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
return 0;
out_exception:
free_page(mem);
return rc;
}
static const intercept_handler_t b2_handlers[256] = {
@ -575,20 +526,13 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
return -EOPNOTSUPP;
/* we must resolve the address without holding the mmap semaphore.
* This is ok since the userspace hypervisor is not supposed to change
* the mapping while the guest queries the memory. Otherwise the guest
* might crash or get wrong info anyway. */
user_address = (unsigned long) __guestaddr_to_user(vcpu, address1);
down_read(&current->mm->mmap_sem);
user_address = __gmap_translate(address1, vcpu->arch.gmap);
if (IS_ERR_VALUE(user_address))
goto out_inject;
vma = find_vma(current->mm, user_address);
if (!vma) {
up_read(&current->mm->mmap_sem);
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
if (!vma)
goto out_inject;
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ))
vcpu->arch.sie_block->gpsw.mask |= (1ul << 44);
@ -597,6 +541,10 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
up_read(&current->mm->mmap_sem);
return 0;
out_inject:
up_read(&current->mm->mmap_sem);
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)

View File

@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_HAVE_KVM
BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
#endif
/*
* every pentium local APIC has two 'local interrupts', with a
* soft-definable vector attached to both interrupts, one of

View File

@ -11,6 +11,9 @@ typedef struct {
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
unsigned int icr_read_retry_count;
#endif
#ifdef CONFIG_HAVE_KVM
unsigned int kvm_posted_intr_ipis;
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;

View File

@ -28,6 +28,7 @@
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
extern void x86_platform_ipi(void);
extern void kvm_posted_intr_ipi(void);
extern void error_interrupt(void);
extern void irq_work_interrupt(void);

View File

@ -102,6 +102,11 @@
*/
#define X86_PLATFORM_IPI_VECTOR 0xf7
/* Vector for KVM to deliver posted interrupt IPI */
#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#endif
/*
* IRQ work vector:
*/

View File

@ -31,7 +31,7 @@
#include <asm/msr-index.h>
#include <asm/asm.h>
#define KVM_MAX_VCPUS 254
#define KVM_MAX_VCPUS 255
#define KVM_SOFT_MAX_VCPUS 160
#define KVM_USER_MEM_SLOTS 125
/* memory slots that are not exposed to userspace */
@ -43,6 +43,8 @@
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
@ -94,9 +96,6 @@
#define ASYNC_PF_PER_VCPU 64
extern raw_spinlock_t kvm_lock;
extern struct list_head vm_list;
struct kvm_vcpu;
struct kvm;
struct kvm_async_pf;
@ -230,6 +229,7 @@ struct kvm_mmu_page {
#endif
int write_flooding_count;
bool mmio_cached;
};
struct kvm_pio_request {
@ -345,7 +345,6 @@ struct kvm_vcpu_arch {
unsigned long apic_attention;
int32_t apic_arb_prio;
int mp_state;
int sipi_vector;
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
@ -643,7 +642,7 @@ struct kvm_x86_ops {
/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
int (*vcpu_reset)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu);
void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
@ -696,14 +695,16 @@ struct kvm_x86_ops {
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
int (*enable_nmi_window)(struct kvm_vcpu *vcpu);
int (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
int (*vm_has_apicv)(struct kvm *kvm);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
@ -730,6 +731,7 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
@ -767,6 +769,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
@ -797,6 +800,7 @@ enum emulation_result {
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
#define EMULTYPE_RETRY (1 << 3)
#define EMULTYPE_NO_REEXECUTE (1 << 4)
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
@ -807,6 +811,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
}
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
@ -819,6 +824,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
@ -973,7 +979,6 @@ enum {
* Trap the fault and ignore the instruction if that happens.
*/
asmlinkage void kvm_spurious_fault(void);
extern bool kvm_rebooting;
#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
"666: " insn "\n\t" \
@ -1002,6 +1007,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_define_shared_msr(unsigned index, u32 msr);
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
@ -1027,7 +1033,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
void kvm_deliver_pmi(struct kvm_vcpu *vcpu);

View File

@ -65,11 +65,16 @@
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008
#define PIN_BASED_VIRTUAL_NMIS 0x00000020
#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
#define PIN_BASED_POSTED_INTR 0x00000080
#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002
#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
@ -81,6 +86,8 @@
#define VM_EXIT_LOAD_IA32_EFER 0x00200000
#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
@ -89,9 +96,15 @@
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
/* VMCS Encodings */
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
POSTED_INTR_NV = 0x00000002,
GUEST_ES_SELECTOR = 0x00000800,
GUEST_CS_SELECTOR = 0x00000802,
GUEST_SS_SELECTOR = 0x00000804,
@ -126,6 +139,8 @@ enum vmcs_field {
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
APIC_ACCESS_ADDR = 0x00002014,
APIC_ACCESS_ADDR_HIGH = 0x00002015,
POSTED_INTR_DESC_ADDR = 0x00002016,
POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
EOI_EXIT_BITMAP0 = 0x0000201c,
@ -136,6 +151,8 @@ enum vmcs_field {
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
EOI_EXIT_BITMAP3 = 0x00002022,
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
VMREAD_BITMAP = 0x00002026,
VMWRITE_BITMAP = 0x00002028,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
@ -209,6 +226,7 @@ enum vmcs_field {
GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
GUEST_ACTIVITY_STATE = 0X00004826,
GUEST_SYSENTER_CS = 0x0000482A,
VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
HOST_IA32_SYSENTER_CS = 0x00004c00,
CR0_GUEST_HOST_MASK = 0x00006000,
CR4_GUEST_HOST_MASK = 0x00006002,

View File

@ -29,7 +29,6 @@
#define __KVM_HAVE_PIT
#define __KVM_HAVE_IOAPIC
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_DEVICE_ASSIGNMENT
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
#define __KVM_HAVE_GUEST_DEBUG

View File

@ -528,6 +528,8 @@
#define VMX_BASIC_MEM_TYPE_WB 6LLU
#define VMX_BASIC_INOUT 0x0040000000000000LLU
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114

View File

@ -65,6 +65,7 @@
#define EXIT_REASON_EOI_INDUCED 45
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_PREEMPTION_TIMER 52
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
@ -110,7 +111,7 @@
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
{ EXIT_REASON_INVD, "INVD" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }
{ EXIT_REASON_INVPCID, "INVPCID" }, \
{ EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }
#endif /* _UAPIVMX_H */

View File

@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_HAVE_KVM
apicinterrupt POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
#endif
apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
apicinterrupt THERMAL_APIC_VECTOR \

View File

@ -224,6 +224,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
set_irq_regs(old_regs);
}
#ifdef CONFIG_HAVE_KVM
/*
* Handler for POSTED_INTERRUPT_VECTOR.
*/
void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
irq_enter();
exit_idle();
inc_irq_stat(kvm_posted_intr_ipis);
irq_exit();
set_irq_regs(old_regs);
}
#endif
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
#ifdef CONFIG_HOTPLUG_CPU

View File

@ -172,6 +172,10 @@ static void __init apic_intr_init(void)
/* IPI for X86 platform specific use */
alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
#ifdef CONFIG_HAVE_KVM
/* IPI for KVM to deliver posted interrupt */
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
#endif
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);

View File

@ -160,8 +160,12 @@ int kvm_register_clock(char *txt)
{
int cpu = smp_processor_id();
int low, high, ret;
struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
struct pvclock_vcpu_time_info *src;
if (!hv_clock)
return 0;
src = &hv_clock[cpu].pvti;
low = (int)slow_virt_to_phys(src) | 1;
high = ((u64)slow_virt_to_phys(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
@ -276,6 +280,9 @@ int __init kvm_setup_vsyscall_timeinfo(void)
struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size;
if (!hv_clock)
return 0;
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
preempt_disable();

View File

@ -21,14 +21,13 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
depends on HIGH_RES_TIMERS
# for device assignment:
depends on PCI
# for TASKSTATS/TASK_DELAY_ACCT:
depends on NET
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
select KVM_ASYNC_PF
@ -82,6 +81,17 @@ config KVM_MMU_AUDIT
This option adds a R/W kVM module parameter 'mmu_audit', which allows
audit KVM MMU at runtime.
config KVM_DEVICE_ASSIGNMENT
bool "KVM legacy PCI device assignment support"
depends on KVM && PCI && IOMMU_API
default y
---help---
Provide support for legacy PCI device assignment through KVM. The
kernel now also supports a full featured userspace device driver
framework through VFIO, which supersedes much of this support.
If unsure, say Y.
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/vhost/Kconfig

View File

@ -7,8 +7,9 @@ CFLAGS_vmx.o := -I.
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o eventfd.o \
assigned-dev.o)
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
irqchip.o)
kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \
assigned-dev.o iommu.o)
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \

View File

@ -132,8 +132,9 @@
#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
#define No64 (1<<28)
#define PageTable (1 << 29) /* instruction used to write page table */
#define NotImpl (1 << 30) /* instruction is not implemented */
/* Source 2 operand type */
#define Src2Shift (30)
#define Src2Shift (31)
#define Src2None (OpNone << Src2Shift)
#define Src2CL (OpCL << Src2Shift)
#define Src2ImmByte (OpImmByte << Src2Shift)
@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
memset(&seg_desc, 0, sizeof seg_desc);
if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
|| ctxt->mode == X86EMUL_MODE_REAL) {
/* set real mode segment descriptor */
if (ctxt->mode == X86EMUL_MODE_REAL) {
/* set real mode segment descriptor (keep limit etc. for
* unreal mode) */
ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
set_desc_base(&seg_desc, selector << 4);
goto load;
} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
/* VM86 needs a clean new segment descriptor */
set_desc_base(&seg_desc, selector << 4);
set_desc_limit(&seg_desc, 0xffff);
seg_desc.type = 3;
seg_desc.p = 1;
seg_desc.s = 1;
seg_desc.dpl = 3;
goto load;
}
rpl = selector & 3;
@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
.check_perm = (_p) }
#define N D(0)
#define N D(NotImpl)
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
@ -3713,7 +3723,7 @@ static const struct opcode group5[] = {
I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
I(SrcMem | Stack, em_grp45),
I(SrcMemFAddr | ImplicitOps, em_grp45),
I(SrcMem | Stack, em_grp45), N,
I(SrcMem | Stack, em_grp45), D(Undefined),
};
static const struct opcode group6[] = {
@ -4162,6 +4172,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
break;
case OpMem8:
ctxt->memop.bytes = 1;
if (ctxt->memop.type == OP_REG) {
ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1);
fetch_register_operand(&ctxt->memop);
}
goto mem_common;
case OpMem16:
ctxt->memop.bytes = 2;
@ -4373,7 +4387,7 @@ done_prefixes:
ctxt->intercept = opcode.intercept;
/* Unrecognised? */
if (ctxt->d == 0 || (ctxt->d & Undefined))
if (ctxt->d == 0 || (ctxt->d & NotImpl))
return EMULATION_FAILED;
if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
@ -4511,7 +4525,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
ctxt->mem_read.pos = 0;
if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) {
if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
(ctxt->d & Undefined)) {
rc = emulate_ud(ctxt);
goto done;
}

View File

@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work)
}
spin_unlock(&ps->inject_lock);
if (inject) {
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
/*
* Provides NMI watchdog support via Virtual Wire mode.

View File

@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
{
struct kvm_lapic *apic = vcpu->arch.apic;
return apic_test_vector(vector, apic->regs + APIC_ISR) ||
apic_test_vector(vector, apic->regs + APIC_IRR);
}
static inline void apic_set_vector(int vec, void *bitmap)
{
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
struct kvm_lapic_irq *irq,
u64 *eoi_exit_bitmap)
{
struct kvm_lapic **dst;
struct kvm_apic_map *map;
unsigned long bitmap = 1;
int i;
rcu_read_lock();
map = rcu_dereference(vcpu->kvm->arch.apic_map);
if (unlikely(!map)) {
__set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
goto out;
}
if (irq->dest_mode == 0) { /* physical mode */
if (irq->delivery_mode == APIC_DM_LOWEST ||
irq->dest_id == 0xff) {
__set_bit(irq->vector,
(unsigned long *)eoi_exit_bitmap);
goto out;
}
dst = &map->phys_map[irq->dest_id & 0xff];
} else {
u32 mda = irq->dest_id << (32 - map->ldr_bits);
dst = map->logical_map[apic_cluster_id(map, mda)];
bitmap = apic_logical_id(map, mda);
}
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
continue;
if (dst[i]->vcpu == vcpu) {
__set_bit(irq->vector,
(unsigned long *)eoi_exit_bitmap);
break;
}
}
out:
rcu_read_unlock();
}
static void recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
@ -256,7 +217,7 @@ out:
if (old)
kfree_rcu(old, rcu);
kvm_ioapic_make_eoibitmap_request(kvm);
kvm_vcpu_request_scan_ioapic(kvm);
}
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap)
return count;
}
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
{
u32 i, pir_val;
struct kvm_lapic *apic = vcpu->arch.apic;
for (i = 0; i <= 7; i++) {
pir_val = xchg(&pir[i], 0);
if (pir_val)
*((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val;
}
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
{
apic->irr_pending = true;
@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
if (!apic->irr_pending)
return -1;
kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
result = apic_search_irr(apic);
ASSERT(result == -1 || result >= 16);
@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
}
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode);
int vector, int level, int trig_mode,
unsigned long *dest_map);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
unsigned long *dest_map)
{
struct kvm_lapic *apic = vcpu->arch.apic;
return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
irq->level, irq->trig_mode);
irq->level, irq->trig_mode, dest_map);
}
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
return result;
}
void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
int i;
for (i = 0; i < 8; i++)
apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
}
static void apic_update_ppr(struct kvm_lapic *apic)
{
u32 tpr, isrv, ppr, old_ppr;
@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
}
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r)
struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
{
struct kvm_apic_map *map;
unsigned long bitmap = 1;
@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
*r = -1;
if (irq->shorthand == APIC_DEST_SELF) {
*r = kvm_apic_set_irq(src->vcpu, irq);
*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
return true;
}
@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
continue;
if (*r < 0)
*r = 0;
*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
}
ret = true;
@ -681,7 +667,8 @@ out:
* Return 1 if successfully added and 0 if discarded.
*/
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode)
int vector, int level, int trig_mode,
unsigned long *dest_map)
{
int result = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
if (unlikely(!apic_enabled(apic)))
break;
if (trig_mode) {
apic_debug("level trig mode for vector %d", vector);
apic_set_vector(vector, apic->regs + APIC_TMR);
} else
apic_clear_vector(vector, apic->regs + APIC_TMR);
if (dest_map)
__set_bit(vcpu->vcpu_id, dest_map);
if (kvm_x86_ops->deliver_posted_interrupt) {
result = 1;
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
} else {
result = !apic_test_and_set_irr(vector, apic);
trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
trig_mode, vector, !result);
if (!result) {
if (trig_mode)
apic_debug("level trig mode repeatedly for "
"vector %d", vector);
break;
apic_debug("level trig mode repeatedly "
"for vector %d", vector);
goto out;
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
out:
trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
trig_mode, vector, !result);
break;
case APIC_DM_REMRD:
@ -731,7 +722,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_INIT:
if (!trig_mode || level) {
result = 1;
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
/* assumes that there are only KVM_APIC_INIT/SIPI */
apic->pending_events = (1UL << KVM_APIC_INIT);
/* make sure pending_events is visible before sending
* the request */
smp_wmb();
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
} else {
@ -743,13 +738,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_STARTUP:
apic_debug("SIPI to vcpu %d vector 0x%02x\n",
vcpu->vcpu_id, vector);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
result = 1;
vcpu->arch.sipi_vector = vector;
vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
apic->sipi_vector = vector;
/* make sure sipi_vector is visible for the receiver */
smp_wmb();
set_bit(KVM_APIC_SIPI, &apic->pending_events);
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
break;
case APIC_DM_EXTINT:
@ -782,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
}
}
@ -848,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
irq.vector);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
}
static u32 apic_get_tmcct(struct kvm_lapic *apic)
@ -1484,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
NULL);
}
return 0;
}
@ -1654,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
apic->highest_isr_cache = -1;
kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_rtc_eoi_tracking_restore_one(vcpu);
}
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
@ -1860,6 +1857,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
addr, sizeof(u8));
}
void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
unsigned int sipi_vector;
if (!kvm_vcpu_has_lapic(vcpu))
return;
if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
kvm_lapic_reset(vcpu);
kvm_vcpu_reset(vcpu);
if (kvm_vcpu_is_bsp(apic->vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
}
if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) &&
vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
/* evaluate pending_events before reading the vector */
smp_rmb();
sipi_vector = apic->sipi_vector;
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, sipi_vector);
kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
}
void kvm_lapic_init(void)
{
/* do not patch jump label more than once per second */

View File

@ -5,6 +5,9 @@
#include <linux/kvm_host.h>
#define KVM_APIC_INIT 0
#define KVM_APIC_SIPI 1
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
@ -32,6 +35,8 @@ struct kvm_lapic {
void *regs;
gpa_t vapic_addr;
struct page *vapic_page;
unsigned long pending_events;
unsigned int sipi_vector;
};
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
void kvm_apic_accept_events(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
@ -47,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
unsigned long *dest_map);
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r);
struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
@ -154,8 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
return ldr & map->lid_mask;
}
void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
struct kvm_lapic_irq *irq,
u64 *eoi_bitmap);
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
{
return vcpu->arch.apic->pending_events;
}
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
#endif

View File

@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access)
{
struct kvm_mmu_page *sp = page_header(__pa(sptep));
access &= ACC_WRITE_MASK | ACC_USER_MASK;
sp->mmio_cached = true;
trace_mark_mmio_spte(sptep, gfn, access);
mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT);
}
@ -1502,6 +1505,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
u64 *parent_pte, int direct)
{
struct kvm_mmu_page *sp;
sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
if (!direct)
@ -1644,16 +1648,14 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);
#define for_each_gfn_sp(kvm, sp, gfn) \
hlist_for_each_entry(sp, \
&(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
if ((sp)->gfn != (gfn)) {} else
#define for_each_gfn_sp(_kvm, _sp, _gfn) \
hlist_for_each_entry(_sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
if ((_sp)->gfn != (_gfn)) {} else
#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \
hlist_for_each_entry(sp, \
&(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
if ((sp)->gfn != (gfn) || (sp)->role.direct || \
(sp)->role.invalid) {} else
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
for_each_gfn_sp(_kvm, _sp, _gfn) \
if ((_sp)->role.direct || (_sp)->role.invalid) {} else
/* @sp->gfn should be write-protected at the call site */
static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@ -2089,7 +2091,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list)
{
struct kvm_mmu_page *sp;
struct kvm_mmu_page *sp, *nsp;
if (list_empty(invalid_list))
return;
@ -2106,11 +2108,25 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
*/
kvm_flush_remote_tlbs(kvm);
do {
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
list_for_each_entry_safe(sp, nsp, invalid_list, link) {
WARN_ON(!sp->role.invalid || sp->root_count);
kvm_mmu_free_page(sp);
} while (!list_empty(invalid_list));
}
}
static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
struct list_head *invalid_list)
{
struct kvm_mmu_page *sp;
if (list_empty(&kvm->arch.active_mmu_pages))
return false;
sp = list_entry(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
return true;
}
/*
@ -2120,23 +2136,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
{
LIST_HEAD(invalid_list);
/*
* If we set the number of mmu pages to be smaller be than the
* number of actived pages , we must to free some mmu pages before we
* change the value
*/
spin_lock(&kvm->mmu_lock);
if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
!list_empty(&kvm->arch.active_mmu_pages)) {
struct kvm_mmu_page *page;
/* Need to free some mmu pages to achieve the goal. */
while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages)
if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list))
break;
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_prepare_zap_page(kvm, page, &invalid_list);
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
}
@ -2794,6 +2802,7 @@ exit:
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, pfn_t *pfn, bool write, bool *writable);
static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
gfn_t gfn, bool prefault)
@ -2835,7 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
@ -2913,7 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
1, ACC_ALL, NULL);
++sp->root_count;
@ -2925,7 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
i << 30,
PT32_ROOT_LEVEL, 1, ACC_ALL,
@ -2964,7 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
0, ACC_ALL, NULL);
root = __pa(sp->spt);
@ -2998,7 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
return 1;
}
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, 0,
ACC_ALL, NULL);
@ -3304,7 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, gpa, write, map_writable,
@ -4006,17 +4015,17 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
}
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
{
LIST_HEAD(invalid_list);
while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES &&
!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
struct kvm_mmu_page *sp;
if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
return;
while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
break;
sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
++vcpu->kvm->stat.mmu_recycled;
}
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@ -4185,17 +4194,22 @@ restart:
spin_unlock(&kvm->mmu_lock);
}
static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
struct list_head *invalid_list)
void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
{
struct kvm_mmu_page *page;
struct kvm_mmu_page *sp, *node;
LIST_HEAD(invalid_list);
if (list_empty(&kvm->arch.active_mmu_pages))
return;
spin_lock(&kvm->mmu_lock);
restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
if (!sp->mmio_cached)
continue;
if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
goto restart;
}
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
}
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
@ -4232,7 +4246,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list);
prepare_zap_oldest_mmu_page(kvm, &invalid_list);
kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);

View File

@ -57,14 +57,11 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
kvm->arch.n_used_mmu_pages;
}
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES))
__kvm_mmu_free_some_pages(vcpu);
return 0;
}
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)

View File

@ -627,7 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
kvm_mmu_free_some_pages(vcpu);
make_mmu_pages_available(vcpu);
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,

View File

@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
return 1;
}
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
u32 index = msr_info->index;
u64 data = msr_info->data;
switch (index) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
}
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
if (msr_info->host_initiated) {
pmu->global_status = data;
return 0;
}
break; /* RO MSR */
case MSR_CORE_PERF_GLOBAL_CTRL:
if (pmu->global_ctrl == data)
@ -386,6 +392,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
pmu->global_ovf_ctrl = data;
return 0;
@ -394,6 +401,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
default:
if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
(pmc = get_fixed_pmc(pmu, index))) {
if (!msr_info->host_initiated)
data = (s64)(s32)data;
pmc->counter += data - read_pmc(pmc);
return 0;

View File

@ -1131,17 +1131,11 @@ static void init_vmcb(struct vcpu_svm *svm)
init_seg(&save->gs);
save->cs.selector = 0xf000;
save->cs.base = 0xffff0000;
/* Executable/Readable Code Segment */
save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
save->cs.limit = 0xffff;
/*
* cs.base should really be 0xffff0000, but vmx can't handle that, so
* be consistent with it.
*
* Replace when we have real mode working for vmx.
*/
save->cs.base = 0xf0000;
save->gdtr.limit = 0xffff;
save->idtr.limit = 0xffff;
@ -1191,7 +1185,7 @@ static void init_vmcb(struct vcpu_svm *svm)
enable_gif(svm);
}
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u32 dummy;
@ -1199,16 +1193,8 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
init_vmcb(svm);
if (!kvm_vcpu_is_bsp(vcpu)) {
kvm_rip_write(vcpu, 0);
svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
}
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
return 0;
}
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
@ -3487,7 +3473,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
"exit_code 0x%x\n",
__func__, svm->vmcb->control.exit_int_info,
exit_code);
@ -3591,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
return;
}
static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
return;
}
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -3641,7 +3632,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
return ret;
}
static void enable_irq_window(struct kvm_vcpu *vcpu)
static int enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -3655,15 +3646,16 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
}
return 0;
}
static void enable_nmi_window(struct kvm_vcpu *vcpu)
static int enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
== HF_NMI_MASK)
return; /* IRET will cause a vm exit */
return 0; /* IRET will cause a vm exit */
/*
* Something prevents NMI from been injected. Single step over possible
@ -3672,6 +3664,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_bp_intercept(vcpu);
return 0;
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@ -4247,6 +4240,11 @@ out:
return ret;
}
static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
{
local_irq_enable();
}
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@ -4314,6 +4312,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.vm_has_apicv = svm_vm_has_apicv,
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_isr_update = svm_hwapic_isr_update,
.sync_pir_to_irr = svm_sync_pir_to_irr,
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
@ -4342,6 +4341,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tdp_cr3 = set_tdp_cr3,
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,
};
static int __init svm_init(void)

File diff suppressed because it is too large Load Diff

View File

@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0;
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
{
int i;
@ -263,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
asmlinkage void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
BUG();
}
EXPORT_SYMBOL_GPL(kvm_spurious_fault);
#define EXCPT_BENIGN 0
#define EXCPT_CONTRIBUTORY 1
#define EXCPT_PF 2
@ -840,23 +845,17 @@ static const u32 emulated_msrs[] = {
MSR_IA32_MCG_CTL,
};
static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
u64 old_efer = vcpu->arch.efer;
if (efer & efer_reserved_bits)
return 1;
if (is_paging(vcpu)
&& (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
return 1;
return false;
if (efer & EFER_FFXSR) {
struct kvm_cpuid_entry2 *feat;
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
return 1;
return false;
}
if (efer & EFER_SVME) {
@ -864,9 +863,24 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
return 1;
return false;
}
return true;
}
EXPORT_SYMBOL_GPL(kvm_valid_efer);
static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
u64 old_efer = vcpu->arch.efer;
if (!kvm_valid_efer(vcpu, efer))
return 1;
if (is_paging(vcpu)
&& (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
return 1;
efer &= ~EFER_LMA;
efer |= vcpu->arch.efer & EFER_LMA;
@ -1079,6 +1093,10 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
u32 thresh_lo, thresh_hi;
int use_scaling = 0;
/* tsc_khz can be zero if TSC calibration fails */
if (this_tsc_khz == 0)
return;
/* Compute a scale to convert nanoseconds in TSC cycles */
kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
&vcpu->arch.virtual_tsc_shift,
@ -1156,6 +1174,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
/* n.b - signed multiplication and division required */
usdiff = data - kvm->arch.last_tsc_write;
#ifdef CONFIG_X86_64
@ -1170,6 +1189,8 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
usdiff -= elapsed;
if (usdiff < 0)
usdiff = -usdiff;
} else
usdiff = USEC_PER_SEC; /* disable TSC match window below */
/*
* Special case: TSC write with a small delta (1 second) of virtual
@ -2034,7 +2055,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr, data);
return kvm_pmu_set_msr(vcpu, msr_info);
if (pr || data != 0)
vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
@ -2080,7 +2101,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr, data);
return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
msr, data);
@ -2479,7 +2500,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_USER_NMI:
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_PIT2:
@ -2497,10 +2517,12 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_XSAVE:
case KVM_CAP_ASYNC_PF:
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_PCI_2_3:
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
case KVM_CAP_IRQFD_RESAMPLE:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
#endif
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@ -2521,9 +2543,11 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
break;
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_IOMMU:
r = iommu_present(&pci_bus_type);
break;
#endif
case KVM_CAP_MCE:
r = KVM_MAX_MCE_BANKS;
break;
@ -2679,6 +2703,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
kvm_x86_ops->sync_pir_to_irr(vcpu);
memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
return 0;
@ -2696,7 +2721,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
if (irq->irq >= KVM_NR_INTERRUPTS)
return -EINVAL;
if (irqchip_in_kernel(vcpu->kvm))
return -ENXIO;
@ -2819,10 +2844,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
events->nmi.pad = 0;
events->sipi_vector = vcpu->arch.sipi_vector;
events->sipi_vector = 0; /* never valid when reporting to user space */
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
| KVM_VCPUEVENT_VALID_SHADOW);
memset(&events->reserved, 0, sizeof(events->reserved));
}
@ -2853,8 +2877,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.nmi_pending = events->nmi.pending;
kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
vcpu->arch.sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
kvm_vcpu_has_lapic(vcpu))
vcpu->arch.apic->sipi_vector = events->sipi_vector;
kvm_make_request(KVM_REQ_EVENT, vcpu);
@ -3478,13 +3503,15 @@ out:
return r;
}
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
bool line_status)
{
if (!irqchip_in_kernel(kvm))
return -ENXIO;
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event->irq, irq_event->level);
irq_event->irq, irq_event->level,
line_status);
return 0;
}
@ -4752,11 +4779,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
}
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
bool write_fault_to_shadow_pgtable)
bool write_fault_to_shadow_pgtable,
int emulation_type)
{
gpa_t gpa = cr2;
pfn_t pfn;
if (emulation_type & EMULTYPE_NO_REEXECUTE)
return false;
if (!vcpu->arch.mmu.direct_map) {
/*
* Write permission should be allowed since only
@ -4899,8 +4930,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
if (r != EMULATION_OK) {
if (emulation_type & EMULTYPE_TRAP_UD)
return EMULATE_FAIL;
if (reexecute_instruction(vcpu, cr2,
write_fault_to_spt))
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
emulation_type))
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
@ -4930,7 +4961,8 @@ restart:
return EMULATE_DONE;
if (r == EMULATION_FAILED) {
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
emulation_type))
return EMULATE_DONE;
return handle_emulation_failure(vcpu);
@ -5641,14 +5673,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
#endif
}
static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
u64 eoi_exit_bitmap[4];
u32 tmr[8];
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
memset(eoi_exit_bitmap, 0, 32);
memset(tmr, 0, 32);
kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
kvm_apic_update_tmr(vcpu, tmr);
}
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
@ -5656,7 +5694,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
int r;
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
vcpu->run->request_interrupt_window;
bool req_immediate_exit = 0;
bool req_immediate_exit = false;
if (vcpu->requests) {
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
@ -5698,24 +5736,30 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
record_steal_time(vcpu);
if (kvm_check_request(KVM_REQ_NMI, vcpu))
process_nmi(vcpu);
req_immediate_exit =
kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
if (kvm_check_request(KVM_REQ_PMU, vcpu))
kvm_handle_pmu_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_deliver_pmi(vcpu);
if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
update_eoi_exitmap(vcpu);
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
r = 1;
goto out;
}
inject_pending_event(vcpu);
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
kvm_x86_ops->enable_nmi_window(vcpu);
req_immediate_exit =
kvm_x86_ops->enable_nmi_window(vcpu) != 0;
else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
req_immediate_exit =
kvm_x86_ops->enable_irq_window(vcpu) != 0;
if (kvm_lapic_enabled(vcpu)) {
/*
@ -5794,7 +5838,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
local_irq_enable();
/* Interrupt is enabled by handle_external_intr() */
kvm_x86_ops->handle_external_intr(vcpu);
++vcpu->stat.exits;
@ -5843,16 +5889,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
int r;
struct kvm *kvm = vcpu->kvm;
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
r = kvm_vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
r = vapic_enter(vcpu);
if (r) {
@ -5869,8 +5905,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
{
if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
kvm_apic_accept_events(vcpu);
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
vcpu->arch.mp_state =
@ -5878,7 +5914,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
case KVM_MP_STATE_SIPI_RECEIVED:
case KVM_MP_STATE_INIT_RECEIVED:
break;
default:
r = -EINTR;
break;
@ -6013,6 +6050,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
kvm_apic_accept_events(vcpu);
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
r = -EAGAIN;
goto out;
@ -6169,6 +6207,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
kvm_apic_accept_events(vcpu);
mp_state->mp_state = vcpu->arch.mp_state;
return 0;
}
@ -6176,6 +6215,14 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
if (!kvm_vcpu_has_lapic(vcpu) &&
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
return -EINVAL;
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
} else
vcpu->arch.mp_state = mp_state->mp_state;
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
@ -6475,8 +6522,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
r = vcpu_load(vcpu);
if (r)
return r;
r = kvm_vcpu_reset(vcpu);
if (r == 0)
kvm_vcpu_reset(vcpu);
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
@ -6514,7 +6560,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_x86_ops->vcpu_free(vcpu);
}
static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
{
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
@ -6541,7 +6587,18 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0;
return kvm_x86_ops->vcpu_reset(vcpu);
kvm_x86_ops->vcpu_reset(vcpu);
}
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
{
struct kvm_segment cs;
kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
cs.selector = vector << 8;
cs.base = vector << 12;
kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
kvm_rip_write(vcpu, 0);
}
int kvm_arch_hardware_enable(void *garbage)
@ -6706,8 +6763,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
}
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
r = -ENOMEM;
goto fail_free_mce_banks;
}
r = fx_init(vcpu);
if (r)
@ -6811,6 +6870,23 @@ void kvm_arch_sync_events(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
if (current->mm == kvm->mm) {
/*
* Free memory regions allocated on behalf of userspace,
* unless the the memory map has changed due to process exit
* or fd copying.
*/
struct kvm_userspace_memory_region mem;
memset(&mem, 0, sizeof(mem));
mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
kvm_set_memory_region(kvm, &mem);
mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
kvm_set_memory_region(kvm, &mem);
mem.slot = TSS_PRIVATE_MEMSLOT;
kvm_set_memory_region(kvm, &mem);
}
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
@ -6903,24 +6979,21 @@ out_free:
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
bool user_alloc)
enum kvm_mr_change change)
{
int npages = memslot->npages;
/*
* Only private memory slots need to be mapped here since
* KVM_SET_MEMORY_REGION ioctl is no longer supported.
*/
if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
unsigned long userspace_addr;
/*
* MAP_SHARED to prevent internal slot pages from being moved
* by fork()/COW.
*/
userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, 0);
@ -6935,17 +7008,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
bool user_alloc)
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
{
int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
int nr_mmu_pages = 0;
if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
int ret;
ret = vm_munmap(old.userspace_addr,
old.npages * PAGE_SIZE);
ret = vm_munmap(old->userspace_addr,
old->npages * PAGE_SIZE);
if (ret < 0)
printk(KERN_WARNING
"kvm_vm_ioctl_set_memory_region: "
@ -6962,14 +7035,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* Existing largepage mappings are destroyed here and new ones will
* not be created until the end of the logging.
*/
if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
/*
* If memory slot is created, or moved, we need to clear all
* mmio sptes.
*/
if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
kvm_mmu_zap_all(kvm);
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
kvm_mmu_zap_mmio_sptes(kvm);
kvm_reload_remote_mmus(kvm);
}
}
@ -6991,7 +7064,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
|| !list_empty_careful(&vcpu->async_pf.done)
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|| kvm_apic_has_events(vcpu)
|| atomic_read(&vcpu->arch.nmi_queued) ||
(kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_has_interrupt(vcpu));

View File

@ -443,29 +443,30 @@ static int __init test_devices_support(unsigned long addr)
}
/*
* Init function for virtio
* devices are in a single page above top of "normal" mem
* devices are in a single page above top of "normal" + standby mem
*/
static int __init kvm_devices_init(void)
{
int rc;
unsigned long total_memory_size = sclp_get_rzm() * sclp_get_rnmax();
if (!MACHINE_IS_KVM)
return -ENODEV;
if (test_devices_support(real_memory_size) < 0)
if (test_devices_support(total_memory_size) < 0)
return -ENODEV;
rc = vmem_add_mapping(real_memory_size, PAGE_SIZE);
rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
if (rc)
return rc;
kvm_devices = (void *) real_memory_size;
kvm_devices = (void *) total_memory_size;
kvm_root = root_device_register("kvm_s390");
if (IS_ERR(kvm_root)) {
rc = PTR_ERR(kvm_root);
printk(KERN_ERR "Could not register kvm_s390 root device");
vmem_remove_mapping(real_memory_size, PAGE_SIZE);
vmem_remove_mapping(total_memory_size, PAGE_SIZE);
return rc;
}

View File

@ -31,6 +31,7 @@
#include <asm/irq.h>
#include <asm/cio.h>
#include <asm/ccwdev.h>
#include <asm/virtio-ccw.h>
/*
* virtio related functions
@ -77,12 +78,9 @@ struct virtio_ccw_vq_info {
void *queue;
struct vq_info_block *info_block;
struct list_head node;
long cookie;
};
#define KVM_VIRTIO_CCW_RING_ALIGN 4096
#define KVM_S390_VIRTIO_CCW_NOTIFY 3
#define CCW_CMD_SET_VQ 0x13
#define CCW_CMD_VDEV_RESET 0x33
#define CCW_CMD_SET_IND 0x43
@ -135,8 +133,11 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev,
do {
spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags);
ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0);
if (!ret)
if (!ret) {
if (!vcdev->curr_io)
vcdev->err = 0;
vcdev->curr_io |= flag;
}
spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags);
cpu_relax();
} while (ret == -EBUSY);
@ -145,15 +146,18 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev,
}
static inline long do_kvm_notify(struct subchannel_id schid,
unsigned long queue_index)
unsigned long queue_index,
long cookie)
{
register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY;
register struct subchannel_id __schid asm("2") = schid;
register unsigned long __index asm("3") = queue_index;
register long __rc asm("2");
register long __cookie asm("4") = cookie;
asm volatile ("diag 2,4,0x500\n"
: "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index)
: "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index),
"d"(__cookie)
: "memory", "cc");
return __rc;
}
@ -166,7 +170,7 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq)
vcdev = to_vc_device(info->vq->vdev);
ccw_device_get_schid(vcdev->cdev, &schid);
do_kvm_notify(schid, vq->index);
info->cookie = do_kvm_notify(schid, vq->index, info->cookie);
}
static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,

View File

@ -117,14 +117,13 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_APF_HALT 12
#define KVM_REQ_STEAL_UPDATE 13
#define KVM_REQ_NMI 14
#define KVM_REQ_IMMEDIATE_EXIT 15
#define KVM_REQ_PMU 16
#define KVM_REQ_PMI 17
#define KVM_REQ_WATCHDOG 18
#define KVM_REQ_MASTERCLOCK_UPDATE 19
#define KVM_REQ_MCLOCK_INPROGRESS 20
#define KVM_REQ_EPR_EXIT 21
#define KVM_REQ_EOIBITMAP 22
#define KVM_REQ_PMU 15
#define KVM_REQ_PMI 16
#define KVM_REQ_WATCHDOG 17
#define KVM_REQ_MASTERCLOCK_UPDATE 18
#define KVM_REQ_MCLOCK_INPROGRESS 19
#define KVM_REQ_EPR_EXIT 20
#define KVM_REQ_SCAN_IOAPIC 21
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@ -133,6 +132,9 @@ struct kvm;
struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
extern raw_spinlock_t kvm_lock;
extern struct list_head vm_list;
struct kvm_io_range {
gpa_t addr;
int len;
@ -149,6 +151,7 @@ struct kvm_io_bus {
enum kvm_bus {
KVM_MMIO_BUS,
KVM_PIO_BUS,
KVM_VIRTIO_CCW_NOTIFY_BUS,
KVM_NR_BUSES
};
@ -252,6 +255,7 @@ struct kvm_vcpu {
bool dy_eligible;
} spin_loop;
#endif
bool preempted;
struct kvm_vcpu_arch arch;
};
@ -285,7 +289,8 @@ struct kvm_kernel_irq_routing_entry {
u32 gsi;
u32 type;
int (*set)(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level);
struct kvm *kvm, int irq_source_id, int level,
bool line_status);
union {
struct {
unsigned irqchip;
@ -296,10 +301,10 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
};
#ifdef __KVM_HAVE_IOAPIC
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
struct kvm_irq_routing_table {
int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
struct kvm_kernel_irq_routing_entry *rt_entries;
u32 nr_rt_entries;
/*
@ -385,6 +390,7 @@ struct kvm {
long mmu_notifier_count;
#endif
long tlbs_dirty;
struct list_head devices;
};
#define kvm_err(fmt, ...) \
@ -424,6 +430,19 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
int __must_check vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
int kvm_irqfd_init(void);
void kvm_irqfd_exit(void);
#else
static inline int kvm_irqfd_init(void)
{
return 0;
}
static inline void kvm_irqfd_exit(void)
{
}
#endif
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module);
void kvm_exit(void);
@ -452,24 +471,39 @@ id_to_memslot(struct kvm_memslots *slots, int id)
return slot;
}
/*
* KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
* - create a new memory slot
* - delete an existing memory slot
* - modify an existing memory slot
* -- move it in the guest physical memory space
* -- just change its flags
*
* Since flags can be changed by some of these operations, the following
* differentiation is the best we can do for __kvm_set_memory_region():
*/
enum kvm_mr_change {
KVM_MR_CREATE,
KVM_MR_DELETE,
KVM_MR_MOVE,
KVM_MR_FLAGS_ONLY,
};
int kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
bool user_alloc);
struct kvm_userspace_memory_region *mem);
int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
bool user_alloc);
struct kvm_userspace_memory_region *mem);
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
bool user_alloc);
enum kvm_mr_change change);
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
bool user_alloc);
const struct kvm_memory_slot *old,
enum kvm_mr_change change);
bool kvm_largepages_enabled(void);
void kvm_disable_largepages(void);
/* flush all memory translations */
@ -539,7 +573,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_update_eoibitmap_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@ -555,10 +589,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log);
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct
kvm_userspace_memory_region *mem,
bool user_alloc);
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
struct kvm_userspace_memory_region *mem);
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
bool line_status);
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@ -632,7 +665,6 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);
void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
@ -684,15 +716,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask);
#ifdef __KVM_HAVE_IOAPIC
void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
union kvm_ioapic_redirect_entry *entry,
unsigned long *deliver_bitmask);
#endif
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status);
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
int irq_source_id, int level);
int irq_source_id, int level, bool line_status);
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
void kvm_register_irq_ack_notifier(struct kvm *kvm,
@ -705,7 +733,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
/* For vcpu->arch.iommu_flags */
#define KVM_IOMMU_CACHE_COHERENCY 0x1
#ifdef CONFIG_IOMMU_API
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
int kvm_iommu_map_guest(struct kvm *kvm);
@ -714,7 +742,7 @@ int kvm_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev);
int kvm_deassign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev);
#else /* CONFIG_IOMMU_API */
#else
static inline int kvm_iommu_map_pages(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
@ -726,28 +754,11 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
{
}
static inline int kvm_iommu_map_guest(struct kvm *kvm)
{
return -ENODEV;
}
static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
{
return 0;
}
static inline int kvm_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
return 0;
}
static inline int kvm_deassign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
return 0;
}
#endif /* CONFIG_IOMMU_API */
#endif
static inline void __guest_enter(void)
{
@ -921,7 +932,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
}
#endif
#ifdef KVM_CAP_IRQ_ROUTING
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
#define KVM_MAX_IRQ_ROUTES 1024
@ -930,6 +941,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
unsigned flags);
int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
void kvm_free_irq_routing(struct kvm *kvm);
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
@ -998,11 +1012,13 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
#endif
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
unsigned long arg);
void kvm_free_all_assigned_devices(struct kvm *kvm);
#else
static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
@ -1011,6 +1027,8 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
return -ENOTTY;
}
static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
#endif
static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
@ -1028,6 +1046,46 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
}
}
extern bool kvm_rebooting;
struct kvm_device_ops;
struct kvm_device {
struct kvm_device_ops *ops;
struct kvm *kvm;
void *private;
struct list_head vm_node;
};
/* create, destroy, and name are mandatory */
struct kvm_device_ops {
const char *name;
int (*create)(struct kvm_device *dev, u32 type);
/*
* Destroy is responsible for freeing dev.
*
* Destroy may be called before or after destructors are called
* on emulated I/O regions, depending on whether a reference is
* held by a vcpu or other kvm component that gets destroyed
* after the emulated I/O.
*/
void (*destroy)(struct kvm_device *dev);
int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
unsigned long arg);
};
void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)

Some files were not shown because too many files have changed in this diff Show More