4.11 is going to be a relatively large release for KVM, with a little over
200 commits and noteworthy changes for most architectures. * ARM: - GICv3 save/restore - cache flushing fixes - working MSI injection for GICv3 ITS - physical timer emulation * MIPS: - various improvements under the hood - support for SMP guests - a large rewrite of MMU emulation. KVM MIPS can now use MMU notifiers to support copy-on-write, KSM, idle page tracking, swapping, ballooning and everything else. KVM_CAP_READONLY_MEM is also supported, so that writes to some memory regions can be treated as MMIO. The new MMU also paves the way for hardware virtualization support. * PPC: - support for POWER9 using the radix-tree MMU for host and guest - resizable hashed page table - bugfixes. * s390: expose more features to the guest - more SIMD extensions - instruction execution protection - ESOP2 * x86: - improved hashing in the MMU - faster PageLRU tracking for Intel CPUs without EPT A/D bits - some refactoring of nested VMX entry/exit code, preparing for live migration support of nested hypervisors - expose yet another AVX512 CPUID bit - host-to-guest PTP support - refactoring of interrupt injection, with some optimizations thrown in and some duct tape removed. - remove lazy FPU handling - optimizations of user-mode exits - optimizations of vcpu_is_preempted() for KVM guests * generic: - alternative signaling mechanism that doesn't pound on tsk->sighand->siglock -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJYral1AAoJEL/70l94x66DbNgH/Rx8YXuidFq2fe3RWOvld3RK 85OM/D5g38cTLpBE0/sJpcvX34iYN8U/l5foCZwpxB+83GHEk2Cr57JyfTogdaAJ x8dBhHKQCA/HxSQUQLN6nFqRV+yT8WUR92Fhqx82+80BSen5Yzcfee/TDoW6T1IW g8CYgX9FrRaGOX066ImAuUfdAdUVjyssfs9VttDTX+HiusPeuBPx/wsRe1ZEEPlH vnltIJQb1ETV2GOZLUojKjzH6aZkjIl29XxjkYii9JTUornClG0DfW+5QT3uLrB5 gJ+G+Zmpsq8ZBx9jNDtAi7sFsoPY1Mzf+JPNCGXBra2sP2GrBAuXcxmgznRYltQ= =8IIp -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "4.11 is going to be a relatively large release for KVM, with a little over 200 commits and noteworthy changes for most architectures. ARM: - GICv3 save/restore - cache flushing fixes - working MSI injection for GICv3 ITS - physical timer emulation MIPS: - various improvements under the hood - support for SMP guests - a large rewrite of MMU emulation. KVM MIPS can now use MMU notifiers to support copy-on-write, KSM, idle page tracking, swapping, ballooning and everything else. KVM_CAP_READONLY_MEM is also supported, so that writes to some memory regions can be treated as MMIO. The new MMU also paves the way for hardware virtualization support. PPC: - support for POWER9 using the radix-tree MMU for host and guest - resizable hashed page table - bugfixes. s390: - expose more features to the guest - more SIMD extensions - instruction execution protection - ESOP2 x86: - improved hashing in the MMU - faster PageLRU tracking for Intel CPUs without EPT A/D bits - some refactoring of nested VMX entry/exit code, preparing for live migration support of nested hypervisors - expose yet another AVX512 CPUID bit - host-to-guest PTP support - refactoring of interrupt injection, with some optimizations thrown in and some duct tape removed. - remove lazy FPU handling - optimizations of user-mode exits - optimizations of vcpu_is_preempted() for KVM guests generic: - alternative signaling mechanism that doesn't pound on tsk->sighand->siglock" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (195 commits) x86/kvm: Provide optimized version of vcpu_is_preempted() for x86-64 x86/paravirt: Change vcp_is_preempted() arg type to long KVM: VMX: use correct vmcs_read/write for guest segment selector/base x86/kvm/vmx: Defer TR reload after VM exit x86/asm/64: Drop __cacheline_aligned from struct x86_hw_tss x86/kvm/vmx: Simplify segment_base() x86/kvm/vmx: Get rid of segment_base() on 64-bit kernels x86/kvm/vmx: Don't fetch the TSS base from the GDT x86/asm: Define the kernel TSS limit in a macro kvm: fix page struct leak in handle_vmon KVM: PPC: Book3S HV: Disable HPT resizing on POWER9 for now KVM: Return an error code only as a constant in kvm_get_dirty_log() KVM: Return an error code only as a constant in kvm_get_dirty_log_protect() KVM: Return directly after a failed copy_from_user() in kvm_vm_compat_ioctl() KVM: x86: remove code for lazy FPU handling KVM: race-free exit from KVM_RUN without POSIX signals KVM: PPC: Book3S HV: Turn "KVM guest htab" message into a debug message KVM: PPC: Book3S PR: Ratelimit copy data failure error messages KVM: Support vCPU-based gfn->hva cache KVM: use separate generations for each address space ...
This commit is contained in:
commit
fd7e9a8834
|
@ -2061,6 +2061,8 @@ registers, find a list below:
|
|||
MIPS | KVM_REG_MIPS_LO | 64
|
||||
MIPS | KVM_REG_MIPS_PC | 64
|
||||
MIPS | KVM_REG_MIPS_CP0_INDEX | 32
|
||||
MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64
|
||||
MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64
|
||||
MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64
|
||||
MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64
|
||||
MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32
|
||||
|
@ -2071,9 +2073,11 @@ registers, find a list below:
|
|||
MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64
|
||||
MIPS | KVM_REG_MIPS_CP0_COMPARE | 32
|
||||
MIPS | KVM_REG_MIPS_CP0_STATUS | 32
|
||||
MIPS | KVM_REG_MIPS_CP0_INTCTL | 32
|
||||
MIPS | KVM_REG_MIPS_CP0_CAUSE | 32
|
||||
MIPS | KVM_REG_MIPS_CP0_EPC | 64
|
||||
MIPS | KVM_REG_MIPS_CP0_PRID | 32
|
||||
MIPS | KVM_REG_MIPS_CP0_EBASE | 64
|
||||
MIPS | KVM_REG_MIPS_CP0_CONFIG | 32
|
||||
MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32
|
||||
MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32
|
||||
|
@ -2148,6 +2152,12 @@ patterns depending on whether they're 32-bit or 64-bit registers:
|
|||
0x7020 0000 0001 00 <reg:5> <sel:3> (32-bit)
|
||||
0x7030 0000 0001 00 <reg:5> <sel:3> (64-bit)
|
||||
|
||||
Note: KVM_REG_MIPS_CP0_ENTRYLO0 and KVM_REG_MIPS_CP0_ENTRYLO1 are the MIPS64
|
||||
versions of the EntryLo registers regardless of the word size of the host
|
||||
hardware, host kernel, guest, and whether XPA is present in the guest, i.e.
|
||||
with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and
|
||||
the PFNX field starting at bit 30.
|
||||
|
||||
MIPS KVM control registers (see above) have the following id bit patterns:
|
||||
0x7030 0000 0002 <reg:16>
|
||||
|
||||
|
@ -2443,18 +2453,20 @@ are, it will do nothing and return an EBUSY error.
|
|||
The parameter is a pointer to a 32-bit unsigned integer variable
|
||||
containing the order (log base 2) of the desired size of the hash
|
||||
table, which must be between 18 and 46. On successful return from the
|
||||
ioctl, it will have been updated with the order of the hash table that
|
||||
was allocated.
|
||||
ioctl, the value will not be changed by the kernel.
|
||||
|
||||
If no hash table has been allocated when any vcpu is asked to run
|
||||
(with the KVM_RUN ioctl), the host kernel will allocate a
|
||||
default-sized hash table (16 MB).
|
||||
|
||||
If this ioctl is called when a hash table has already been allocated,
|
||||
the kernel will clear out the existing hash table (zero all HPTEs) and
|
||||
return the hash table order in the parameter. (If the guest is using
|
||||
the virtualized real-mode area (VRMA) facility, the kernel will
|
||||
re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.)
|
||||
with a different order from the existing hash table, the existing hash
|
||||
table will be freed and a new one allocated. If this is ioctl is
|
||||
called when a hash table has already been allocated of the same order
|
||||
as specified, the kernel will clear out the existing hash table (zero
|
||||
all HPTEs). In either case, if the guest is using the virtualized
|
||||
real-mode area (VRMA) facility, the kernel will re-create the VMRA
|
||||
HPTEs on the next KVM_RUN of any vcpu.
|
||||
|
||||
4.77 KVM_S390_INTERRUPT
|
||||
|
||||
|
@ -3177,7 +3189,7 @@ of IOMMU pages.
|
|||
|
||||
The rest of functionality is identical to KVM_CREATE_SPAPR_TCE.
|
||||
|
||||
4.98 KVM_REINJECT_CONTROL
|
||||
4.99 KVM_REINJECT_CONTROL
|
||||
|
||||
Capability: KVM_CAP_REINJECT_CONTROL
|
||||
Architectures: x86
|
||||
|
@ -3201,7 +3213,7 @@ struct kvm_reinject_control {
|
|||
pit_reinject = 0 (!reinject mode) is recommended, unless running an old
|
||||
operating system that uses the PIT for timing (e.g. Linux 2.4.x).
|
||||
|
||||
4.99 KVM_PPC_CONFIGURE_V3_MMU
|
||||
4.100 KVM_PPC_CONFIGURE_V3_MMU
|
||||
|
||||
Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
|
||||
Architectures: ppc
|
||||
|
@ -3232,7 +3244,7 @@ process table, which is in the guest's space. This field is formatted
|
|||
as the second doubleword of the partition table entry, as defined in
|
||||
the Power ISA V3.00, Book III section 5.7.6.1.
|
||||
|
||||
4.100 KVM_PPC_GET_RMMU_INFO
|
||||
4.101 KVM_PPC_GET_RMMU_INFO
|
||||
|
||||
Capability: KVM_CAP_PPC_RADIX_MMU
|
||||
Architectures: ppc
|
||||
|
@ -3266,6 +3278,101 @@ The ap_encodings gives the supported page sizes and their AP field
|
|||
encodings, encoded with the AP value in the top 3 bits and the log
|
||||
base 2 of the page size in the bottom 6 bits.
|
||||
|
||||
4.102 KVM_PPC_RESIZE_HPT_PREPARE
|
||||
|
||||
Capability: KVM_CAP_SPAPR_RESIZE_HPT
|
||||
Architectures: powerpc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_ppc_resize_hpt (in)
|
||||
Returns: 0 on successful completion,
|
||||
>0 if a new HPT is being prepared, the value is an estimated
|
||||
number of milliseconds until preparation is complete
|
||||
-EFAULT if struct kvm_reinject_control cannot be read,
|
||||
-EINVAL if the supplied shift or flags are invalid
|
||||
-ENOMEM if unable to allocate the new HPT
|
||||
-ENOSPC if there was a hash collision when moving existing
|
||||
HPT entries to the new HPT
|
||||
-EIO on other error conditions
|
||||
|
||||
Used to implement the PAPR extension for runtime resizing of a guest's
|
||||
Hashed Page Table (HPT). Specifically this starts, stops or monitors
|
||||
the preparation of a new potential HPT for the guest, essentially
|
||||
implementing the H_RESIZE_HPT_PREPARE hypercall.
|
||||
|
||||
If called with shift > 0 when there is no pending HPT for the guest,
|
||||
this begins preparation of a new pending HPT of size 2^(shift) bytes.
|
||||
It then returns a positive integer with the estimated number of
|
||||
milliseconds until preparation is complete.
|
||||
|
||||
If called when there is a pending HPT whose size does not match that
|
||||
requested in the parameters, discards the existing pending HPT and
|
||||
creates a new one as above.
|
||||
|
||||
If called when there is a pending HPT of the size requested, will:
|
||||
* If preparation of the pending HPT is already complete, return 0
|
||||
* If preparation of the pending HPT has failed, return an error
|
||||
code, then discard the pending HPT.
|
||||
* If preparation of the pending HPT is still in progress, return an
|
||||
estimated number of milliseconds until preparation is complete.
|
||||
|
||||
If called with shift == 0, discards any currently pending HPT and
|
||||
returns 0 (i.e. cancels any in-progress preparation).
|
||||
|
||||
flags is reserved for future expansion, currently setting any bits in
|
||||
flags will result in an -EINVAL.
|
||||
|
||||
Normally this will be called repeatedly with the same parameters until
|
||||
it returns <= 0. The first call will initiate preparation, subsequent
|
||||
ones will monitor preparation until it completes or fails.
|
||||
|
||||
struct kvm_ppc_resize_hpt {
|
||||
__u64 flags;
|
||||
__u32 shift;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
4.103 KVM_PPC_RESIZE_HPT_COMMIT
|
||||
|
||||
Capability: KVM_CAP_SPAPR_RESIZE_HPT
|
||||
Architectures: powerpc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_ppc_resize_hpt (in)
|
||||
Returns: 0 on successful completion,
|
||||
-EFAULT if struct kvm_reinject_control cannot be read,
|
||||
-EINVAL if the supplied shift or flags are invalid
|
||||
-ENXIO is there is no pending HPT, or the pending HPT doesn't
|
||||
have the requested size
|
||||
-EBUSY if the pending HPT is not fully prepared
|
||||
-ENOSPC if there was a hash collision when moving existing
|
||||
HPT entries to the new HPT
|
||||
-EIO on other error conditions
|
||||
|
||||
Used to implement the PAPR extension for runtime resizing of a guest's
|
||||
Hashed Page Table (HPT). Specifically this requests that the guest be
|
||||
transferred to working with the new HPT, essentially implementing the
|
||||
H_RESIZE_HPT_COMMIT hypercall.
|
||||
|
||||
This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
|
||||
returned 0 with the same parameters. In other cases
|
||||
KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
|
||||
-EBUSY, though others may be possible if the preparation was started,
|
||||
but failed).
|
||||
|
||||
This will have undefined effects on the guest if it has not already
|
||||
placed itself in a quiescent state where no vcpu will make MMU enabled
|
||||
memory accesses.
|
||||
|
||||
On succsful completion, the pending HPT will become the guest's active
|
||||
HPT and the previous HPT will be discarded.
|
||||
|
||||
On failure, the guest will still be operating on its previous HPT.
|
||||
|
||||
struct kvm_ppc_resize_hpt {
|
||||
__u64 flags;
|
||||
__u32 shift;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
|
@ -3282,7 +3389,18 @@ struct kvm_run {
|
|||
Request that KVM_RUN return when it becomes possible to inject external
|
||||
interrupts into the guest. Useful in conjunction with KVM_INTERRUPT.
|
||||
|
||||
__u8 padding1[7];
|
||||
__u8 immediate_exit;
|
||||
|
||||
This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN
|
||||
exits immediately, returning -EINTR. In the common scenario where a
|
||||
signal is used to "kick" a VCPU out of KVM_RUN, this field can be used
|
||||
to avoid usage of KVM_SET_SIGNAL_MASK, which has worse scalability.
|
||||
Rather than blocking the signal outside KVM_RUN, userspace can set up
|
||||
a signal handler that sets run->immediate_exit to a non-zero value.
|
||||
|
||||
This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available.
|
||||
|
||||
__u8 padding1[6];
|
||||
|
||||
/* out */
|
||||
__u32 exit_reason;
|
||||
|
|
|
@ -118,7 +118,7 @@ Groups:
|
|||
-EBUSY: One or more VCPUs are running
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_CPU_SYSREGS
|
||||
KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
|
||||
Attributes:
|
||||
The attr field of kvm_device_attr encodes two values:
|
||||
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
|
||||
|
@ -139,13 +139,15 @@ Groups:
|
|||
All system regs accessed through this API are (rw, 64-bit) and
|
||||
kvm_device_attr.addr points to a __u64 value.
|
||||
|
||||
KVM_DEV_ARM_VGIC_CPU_SYSREGS accesses the CPU interface registers for the
|
||||
KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS accesses the CPU interface registers for the
|
||||
CPU specified by the mpidr field.
|
||||
|
||||
CPU interface registers access is not implemented for AArch32 mode.
|
||||
Error -ENXIO is returned when accessed in AArch32 mode.
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: VCPU is running
|
||||
-EINVAL: Invalid mpidr supplied
|
||||
-EINVAL: Invalid mpidr or register value supplied
|
||||
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
||||
|
@ -204,3 +206,6 @@ Groups:
|
|||
architecture defined MPIDR, and the field is encoded as follows:
|
||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||
Errors:
|
||||
-EINVAL: vINTID is not multiple of 32 or
|
||||
info field is not VGIC_LEVEL_INFO_LINE_LEVEL
|
||||
|
|
|
@ -81,3 +81,38 @@ the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
|
|||
same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
|
||||
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
|
||||
is used in the hypercall for future use.
|
||||
|
||||
|
||||
6. KVM_HC_CLOCK_PAIRING
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Hypercall used to synchronize host and guest clocks.
|
||||
Usage:
|
||||
|
||||
a0: guest physical address where host copies
|
||||
"struct kvm_clock_offset" structure.
|
||||
|
||||
a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
|
||||
is supported (corresponding to the host's CLOCK_REALTIME clock).
|
||||
|
||||
struct kvm_clock_pairing {
|
||||
__s64 sec;
|
||||
__s64 nsec;
|
||||
__u64 tsc;
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
};
|
||||
|
||||
Where:
|
||||
* sec: seconds from clock_type clock.
|
||||
* nsec: nanoseconds from clock_type clock.
|
||||
* tsc: guest TSC value used to calculate sec/nsec pair
|
||||
* flags: flags, unused (0) at the moment.
|
||||
|
||||
The hypercall lets a guest compute a precise timestamp across
|
||||
host and guest. The guest can use the returned TSC value to
|
||||
compute the CLOCK_REALTIME for its clock, at the same instant.
|
||||
|
||||
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
|
||||
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
|
||||
|
|
|
@ -26,9 +26,16 @@ sections.
|
|||
Fast page fault:
|
||||
|
||||
Fast page fault is the fast path which fixes the guest page fault out of
|
||||
the mmu-lock on x86. Currently, the page fault can be fast only if the
|
||||
shadow page table is present and it is caused by write-protect, that means
|
||||
we just need change the W bit of the spte.
|
||||
the mmu-lock on x86. Currently, the page fault can be fast in one of the
|
||||
following two cases:
|
||||
|
||||
1. Access Tracking: The SPTE is not present, but it is marked for access
|
||||
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
|
||||
restore the saved R/X bits. This is described in more detail later below.
|
||||
|
||||
2. Write-Protection: The SPTE is present and the fault is
|
||||
caused by write-protect. That means we just need to change the W bit of the
|
||||
spte.
|
||||
|
||||
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
|
||||
SPTE_MMU_WRITEABLE bit on the spte:
|
||||
|
@ -38,7 +45,8 @@ SPTE_MMU_WRITEABLE bit on the spte:
|
|||
page write-protection.
|
||||
|
||||
On fast page fault path, we will use cmpxchg to atomically set the spte W
|
||||
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, this
|
||||
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
|
||||
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
|
||||
is safe because whenever changing these bits can be detected by cmpxchg.
|
||||
|
||||
But we need carefully check these cases:
|
||||
|
@ -142,6 +150,21 @@ Since the spte is "volatile" if it can be updated out of mmu-lock, we always
|
|||
atomically update the spte, the race caused by fast page fault can be avoided,
|
||||
See the comments in spte_has_volatile_bits() and mmu_spte_update().
|
||||
|
||||
Lockless Access Tracking:
|
||||
|
||||
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
|
||||
bits. In this case, when the KVM MMU notifier is called to track accesses to a
|
||||
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
|
||||
by clearing the RWX bits in the PTE and storing the original R & X bits in
|
||||
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
|
||||
PTE (using the ignored bit 62). When the VM tries to access the page later on,
|
||||
a fault is generated and the fast page fault mechanism described above is used
|
||||
to atomically restore the PTE to a Present state. The W bit is not saved when
|
||||
the PTE is marked for access tracking and during restoration to the Present
|
||||
state, the W bit is set depending on whether or not it was a write access. If
|
||||
it wasn't, then the W bit will remain clear until a write access happens, at
|
||||
which time it will be set using the Dirty tracking mechanism described above.
|
||||
|
||||
3. Reference
|
||||
------------
|
||||
|
||||
|
|
|
@ -60,9 +60,6 @@ struct kvm_arch {
|
|||
/* The last vcpu id that ran on each physical CPU */
|
||||
int __percpu *last_vcpu_ran;
|
||||
|
||||
/* Timer */
|
||||
struct arch_timer_kvm timer;
|
||||
|
||||
/*
|
||||
* Anything that is not used directly from assembly code goes
|
||||
* here.
|
||||
|
|
|
@ -129,8 +129,7 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
|
|||
|
||||
static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
|
||||
kvm_pfn_t pfn,
|
||||
unsigned long size,
|
||||
bool ipa_uncached)
|
||||
unsigned long size)
|
||||
{
|
||||
/*
|
||||
* If we are going to insert an instruction page and the icache is
|
||||
|
@ -150,18 +149,12 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
|
|||
* and iterate over the range.
|
||||
*/
|
||||
|
||||
bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
|
||||
|
||||
VM_BUG_ON(size & ~PAGE_MASK);
|
||||
|
||||
if (!need_flush && !icache_is_pipt())
|
||||
goto vipt_cache;
|
||||
|
||||
while (size) {
|
||||
void *va = kmap_atomic_pfn(pfn);
|
||||
|
||||
if (need_flush)
|
||||
kvm_flush_dcache_to_poc(va, PAGE_SIZE);
|
||||
kvm_flush_dcache_to_poc(va, PAGE_SIZE);
|
||||
|
||||
if (icache_is_pipt())
|
||||
__cpuc_coherent_user_range((unsigned long)va,
|
||||
|
@ -173,7 +166,6 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
|
|||
kunmap_atomic(va);
|
||||
}
|
||||
|
||||
vipt_cache:
|
||||
if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
|
||||
/* any kind of VIPT cache */
|
||||
__flush_icache_all();
|
||||
|
|
|
@ -181,10 +181,23 @@ struct kvm_arch_memory_slot {
|
|||
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
|
||||
(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
|
||||
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
|
||||
#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
|
||||
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
|
||||
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
|
||||
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
|
||||
|
||||
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
|
||||
|
||||
/* KVM_IRQ_LINE irq field index values */
|
||||
|
|
|
@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt)
|
|||
plus_virt_def := -DREQUIRES_VIRT=1
|
||||
endif
|
||||
|
||||
ccflags-y += -Iarch/arm/kvm
|
||||
ccflags-y += -Iarch/arm/kvm -Ivirt/kvm/arm/vgic
|
||||
CFLAGS_arm.o := -I. $(plus_virt_def)
|
||||
CFLAGS_mmu.o := -I.
|
||||
|
||||
|
@ -20,7 +20,7 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vf
|
|||
obj-$(CONFIG_KVM_ARM_HOST) += hyp/
|
||||
obj-y += kvm-arm.o init.o interrupts.o
|
||||
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
|
||||
obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
|
||||
obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o vgic-v3-coproc.o
|
||||
obj-y += $(KVM)/arm/aarch32.o
|
||||
|
||||
obj-y += $(KVM)/arm/vgic/vgic.o
|
||||
|
@ -33,5 +33,6 @@ obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
|
|||
obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o
|
||||
obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
|
||||
obj-y += $(KVM)/arm/vgic/vgic-its.o
|
||||
obj-y += $(KVM)/arm/vgic/vgic-debug.o
|
||||
obj-y += $(KVM)/irqchip.o
|
||||
obj-y += $(KVM)/arm/arch_timer.o
|
||||
|
|
|
@ -135,7 +135,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
goto out_free_stage2_pgd;
|
||||
|
||||
kvm_vgic_early_init(kvm);
|
||||
kvm_timer_init(kvm);
|
||||
|
||||
/* Mark the initial VMID generation invalid */
|
||||
kvm->arch.vmid_gen = 0;
|
||||
|
@ -207,6 +206,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_ARM_PSCI_0_2:
|
||||
case KVM_CAP_READONLY_MEM:
|
||||
case KVM_CAP_MP_STATE:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_COALESCED_MMIO:
|
||||
|
@ -301,7 +301,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|||
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_timer_should_fire(vcpu);
|
||||
return kvm_timer_should_fire(vcpu_vtimer(vcpu)) ||
|
||||
kvm_timer_should_fire(vcpu_ptimer(vcpu));
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
|
@ -604,6 +605,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
return ret;
|
||||
}
|
||||
|
||||
if (run->immediate_exit)
|
||||
return -EINTR;
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
|
||||
|
||||
|
|
|
@ -1232,9 +1232,9 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
|||
}
|
||||
|
||||
static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
|
||||
unsigned long size, bool uncached)
|
||||
unsigned long size)
|
||||
{
|
||||
__coherent_cache_guest_page(vcpu, pfn, size, uncached);
|
||||
__coherent_cache_guest_page(vcpu, pfn, size);
|
||||
}
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
|
@ -1250,7 +1250,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
struct vm_area_struct *vma;
|
||||
kvm_pfn_t pfn;
|
||||
pgprot_t mem_type = PAGE_S2;
|
||||
bool fault_ipa_uncached;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
unsigned long flags = 0;
|
||||
|
||||
|
@ -1337,8 +1336,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
if (!hugetlb && !force_pte)
|
||||
hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
|
||||
|
||||
fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
|
||||
|
||||
if (hugetlb) {
|
||||
pmd_t new_pmd = pfn_pmd(pfn, mem_type);
|
||||
new_pmd = pmd_mkhuge(new_pmd);
|
||||
|
@ -1346,7 +1343,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
new_pmd = kvm_s2pmd_mkwrite(new_pmd);
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
}
|
||||
coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
|
||||
coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
|
||||
ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
|
||||
} else {
|
||||
pte_t new_pte = pfn_pte(pfn, mem_type);
|
||||
|
@ -1356,7 +1353,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
kvm_set_pfn_dirty(pfn);
|
||||
mark_page_dirty(kvm, gfn);
|
||||
}
|
||||
coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
|
||||
coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
|
||||
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
|
||||
}
|
||||
|
||||
|
@ -1879,15 +1876,6 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
|
|||
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
/*
|
||||
* Readonly memslots are not incoherent with the caches by definition,
|
||||
* but in practice, they are used mostly to emulate ROMs or NOR flashes
|
||||
* that the guest may consider devices and hence map as uncached.
|
||||
* To prevent incoherency issues in these cases, tag all readonly
|
||||
* regions as incoherent.
|
||||
*/
|
||||
if (slot->flags & KVM_MEM_READONLY)
|
||||
slot->flags |= KVM_MEMSLOT_INCOHERENT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,11 @@ static struct kvm_regs cortexa_regs_reset = {
|
|||
.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
|
||||
};
|
||||
|
||||
static const struct kvm_irq_level cortexa_ptimer_irq = {
|
||||
{ .irq = 30 },
|
||||
.level = 1,
|
||||
};
|
||||
|
||||
static const struct kvm_irq_level cortexa_vtimer_irq = {
|
||||
{ .irq = 27 },
|
||||
.level = 1,
|
||||
|
@ -58,6 +63,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct kvm_regs *reset_regs;
|
||||
const struct kvm_irq_level *cpu_vtimer_irq;
|
||||
const struct kvm_irq_level *cpu_ptimer_irq;
|
||||
|
||||
switch (vcpu->arch.target) {
|
||||
case KVM_ARM_TARGET_CORTEX_A7:
|
||||
|
@ -65,6 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
|||
reset_regs = &cortexa_regs_reset;
|
||||
vcpu->arch.midr = read_cpuid_id();
|
||||
cpu_vtimer_irq = &cortexa_vtimer_irq;
|
||||
cpu_ptimer_irq = &cortexa_ptimer_irq;
|
||||
break;
|
||||
default:
|
||||
return -ENODEV;
|
||||
|
@ -77,5 +84,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
|||
kvm_reset_coprocs(vcpu);
|
||||
|
||||
/* Reset arch_timer context */
|
||||
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
|
||||
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* VGIC system registers handling functions for AArch32 mode
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include "vgic.h"
|
||||
|
||||
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
|
||||
u64 *reg)
|
||||
{
|
||||
/*
|
||||
* TODO: Implement for AArch32
|
||||
*/
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
|
||||
u64 *reg)
|
||||
{
|
||||
/*
|
||||
* TODO: Implement for AArch32
|
||||
*/
|
||||
return -ENXIO;
|
||||
}
|
|
@ -70,9 +70,6 @@ struct kvm_arch {
|
|||
|
||||
/* Interrupt controller */
|
||||
struct vgic_dist vgic;
|
||||
|
||||
/* Timer */
|
||||
struct arch_timer_kvm timer;
|
||||
};
|
||||
|
||||
#define KVM_NR_MEM_OBJS 40
|
||||
|
|
|
@ -236,13 +236,11 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
|
|||
|
||||
static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
|
||||
kvm_pfn_t pfn,
|
||||
unsigned long size,
|
||||
bool ipa_uncached)
|
||||
unsigned long size)
|
||||
{
|
||||
void *va = page_address(pfn_to_page(pfn));
|
||||
|
||||
if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
|
||||
kvm_flush_dcache_to_poc(va, size);
|
||||
kvm_flush_dcache_to_poc(va, size);
|
||||
|
||||
if (!icache_is_aliasing()) { /* PIPT */
|
||||
flush_icache_range((unsigned long)va,
|
||||
|
|
|
@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
|
|||
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
|
||||
(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
|
||||
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
|
||||
#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
|
||||
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
|
||||
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
|
||||
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
|
||||
|
||||
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
|
||||
|
||||
/* Device Control API on vcpu fd */
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Makefile for Kernel-based Virtual Machine module
|
||||
#
|
||||
|
||||
ccflags-y += -Iarch/arm64/kvm
|
||||
ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic
|
||||
CFLAGS_arm.o := -I.
|
||||
CFLAGS_mmu.o := -I.
|
||||
|
||||
|
@ -19,6 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
|
|||
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
|
||||
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
|
||||
|
@ -31,6 +32,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
|
|||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
|
||||
kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
|
||||
|
|
|
@ -46,6 +46,11 @@ static const struct kvm_regs default_regs_reset32 = {
|
|||
COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
|
||||
};
|
||||
|
||||
static const struct kvm_irq_level default_ptimer_irq = {
|
||||
.irq = 30,
|
||||
.level = 1,
|
||||
};
|
||||
|
||||
static const struct kvm_irq_level default_vtimer_irq = {
|
||||
.irq = 27,
|
||||
.level = 1,
|
||||
|
@ -104,6 +109,7 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const struct kvm_irq_level *cpu_vtimer_irq;
|
||||
const struct kvm_irq_level *cpu_ptimer_irq;
|
||||
const struct kvm_regs *cpu_reset;
|
||||
|
||||
switch (vcpu->arch.target) {
|
||||
|
@ -117,6 +123,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
cpu_vtimer_irq = &default_vtimer_irq;
|
||||
cpu_ptimer_irq = &default_ptimer_irq;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -130,5 +137,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
|||
kvm_pmu_vcpu_reset(vcpu);
|
||||
|
||||
/* Reset timer */
|
||||
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
|
||||
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
|
||||
}
|
||||
|
|
|
@ -820,6 +820,61 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
|
||||
access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
|
||||
|
||||
static bool access_cntp_tval(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
u64 now = kvm_phys_timer_read();
|
||||
|
||||
if (p->is_write)
|
||||
ptimer->cnt_cval = p->regval + now;
|
||||
else
|
||||
p->regval = ptimer->cnt_cval - now;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
|
||||
if (p->is_write) {
|
||||
/* ISTATUS bit is read-only */
|
||||
ptimer->cnt_ctl = p->regval & ~ARCH_TIMER_CTRL_IT_STAT;
|
||||
} else {
|
||||
u64 now = kvm_phys_timer_read();
|
||||
|
||||
p->regval = ptimer->cnt_ctl;
|
||||
/*
|
||||
* Set ISTATUS bit if it's expired.
|
||||
* Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
|
||||
* UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
|
||||
* regardless of ENABLE bit for our implementation convenience.
|
||||
*/
|
||||
if (ptimer->cnt_cval <= now)
|
||||
p->regval |= ARCH_TIMER_CTRL_IT_STAT;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_cntp_cval(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
|
||||
if (p->is_write)
|
||||
ptimer->cnt_cval = p->regval;
|
||||
else
|
||||
p->regval = ptimer->cnt_cval;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Architected system registers.
|
||||
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
|
||||
|
@ -1029,6 +1084,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
|
||||
NULL, reset_unknown, TPIDRRO_EL0 },
|
||||
|
||||
/* CNTP_TVAL_EL0 */
|
||||
{ Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b000),
|
||||
access_cntp_tval },
|
||||
/* CNTP_CTL_EL0 */
|
||||
{ Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b001),
|
||||
access_cntp_ctl },
|
||||
/* CNTP_CVAL_EL0 */
|
||||
{ Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b010),
|
||||
access_cntp_cval },
|
||||
|
||||
/* PMEVCNTRn_EL0 */
|
||||
PMU_PMEVCNTR_EL0(0),
|
||||
PMU_PMEVCNTR_EL0(1),
|
||||
|
@ -1795,6 +1860,17 @@ static bool index_to_params(u64 id, struct sys_reg_params *params)
|
|||
}
|
||||
}
|
||||
|
||||
const struct sys_reg_desc *find_reg_by_id(u64 id,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc table[],
|
||||
unsigned int num)
|
||||
{
|
||||
if (!index_to_params(id, params))
|
||||
return NULL;
|
||||
|
||||
return find_reg(params, table, num);
|
||||
}
|
||||
|
||||
/* Decode an index value, and find the sys_reg_desc entry. */
|
||||
static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
|
||||
u64 id)
|
||||
|
@ -1807,11 +1883,8 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
|
|||
if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
|
||||
return NULL;
|
||||
|
||||
if (!index_to_params(id, ¶ms))
|
||||
return NULL;
|
||||
|
||||
table = get_target_table(vcpu->arch.target, true, &num);
|
||||
r = find_reg(¶ms, table, num);
|
||||
r = find_reg_by_id(id, ¶ms, table, num);
|
||||
if (!r)
|
||||
r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
|
||||
|
||||
|
@ -1918,10 +1991,8 @@ static int get_invariant_sys_reg(u64 id, void __user *uaddr)
|
|||
struct sys_reg_params params;
|
||||
const struct sys_reg_desc *r;
|
||||
|
||||
if (!index_to_params(id, ¶ms))
|
||||
return -ENOENT;
|
||||
|
||||
r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
|
||||
r = find_reg_by_id(id, ¶ms, invariant_sys_regs,
|
||||
ARRAY_SIZE(invariant_sys_regs));
|
||||
if (!r)
|
||||
return -ENOENT;
|
||||
|
||||
|
@ -1935,9 +2006,8 @@ static int set_invariant_sys_reg(u64 id, void __user *uaddr)
|
|||
int err;
|
||||
u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
|
||||
|
||||
if (!index_to_params(id, ¶ms))
|
||||
return -ENOENT;
|
||||
r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
|
||||
r = find_reg_by_id(id, ¶ms, invariant_sys_regs,
|
||||
ARRAY_SIZE(invariant_sys_regs));
|
||||
if (!r)
|
||||
return -ENOENT;
|
||||
|
||||
|
|
|
@ -136,6 +136,10 @@ static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
|
|||
return i1->Op2 - i2->Op2;
|
||||
}
|
||||
|
||||
const struct sys_reg_desc *find_reg_by_id(u64 id,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc table[],
|
||||
unsigned int num);
|
||||
|
||||
#define Op0(_x) .Op0 = _x
|
||||
#define Op1(_x) .Op1 = _x
|
||||
|
|
|
@ -0,0 +1,346 @@
|
|||
/*
|
||||
* VGIC system registers handling functions for AArch64 mode
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/irqchip/arm-gic-v3.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include "vgic.h"
|
||||
#include "sys_regs.h"
|
||||
|
||||
static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
u32 host_pri_bits, host_id_bits, host_seis, host_a3v, seis, a3v;
|
||||
struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_vmcr vmcr;
|
||||
u64 val;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
val = p->regval;
|
||||
|
||||
/*
|
||||
* Disallow restoring VM state if not supported by this
|
||||
* hardware.
|
||||
*/
|
||||
host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >>
|
||||
ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1;
|
||||
if (host_pri_bits > vgic_v3_cpu->num_pri_bits)
|
||||
return false;
|
||||
|
||||
vgic_v3_cpu->num_pri_bits = host_pri_bits;
|
||||
|
||||
host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >>
|
||||
ICC_CTLR_EL1_ID_BITS_SHIFT;
|
||||
if (host_id_bits > vgic_v3_cpu->num_id_bits)
|
||||
return false;
|
||||
|
||||
vgic_v3_cpu->num_id_bits = host_id_bits;
|
||||
|
||||
host_seis = ((kvm_vgic_global_state.ich_vtr_el2 &
|
||||
ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT);
|
||||
seis = (val & ICC_CTLR_EL1_SEIS_MASK) >>
|
||||
ICC_CTLR_EL1_SEIS_SHIFT;
|
||||
if (host_seis != seis)
|
||||
return false;
|
||||
|
||||
host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 &
|
||||
ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT);
|
||||
a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT;
|
||||
if (host_a3v != a3v)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
|
||||
* The vgic_set_vmcr() will convert to ICH_VMCR layout.
|
||||
*/
|
||||
vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK;
|
||||
vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
val = 0;
|
||||
val |= (vgic_v3_cpu->num_pri_bits - 1) <<
|
||||
ICC_CTLR_EL1_PRI_BITS_SHIFT;
|
||||
val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT;
|
||||
val |= ((kvm_vgic_global_state.ich_vtr_el2 &
|
||||
ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) <<
|
||||
ICC_CTLR_EL1_SEIS_SHIFT;
|
||||
val |= ((kvm_vgic_global_state.ich_vtr_el2 &
|
||||
ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) <<
|
||||
ICC_CTLR_EL1_A3V_SHIFT;
|
||||
/*
|
||||
* The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
|
||||
* Extract it directly using ICC_CTLR_EL1 reg definitions.
|
||||
*/
|
||||
val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK;
|
||||
val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK;
|
||||
|
||||
p->regval = val;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gic_pmr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
vmcr.pmr = (p->regval & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gic_bpr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
vmcr.bpr = (p->regval & ICC_BPR0_EL1_MASK) >>
|
||||
ICC_BPR0_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) &
|
||||
ICC_BPR0_EL1_MASK;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
if (!p->is_write)
|
||||
p->regval = 0;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) {
|
||||
if (p->is_write) {
|
||||
vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
|
||||
ICC_BPR1_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) &
|
||||
ICC_BPR1_EL1_MASK;
|
||||
}
|
||||
} else {
|
||||
if (!p->is_write)
|
||||
p->regval = min((vmcr.bpr + 1), 7U);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gic_grpen0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
vmcr.grpen0 = (p->regval & ICC_IGRPEN0_EL1_MASK) >>
|
||||
ICC_IGRPEN0_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) &
|
||||
ICC_IGRPEN0_EL1_MASK;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gic_grpen1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
vmcr.grpen1 = (p->regval & ICC_IGRPEN1_EL1_MASK) >>
|
||||
ICC_IGRPEN1_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) &
|
||||
ICC_IGRPEN1_EL1_MASK;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p, u8 apr, u8 idx)
|
||||
{
|
||||
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
uint32_t *ap_reg;
|
||||
|
||||
if (apr)
|
||||
ap_reg = &vgicv3->vgic_ap1r[idx];
|
||||
else
|
||||
ap_reg = &vgicv3->vgic_ap0r[idx];
|
||||
|
||||
if (p->is_write)
|
||||
*ap_reg = p->regval;
|
||||
else
|
||||
p->regval = *ap_reg;
|
||||
}
|
||||
|
||||
static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r, u8 apr)
|
||||
{
|
||||
struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
|
||||
u8 idx = r->Op2 & 3;
|
||||
|
||||
/*
|
||||
* num_pri_bits are initialized with HW supported values.
|
||||
* We can rely safely on num_pri_bits even if VM has not
|
||||
* restored ICC_CTLR_EL1 before restoring APnR registers.
|
||||
*/
|
||||
switch (vgic_v3_cpu->num_pri_bits) {
|
||||
case 7:
|
||||
vgic_v3_access_apr_reg(vcpu, p, apr, idx);
|
||||
break;
|
||||
case 6:
|
||||
if (idx > 1)
|
||||
goto err;
|
||||
vgic_v3_access_apr_reg(vcpu, p, apr, idx);
|
||||
break;
|
||||
default:
|
||||
if (idx > 0)
|
||||
goto err;
|
||||
vgic_v3_access_apr_reg(vcpu, p, apr, idx);
|
||||
}
|
||||
|
||||
return true;
|
||||
err:
|
||||
if (!p->is_write)
|
||||
p->regval = 0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool access_gic_ap0r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
|
||||
{
|
||||
return access_gic_aprn(vcpu, p, r, 0);
|
||||
}
|
||||
|
||||
static bool access_gic_ap1r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
return access_gic_aprn(vcpu, p, r, 1);
|
||||
}
|
||||
|
||||
static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
/* Validate SRE bit */
|
||||
if (p->is_write) {
|
||||
if (!(p->regval & ICC_SRE_EL1_SRE))
|
||||
return false;
|
||||
} else {
|
||||
p->regval = vgicv3->vgic_sre;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
|
||||
/* ICC_PMR_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(4), CRm(6), Op2(0), access_gic_pmr },
|
||||
/* ICC_BPR0_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(8), Op2(3), access_gic_bpr0 },
|
||||
/* ICC_AP0R0_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(8), Op2(4), access_gic_ap0r },
|
||||
/* ICC_AP0R1_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(8), Op2(5), access_gic_ap0r },
|
||||
/* ICC_AP0R2_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(8), Op2(6), access_gic_ap0r },
|
||||
/* ICC_AP0R3_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(8), Op2(7), access_gic_ap0r },
|
||||
/* ICC_AP1R0_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(9), Op2(0), access_gic_ap1r },
|
||||
/* ICC_AP1R1_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(9), Op2(1), access_gic_ap1r },
|
||||
/* ICC_AP1R2_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(9), Op2(2), access_gic_ap1r },
|
||||
/* ICC_AP1R3_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(9), Op2(3), access_gic_ap1r },
|
||||
/* ICC_BPR1_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(12), Op2(3), access_gic_bpr1 },
|
||||
/* ICC_CTLR_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(12), Op2(4), access_gic_ctlr },
|
||||
/* ICC_SRE_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre },
|
||||
/* ICC_IGRPEN0_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 },
|
||||
/* ICC_GRPEN1_EL1 */
|
||||
{ Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 },
|
||||
};
|
||||
|
||||
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
|
||||
u64 *reg)
|
||||
{
|
||||
struct sys_reg_params params;
|
||||
u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
|
||||
|
||||
params.regval = *reg;
|
||||
params.is_write = is_write;
|
||||
params.is_aarch32 = false;
|
||||
params.is_32bit = false;
|
||||
|
||||
if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs,
|
||||
ARRAY_SIZE(gic_v3_icc_reg_descs)))
|
||||
return 0;
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
|
||||
u64 *reg)
|
||||
{
|
||||
struct sys_reg_params params;
|
||||
const struct sys_reg_desc *r;
|
||||
u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
|
||||
|
||||
if (is_write)
|
||||
params.regval = *reg;
|
||||
params.is_write = is_write;
|
||||
params.is_aarch32 = false;
|
||||
params.is_32bit = false;
|
||||
|
||||
r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs,
|
||||
ARRAY_SIZE(gic_v3_icc_reg_descs));
|
||||
if (!r)
|
||||
return -ENXIO;
|
||||
|
||||
if (!r->access(vcpu, ¶ms, r))
|
||||
return -EINVAL;
|
||||
|
||||
if (!is_write)
|
||||
*reg = params.regval;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -43,6 +43,7 @@
|
|||
#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
|
||||
#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
|
||||
#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0)
|
||||
#define KVM_REG_MIPS_CP0_INTCTL MIPS_CP0_32(12, 1)
|
||||
#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0)
|
||||
#define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0)
|
||||
#define KVM_REG_MIPS_CP0_PRID MIPS_CP0_32(15, 0)
|
||||
|
@ -64,7 +65,7 @@
|
|||
#define KVM_REG_MIPS_CP0_KSCRATCH6 MIPS_CP0_64(31, 7)
|
||||
|
||||
|
||||
#define KVM_MAX_VCPUS 1
|
||||
#define KVM_MAX_VCPUS 8
|
||||
#define KVM_USER_MEM_SLOTS 8
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 0
|
||||
|
@ -88,6 +89,7 @@
|
|||
|
||||
#define KVM_GUEST_KUSEG 0x00000000UL
|
||||
#define KVM_GUEST_KSEG0 0x40000000UL
|
||||
#define KVM_GUEST_KSEG1 0x40000000UL
|
||||
#define KVM_GUEST_KSEG23 0x60000000UL
|
||||
#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0xe0000000)
|
||||
#define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff)
|
||||
|
@ -104,7 +106,6 @@
|
|||
#define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23)
|
||||
|
||||
#define KVM_INVALID_PAGE 0xdeadbeef
|
||||
#define KVM_INVALID_INST 0xdeadbeef
|
||||
#define KVM_INVALID_ADDR 0xdeadbeef
|
||||
|
||||
/*
|
||||
|
@ -121,8 +122,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
|
|||
return IS_ERR_VALUE(addr);
|
||||
}
|
||||
|
||||
extern atomic_t kvm_mips_instance;
|
||||
|
||||
struct kvm_vm_stat {
|
||||
ulong remote_tlb_flush;
|
||||
};
|
||||
|
@ -156,12 +155,8 @@ struct kvm_arch_memory_slot {
|
|||
};
|
||||
|
||||
struct kvm_arch {
|
||||
/* Guest GVA->HPA page table */
|
||||
unsigned long *guest_pmap;
|
||||
unsigned long guest_pmap_npages;
|
||||
|
||||
/* Wired host TLB used for the commpage */
|
||||
int commpage_tlb;
|
||||
/* Guest physical mm */
|
||||
struct mm_struct gpa_mm;
|
||||
};
|
||||
|
||||
#define N_MIPS_COPROC_REGS 32
|
||||
|
@ -233,6 +228,7 @@ enum emulation_result {
|
|||
EMULATE_FAIL, /* can't emulate this instruction */
|
||||
EMULATE_WAIT, /* WAIT instruction */
|
||||
EMULATE_PRIV_FAIL,
|
||||
EMULATE_EXCEPT, /* A guest exception has been generated */
|
||||
};
|
||||
|
||||
#define mips3_paddr_to_tlbpfn(x) \
|
||||
|
@ -250,6 +246,7 @@ enum emulation_result {
|
|||
#define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID)
|
||||
#define TLB_LO_IDX(x, va) (((va) >> PAGE_SHIFT) & 1)
|
||||
#define TLB_IS_VALID(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_V)
|
||||
#define TLB_IS_DIRTY(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_D)
|
||||
#define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \
|
||||
((y) & VPN2_MASK & ~(x).tlb_mask))
|
||||
#define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \
|
||||
|
@ -261,6 +258,17 @@ struct kvm_mips_tlb {
|
|||
long tlb_lo[2];
|
||||
};
|
||||
|
||||
#define KVM_NR_MEM_OBJS 4
|
||||
|
||||
/*
|
||||
* We don't want allocation failures within the mmu code, so we preallocate
|
||||
* enough memory for a single page fault in a cache.
|
||||
*/
|
||||
struct kvm_mmu_memory_cache {
|
||||
int nobjs;
|
||||
void *objects[KVM_NR_MEM_OBJS];
|
||||
};
|
||||
|
||||
#define KVM_MIPS_AUX_FPU 0x1
|
||||
#define KVM_MIPS_AUX_MSA 0x2
|
||||
|
||||
|
@ -275,6 +283,8 @@ struct kvm_vcpu_arch {
|
|||
unsigned long host_cp0_badvaddr;
|
||||
unsigned long host_cp0_epc;
|
||||
u32 host_cp0_cause;
|
||||
u32 host_cp0_badinstr;
|
||||
u32 host_cp0_badinstrp;
|
||||
|
||||
/* GPRS */
|
||||
unsigned long gprs[32];
|
||||
|
@ -318,20 +328,18 @@ struct kvm_vcpu_arch {
|
|||
/* Bitmask of pending exceptions to be cleared */
|
||||
unsigned long pending_exceptions_clr;
|
||||
|
||||
/* Save/Restore the entryhi register when are are preempted/scheduled back in */
|
||||
unsigned long preempt_entryhi;
|
||||
|
||||
/* S/W Based TLB for guest */
|
||||
struct kvm_mips_tlb guest_tlb[KVM_MIPS_GUEST_TLB_SIZE];
|
||||
|
||||
/* Cached guest kernel/user ASIDs */
|
||||
u32 guest_user_asid[NR_CPUS];
|
||||
u32 guest_kernel_asid[NR_CPUS];
|
||||
/* Guest kernel/user [partial] mm */
|
||||
struct mm_struct guest_kernel_mm, guest_user_mm;
|
||||
|
||||
/* Guest ASID of last user mode execution */
|
||||
unsigned int last_user_gasid;
|
||||
|
||||
/* Cache some mmu pages needed inside spinlock regions */
|
||||
struct kvm_mmu_memory_cache mmu_page_cache;
|
||||
|
||||
int last_sched_cpu;
|
||||
|
||||
/* WAIT executed */
|
||||
|
@ -339,14 +347,15 @@ struct kvm_vcpu_arch {
|
|||
|
||||
u8 fpu_enabled;
|
||||
u8 msa_enabled;
|
||||
u8 kscratch_enabled;
|
||||
};
|
||||
|
||||
|
||||
#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0])
|
||||
#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
|
||||
#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0])
|
||||
#define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val))
|
||||
#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0])
|
||||
#define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val))
|
||||
#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
|
||||
#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
|
||||
#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
|
||||
|
@ -522,9 +531,17 @@ struct kvm_mips_callbacks {
|
|||
int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_fpe)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
|
||||
int (*vm_init)(struct kvm *kvm);
|
||||
int (*vcpu_init)(struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_uninit)(struct kvm_vcpu *vcpu);
|
||||
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
|
||||
void (*flush_shadow_all)(struct kvm *kvm);
|
||||
/*
|
||||
* Must take care of flushing any cached GPA PTEs (e.g. guest entries in
|
||||
* VZ root TLB, or T&E GVA page tables and corresponding root TLB
|
||||
* mappings).
|
||||
*/
|
||||
void (*flush_shadow_memslot)(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot);
|
||||
gpa_t (*gva_to_gpa)(gva_t gva);
|
||||
void (*queue_timer_int)(struct kvm_vcpu *vcpu);
|
||||
void (*dequeue_timer_int)(struct kvm_vcpu *vcpu);
|
||||
|
@ -542,8 +559,10 @@ struct kvm_mips_callbacks {
|
|||
const struct kvm_one_reg *reg, s64 *v);
|
||||
int (*set_one_reg)(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg, s64 v);
|
||||
int (*vcpu_get_regs)(struct kvm_vcpu *vcpu);
|
||||
int (*vcpu_set_regs)(struct kvm_vcpu *vcpu);
|
||||
int (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
|
||||
int (*vcpu_put)(struct kvm_vcpu *vcpu, int cpu);
|
||||
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_reenter)(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
};
|
||||
extern struct kvm_mips_callbacks *kvm_mips_callbacks;
|
||||
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
|
||||
|
@ -556,6 +575,7 @@ extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
|||
/* Building of entry/exception code */
|
||||
int kvm_mips_entry_setup(void);
|
||||
void *kvm_mips_build_vcpu_run(void *addr);
|
||||
void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler);
|
||||
void *kvm_mips_build_exception(void *addr, void *handler);
|
||||
void *kvm_mips_build_exit(void *addr);
|
||||
|
||||
|
@ -580,54 +600,125 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu);
|
|||
u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu);
|
||||
|
||||
extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr,
|
||||
struct kvm_vcpu *vcpu);
|
||||
struct kvm_vcpu *vcpu,
|
||||
bool write_fault);
|
||||
|
||||
extern int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
|
||||
struct kvm_vcpu *vcpu);
|
||||
|
||||
extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mips_tlb *tlb);
|
||||
struct kvm_mips_tlb *tlb,
|
||||
unsigned long gva,
|
||||
bool write_fault);
|
||||
|
||||
extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
|
||||
u32 *opc,
|
||||
struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu);
|
||||
|
||||
extern enum emulation_result kvm_mips_handle_tlbmod(u32 cause,
|
||||
u32 *opc,
|
||||
struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu);
|
||||
struct kvm_vcpu *vcpu,
|
||||
bool write_fault);
|
||||
|
||||
extern void kvm_mips_dump_host_tlbs(void);
|
||||
extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu);
|
||||
extern int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
|
||||
unsigned long entrylo0,
|
||||
unsigned long entrylo1,
|
||||
int flush_dcache_mask);
|
||||
extern void kvm_mips_flush_host_tlb(int skip_kseg0);
|
||||
extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
|
||||
extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi,
|
||||
bool user, bool kernel);
|
||||
|
||||
extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
|
||||
unsigned long entryhi);
|
||||
extern int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr);
|
||||
extern unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
|
||||
unsigned long gva);
|
||||
extern void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
|
||||
struct kvm_vcpu *vcpu);
|
||||
extern void kvm_local_flush_tlb_all(void);
|
||||
extern void kvm_mips_alloc_new_mmu_context(struct kvm_vcpu *vcpu);
|
||||
extern void kvm_mips_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
extern void kvm_mips_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_mips_suspend_mm(int cpu);
|
||||
void kvm_mips_resume_mm(int cpu);
|
||||
|
||||
/* MMU handling */
|
||||
|
||||
/**
|
||||
* enum kvm_mips_flush - Types of MMU flushes.
|
||||
* @KMF_USER: Flush guest user virtual memory mappings.
|
||||
* Guest USeg only.
|
||||
* @KMF_KERN: Flush guest kernel virtual memory mappings.
|
||||
* Guest USeg and KSeg2/3.
|
||||
* @KMF_GPA: Flush guest physical memory mappings.
|
||||
* Also includes KSeg0 if KMF_KERN is set.
|
||||
*/
|
||||
enum kvm_mips_flush {
|
||||
KMF_USER = 0x0,
|
||||
KMF_KERN = 0x1,
|
||||
KMF_GPA = 0x2,
|
||||
};
|
||||
void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags);
|
||||
bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn);
|
||||
int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn);
|
||||
pgd_t *kvm_pgd_alloc(void);
|
||||
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
|
||||
void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr,
|
||||
bool user);
|
||||
void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu);
|
||||
void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu);
|
||||
|
||||
enum kvm_mips_fault_result {
|
||||
KVM_MIPS_MAPPED = 0,
|
||||
KVM_MIPS_GVA,
|
||||
KVM_MIPS_GPA,
|
||||
KVM_MIPS_TLB,
|
||||
KVM_MIPS_TLBINV,
|
||||
KVM_MIPS_TLBMOD,
|
||||
};
|
||||
enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
|
||||
unsigned long gva,
|
||||
bool write);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||
int kvm_unmap_hva_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
|
||||
static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
||||
unsigned long address)
|
||||
{
|
||||
}
|
||||
|
||||
/* Emulation */
|
||||
u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu);
|
||||
int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
|
||||
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
|
||||
int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
|
||||
int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
|
||||
|
||||
/**
|
||||
* kvm_is_ifetch_fault() - Find whether a TLBL exception is due to ifetch fault.
|
||||
* @vcpu: Virtual CPU.
|
||||
*
|
||||
* Returns: Whether the TLBL exception was likely due to an instruction
|
||||
* fetch fault rather than a data load fault.
|
||||
*/
|
||||
static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *vcpu)
|
||||
{
|
||||
unsigned long badvaddr = vcpu->host_cp0_badvaddr;
|
||||
unsigned long epc = msk_isa16_mode(vcpu->pc);
|
||||
u32 cause = vcpu->host_cp0_cause;
|
||||
|
||||
if (epc == badvaddr)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Branches may be 32-bit or 16-bit instructions.
|
||||
* This isn't exact, but we don't really support MIPS16 or microMIPS yet
|
||||
* in KVM anyway.
|
||||
*/
|
||||
if ((cause & CAUSEF_BD) && badvaddr - epc <= 4)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
extern enum emulation_result kvm_mips_emulate_inst(u32 cause,
|
||||
u32 *opc,
|
||||
struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu);
|
||||
|
||||
long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern enum emulation_result kvm_mips_emulate_syscall(u32 cause,
|
||||
u32 *opc,
|
||||
struct kvm_run *run,
|
||||
|
@ -761,10 +852,6 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
|||
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
|
||||
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot) {}
|
||||
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
|
|
@ -29,9 +29,11 @@ do { \
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
extern void tlbmiss_handler_setup_pgd(unsigned long);
|
||||
|
||||
/* Note: This is also implemented with uasm in arch/mips/kvm/entry.c */
|
||||
#define TLBMISS_HANDLER_SETUP_PGD(pgd) \
|
||||
do { \
|
||||
extern void tlbmiss_handler_setup_pgd(unsigned long); \
|
||||
tlbmiss_handler_setup_pgd((unsigned long)(pgd)); \
|
||||
htw_set_pwbase((unsigned long)pgd); \
|
||||
} while (0)
|
||||
|
@ -97,17 +99,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|||
static inline void
|
||||
get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
|
||||
{
|
||||
extern void kvm_local_flush_tlb_all(void);
|
||||
unsigned long asid = asid_cache(cpu);
|
||||
|
||||
if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) {
|
||||
if (cpu_has_vtag_icache)
|
||||
flush_icache_all();
|
||||
#ifdef CONFIG_KVM
|
||||
kvm_local_flush_tlb_all(); /* start new asid cycle */
|
||||
#else
|
||||
local_flush_tlb_all(); /* start new asid cycle */
|
||||
#endif
|
||||
if (!asid) /* fix version if needed */
|
||||
asid = asid_first_version(cpu);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
* Some parts derived from the x86 version of this file.
|
||||
*/
|
||||
|
||||
#define __KVM_HAVE_READONLY_MEM
|
||||
|
||||
/*
|
||||
* for KVM_GET_REGS and KVM_SET_REGS
|
||||
*
|
||||
|
|
|
@ -20,7 +20,9 @@ config KVM
|
|||
select EXPORT_UASM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
select SRCU
|
||||
---help---
|
||||
Support for hosting Guest kernels.
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <linux/err.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/bootmem.h>
|
||||
|
@ -29,28 +30,37 @@
|
|||
static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc,
|
||||
union mips_instruction replace)
|
||||
{
|
||||
unsigned long paddr, flags;
|
||||
void *vaddr;
|
||||
unsigned long vaddr = (unsigned long)opc;
|
||||
int err;
|
||||
|
||||
if (KVM_GUEST_KSEGX((unsigned long)opc) == KVM_GUEST_KSEG0) {
|
||||
paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu,
|
||||
(unsigned long)opc);
|
||||
vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr)));
|
||||
vaddr += paddr & ~PAGE_MASK;
|
||||
memcpy(vaddr, (void *)&replace, sizeof(u32));
|
||||
local_flush_icache_range((unsigned long)vaddr,
|
||||
(unsigned long)vaddr + 32);
|
||||
kunmap_atomic(vaddr);
|
||||
} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
|
||||
local_irq_save(flags);
|
||||
memcpy((void *)opc, (void *)&replace, sizeof(u32));
|
||||
__local_flush_icache_user_range((unsigned long)opc,
|
||||
(unsigned long)opc + 32);
|
||||
local_irq_restore(flags);
|
||||
} else {
|
||||
kvm_err("%s: Invalid address: %p\n", __func__, opc);
|
||||
return -EFAULT;
|
||||
retry:
|
||||
/* The GVA page table is still active so use the Linux TLB handlers */
|
||||
kvm_trap_emul_gva_lockless_begin(vcpu);
|
||||
err = put_user(replace.word, opc);
|
||||
kvm_trap_emul_gva_lockless_end(vcpu);
|
||||
|
||||
if (unlikely(err)) {
|
||||
/*
|
||||
* We write protect clean pages in GVA page table so normal
|
||||
* Linux TLB mod handler doesn't silently dirty the page.
|
||||
* Its also possible we raced with a GVA invalidation.
|
||||
* Try to force the page to become dirty.
|
||||
*/
|
||||
err = kvm_trap_emul_gva_fault(vcpu, vaddr, true);
|
||||
if (unlikely(err)) {
|
||||
kvm_info("%s: Address unwriteable: %p\n",
|
||||
__func__, opc);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try again. This will likely trigger a TLB refill, which will
|
||||
* fetch the new dirty entry from the GVA page table, which
|
||||
* should then succeed.
|
||||
*/
|
||||
goto retry;
|
||||
}
|
||||
__local_flush_icache_user_range(vaddr, vaddr + 4);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -38,23 +38,25 @@
|
|||
* Compute the return address and do emulate branch simulation, if required.
|
||||
* This function should be called only in branch delay slot active.
|
||||
*/
|
||||
unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
|
||||
unsigned long instpc)
|
||||
static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc,
|
||||
unsigned long *out)
|
||||
{
|
||||
unsigned int dspcontrol;
|
||||
union mips_instruction insn;
|
||||
struct kvm_vcpu_arch *arch = &vcpu->arch;
|
||||
long epc = instpc;
|
||||
long nextpc = KVM_INVALID_INST;
|
||||
long nextpc;
|
||||
int err;
|
||||
|
||||
if (epc & 3)
|
||||
goto unaligned;
|
||||
if (epc & 3) {
|
||||
kvm_err("%s: unaligned epc\n", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Read the instruction */
|
||||
insn.word = kvm_get_inst((u32 *) epc, vcpu);
|
||||
|
||||
if (insn.word == KVM_INVALID_INST)
|
||||
return KVM_INVALID_INST;
|
||||
err = kvm_get_badinstrp((u32 *)epc, vcpu, &insn.word);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
switch (insn.i_format.opcode) {
|
||||
/* jr and jalr are in r_format format. */
|
||||
|
@ -66,6 +68,8 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
|
|||
case jr_op:
|
||||
nextpc = arch->gprs[insn.r_format.rs];
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -114,8 +118,11 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
|
|||
nextpc = epc;
|
||||
break;
|
||||
case bposge32_op:
|
||||
if (!cpu_has_dsp)
|
||||
goto sigill;
|
||||
if (!cpu_has_dsp) {
|
||||
kvm_err("%s: DSP branch but not DSP ASE\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dspcontrol = rddsp(0x01);
|
||||
|
||||
|
@ -125,6 +132,8 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
|
|||
epc += 8;
|
||||
nextpc = epc;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -189,7 +198,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
|
|||
/* And now the FPA/cp1 branch instructions. */
|
||||
case cop1_op:
|
||||
kvm_err("%s: unsupported cop1_op\n", __func__);
|
||||
break;
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_CPU_MIPSR6
|
||||
/* R6 added the following compact branches with forbidden slots */
|
||||
|
@ -198,19 +207,19 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
|
|||
/* only rt == 0 isn't compact branch */
|
||||
if (insn.i_format.rt != 0)
|
||||
goto compact_branch;
|
||||
break;
|
||||
return -EINVAL;
|
||||
case pop10_op:
|
||||
case pop30_op:
|
||||
/* only rs == rt == 0 is reserved, rest are compact branches */
|
||||
if (insn.i_format.rs != 0 || insn.i_format.rt != 0)
|
||||
goto compact_branch;
|
||||
break;
|
||||
return -EINVAL;
|
||||
case pop66_op:
|
||||
case pop76_op:
|
||||
/* only rs == 0 isn't compact branch */
|
||||
if (insn.i_format.rs != 0)
|
||||
goto compact_branch;
|
||||
break;
|
||||
return -EINVAL;
|
||||
compact_branch:
|
||||
/*
|
||||
* If we've hit an exception on the forbidden slot, then
|
||||
|
@ -221,42 +230,74 @@ compact_branch:
|
|||
break;
|
||||
#else
|
||||
compact_branch:
|
||||
/* Compact branches not supported before R6 */
|
||||
break;
|
||||
/* Fall through - Compact branches not supported before R6 */
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return nextpc;
|
||||
|
||||
unaligned:
|
||||
kvm_err("%s: unaligned epc\n", __func__);
|
||||
return nextpc;
|
||||
|
||||
sigill:
|
||||
kvm_err("%s: DSP branch but not DSP ASE\n", __func__);
|
||||
return nextpc;
|
||||
*out = nextpc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause)
|
||||
{
|
||||
unsigned long branch_pc;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
int err;
|
||||
|
||||
if (cause & CAUSEF_BD) {
|
||||
branch_pc = kvm_compute_return_epc(vcpu, vcpu->arch.pc);
|
||||
if (branch_pc == KVM_INVALID_INST) {
|
||||
er = EMULATE_FAIL;
|
||||
} else {
|
||||
vcpu->arch.pc = branch_pc;
|
||||
kvm_debug("BD update_pc(): New PC: %#lx\n",
|
||||
vcpu->arch.pc);
|
||||
}
|
||||
} else
|
||||
err = kvm_compute_return_epc(vcpu, vcpu->arch.pc,
|
||||
&vcpu->arch.pc);
|
||||
if (err)
|
||||
return EMULATE_FAIL;
|
||||
} else {
|
||||
vcpu->arch.pc += 4;
|
||||
}
|
||||
|
||||
kvm_debug("update_pc(): New PC: %#lx\n", vcpu->arch.pc);
|
||||
|
||||
return er;
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_get_badinstr() - Get bad instruction encoding.
|
||||
* @opc: Guest pointer to faulting instruction.
|
||||
* @vcpu: KVM VCPU information.
|
||||
*
|
||||
* Gets the instruction encoding of the faulting instruction, using the saved
|
||||
* BadInstr register value if it exists, otherwise falling back to reading guest
|
||||
* memory at @opc.
|
||||
*
|
||||
* Returns: The instruction encoding of the faulting instruction.
|
||||
*/
|
||||
int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
|
||||
{
|
||||
if (cpu_has_badinstr) {
|
||||
*out = vcpu->arch.host_cp0_badinstr;
|
||||
return 0;
|
||||
} else {
|
||||
return kvm_get_inst(opc, vcpu, out);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_get_badinstrp() - Get bad prior instruction encoding.
|
||||
* @opc: Guest pointer to prior faulting instruction.
|
||||
* @vcpu: KVM VCPU information.
|
||||
*
|
||||
* Gets the instruction encoding of the prior faulting instruction (the branch
|
||||
* containing the delay slot which faulted), using the saved BadInstrP register
|
||||
* value if it exists, otherwise falling back to reading guest memory at @opc.
|
||||
*
|
||||
* Returns: The instruction encoding of the prior faulting instruction.
|
||||
*/
|
||||
int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
|
||||
{
|
||||
if (cpu_has_badinstrp) {
|
||||
*out = vcpu->arch.host_cp0_badinstrp;
|
||||
return 0;
|
||||
} else {
|
||||
return kvm_get_inst(opc, vcpu, out);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -856,22 +897,30 @@ enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu)
|
|||
static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mips_tlb *tlb)
|
||||
{
|
||||
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
|
||||
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
|
||||
int cpu, i;
|
||||
bool user;
|
||||
|
||||
/* No need to flush for entries which are already invalid */
|
||||
if (!((tlb->tlb_lo[0] | tlb->tlb_lo[1]) & ENTRYLO_V))
|
||||
return;
|
||||
/* Don't touch host kernel page tables or TLB mappings */
|
||||
if ((unsigned long)tlb->tlb_hi > 0x7fffffff)
|
||||
return;
|
||||
/* User address space doesn't need flushing for KSeg2/3 changes */
|
||||
user = tlb->tlb_hi < KVM_GUEST_KSEG0;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/* Invalidate page table entries */
|
||||
kvm_trap_emul_invalidate_gva(vcpu, tlb->tlb_hi & VPN2_MASK, user);
|
||||
|
||||
/*
|
||||
* Probe the shadow host TLB for the entry being overwritten, if one
|
||||
* matches, invalidate it
|
||||
*/
|
||||
kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi);
|
||||
kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi, user, true);
|
||||
|
||||
/* Invalidate the whole ASID on other CPUs */
|
||||
cpu = smp_processor_id();
|
||||
|
@ -879,8 +928,8 @@ static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu,
|
|||
if (i == cpu)
|
||||
continue;
|
||||
if (user)
|
||||
vcpu->arch.guest_user_asid[i] = 0;
|
||||
vcpu->arch.guest_kernel_asid[i] = 0;
|
||||
cpu_context(i, user_mm) = 0;
|
||||
cpu_context(i, kern_mm) = 0;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
|
@ -1017,7 +1066,7 @@ unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu)
|
|||
unsigned int mask = MIPS_CONF_M;
|
||||
|
||||
/* KScrExist */
|
||||
mask |= (unsigned int)vcpu->arch.kscratch_enabled << 16;
|
||||
mask |= 0xfc << MIPS_CONF4_KSCREXIST_SHIFT;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
@ -1056,6 +1105,7 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
|
|||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
u32 rt, rd, sel;
|
||||
unsigned long curr_pc;
|
||||
|
@ -1150,14 +1200,13 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
|
|||
er = EMULATE_FAIL;
|
||||
break;
|
||||
}
|
||||
#define C0_EBASE_CORE_MASK 0xff
|
||||
if ((rd == MIPS_CP0_PRID) && (sel == 1)) {
|
||||
/* Preserve CORE number */
|
||||
kvm_change_c0_guest_ebase(cop0,
|
||||
~(C0_EBASE_CORE_MASK),
|
||||
/*
|
||||
* Preserve core number, and keep the exception
|
||||
* base in guest KSeg0.
|
||||
*/
|
||||
kvm_change_c0_guest_ebase(cop0, 0x1ffff000,
|
||||
vcpu->arch.gprs[rt]);
|
||||
kvm_err("MTCz, cop0->reg[EBASE]: %#lx\n",
|
||||
kvm_read_c0_guest_ebase(cop0));
|
||||
} else if (rd == MIPS_CP0_TLB_HI && sel == 0) {
|
||||
u32 nasid =
|
||||
vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID;
|
||||
|
@ -1168,6 +1217,17 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
|
|||
& KVM_ENTRYHI_ASID,
|
||||
nasid);
|
||||
|
||||
/*
|
||||
* Flush entries from the GVA page
|
||||
* tables.
|
||||
* Guest user page table will get
|
||||
* flushed lazily on re-entry to guest
|
||||
* user if the guest ASID actually
|
||||
* changes.
|
||||
*/
|
||||
kvm_mips_flush_gva_pt(kern_mm->pgd,
|
||||
KMF_KERN);
|
||||
|
||||
/*
|
||||
* Regenerate/invalidate kernel MMU
|
||||
* context.
|
||||
|
@ -1178,13 +1238,10 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
|
|||
*/
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm,
|
||||
cpu, vcpu);
|
||||
vcpu->arch.guest_kernel_asid[cpu] =
|
||||
vcpu->arch.guest_kernel_mm.context.asid[cpu];
|
||||
get_new_mmu_context(kern_mm, cpu);
|
||||
for_each_possible_cpu(i)
|
||||
if (i != cpu)
|
||||
vcpu->arch.guest_kernel_asid[i] = 0;
|
||||
cpu_context(i, kern_mm) = 0;
|
||||
preempt_enable();
|
||||
}
|
||||
kvm_write_c0_guest_entryhi(cop0,
|
||||
|
@ -1639,12 +1696,56 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
|
|||
return er;
|
||||
}
|
||||
|
||||
static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
|
||||
unsigned long curr_pc,
|
||||
unsigned long addr,
|
||||
struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu,
|
||||
u32 cause)
|
||||
{
|
||||
int err;
|
||||
|
||||
for (;;) {
|
||||
/* Carefully attempt the cache operation */
|
||||
kvm_trap_emul_gva_lockless_begin(vcpu);
|
||||
err = fn(addr);
|
||||
kvm_trap_emul_gva_lockless_end(vcpu);
|
||||
|
||||
if (likely(!err))
|
||||
return EMULATE_DONE;
|
||||
|
||||
/*
|
||||
* Try to handle the fault and retry, maybe we just raced with a
|
||||
* GVA invalidation.
|
||||
*/
|
||||
switch (kvm_trap_emul_gva_fault(vcpu, addr, false)) {
|
||||
case KVM_MIPS_GVA:
|
||||
case KVM_MIPS_GPA:
|
||||
/* bad virtual or physical address */
|
||||
return EMULATE_FAIL;
|
||||
case KVM_MIPS_TLB:
|
||||
/* no matching guest TLB */
|
||||
vcpu->arch.host_cp0_badvaddr = addr;
|
||||
vcpu->arch.pc = curr_pc;
|
||||
kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu);
|
||||
return EMULATE_EXCEPT;
|
||||
case KVM_MIPS_TLBINV:
|
||||
/* invalid matching guest TLB */
|
||||
vcpu->arch.host_cp0_badvaddr = addr;
|
||||
vcpu->arch.pc = curr_pc;
|
||||
kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu);
|
||||
return EMULATE_EXCEPT;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
|
||||
u32 *opc, u32 cause,
|
||||
struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
u32 cache, op_inst, op, base;
|
||||
s16 offset;
|
||||
|
@ -1701,80 +1802,16 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
|
|||
goto done;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
|
||||
if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
|
||||
kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
|
||||
kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
|
||||
__func__, va, vcpu, read_c0_entryhi());
|
||||
er = EMULATE_FAIL;
|
||||
preempt_enable();
|
||||
goto done;
|
||||
}
|
||||
} else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
|
||||
KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
|
||||
int index;
|
||||
|
||||
/* If an entry already exists then skip */
|
||||
if (kvm_mips_host_tlb_lookup(vcpu, va) >= 0)
|
||||
goto skip_fault;
|
||||
|
||||
/*
|
||||
* If address not in the guest TLB, then give the guest a fault,
|
||||
* the resulting handler will do the right thing
|
||||
*/
|
||||
index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) |
|
||||
(kvm_read_c0_guest_entryhi
|
||||
(cop0) & KVM_ENTRYHI_ASID));
|
||||
|
||||
if (index < 0) {
|
||||
vcpu->arch.host_cp0_badvaddr = va;
|
||||
vcpu->arch.pc = curr_pc;
|
||||
er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run,
|
||||
vcpu);
|
||||
preempt_enable();
|
||||
goto dont_update_pc;
|
||||
} else {
|
||||
struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index];
|
||||
/*
|
||||
* Check if the entry is valid, if not then setup a TLB
|
||||
* invalid exception to the guest
|
||||
*/
|
||||
if (!TLB_IS_VALID(*tlb, va)) {
|
||||
vcpu->arch.host_cp0_badvaddr = va;
|
||||
vcpu->arch.pc = curr_pc;
|
||||
er = kvm_mips_emulate_tlbinv_ld(cause, NULL,
|
||||
run, vcpu);
|
||||
preempt_enable();
|
||||
goto dont_update_pc;
|
||||
}
|
||||
/*
|
||||
* We fault an entry from the guest tlb to the
|
||||
* shadow host TLB
|
||||
*/
|
||||
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
|
||||
kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
|
||||
__func__, va, index, vcpu,
|
||||
read_c0_entryhi());
|
||||
er = EMULATE_FAIL;
|
||||
preempt_enable();
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
kvm_err("INVALID CACHE INDEX/ADDRESS (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
|
||||
cache, op, base, arch->gprs[base], offset);
|
||||
er = EMULATE_FAIL;
|
||||
preempt_enable();
|
||||
goto done;
|
||||
|
||||
}
|
||||
|
||||
skip_fault:
|
||||
/* XXXKYMA: Only a subset of cache ops are supported, used by Linux */
|
||||
if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) {
|
||||
flush_dcache_line(va);
|
||||
|
||||
/*
|
||||
* Perform the dcache part of icache synchronisation on the
|
||||
* guest's behalf.
|
||||
*/
|
||||
er = kvm_mips_guest_cache_op(protected_writeback_dcache_line,
|
||||
curr_pc, va, run, vcpu, cause);
|
||||
if (er != EMULATE_DONE)
|
||||
goto done;
|
||||
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
|
||||
/*
|
||||
* Replace the CACHE instruction, with a SYNCI, not the same,
|
||||
|
@ -1783,8 +1820,15 @@ skip_fault:
|
|||
kvm_mips_trans_cache_va(inst, opc, vcpu);
|
||||
#endif
|
||||
} else if (op_inst == Hit_Invalidate_I) {
|
||||
flush_dcache_line(va);
|
||||
flush_icache_line(va);
|
||||
/* Perform the icache synchronisation on the guest's behalf */
|
||||
er = kvm_mips_guest_cache_op(protected_writeback_dcache_line,
|
||||
curr_pc, va, run, vcpu, cause);
|
||||
if (er != EMULATE_DONE)
|
||||
goto done;
|
||||
er = kvm_mips_guest_cache_op(protected_flush_icache_line,
|
||||
curr_pc, va, run, vcpu, cause);
|
||||
if (er != EMULATE_DONE)
|
||||
goto done;
|
||||
|
||||
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
|
||||
/* Replace the CACHE instruction, with a SYNCI */
|
||||
|
@ -1796,17 +1840,13 @@ skip_fault:
|
|||
er = EMULATE_FAIL;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
done:
|
||||
/* Rollback PC only if emulation was unsuccessful */
|
||||
if (er == EMULATE_FAIL)
|
||||
vcpu->arch.pc = curr_pc;
|
||||
|
||||
dont_update_pc:
|
||||
/*
|
||||
* This is for exceptions whose emulation updates the PC, so do not
|
||||
* overwrite the PC under any circumstances
|
||||
*/
|
||||
/* Guest exception needs guest to resume */
|
||||
if (er == EMULATE_EXCEPT)
|
||||
er = EMULATE_DONE;
|
||||
|
||||
return er;
|
||||
}
|
||||
|
@ -1817,12 +1857,14 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
|
|||
{
|
||||
union mips_instruction inst;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
int err;
|
||||
|
||||
/* Fetch the instruction. */
|
||||
if (cause & CAUSEF_BD)
|
||||
opc += 1;
|
||||
|
||||
inst.word = kvm_get_inst(opc, vcpu);
|
||||
err = kvm_get_badinstr(opc, vcpu, &inst.word);
|
||||
if (err)
|
||||
return EMULATE_FAIL;
|
||||
|
||||
switch (inst.r_format.opcode) {
|
||||
case cop0_op:
|
||||
|
@ -1874,6 +1916,22 @@ unknown:
|
|||
return er;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mips_guest_exception_base() - Find guest exception vector base address.
|
||||
*
|
||||
* Returns: The base address of the current guest exception vector, taking
|
||||
* both Guest.CP0_Status.BEV and Guest.CP0_EBase into account.
|
||||
*/
|
||||
long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
|
||||
if (kvm_read_c0_guest_status(cop0) & ST0_BEV)
|
||||
return KVM_GUEST_CKSEG1ADDR(0x1fc00200);
|
||||
else
|
||||
return kvm_read_c0_guest_ebase(cop0) & MIPS_EBASE_BASE;
|
||||
}
|
||||
|
||||
enum emulation_result kvm_mips_emulate_syscall(u32 cause,
|
||||
u32 *opc,
|
||||
struct kvm_run *run,
|
||||
|
@ -1899,7 +1957,7 @@ enum emulation_result kvm_mips_emulate_syscall(u32 cause,
|
|||
(EXCCODE_SYS << CAUSEB_EXCCODE));
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
} else {
|
||||
kvm_err("Trying to deliver SYSCALL when EXL is already set\n");
|
||||
|
@ -1933,13 +1991,13 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
|
|||
arch->pc);
|
||||
|
||||
/* set pc to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x0;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0;
|
||||
|
||||
} else {
|
||||
kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n",
|
||||
arch->pc);
|
||||
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
}
|
||||
|
||||
kvm_change_c0_guest_cause(cop0, (0xff),
|
||||
|
@ -1949,8 +2007,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
|
|||
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
|
||||
/* XXXKYMA: is the context register used by linux??? */
|
||||
kvm_write_c0_guest_entryhi(cop0, entryhi);
|
||||
/* Blow away the shadow host TLBs */
|
||||
kvm_mips_flush_host_tlb(1);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
@ -1978,16 +2034,14 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
|
|||
|
||||
kvm_debug("[EXL == 0] delivering TLB INV @ pc %#lx\n",
|
||||
arch->pc);
|
||||
|
||||
/* set pc to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
|
||||
} else {
|
||||
kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n",
|
||||
arch->pc);
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
}
|
||||
|
||||
/* set pc to the exception entry point */
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
kvm_change_c0_guest_cause(cop0, (0xff),
|
||||
(EXCCODE_TLBL << CAUSEB_EXCCODE));
|
||||
|
||||
|
@ -1995,8 +2049,6 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
|
|||
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
|
||||
/* XXXKYMA: is the context register used by linux??? */
|
||||
kvm_write_c0_guest_entryhi(cop0, entryhi);
|
||||
/* Blow away the shadow host TLBs */
|
||||
kvm_mips_flush_host_tlb(1);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
@ -2025,11 +2077,11 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
|
|||
arch->pc);
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x0;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0;
|
||||
} else {
|
||||
kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n",
|
||||
arch->pc);
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
}
|
||||
|
||||
kvm_change_c0_guest_cause(cop0, (0xff),
|
||||
|
@ -2039,8 +2091,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
|
|||
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
|
||||
/* XXXKYMA: is the context register used by linux??? */
|
||||
kvm_write_c0_guest_entryhi(cop0, entryhi);
|
||||
/* Blow away the shadow host TLBs */
|
||||
kvm_mips_flush_host_tlb(1);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
@ -2067,15 +2117,14 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
|
|||
|
||||
kvm_debug("[EXL == 0] Delivering TLB MISS @ pc %#lx\n",
|
||||
arch->pc);
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
} else {
|
||||
kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n",
|
||||
arch->pc);
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
}
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
kvm_change_c0_guest_cause(cop0, (0xff),
|
||||
(EXCCODE_TLBS << CAUSEB_EXCCODE));
|
||||
|
||||
|
@ -2083,41 +2132,10 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
|
|||
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
|
||||
/* XXXKYMA: is the context register used by linux??? */
|
||||
kvm_write_c0_guest_entryhi(cop0, entryhi);
|
||||
/* Blow away the shadow host TLBs */
|
||||
kvm_mips_flush_host_tlb(1);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
/* TLBMOD: store into address matching TLB with Dirty bit off */
|
||||
enum emulation_result kvm_mips_handle_tlbmod(u32 cause, u32 *opc,
|
||||
struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
#ifdef DEBUG
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
|
||||
(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
|
||||
int index;
|
||||
|
||||
/* If address not in the guest TLB, then we are in trouble */
|
||||
index = kvm_mips_guest_tlb_lookup(vcpu, entryhi);
|
||||
if (index < 0) {
|
||||
/* XXXKYMA Invalidate and retry */
|
||||
kvm_mips_host_tlb_inv(vcpu, vcpu->arch.host_cp0_badvaddr);
|
||||
kvm_err("%s: host got TLBMOD for %#lx but entry not present in Guest TLB\n",
|
||||
__func__, entryhi);
|
||||
kvm_mips_dump_guest_tlbs(vcpu);
|
||||
kvm_mips_dump_host_tlbs();
|
||||
return EMULATE_FAIL;
|
||||
}
|
||||
#endif
|
||||
|
||||
er = kvm_mips_emulate_tlbmod(cause, opc, run, vcpu);
|
||||
return er;
|
||||
}
|
||||
|
||||
enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
|
||||
u32 *opc,
|
||||
struct kvm_run *run,
|
||||
|
@ -2140,14 +2158,13 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
|
|||
|
||||
kvm_debug("[EXL == 0] Delivering TLB MOD @ pc %#lx\n",
|
||||
arch->pc);
|
||||
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
} else {
|
||||
kvm_debug("[EXL == 1] Delivering TLB MOD @ pc %#lx\n",
|
||||
arch->pc);
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
}
|
||||
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
kvm_change_c0_guest_cause(cop0, (0xff),
|
||||
(EXCCODE_MOD << CAUSEB_EXCCODE));
|
||||
|
||||
|
@ -2155,8 +2172,6 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
|
|||
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
|
||||
/* XXXKYMA: is the context register used by linux??? */
|
||||
kvm_write_c0_guest_entryhi(cop0, entryhi);
|
||||
/* Blow away the shadow host TLBs */
|
||||
kvm_mips_flush_host_tlb(1);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
@ -2181,7 +2196,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
|
|||
|
||||
}
|
||||
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
kvm_change_c0_guest_cause(cop0, (0xff),
|
||||
(EXCCODE_CPU << CAUSEB_EXCCODE));
|
||||
|
@ -2215,7 +2230,7 @@ enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
|
|||
(EXCCODE_RI << CAUSEB_EXCCODE));
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
} else {
|
||||
kvm_err("Trying to deliver RI when EXL is already set\n");
|
||||
|
@ -2250,7 +2265,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
|
|||
(EXCCODE_BP << CAUSEB_EXCCODE));
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
} else {
|
||||
kvm_err("Trying to deliver BP when EXL is already set\n");
|
||||
|
@ -2285,7 +2300,7 @@ enum emulation_result kvm_mips_emulate_trap_exc(u32 cause,
|
|||
(EXCCODE_TR << CAUSEB_EXCCODE));
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
} else {
|
||||
kvm_err("Trying to deliver TRAP when EXL is already set\n");
|
||||
|
@ -2320,7 +2335,7 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause,
|
|||
(EXCCODE_MSAFPE << CAUSEB_EXCCODE));
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
} else {
|
||||
kvm_err("Trying to deliver MSAFPE when EXL is already set\n");
|
||||
|
@ -2355,7 +2370,7 @@ enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause,
|
|||
(EXCCODE_FPE << CAUSEB_EXCCODE));
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
} else {
|
||||
kvm_err("Trying to deliver FPE when EXL is already set\n");
|
||||
|
@ -2390,7 +2405,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause,
|
|||
(EXCCODE_MSADIS << CAUSEB_EXCCODE));
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
|
||||
} else {
|
||||
kvm_err("Trying to deliver MSADIS when EXL is already set\n");
|
||||
|
@ -2409,6 +2424,7 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc,
|
|||
enum emulation_result er = EMULATE_DONE;
|
||||
unsigned long curr_pc;
|
||||
union mips_instruction inst;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Update PC and hold onto current PC in case there is
|
||||
|
@ -2422,11 +2438,9 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc,
|
|||
/* Fetch the instruction. */
|
||||
if (cause & CAUSEF_BD)
|
||||
opc += 1;
|
||||
|
||||
inst.word = kvm_get_inst(opc, vcpu);
|
||||
|
||||
if (inst.word == KVM_INVALID_INST) {
|
||||
kvm_err("%s: Cannot get inst @ %p\n", __func__, opc);
|
||||
err = kvm_get_badinstr(opc, vcpu, &inst.word);
|
||||
if (err) {
|
||||
kvm_err("%s: Cannot get inst @ %p (%d)\n", __func__, opc, err);
|
||||
return EMULATE_FAIL;
|
||||
}
|
||||
|
||||
|
@ -2557,7 +2571,7 @@ static enum emulation_result kvm_mips_emulate_exc(u32 cause,
|
|||
(exccode << CAUSEB_EXCCODE));
|
||||
|
||||
/* Set PC to the exception entry point */
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
|
||||
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
|
||||
|
||||
kvm_debug("Delivering EXC %d @ pc %#lx, badVaddr: %#lx\n",
|
||||
|
@ -2670,7 +2684,8 @@ enum emulation_result kvm_mips_check_privilege(u32 cause,
|
|||
enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
|
||||
u32 *opc,
|
||||
struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu)
|
||||
struct kvm_vcpu *vcpu,
|
||||
bool write_fault)
|
||||
{
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
|
||||
|
@ -2726,7 +2741,8 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
|
|||
* OK we have a Guest TLB entry, now inject it into the
|
||||
* shadow host TLB
|
||||
*/
|
||||
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
|
||||
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, va,
|
||||
write_fault)) {
|
||||
kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
|
||||
__func__, va, index, vcpu,
|
||||
read_c0_entryhi());
|
||||
|
|
|
@ -12,8 +12,11 @@
|
|||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/log2.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/msa.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/tlbex.h>
|
||||
#include <asm/uasm.h>
|
||||
|
||||
/* Register names */
|
||||
|
@ -50,6 +53,8 @@
|
|||
/* Some CP0 registers */
|
||||
#define C0_HWRENA 7, 0
|
||||
#define C0_BADVADDR 8, 0
|
||||
#define C0_BADINSTR 8, 1
|
||||
#define C0_BADINSTRP 8, 2
|
||||
#define C0_ENTRYHI 10, 0
|
||||
#define C0_STATUS 12, 0
|
||||
#define C0_CAUSE 13, 0
|
||||
|
@ -89,6 +94,21 @@ static void *kvm_mips_build_ret_from_exit(void *addr);
|
|||
static void *kvm_mips_build_ret_to_guest(void *addr);
|
||||
static void *kvm_mips_build_ret_to_host(void *addr);
|
||||
|
||||
/*
|
||||
* The version of this function in tlbex.c uses current_cpu_type(), but for KVM
|
||||
* we assume symmetry.
|
||||
*/
|
||||
static int c0_kscratch(void)
|
||||
{
|
||||
switch (boot_cpu_type()) {
|
||||
case CPU_XLP:
|
||||
case CPU_XLR:
|
||||
return 22;
|
||||
default:
|
||||
return 31;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mips_entry_setup() - Perform global setup for entry code.
|
||||
*
|
||||
|
@ -103,18 +123,21 @@ int kvm_mips_entry_setup(void)
|
|||
* We prefer to use KScratchN registers if they are available over the
|
||||
* defaults above, which may not work on all cores.
|
||||
*/
|
||||
unsigned int kscratch_mask = cpu_data[0].kscratch_mask & 0xfc;
|
||||
unsigned int kscratch_mask = cpu_data[0].kscratch_mask;
|
||||
|
||||
if (pgd_reg != -1)
|
||||
kscratch_mask &= ~BIT(pgd_reg);
|
||||
|
||||
/* Pick a scratch register for storing VCPU */
|
||||
if (kscratch_mask) {
|
||||
scratch_vcpu[0] = 31;
|
||||
scratch_vcpu[0] = c0_kscratch();
|
||||
scratch_vcpu[1] = ffs(kscratch_mask) - 1;
|
||||
kscratch_mask &= ~BIT(scratch_vcpu[1]);
|
||||
}
|
||||
|
||||
/* Pick a scratch register to use as a temp for saving state */
|
||||
if (kscratch_mask) {
|
||||
scratch_tmp[0] = 31;
|
||||
scratch_tmp[0] = c0_kscratch();
|
||||
scratch_tmp[1] = ffs(kscratch_mask) - 1;
|
||||
kscratch_mask &= ~BIT(scratch_tmp[1]);
|
||||
}
|
||||
|
@ -130,7 +153,7 @@ static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp,
|
|||
UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame);
|
||||
|
||||
/* Save the temp scratch register value in cp0_cause of stack frame */
|
||||
if (scratch_tmp[0] == 31) {
|
||||
if (scratch_tmp[0] == c0_kscratch()) {
|
||||
UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]);
|
||||
UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame);
|
||||
}
|
||||
|
@ -146,7 +169,7 @@ static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp,
|
|||
UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame);
|
||||
UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]);
|
||||
|
||||
if (scratch_tmp[0] == 31) {
|
||||
if (scratch_tmp[0] == c0_kscratch()) {
|
||||
UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame);
|
||||
UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]);
|
||||
}
|
||||
|
@ -286,23 +309,26 @@ static void *kvm_mips_build_enter_guest(void *addr)
|
|||
uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL);
|
||||
uasm_i_xori(&p, T0, T0, KSU_USER);
|
||||
uasm_il_bnez(&p, &r, T0, label_kernel_asid);
|
||||
UASM_i_ADDIU(&p, T1, K1,
|
||||
offsetof(struct kvm_vcpu_arch, guest_kernel_asid));
|
||||
UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch,
|
||||
guest_kernel_mm.context.asid));
|
||||
/* else user */
|
||||
UASM_i_ADDIU(&p, T1, K1,
|
||||
offsetof(struct kvm_vcpu_arch, guest_user_asid));
|
||||
UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch,
|
||||
guest_user_mm.context.asid));
|
||||
uasm_l_kernel_asid(&l, p);
|
||||
|
||||
/* t1: contains the base of the ASID array, need to get the cpu id */
|
||||
/* smp_processor_id */
|
||||
uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP);
|
||||
/* x4 */
|
||||
uasm_i_sll(&p, T2, T2, 2);
|
||||
/* index the ASID array */
|
||||
uasm_i_sll(&p, T2, T2, ilog2(sizeof(long)));
|
||||
UASM_i_ADDU(&p, T3, T1, T2);
|
||||
uasm_i_lw(&p, K0, 0, T3);
|
||||
UASM_i_LW(&p, K0, 0, T3);
|
||||
#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
|
||||
/* x sizeof(struct cpuinfo_mips)/4 */
|
||||
uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/4);
|
||||
/*
|
||||
* reuse ASID array offset
|
||||
* cpuinfo_mips is a multiple of sizeof(long)
|
||||
*/
|
||||
uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/sizeof(long));
|
||||
uasm_i_mul(&p, T2, T2, T3);
|
||||
|
||||
UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask);
|
||||
|
@ -312,7 +338,20 @@ static void *kvm_mips_build_enter_guest(void *addr)
|
|||
#else
|
||||
uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID);
|
||||
#endif
|
||||
uasm_i_mtc0(&p, K0, C0_ENTRYHI);
|
||||
|
||||
/*
|
||||
* Set up KVM T&E GVA pgd.
|
||||
* This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
|
||||
* - call tlbmiss_handler_setup_pgd(mm->pgd)
|
||||
* - but skips write into CP0_PWBase for now
|
||||
*/
|
||||
UASM_i_LW(&p, A0, (int)offsetof(struct mm_struct, pgd) -
|
||||
(int)offsetof(struct mm_struct, context.asid), T1);
|
||||
|
||||
UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
|
||||
uasm_i_jalr(&p, RA, T9);
|
||||
uasm_i_mtc0(&p, K0, C0_ENTRYHI);
|
||||
|
||||
uasm_i_ehb(&p);
|
||||
|
||||
/* Disable RDHWR access */
|
||||
|
@ -347,6 +386,80 @@ static void *kvm_mips_build_enter_guest(void *addr)
|
|||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mips_build_tlb_refill_exception() - Assemble TLB refill handler.
|
||||
* @addr: Address to start writing code.
|
||||
* @handler: Address of common handler (within range of @addr).
|
||||
*
|
||||
* Assemble TLB refill exception fast path handler for guest execution.
|
||||
*
|
||||
* Returns: Next address after end of written function.
|
||||
*/
|
||||
void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler)
|
||||
{
|
||||
u32 *p = addr;
|
||||
struct uasm_label labels[2];
|
||||
struct uasm_reloc relocs[2];
|
||||
struct uasm_label *l = labels;
|
||||
struct uasm_reloc *r = relocs;
|
||||
|
||||
memset(labels, 0, sizeof(labels));
|
||||
memset(relocs, 0, sizeof(relocs));
|
||||
|
||||
/* Save guest k1 into scratch register */
|
||||
UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
|
||||
|
||||
/* Get the VCPU pointer from the VCPU scratch register */
|
||||
UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
|
||||
|
||||
/* Save guest k0 into VCPU structure */
|
||||
UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1);
|
||||
|
||||
/*
|
||||
* Some of the common tlbex code uses current_cpu_type(). For KVM we
|
||||
* assume symmetry and just disable preemption to silence the warning.
|
||||
*/
|
||||
preempt_disable();
|
||||
|
||||
/*
|
||||
* Now for the actual refill bit. A lot of this can be common with the
|
||||
* Linux TLB refill handler, however we don't need to handle so many
|
||||
* cases. We only need to handle user mode refills, and user mode runs
|
||||
* with 32-bit addressing.
|
||||
*
|
||||
* Therefore the branch to label_vmalloc generated by build_get_pmde64()
|
||||
* that isn't resolved should never actually get taken and is harmless
|
||||
* to leave in place for now.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */
|
||||
#else
|
||||
build_get_pgde32(&p, K0, K1); /* get pgd in K1 */
|
||||
#endif
|
||||
|
||||
/* we don't support huge pages yet */
|
||||
|
||||
build_get_ptep(&p, K0, K1);
|
||||
build_update_entries(&p, K0, K1);
|
||||
build_tlb_write_entry(&p, &l, &r, tlb_random);
|
||||
|
||||
preempt_enable();
|
||||
|
||||
/* Get the VCPU pointer from the VCPU scratch register again */
|
||||
UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
|
||||
|
||||
/* Restore the guest's k0/k1 registers */
|
||||
UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1);
|
||||
uasm_i_ehb(&p);
|
||||
UASM_i_MFC0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
|
||||
|
||||
/* Jump to guest */
|
||||
uasm_i_eret(&p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mips_build_exception() - Assemble first level guest exception handler.
|
||||
* @addr: Address to start writing code.
|
||||
|
@ -468,6 +581,18 @@ void *kvm_mips_build_exit(void *addr)
|
|||
uasm_i_mfc0(&p, K0, C0_CAUSE);
|
||||
uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1);
|
||||
|
||||
if (cpu_has_badinstr) {
|
||||
uasm_i_mfc0(&p, K0, C0_BADINSTR);
|
||||
uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch,
|
||||
host_cp0_badinstr), K1);
|
||||
}
|
||||
|
||||
if (cpu_has_badinstrp) {
|
||||
uasm_i_mfc0(&p, K0, C0_BADINSTRP);
|
||||
uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch,
|
||||
host_cp0_badinstrp), K1);
|
||||
}
|
||||
|
||||
/* Now restore the host state just enough to run the handlers */
|
||||
|
||||
/* Switch EBASE to the one used by Linux */
|
||||
|
|
|
@ -183,10 +183,11 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
|
|||
(exccode << CAUSEB_EXCCODE));
|
||||
|
||||
/* XXXSL Set PC to the interrupt exception entry point */
|
||||
arch->pc = kvm_mips_guest_exception_base(vcpu);
|
||||
if (kvm_read_c0_guest_cause(cop0) & CAUSEF_IV)
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x200;
|
||||
arch->pc += 0x200;
|
||||
else
|
||||
arch->pc = KVM_GUEST_KSEG0 + 0x180;
|
||||
arch->pc += 0x180;
|
||||
|
||||
clear_bit(priority, &vcpu->arch.pending_exceptions);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
@ -63,18 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{NULL}
|
||||
};
|
||||
|
||||
static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
vcpu->arch.guest_kernel_asid[i] = 0;
|
||||
vcpu->arch.guest_user_asid[i] = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXXKYMA: We are simulatoring a processor that has the WII bit set in
|
||||
* Config7, so we are "runnable" if interrupts are pending
|
||||
|
@ -104,39 +93,12 @@ void kvm_arch_check_processor_compat(void *rtn)
|
|||
*(int *)rtn = 0;
|
||||
}
|
||||
|
||||
static void kvm_mips_init_tlbs(struct kvm *kvm)
|
||||
{
|
||||
unsigned long wired;
|
||||
|
||||
/*
|
||||
* Add a wired entry to the TLB, it is used to map the commpage to
|
||||
* the Guest kernel
|
||||
*/
|
||||
wired = read_c0_wired();
|
||||
write_c0_wired(wired + 1);
|
||||
mtc0_tlbw_hazard();
|
||||
kvm->arch.commpage_tlb = wired;
|
||||
|
||||
kvm_debug("[%d] commpage TLB: %d\n", smp_processor_id(),
|
||||
kvm->arch.commpage_tlb);
|
||||
}
|
||||
|
||||
static void kvm_mips_init_vm_percpu(void *arg)
|
||||
{
|
||||
struct kvm *kvm = (struct kvm *)arg;
|
||||
|
||||
kvm_mips_init_tlbs(kvm);
|
||||
kvm_mips_callbacks->vm_init(kvm);
|
||||
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
if (atomic_inc_return(&kvm_mips_instance) == 1) {
|
||||
kvm_debug("%s: 1st KVM instance, setup host TLB parameters\n",
|
||||
__func__);
|
||||
on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1);
|
||||
}
|
||||
/* Allocate page table to map GPA -> RPA */
|
||||
kvm->arch.gpa_mm.pgd = kvm_pgd_alloc();
|
||||
if (!kvm->arch.gpa_mm.pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -156,13 +118,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
|
|||
unsigned int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
/* Put the pages we reserved for the guest pmap */
|
||||
for (i = 0; i < kvm->arch.guest_pmap_npages; i++) {
|
||||
if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE)
|
||||
kvm_release_pfn_clean(kvm->arch.guest_pmap[i]);
|
||||
}
|
||||
kfree(kvm->arch.guest_pmap);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
kvm_arch_vcpu_free(vcpu);
|
||||
}
|
||||
|
@ -177,25 +132,17 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
|
|||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
static void kvm_mips_uninit_tlbs(void *arg)
|
||||
static void kvm_mips_free_gpa_pt(struct kvm *kvm)
|
||||
{
|
||||
/* Restore wired count */
|
||||
write_c0_wired(0);
|
||||
mtc0_tlbw_hazard();
|
||||
/* Clear out all the TLBs */
|
||||
kvm_local_flush_tlb_all();
|
||||
/* It should always be safe to remove after flushing the whole range */
|
||||
WARN_ON(!kvm_mips_flush_gpa_pt(kvm, 0, ~0));
|
||||
pgd_free(NULL, kvm->arch.gpa_mm.pgd);
|
||||
}
|
||||
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
kvm_mips_free_vcpus(kvm);
|
||||
|
||||
/* If this is the last instance, restore wired count */
|
||||
if (atomic_dec_return(&kvm_mips_instance) == 0) {
|
||||
kvm_debug("%s: last KVM instance, restoring TLB parameters\n",
|
||||
__func__);
|
||||
on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1);
|
||||
}
|
||||
kvm_mips_free_gpa_pt(kvm);
|
||||
}
|
||||
|
||||
long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl,
|
||||
|
@ -210,6 +157,32 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
/* Flush whole GPA */
|
||||
kvm_mips_flush_gpa_pt(kvm, 0, ~0);
|
||||
|
||||
/* Let implementation do the rest */
|
||||
kvm_mips_callbacks->flush_shadow_all(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
/*
|
||||
* The slot has been made invalid (ready for moving or deletion), so we
|
||||
* need to ensure that it can no longer be accessed by any guest VCPUs.
|
||||
*/
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
/* Flush slot from GPA */
|
||||
kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
|
||||
slot->base_gfn + slot->npages - 1);
|
||||
/* Let implementation do the rest */
|
||||
kvm_mips_callbacks->flush_shadow_memslot(kvm, slot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
|
@ -224,35 +197,32 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
unsigned long npages = 0;
|
||||
int i;
|
||||
int needs_flush;
|
||||
|
||||
kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n",
|
||||
__func__, kvm, mem->slot, mem->guest_phys_addr,
|
||||
mem->memory_size, mem->userspace_addr);
|
||||
|
||||
/* Setup Guest PMAP table */
|
||||
if (!kvm->arch.guest_pmap) {
|
||||
if (mem->slot == 0)
|
||||
npages = mem->memory_size >> PAGE_SHIFT;
|
||||
|
||||
if (npages) {
|
||||
kvm->arch.guest_pmap_npages = npages;
|
||||
kvm->arch.guest_pmap =
|
||||
kzalloc(npages * sizeof(unsigned long), GFP_KERNEL);
|
||||
|
||||
if (!kvm->arch.guest_pmap) {
|
||||
kvm_err("Failed to allocate guest PMAP\n");
|
||||
return;
|
||||
}
|
||||
|
||||
kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n",
|
||||
npages, kvm->arch.guest_pmap);
|
||||
|
||||
/* Now setup the page table */
|
||||
for (i = 0; i < npages; i++)
|
||||
kvm->arch.guest_pmap[i] = KVM_INVALID_PAGE;
|
||||
}
|
||||
/*
|
||||
* If dirty page logging is enabled, write protect all pages in the slot
|
||||
* ready for dirty logging.
|
||||
*
|
||||
* There is no need to do this in any of the following cases:
|
||||
* CREATE: No dirty mappings will already exist.
|
||||
* MOVE/DELETE: The old mappings will already have been cleaned up by
|
||||
* kvm_arch_flush_shadow_memslot()
|
||||
*/
|
||||
if (change == KVM_MR_FLAGS_ONLY &&
|
||||
(!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
|
||||
new->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
/* Write protect GPA page table entries */
|
||||
needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
|
||||
new->base_gfn + new->npages - 1);
|
||||
/* Let implementation do the rest */
|
||||
if (needs_flush)
|
||||
kvm_mips_callbacks->flush_shadow_memslot(kvm, new);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -276,7 +246,7 @@ static inline void dump_handler(const char *symbol, void *start, void *end)
|
|||
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
{
|
||||
int err, size;
|
||||
void *gebase, *p, *handler;
|
||||
void *gebase, *p, *handler, *refill_start, *refill_end;
|
||||
int i;
|
||||
|
||||
struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
|
||||
|
@ -329,8 +299,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
|||
/* Build guest exception vectors dynamically in unmapped memory */
|
||||
handler = gebase + 0x2000;
|
||||
|
||||
/* TLB Refill, EXL = 0 */
|
||||
kvm_mips_build_exception(gebase, handler);
|
||||
/* TLB refill */
|
||||
refill_start = gebase;
|
||||
refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler);
|
||||
|
||||
/* General Exception Entry point */
|
||||
kvm_mips_build_exception(gebase + 0x180, handler);
|
||||
|
@ -356,6 +327,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
|||
pr_debug("#include <asm/regdef.h>\n");
|
||||
pr_debug("\n");
|
||||
dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p);
|
||||
dump_handler("kvm_tlb_refill", refill_start, refill_end);
|
||||
dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200);
|
||||
dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run);
|
||||
|
||||
|
@ -406,6 +378,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
|
|||
|
||||
kvm_mips_dump_stats(vcpu);
|
||||
|
||||
kvm_mmu_free_memory_caches(vcpu);
|
||||
kfree(vcpu->arch.guest_ebase);
|
||||
kfree(vcpu->arch.kseg0_commpage);
|
||||
kfree(vcpu);
|
||||
|
@ -422,37 +395,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
|||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
/* Must be called with preemption disabled, just before entering guest */
|
||||
static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
int i, cpu = smp_processor_id();
|
||||
unsigned int gasid;
|
||||
|
||||
/*
|
||||
* Lazy host ASID regeneration for guest user mode.
|
||||
* If the guest ASID has changed since the last guest usermode
|
||||
* execution, regenerate the host ASID so as to invalidate stale TLB
|
||||
* entries.
|
||||
*/
|
||||
if (!KVM_GUEST_KERNEL_MODE(vcpu)) {
|
||||
gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID;
|
||||
if (gasid != vcpu->arch.last_user_gasid) {
|
||||
kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu,
|
||||
vcpu);
|
||||
vcpu->arch.guest_user_asid[cpu] =
|
||||
vcpu->arch.guest_user_mm.context.asid[cpu];
|
||||
for_each_possible_cpu(i)
|
||||
if (i != cpu)
|
||||
vcpu->arch.guest_user_asid[cpu] = 0;
|
||||
vcpu->arch.last_user_gasid = gasid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
int r = 0;
|
||||
int r = -EINTR;
|
||||
sigset_t sigsaved;
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
|
@ -464,31 +409,30 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
vcpu->mmio_needed = 0;
|
||||
}
|
||||
|
||||
if (run->immediate_exit)
|
||||
goto out;
|
||||
|
||||
lose_fpu(1);
|
||||
|
||||
local_irq_disable();
|
||||
/* Check if we have any exceptions/interrupts pending */
|
||||
kvm_mips_deliver_interrupts(vcpu,
|
||||
kvm_read_c0_guest_cause(vcpu->arch.cop0));
|
||||
|
||||
guest_enter_irqoff();
|
||||
|
||||
/* Disable hardware page table walking while in guest */
|
||||
htw_stop();
|
||||
|
||||
trace_kvm_enter(vcpu);
|
||||
|
||||
kvm_mips_check_asids(vcpu);
|
||||
/*
|
||||
* Make sure the read of VCPU requests in vcpu_run() callback is not
|
||||
* reordered ahead of the write to vcpu->mode, or we could miss a TLB
|
||||
* flush request while the requester sees the VCPU as outside of guest
|
||||
* mode and not needing an IPI.
|
||||
*/
|
||||
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
|
||||
|
||||
r = kvm_mips_callbacks->vcpu_run(run, vcpu);
|
||||
|
||||
r = vcpu->arch.vcpu_run(run, vcpu);
|
||||
trace_kvm_out(vcpu);
|
||||
|
||||
/* Re-enable HTW before enabling interrupts */
|
||||
htw_start();
|
||||
|
||||
guest_exit_irqoff();
|
||||
local_irq_enable();
|
||||
|
||||
out:
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
|
||||
|
||||
|
@ -580,33 +524,6 @@ static u64 kvm_mips_get_one_regs[] = {
|
|||
KVM_REG_MIPS_LO,
|
||||
#endif
|
||||
KVM_REG_MIPS_PC,
|
||||
|
||||
KVM_REG_MIPS_CP0_INDEX,
|
||||
KVM_REG_MIPS_CP0_CONTEXT,
|
||||
KVM_REG_MIPS_CP0_USERLOCAL,
|
||||
KVM_REG_MIPS_CP0_PAGEMASK,
|
||||
KVM_REG_MIPS_CP0_WIRED,
|
||||
KVM_REG_MIPS_CP0_HWRENA,
|
||||
KVM_REG_MIPS_CP0_BADVADDR,
|
||||
KVM_REG_MIPS_CP0_COUNT,
|
||||
KVM_REG_MIPS_CP0_ENTRYHI,
|
||||
KVM_REG_MIPS_CP0_COMPARE,
|
||||
KVM_REG_MIPS_CP0_STATUS,
|
||||
KVM_REG_MIPS_CP0_CAUSE,
|
||||
KVM_REG_MIPS_CP0_EPC,
|
||||
KVM_REG_MIPS_CP0_PRID,
|
||||
KVM_REG_MIPS_CP0_CONFIG,
|
||||
KVM_REG_MIPS_CP0_CONFIG1,
|
||||
KVM_REG_MIPS_CP0_CONFIG2,
|
||||
KVM_REG_MIPS_CP0_CONFIG3,
|
||||
KVM_REG_MIPS_CP0_CONFIG4,
|
||||
KVM_REG_MIPS_CP0_CONFIG5,
|
||||
KVM_REG_MIPS_CP0_CONFIG7,
|
||||
KVM_REG_MIPS_CP0_ERROREPC,
|
||||
|
||||
KVM_REG_MIPS_COUNT_CTL,
|
||||
KVM_REG_MIPS_COUNT_RESUME,
|
||||
KVM_REG_MIPS_COUNT_HZ,
|
||||
};
|
||||
|
||||
static u64 kvm_mips_get_one_regs_fpu[] = {
|
||||
|
@ -619,15 +536,6 @@ static u64 kvm_mips_get_one_regs_msa[] = {
|
|||
KVM_REG_MIPS_MSA_CSR,
|
||||
};
|
||||
|
||||
static u64 kvm_mips_get_one_regs_kscratch[] = {
|
||||
KVM_REG_MIPS_CP0_KSCRATCH1,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH2,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH3,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH4,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH5,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH6,
|
||||
};
|
||||
|
||||
static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long ret;
|
||||
|
@ -641,7 +549,6 @@ static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
if (kvm_mips_guest_can_have_msa(&vcpu->arch))
|
||||
ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32;
|
||||
ret += __arch_hweight8(vcpu->arch.kscratch_enabled);
|
||||
ret += kvm_mips_callbacks->num_regs(vcpu);
|
||||
|
||||
return ret;
|
||||
|
@ -694,16 +601,6 @@ static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices)
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 6; ++i) {
|
||||
if (!(vcpu->arch.kscratch_enabled & BIT(i + 2)))
|
||||
continue;
|
||||
|
||||
if (copy_to_user(indices, &kvm_mips_get_one_regs_kscratch[i],
|
||||
sizeof(kvm_mips_get_one_regs_kscratch[i])))
|
||||
return -EFAULT;
|
||||
++indices;
|
||||
}
|
||||
|
||||
return kvm_mips_callbacks->copy_reg_indices(vcpu, indices);
|
||||
}
|
||||
|
||||
|
@ -794,95 +691,6 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
|
|||
v = fpu->msacsr;
|
||||
break;
|
||||
|
||||
/* Co-processor 0 registers */
|
||||
case KVM_REG_MIPS_CP0_INDEX:
|
||||
v = (long)kvm_read_c0_guest_index(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONTEXT:
|
||||
v = (long)kvm_read_c0_guest_context(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_USERLOCAL:
|
||||
v = (long)kvm_read_c0_guest_userlocal(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_PAGEMASK:
|
||||
v = (long)kvm_read_c0_guest_pagemask(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_WIRED:
|
||||
v = (long)kvm_read_c0_guest_wired(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_HWRENA:
|
||||
v = (long)kvm_read_c0_guest_hwrena(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_BADVADDR:
|
||||
v = (long)kvm_read_c0_guest_badvaddr(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ENTRYHI:
|
||||
v = (long)kvm_read_c0_guest_entryhi(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_COMPARE:
|
||||
v = (long)kvm_read_c0_guest_compare(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_STATUS:
|
||||
v = (long)kvm_read_c0_guest_status(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CAUSE:
|
||||
v = (long)kvm_read_c0_guest_cause(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_EPC:
|
||||
v = (long)kvm_read_c0_guest_epc(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_PRID:
|
||||
v = (long)kvm_read_c0_guest_prid(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG:
|
||||
v = (long)kvm_read_c0_guest_config(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG1:
|
||||
v = (long)kvm_read_c0_guest_config1(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG2:
|
||||
v = (long)kvm_read_c0_guest_config2(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG3:
|
||||
v = (long)kvm_read_c0_guest_config3(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG4:
|
||||
v = (long)kvm_read_c0_guest_config4(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG5:
|
||||
v = (long)kvm_read_c0_guest_config5(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG7:
|
||||
v = (long)kvm_read_c0_guest_config7(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ERROREPC:
|
||||
v = (long)kvm_read_c0_guest_errorepc(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
|
||||
idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
|
||||
if (!(vcpu->arch.kscratch_enabled & BIT(idx)))
|
||||
return -EINVAL;
|
||||
switch (idx) {
|
||||
case 2:
|
||||
v = (long)kvm_read_c0_guest_kscratch1(cop0);
|
||||
break;
|
||||
case 3:
|
||||
v = (long)kvm_read_c0_guest_kscratch2(cop0);
|
||||
break;
|
||||
case 4:
|
||||
v = (long)kvm_read_c0_guest_kscratch3(cop0);
|
||||
break;
|
||||
case 5:
|
||||
v = (long)kvm_read_c0_guest_kscratch4(cop0);
|
||||
break;
|
||||
case 6:
|
||||
v = (long)kvm_read_c0_guest_kscratch5(cop0);
|
||||
break;
|
||||
case 7:
|
||||
v = (long)kvm_read_c0_guest_kscratch6(cop0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
/* registers to be handled specially */
|
||||
default:
|
||||
ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v);
|
||||
|
@ -1014,68 +822,6 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
|
|||
fpu->msacsr = v;
|
||||
break;
|
||||
|
||||
/* Co-processor 0 registers */
|
||||
case KVM_REG_MIPS_CP0_INDEX:
|
||||
kvm_write_c0_guest_index(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONTEXT:
|
||||
kvm_write_c0_guest_context(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_USERLOCAL:
|
||||
kvm_write_c0_guest_userlocal(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_PAGEMASK:
|
||||
kvm_write_c0_guest_pagemask(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_WIRED:
|
||||
kvm_write_c0_guest_wired(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_HWRENA:
|
||||
kvm_write_c0_guest_hwrena(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_BADVADDR:
|
||||
kvm_write_c0_guest_badvaddr(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ENTRYHI:
|
||||
kvm_write_c0_guest_entryhi(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_STATUS:
|
||||
kvm_write_c0_guest_status(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_EPC:
|
||||
kvm_write_c0_guest_epc(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_PRID:
|
||||
kvm_write_c0_guest_prid(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ERROREPC:
|
||||
kvm_write_c0_guest_errorepc(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
|
||||
idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
|
||||
if (!(vcpu->arch.kscratch_enabled & BIT(idx)))
|
||||
return -EINVAL;
|
||||
switch (idx) {
|
||||
case 2:
|
||||
kvm_write_c0_guest_kscratch1(cop0, v);
|
||||
break;
|
||||
case 3:
|
||||
kvm_write_c0_guest_kscratch2(cop0, v);
|
||||
break;
|
||||
case 4:
|
||||
kvm_write_c0_guest_kscratch3(cop0, v);
|
||||
break;
|
||||
case 5:
|
||||
kvm_write_c0_guest_kscratch4(cop0, v);
|
||||
break;
|
||||
case 6:
|
||||
kvm_write_c0_guest_kscratch5(cop0, v);
|
||||
break;
|
||||
case 7:
|
||||
kvm_write_c0_guest_kscratch6(cop0, v);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
/* registers to be handled specially */
|
||||
default:
|
||||
return kvm_mips_callbacks->set_one_reg(vcpu, reg, v);
|
||||
|
@ -1144,18 +890,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
|
|||
return -E2BIG;
|
||||
return kvm_mips_copy_reg_indices(vcpu, user_list->reg);
|
||||
}
|
||||
case KVM_NMI:
|
||||
/* Treat the NMI as a CPU reset */
|
||||
r = kvm_mips_reset_vcpu(vcpu);
|
||||
break;
|
||||
case KVM_INTERRUPT:
|
||||
{
|
||||
struct kvm_mips_interrupt irq;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&irq, argp, sizeof(irq)))
|
||||
goto out;
|
||||
|
||||
return -EFAULT;
|
||||
kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
|
||||
irq.irq);
|
||||
|
||||
|
@ -1165,56 +905,57 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
|
|||
case KVM_ENABLE_CAP: {
|
||||
struct kvm_enable_cap cap;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cap, argp, sizeof(cap)))
|
||||
goto out;
|
||||
return -EFAULT;
|
||||
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Get (and clear) the dirty memory log for a memory slot. */
|
||||
/**
|
||||
* kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
|
||||
* @kvm: kvm instance
|
||||
* @log: slot id and address to which we copy the log
|
||||
*
|
||||
* Steps 1-4 below provide general overview of dirty page logging. See
|
||||
* kvm_get_dirty_log_protect() function description for additional details.
|
||||
*
|
||||
* We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
|
||||
* always flush the TLB (step 4) even if previous step failed and the dirty
|
||||
* bitmap may be corrupt. Regardless of previous outcome the KVM logging API
|
||||
* does not preclude user space subsequent dirty log read. Flushing TLB ensures
|
||||
* writes will be marked dirty for next log read.
|
||||
*
|
||||
* 1. Take a snapshot of the bit and clear it if needed.
|
||||
* 2. Write protect the corresponding page.
|
||||
* 3. Copy the snapshot to the userspace.
|
||||
* 4. Flush TLB's if needed.
|
||||
*/
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long ga, ga_end;
|
||||
int is_dirty = 0;
|
||||
bool is_dirty = false;
|
||||
int r;
|
||||
unsigned long n;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = kvm_get_dirty_log(kvm, log, &is_dirty);
|
||||
if (r)
|
||||
goto out;
|
||||
r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
|
||||
|
||||
/* If nothing is dirty, don't bother messing with page tables. */
|
||||
if (is_dirty) {
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, log->slot);
|
||||
|
||||
ga = memslot->base_gfn << PAGE_SHIFT;
|
||||
ga_end = ga + (memslot->npages << PAGE_SHIFT);
|
||||
|
||||
kvm_info("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga,
|
||||
ga_end);
|
||||
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
memset(memslot->dirty_bitmap, 0, n);
|
||||
/* Let implementation handle TLB/GVA invalidation */
|
||||
kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
|
||||
}
|
||||
|
||||
r = 0;
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
|
||||
}
|
||||
|
||||
long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
||||
|
@ -1282,11 +1023,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
switch (ext) {
|
||||
case KVM_CAP_ONE_REG:
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_READONLY_MEM:
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_COALESCED_MMIO:
|
||||
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
|
||||
break;
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
r = num_online_cpus();
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPUS:
|
||||
r = KVM_MAX_VCPUS;
|
||||
break;
|
||||
case KVM_CAP_MIPS_FPU:
|
||||
/* We don't handle systems with inconsistent cpu_has_fpu */
|
||||
r = !!raw_cpu_has_fpu;
|
||||
|
@ -1400,13 +1150,23 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
|
|||
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_mips_callbacks->vcpu_init(vcpu);
|
||||
int err;
|
||||
|
||||
err = kvm_mips_callbacks->vcpu_init(vcpu);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_REL);
|
||||
vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_mips_callbacks->vcpu_uninit(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_translation *tr)
|
||||
{
|
||||
|
@ -1440,8 +1200,11 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
|
||||
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
u32 inst;
|
||||
int ret = RESUME_GUEST;
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
|
||||
/* re-enable HTW before enabling interrupts */
|
||||
htw_start();
|
||||
|
||||
|
@ -1564,8 +1327,12 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
|
||||
default:
|
||||
if (cause & CAUSEF_BD)
|
||||
opc += 1;
|
||||
inst = 0;
|
||||
kvm_get_badinstr(opc, vcpu, &inst);
|
||||
kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n",
|
||||
exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
|
||||
exccode, opc, inst, badvaddr,
|
||||
kvm_read_c0_guest_status(vcpu->arch.cop0));
|
||||
kvm_arch_vcpu_dump_regs(vcpu);
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
|
@ -1593,7 +1360,15 @@ skip_emul:
|
|||
if (ret == RESUME_GUEST) {
|
||||
trace_kvm_reenter(vcpu);
|
||||
|
||||
kvm_mips_check_asids(vcpu);
|
||||
/*
|
||||
* Make sure the read of VCPU requests in vcpu_reenter()
|
||||
* callback is not reordered ahead of the write to vcpu->mode,
|
||||
* or we could miss a TLB flush request while the requester sees
|
||||
* the VCPU as outside of guest mode and not needing an IPI.
|
||||
*/
|
||||
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
|
||||
|
||||
kvm_mips_callbacks->vcpu_reenter(run, vcpu);
|
||||
|
||||
/*
|
||||
* If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
|
||||
|
|
1339
arch/mips/kvm/mmu.c
1339
arch/mips/kvm/mmu.c
File diff suppressed because it is too large
Load Diff
|
@ -33,28 +33,20 @@
|
|||
#define KVM_GUEST_PC_TLB 0
|
||||
#define KVM_GUEST_SP_TLB 1
|
||||
|
||||
atomic_t kvm_mips_instance;
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_instance);
|
||||
|
||||
static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
return vcpu->arch.guest_kernel_asid[cpu] &
|
||||
cpu_asid_mask(&cpu_data[cpu]);
|
||||
return cpu_asid(cpu, kern_mm);
|
||||
}
|
||||
|
||||
static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
return vcpu->arch.guest_user_asid[cpu] &
|
||||
cpu_asid_mask(&cpu_data[cpu]);
|
||||
}
|
||||
|
||||
inline u32 kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->kvm->arch.commpage_tlb;
|
||||
return cpu_asid(cpu, user_mm);
|
||||
}
|
||||
|
||||
/* Structure defining an tlb entry data set. */
|
||||
|
@ -104,109 +96,6 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs);
|
||||
|
||||
/* XXXKYMA: Must be called with interrupts disabled */
|
||||
/* set flush_dcache_mask == 0 if no dcache flush required */
|
||||
int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
|
||||
unsigned long entrylo0, unsigned long entrylo1,
|
||||
int flush_dcache_mask)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long old_entryhi;
|
||||
int idx;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
old_entryhi = read_c0_entryhi();
|
||||
write_c0_entryhi(entryhi);
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
tlb_probe();
|
||||
tlb_probe_hazard();
|
||||
idx = read_c0_index();
|
||||
|
||||
if (idx > current_cpu_data.tlbsize) {
|
||||
kvm_err("%s: Invalid Index: %d\n", __func__, idx);
|
||||
kvm_mips_dump_host_tlbs();
|
||||
local_irq_restore(flags);
|
||||
return -1;
|
||||
}
|
||||
|
||||
write_c0_entrylo0(entrylo0);
|
||||
write_c0_entrylo1(entrylo1);
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
if (idx < 0)
|
||||
tlb_write_random();
|
||||
else
|
||||
tlb_write_indexed();
|
||||
tlbw_use_hazard();
|
||||
|
||||
kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n",
|
||||
vcpu->arch.pc, idx, read_c0_entryhi(),
|
||||
read_c0_entrylo0(), read_c0_entrylo1());
|
||||
|
||||
/* Flush D-cache */
|
||||
if (flush_dcache_mask) {
|
||||
if (entrylo0 & ENTRYLO_V) {
|
||||
++vcpu->stat.flush_dcache_exits;
|
||||
flush_data_cache_page((entryhi & VPN2_MASK) &
|
||||
~flush_dcache_mask);
|
||||
}
|
||||
if (entrylo1 & ENTRYLO_V) {
|
||||
++vcpu->stat.flush_dcache_exits;
|
||||
flush_data_cache_page(((entryhi & VPN2_MASK) &
|
||||
~flush_dcache_mask) |
|
||||
(0x1 << PAGE_SHIFT));
|
||||
}
|
||||
}
|
||||
|
||||
/* Restore old ASID */
|
||||
write_c0_entryhi(old_entryhi);
|
||||
mtc0_tlbw_hazard();
|
||||
local_irq_restore(flags);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_write);
|
||||
|
||||
int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_pfn_t pfn;
|
||||
unsigned long flags, old_entryhi = 0, vaddr = 0;
|
||||
unsigned long entrylo[2] = { 0, 0 };
|
||||
unsigned int pair_idx;
|
||||
|
||||
pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage));
|
||||
pair_idx = (badvaddr >> PAGE_SHIFT) & 1;
|
||||
entrylo[pair_idx] = mips3_paddr_to_tlbpfn(pfn << PAGE_SHIFT) |
|
||||
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
|
||||
ENTRYLO_D | ENTRYLO_V;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
old_entryhi = read_c0_entryhi();
|
||||
vaddr = badvaddr & (PAGE_MASK << 1);
|
||||
write_c0_entryhi(vaddr | kvm_mips_get_kernel_asid(vcpu));
|
||||
write_c0_entrylo0(entrylo[0]);
|
||||
write_c0_entrylo1(entrylo[1]);
|
||||
write_c0_index(kvm_mips_get_commpage_asid(vcpu));
|
||||
mtc0_tlbw_hazard();
|
||||
tlb_write_indexed();
|
||||
tlbw_use_hazard();
|
||||
|
||||
kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n",
|
||||
vcpu->arch.pc, read_c0_index(), read_c0_entryhi(),
|
||||
read_c0_entrylo0(), read_c0_entrylo1());
|
||||
|
||||
/* Restore old ASID */
|
||||
write_c0_entryhi(old_entryhi);
|
||||
mtc0_tlbw_hazard();
|
||||
local_irq_restore(flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault);
|
||||
|
||||
int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
|
||||
{
|
||||
int i;
|
||||
|
@ -228,51 +117,11 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup);
|
||||
|
||||
int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
|
||||
{
|
||||
unsigned long old_entryhi, flags;
|
||||
int idx;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
old_entryhi = read_c0_entryhi();
|
||||
|
||||
if (KVM_GUEST_KERNEL_MODE(vcpu))
|
||||
write_c0_entryhi((vaddr & VPN2_MASK) |
|
||||
kvm_mips_get_kernel_asid(vcpu));
|
||||
else {
|
||||
write_c0_entryhi((vaddr & VPN2_MASK) |
|
||||
kvm_mips_get_user_asid(vcpu));
|
||||
}
|
||||
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
tlb_probe();
|
||||
tlb_probe_hazard();
|
||||
idx = read_c0_index();
|
||||
|
||||
/* Restore old ASID */
|
||||
write_c0_entryhi(old_entryhi);
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx);
|
||||
|
||||
return idx;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_lookup);
|
||||
|
||||
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
|
||||
static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
|
||||
{
|
||||
int idx;
|
||||
unsigned long flags, old_entryhi;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
old_entryhi = read_c0_entryhi();
|
||||
|
||||
write_c0_entryhi((va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu));
|
||||
write_c0_entryhi(entryhi);
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
tlb_probe();
|
||||
|
@ -282,7 +131,7 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
|
|||
if (idx >= current_cpu_data.tlbsize)
|
||||
BUG();
|
||||
|
||||
if (idx > 0) {
|
||||
if (idx >= 0) {
|
||||
write_c0_entryhi(UNIQUE_ENTRYHI(idx));
|
||||
write_c0_entrylo0(0);
|
||||
write_c0_entrylo1(0);
|
||||
|
@ -292,93 +141,75 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
|
|||
tlbw_use_hazard();
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
|
||||
bool user, bool kernel)
|
||||
{
|
||||
int idx_user, idx_kernel;
|
||||
unsigned long flags, old_entryhi;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
old_entryhi = read_c0_entryhi();
|
||||
|
||||
if (user)
|
||||
idx_user = _kvm_mips_host_tlb_inv((va & VPN2_MASK) |
|
||||
kvm_mips_get_user_asid(vcpu));
|
||||
if (kernel)
|
||||
idx_kernel = _kvm_mips_host_tlb_inv((va & VPN2_MASK) |
|
||||
kvm_mips_get_kernel_asid(vcpu));
|
||||
|
||||
write_c0_entryhi(old_entryhi);
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (idx > 0)
|
||||
kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__,
|
||||
(va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu), idx);
|
||||
if (user && idx_user >= 0)
|
||||
kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n",
|
||||
__func__, (va & VPN2_MASK) |
|
||||
kvm_mips_get_user_asid(vcpu), idx_user);
|
||||
if (kernel && idx_kernel >= 0)
|
||||
kvm_debug("%s: Invalidated guest kernel entryhi %#lx @ idx %d\n",
|
||||
__func__, (va & VPN2_MASK) |
|
||||
kvm_mips_get_kernel_asid(vcpu), idx_kernel);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
|
||||
|
||||
void kvm_mips_flush_host_tlb(int skip_kseg0)
|
||||
/**
|
||||
* kvm_mips_suspend_mm() - Suspend the active mm.
|
||||
* @cpu The CPU we're running on.
|
||||
*
|
||||
* Suspend the active_mm, ready for a switch to a KVM guest virtual address
|
||||
* space. This is left active for the duration of guest context, including time
|
||||
* with interrupts enabled, so we need to be careful not to confuse e.g. cache
|
||||
* management IPIs.
|
||||
*
|
||||
* kvm_mips_resume_mm() should be called before context switching to a different
|
||||
* process so we don't need to worry about reference counting.
|
||||
*
|
||||
* This needs to be in static kernel code to avoid exporting init_mm.
|
||||
*/
|
||||
void kvm_mips_suspend_mm(int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long old_entryhi, entryhi;
|
||||
unsigned long old_pagemask;
|
||||
int entry = 0;
|
||||
int maxentry = current_cpu_data.tlbsize;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
old_entryhi = read_c0_entryhi();
|
||||
old_pagemask = read_c0_pagemask();
|
||||
|
||||
/* Blast 'em all away. */
|
||||
for (entry = 0; entry < maxentry; entry++) {
|
||||
write_c0_index(entry);
|
||||
|
||||
if (skip_kseg0) {
|
||||
mtc0_tlbr_hazard();
|
||||
tlb_read();
|
||||
tlb_read_hazard();
|
||||
|
||||
entryhi = read_c0_entryhi();
|
||||
|
||||
/* Don't blow away guest kernel entries */
|
||||
if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0)
|
||||
continue;
|
||||
|
||||
write_c0_pagemask(old_pagemask);
|
||||
}
|
||||
|
||||
/* Make sure all entries differ. */
|
||||
write_c0_entryhi(UNIQUE_ENTRYHI(entry));
|
||||
write_c0_entrylo0(0);
|
||||
write_c0_entrylo1(0);
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
tlb_write_indexed();
|
||||
tlbw_use_hazard();
|
||||
}
|
||||
|
||||
write_c0_entryhi(old_entryhi);
|
||||
write_c0_pagemask(old_pagemask);
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
local_irq_restore(flags);
|
||||
cpumask_clear_cpu(cpu, mm_cpumask(current->active_mm));
|
||||
current->active_mm = &init_mm;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb);
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_suspend_mm);
|
||||
|
||||
void kvm_local_flush_tlb_all(void)
|
||||
/**
|
||||
* kvm_mips_resume_mm() - Resume the current process mm.
|
||||
* @cpu The CPU we're running on.
|
||||
*
|
||||
* Resume the mm of the current process, after a switch back from a KVM guest
|
||||
* virtual address space (see kvm_mips_suspend_mm()).
|
||||
*/
|
||||
void kvm_mips_resume_mm(int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long old_ctx;
|
||||
int entry = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
/* Save old context and create impossible VPN2 value */
|
||||
old_ctx = read_c0_entryhi();
|
||||
write_c0_entrylo0(0);
|
||||
write_c0_entrylo1(0);
|
||||
|
||||
/* Blast 'em all away. */
|
||||
while (entry < current_cpu_data.tlbsize) {
|
||||
/* Make sure all entries differ. */
|
||||
write_c0_entryhi(UNIQUE_ENTRYHI(entry));
|
||||
write_c0_index(entry);
|
||||
mtc0_tlbw_hazard();
|
||||
tlb_write_indexed();
|
||||
tlbw_use_hazard();
|
||||
entry++;
|
||||
}
|
||||
write_c0_entryhi(old_ctx);
|
||||
mtc0_tlbw_hazard();
|
||||
|
||||
local_irq_restore(flags);
|
||||
cpumask_set_cpu(cpu, mm_cpumask(current->mm));
|
||||
current->active_mm = current->mm;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all);
|
||||
EXPORT_SYMBOL_GPL(kvm_mips_resume_mm);
|
||||
|
|
|
@ -11,9 +11,11 @@
|
|||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
||||
#include "interrupt.h"
|
||||
|
||||
|
@ -21,9 +23,12 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
|
|||
{
|
||||
gpa_t gpa;
|
||||
gva_t kseg = KSEGX(gva);
|
||||
gva_t gkseg = KVM_GUEST_KSEGX(gva);
|
||||
|
||||
if ((kseg == CKSEG0) || (kseg == CKSEG1))
|
||||
gpa = CPHYSADDR(gva);
|
||||
else if (gkseg == KVM_GUEST_KSEG0)
|
||||
gpa = KVM_GUEST_CPHYSADDR(gva);
|
||||
else {
|
||||
kvm_err("%s: cannot find GPA for GVA: %#lx\n", __func__, gva);
|
||||
kvm_mips_dump_host_tlbs();
|
||||
|
@ -83,48 +88,134 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
enum emulation_result er;
|
||||
union mips_instruction inst;
|
||||
int err;
|
||||
|
||||
/* A code fetch fault doesn't count as an MMIO */
|
||||
if (kvm_is_ifetch_fault(&vcpu->arch)) {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
||||
/* Fetch the instruction. */
|
||||
if (cause & CAUSEF_BD)
|
||||
opc += 1;
|
||||
err = kvm_get_badinstr(opc, vcpu, &inst.word);
|
||||
if (err) {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
||||
/* Emulate the load */
|
||||
er = kvm_mips_emulate_load(inst, cause, run, vcpu);
|
||||
if (er == EMULATE_FAIL) {
|
||||
kvm_err("Emulate load from MMIO space failed\n");
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
} else {
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
}
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
||||
static int kvm_mips_bad_store(u32 cause, u32 *opc, struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
enum emulation_result er;
|
||||
union mips_instruction inst;
|
||||
int err;
|
||||
|
||||
/* Fetch the instruction. */
|
||||
if (cause & CAUSEF_BD)
|
||||
opc += 1;
|
||||
err = kvm_get_badinstr(opc, vcpu, &inst.word);
|
||||
if (err) {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
||||
/* Emulate the store */
|
||||
er = kvm_mips_emulate_store(inst, cause, run, vcpu);
|
||||
if (er == EMULATE_FAIL) {
|
||||
kvm_err("Emulate store to MMIO space failed\n");
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
} else {
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
}
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
||||
static int kvm_mips_bad_access(u32 cause, u32 *opc, struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu, bool store)
|
||||
{
|
||||
if (store)
|
||||
return kvm_mips_bad_store(cause, opc, run, vcpu);
|
||||
else
|
||||
return kvm_mips_bad_load(cause, opc, run, vcpu);
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
struct kvm_run *run = vcpu->run;
|
||||
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
|
||||
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
|
||||
u32 cause = vcpu->arch.host_cp0_cause;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
int ret = RESUME_GUEST;
|
||||
struct kvm_mips_tlb *tlb;
|
||||
unsigned long entryhi;
|
||||
int index;
|
||||
|
||||
if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
|
||||
|| KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
|
||||
kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#x, PC: %p, BadVaddr: %#lx\n",
|
||||
cause, opc, badvaddr);
|
||||
er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu);
|
||||
|
||||
if (er == EMULATE_DONE)
|
||||
ret = RESUME_GUEST;
|
||||
else {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
ret = RESUME_HOST;
|
||||
}
|
||||
} else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) {
|
||||
/*
|
||||
* XXXKYMA: The guest kernel does not expect to get this fault
|
||||
* when we are not using HIGHMEM. Need to address this in a
|
||||
* HIGHMEM kernel
|
||||
* First find the mapping in the guest TLB. If the failure to
|
||||
* write was due to the guest TLB, it should be up to the guest
|
||||
* to handle it.
|
||||
*/
|
||||
kvm_err("TLB MOD fault not handled, cause %#x, PC: %p, BadVaddr: %#lx\n",
|
||||
cause, opc, badvaddr);
|
||||
kvm_mips_dump_host_tlbs();
|
||||
kvm_arch_vcpu_dump_regs(vcpu);
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
ret = RESUME_HOST;
|
||||
entryhi = (badvaddr & VPN2_MASK) |
|
||||
(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
|
||||
index = kvm_mips_guest_tlb_lookup(vcpu, entryhi);
|
||||
|
||||
/*
|
||||
* These should never happen.
|
||||
* They would indicate stale host TLB entries.
|
||||
*/
|
||||
if (unlikely(index < 0)) {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return RESUME_HOST;
|
||||
}
|
||||
tlb = vcpu->arch.guest_tlb + index;
|
||||
if (unlikely(!TLB_IS_VALID(*tlb, badvaddr))) {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
||||
/*
|
||||
* Guest entry not dirty? That would explain the TLB modified
|
||||
* exception. Relay that on to the guest so it can handle it.
|
||||
*/
|
||||
if (!TLB_IS_DIRTY(*tlb, badvaddr)) {
|
||||
kvm_mips_emulate_tlbmod(cause, opc, run, vcpu);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, badvaddr,
|
||||
true))
|
||||
/* Not writable, needs handling as MMIO */
|
||||
return kvm_mips_bad_store(cause, opc, run, vcpu);
|
||||
return RESUME_GUEST;
|
||||
} else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) {
|
||||
if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, true) < 0)
|
||||
/* Not writable, needs handling as MMIO */
|
||||
return kvm_mips_bad_store(cause, opc, run, vcpu);
|
||||
return RESUME_GUEST;
|
||||
} else {
|
||||
kvm_err("Illegal TLB Mod fault address , cause %#x, PC: %p, BadVaddr: %#lx\n",
|
||||
cause, opc, badvaddr);
|
||||
kvm_mips_dump_host_tlbs();
|
||||
kvm_arch_vcpu_dump_regs(vcpu);
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
ret = RESUME_HOST;
|
||||
/* host kernel addresses are all handled as MMIO */
|
||||
return kvm_mips_bad_store(cause, opc, run, vcpu);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
|
||||
|
@ -157,7 +248,7 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
|
|||
* into the shadow host TLB
|
||||
*/
|
||||
|
||||
er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu);
|
||||
er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu, store);
|
||||
if (er == EMULATE_DONE)
|
||||
ret = RESUME_GUEST;
|
||||
else {
|
||||
|
@ -169,29 +260,15 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
|
|||
* All KSEG0 faults are handled by KVM, as the guest kernel does
|
||||
* not expect to ever get them
|
||||
*/
|
||||
if (kvm_mips_handle_kseg0_tlb_fault
|
||||
(vcpu->arch.host_cp0_badvaddr, vcpu) < 0) {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
ret = RESUME_HOST;
|
||||
}
|
||||
if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, store) < 0)
|
||||
ret = kvm_mips_bad_access(cause, opc, run, vcpu, store);
|
||||
} else if (KVM_GUEST_KERNEL_MODE(vcpu)
|
||||
&& (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
|
||||
/*
|
||||
* With EVA we may get a TLB exception instead of an address
|
||||
* error when the guest performs MMIO to KSeg1 addresses.
|
||||
*/
|
||||
kvm_debug("Emulate %s MMIO space\n",
|
||||
store ? "Store to" : "Load from");
|
||||
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
|
||||
if (er == EMULATE_FAIL) {
|
||||
kvm_err("Emulate %s MMIO space failed\n",
|
||||
store ? "Store to" : "Load from");
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
ret = RESUME_HOST;
|
||||
} else {
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
ret = RESUME_HOST;
|
||||
}
|
||||
ret = kvm_mips_bad_access(cause, opc, run, vcpu, store);
|
||||
} else {
|
||||
kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n",
|
||||
store ? "ST" : "LD", cause, opc, badvaddr);
|
||||
|
@ -219,21 +296,11 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
|
|||
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
|
||||
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
|
||||
u32 cause = vcpu->arch.host_cp0_cause;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
int ret = RESUME_GUEST;
|
||||
|
||||
if (KVM_GUEST_KERNEL_MODE(vcpu)
|
||||
&& (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
|
||||
kvm_debug("Emulate Store to MMIO space\n");
|
||||
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
|
||||
if (er == EMULATE_FAIL) {
|
||||
kvm_err("Emulate Store to MMIO space failed\n");
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
ret = RESUME_HOST;
|
||||
} else {
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
ret = RESUME_HOST;
|
||||
}
|
||||
ret = kvm_mips_bad_store(cause, opc, run, vcpu);
|
||||
} else {
|
||||
kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n",
|
||||
cause, opc, badvaddr);
|
||||
|
@ -249,26 +316,15 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
|
|||
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
|
||||
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
|
||||
u32 cause = vcpu->arch.host_cp0_cause;
|
||||
enum emulation_result er = EMULATE_DONE;
|
||||
int ret = RESUME_GUEST;
|
||||
|
||||
if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) {
|
||||
kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr);
|
||||
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
|
||||
if (er == EMULATE_FAIL) {
|
||||
kvm_err("Emulate Load from MMIO space failed\n");
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
ret = RESUME_HOST;
|
||||
} else {
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
ret = RESUME_HOST;
|
||||
}
|
||||
ret = kvm_mips_bad_load(cause, opc, run, vcpu);
|
||||
} else {
|
||||
kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n",
|
||||
cause, opc, badvaddr);
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
ret = RESUME_HOST;
|
||||
er = EMULATE_FAIL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -428,16 +484,75 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_vm_init(struct kvm *kvm)
|
||||
static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
|
||||
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
|
||||
|
||||
/*
|
||||
* Allocate GVA -> HPA page tables.
|
||||
* MIPS doesn't use the mm_struct pointer argument.
|
||||
*/
|
||||
kern_mm->pgd = pgd_alloc(kern_mm);
|
||||
if (!kern_mm->pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
user_mm->pgd = pgd_alloc(user_mm);
|
||||
if (!user_mm->pgd) {
|
||||
pgd_free(kern_mm, kern_mm->pgd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
static void kvm_mips_emul_free_gva_pt(pgd_t *pgd)
|
||||
{
|
||||
vcpu->arch.kscratch_enabled = 0xfc;
|
||||
/* Don't free host kernel page tables copied from init_mm.pgd */
|
||||
const unsigned long end = 0x80000000;
|
||||
unsigned long pgd_va, pud_va, pmd_va;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
int i, j, k;
|
||||
|
||||
return 0;
|
||||
for (i = 0; i < USER_PTRS_PER_PGD; i++) {
|
||||
if (pgd_none(pgd[i]))
|
||||
continue;
|
||||
|
||||
pgd_va = (unsigned long)i << PGDIR_SHIFT;
|
||||
if (pgd_va >= end)
|
||||
break;
|
||||
pud = pud_offset(pgd + i, 0);
|
||||
for (j = 0; j < PTRS_PER_PUD; j++) {
|
||||
if (pud_none(pud[j]))
|
||||
continue;
|
||||
|
||||
pud_va = pgd_va | ((unsigned long)j << PUD_SHIFT);
|
||||
if (pud_va >= end)
|
||||
break;
|
||||
pmd = pmd_offset(pud + j, 0);
|
||||
for (k = 0; k < PTRS_PER_PMD; k++) {
|
||||
if (pmd_none(pmd[k]))
|
||||
continue;
|
||||
|
||||
pmd_va = pud_va | (k << PMD_SHIFT);
|
||||
if (pmd_va >= end)
|
||||
break;
|
||||
pte = pte_offset(pmd + k, 0);
|
||||
pte_free_kernel(NULL, pte);
|
||||
}
|
||||
pmd_free(NULL, pmd);
|
||||
}
|
||||
pud_free(NULL, pud);
|
||||
}
|
||||
pgd_free(NULL, pgd);
|
||||
}
|
||||
|
||||
static void kvm_trap_emul_vcpu_uninit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_mips_emul_free_gva_pt(vcpu->arch.guest_kernel_mm.pgd);
|
||||
kvm_mips_emul_free_gva_pt(vcpu->arch.guest_user_mm.pgd);
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
|
@ -499,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||
/* Set Wait IE/IXMT Ignore in Config7, IAR, AR */
|
||||
kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10));
|
||||
|
||||
/* Status */
|
||||
kvm_write_c0_guest_status(cop0, ST0_BEV | ST0_ERL);
|
||||
|
||||
/*
|
||||
* Setup IntCtl defaults, compatibility mode for timer interrupts (HW5)
|
||||
*/
|
||||
|
@ -508,17 +626,76 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||
kvm_write_c0_guest_ebase(cop0, KVM_GUEST_KSEG0 |
|
||||
(vcpu_id & MIPS_EBASE_CPUNUM));
|
||||
|
||||
/* Put PC at guest reset vector */
|
||||
vcpu->arch.pc = KVM_GUEST_CKSEG1ADDR(0x1fc00000);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_trap_emul_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
/* Flush GVA page tables and invalidate GVA ASIDs on all VCPUs */
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
static void kvm_trap_emul_flush_shadow_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_trap_emul_flush_shadow_all(kvm);
|
||||
}
|
||||
|
||||
static u64 kvm_trap_emul_get_one_regs[] = {
|
||||
KVM_REG_MIPS_CP0_INDEX,
|
||||
KVM_REG_MIPS_CP0_ENTRYLO0,
|
||||
KVM_REG_MIPS_CP0_ENTRYLO1,
|
||||
KVM_REG_MIPS_CP0_CONTEXT,
|
||||
KVM_REG_MIPS_CP0_USERLOCAL,
|
||||
KVM_REG_MIPS_CP0_PAGEMASK,
|
||||
KVM_REG_MIPS_CP0_WIRED,
|
||||
KVM_REG_MIPS_CP0_HWRENA,
|
||||
KVM_REG_MIPS_CP0_BADVADDR,
|
||||
KVM_REG_MIPS_CP0_COUNT,
|
||||
KVM_REG_MIPS_CP0_ENTRYHI,
|
||||
KVM_REG_MIPS_CP0_COMPARE,
|
||||
KVM_REG_MIPS_CP0_STATUS,
|
||||
KVM_REG_MIPS_CP0_INTCTL,
|
||||
KVM_REG_MIPS_CP0_CAUSE,
|
||||
KVM_REG_MIPS_CP0_EPC,
|
||||
KVM_REG_MIPS_CP0_PRID,
|
||||
KVM_REG_MIPS_CP0_EBASE,
|
||||
KVM_REG_MIPS_CP0_CONFIG,
|
||||
KVM_REG_MIPS_CP0_CONFIG1,
|
||||
KVM_REG_MIPS_CP0_CONFIG2,
|
||||
KVM_REG_MIPS_CP0_CONFIG3,
|
||||
KVM_REG_MIPS_CP0_CONFIG4,
|
||||
KVM_REG_MIPS_CP0_CONFIG5,
|
||||
KVM_REG_MIPS_CP0_CONFIG7,
|
||||
KVM_REG_MIPS_CP0_ERROREPC,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH1,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH2,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH3,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH4,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH5,
|
||||
KVM_REG_MIPS_CP0_KSCRATCH6,
|
||||
|
||||
KVM_REG_MIPS_COUNT_CTL,
|
||||
KVM_REG_MIPS_COUNT_RESUME,
|
||||
KVM_REG_MIPS_COUNT_HZ,
|
||||
};
|
||||
|
||||
static unsigned long kvm_trap_emul_num_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
return ARRAY_SIZE(kvm_trap_emul_get_one_regs);
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_copy_reg_indices(struct kvm_vcpu *vcpu,
|
||||
u64 __user *indices)
|
||||
{
|
||||
if (copy_to_user(indices, kvm_trap_emul_get_one_regs,
|
||||
sizeof(kvm_trap_emul_get_one_regs)))
|
||||
return -EFAULT;
|
||||
indices += ARRAY_SIZE(kvm_trap_emul_get_one_regs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -526,7 +703,81 @@ static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu,
|
|||
const struct kvm_one_reg *reg,
|
||||
s64 *v)
|
||||
{
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_MIPS_CP0_INDEX:
|
||||
*v = (long)kvm_read_c0_guest_index(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ENTRYLO0:
|
||||
*v = kvm_read_c0_guest_entrylo0(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ENTRYLO1:
|
||||
*v = kvm_read_c0_guest_entrylo1(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONTEXT:
|
||||
*v = (long)kvm_read_c0_guest_context(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_USERLOCAL:
|
||||
*v = (long)kvm_read_c0_guest_userlocal(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_PAGEMASK:
|
||||
*v = (long)kvm_read_c0_guest_pagemask(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_WIRED:
|
||||
*v = (long)kvm_read_c0_guest_wired(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_HWRENA:
|
||||
*v = (long)kvm_read_c0_guest_hwrena(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_BADVADDR:
|
||||
*v = (long)kvm_read_c0_guest_badvaddr(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ENTRYHI:
|
||||
*v = (long)kvm_read_c0_guest_entryhi(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_COMPARE:
|
||||
*v = (long)kvm_read_c0_guest_compare(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_STATUS:
|
||||
*v = (long)kvm_read_c0_guest_status(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_INTCTL:
|
||||
*v = (long)kvm_read_c0_guest_intctl(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CAUSE:
|
||||
*v = (long)kvm_read_c0_guest_cause(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_EPC:
|
||||
*v = (long)kvm_read_c0_guest_epc(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_PRID:
|
||||
*v = (long)kvm_read_c0_guest_prid(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_EBASE:
|
||||
*v = (long)kvm_read_c0_guest_ebase(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG:
|
||||
*v = (long)kvm_read_c0_guest_config(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG1:
|
||||
*v = (long)kvm_read_c0_guest_config1(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG2:
|
||||
*v = (long)kvm_read_c0_guest_config2(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG3:
|
||||
*v = (long)kvm_read_c0_guest_config3(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG4:
|
||||
*v = (long)kvm_read_c0_guest_config4(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG5:
|
||||
*v = (long)kvm_read_c0_guest_config5(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG7:
|
||||
*v = (long)kvm_read_c0_guest_config7(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_COUNT:
|
||||
*v = kvm_mips_read_count(vcpu);
|
||||
break;
|
||||
|
@ -539,6 +790,27 @@ static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu,
|
|||
case KVM_REG_MIPS_COUNT_HZ:
|
||||
*v = vcpu->arch.count_hz;
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ERROREPC:
|
||||
*v = (long)kvm_read_c0_guest_errorepc(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH1:
|
||||
*v = (long)kvm_read_c0_guest_kscratch1(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH2:
|
||||
*v = (long)kvm_read_c0_guest_kscratch2(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH3:
|
||||
*v = (long)kvm_read_c0_guest_kscratch3(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH4:
|
||||
*v = (long)kvm_read_c0_guest_kscratch4(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH5:
|
||||
*v = (long)kvm_read_c0_guest_kscratch5(cop0);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH6:
|
||||
*v = (long)kvm_read_c0_guest_kscratch6(cop0);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -554,6 +826,56 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
|
|||
unsigned int cur, change;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_MIPS_CP0_INDEX:
|
||||
kvm_write_c0_guest_index(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ENTRYLO0:
|
||||
kvm_write_c0_guest_entrylo0(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ENTRYLO1:
|
||||
kvm_write_c0_guest_entrylo1(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONTEXT:
|
||||
kvm_write_c0_guest_context(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_USERLOCAL:
|
||||
kvm_write_c0_guest_userlocal(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_PAGEMASK:
|
||||
kvm_write_c0_guest_pagemask(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_WIRED:
|
||||
kvm_write_c0_guest_wired(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_HWRENA:
|
||||
kvm_write_c0_guest_hwrena(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_BADVADDR:
|
||||
kvm_write_c0_guest_badvaddr(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ENTRYHI:
|
||||
kvm_write_c0_guest_entryhi(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_STATUS:
|
||||
kvm_write_c0_guest_status(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_INTCTL:
|
||||
/* No VInt, so no VS, read-only for now */
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_EPC:
|
||||
kvm_write_c0_guest_epc(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_PRID:
|
||||
kvm_write_c0_guest_prid(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_EBASE:
|
||||
/*
|
||||
* Allow core number to be written, but the exception base must
|
||||
* remain in guest KSeg0.
|
||||
*/
|
||||
kvm_change_c0_guest_ebase(cop0, 0x1ffff000 | MIPS_EBASE_CPUNUM,
|
||||
v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_COUNT:
|
||||
kvm_mips_write_count(vcpu, v);
|
||||
break;
|
||||
|
@ -618,6 +940,9 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
|
|||
kvm_write_c0_guest_config5(cop0, v);
|
||||
}
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_CONFIG7:
|
||||
/* writes ignored */
|
||||
break;
|
||||
case KVM_REG_MIPS_COUNT_CTL:
|
||||
ret = kvm_mips_set_count_ctl(vcpu, v);
|
||||
break;
|
||||
|
@ -627,24 +952,269 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
|
|||
case KVM_REG_MIPS_COUNT_HZ:
|
||||
ret = kvm_mips_set_count_hz(vcpu, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_ERROREPC:
|
||||
kvm_write_c0_guest_errorepc(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH1:
|
||||
kvm_write_c0_guest_kscratch1(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH2:
|
||||
kvm_write_c0_guest_kscratch2(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH3:
|
||||
kvm_write_c0_guest_kscratch3(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH4:
|
||||
kvm_write_c0_guest_kscratch4(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH5:
|
||||
kvm_write_c0_guest_kscratch5(cop0, v);
|
||||
break;
|
||||
case KVM_REG_MIPS_CP0_KSCRATCH6:
|
||||
kvm_write_c0_guest_kscratch6(cop0, v);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu)
|
||||
static int kvm_trap_emul_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
kvm_lose_fpu(vcpu);
|
||||
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
|
||||
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
|
||||
struct mm_struct *mm;
|
||||
|
||||
/*
|
||||
* Were we in guest context? If so, restore the appropriate ASID based
|
||||
* on the mode of the Guest (Kernel/User).
|
||||
*/
|
||||
if (current->flags & PF_VCPU) {
|
||||
mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm;
|
||||
if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) &
|
||||
asid_version_mask(cpu))
|
||||
get_new_mmu_context(mm, cpu);
|
||||
write_c0_entryhi(cpu_asid(cpu, mm));
|
||||
TLBMISS_HANDLER_SETUP_PGD(mm->pgd);
|
||||
kvm_mips_suspend_mm(cpu);
|
||||
ehb();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu)
|
||||
static int kvm_trap_emul_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
kvm_lose_fpu(vcpu);
|
||||
|
||||
if (current->flags & PF_VCPU) {
|
||||
/* Restore normal Linux process memory map */
|
||||
if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
|
||||
asid_version_mask(cpu)))
|
||||
get_new_mmu_context(current->mm, cpu);
|
||||
write_c0_entryhi(cpu_asid(cpu, current->mm));
|
||||
TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd);
|
||||
kvm_mips_resume_mm(cpu);
|
||||
ehb();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu,
|
||||
bool reload_asid)
|
||||
{
|
||||
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
|
||||
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
|
||||
struct mm_struct *mm;
|
||||
int i;
|
||||
|
||||
if (likely(!vcpu->requests))
|
||||
return;
|
||||
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
|
||||
/*
|
||||
* Both kernel & user GVA mappings must be invalidated. The
|
||||
* caller is just about to check whether the ASID is stale
|
||||
* anyway so no need to reload it here.
|
||||
*/
|
||||
kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_GPA | KMF_KERN);
|
||||
kvm_mips_flush_gva_pt(user_mm->pgd, KMF_GPA | KMF_USER);
|
||||
for_each_possible_cpu(i) {
|
||||
cpu_context(i, kern_mm) = 0;
|
||||
cpu_context(i, user_mm) = 0;
|
||||
}
|
||||
|
||||
/* Generate new ASID for current mode */
|
||||
if (reload_asid) {
|
||||
mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm;
|
||||
get_new_mmu_context(mm, cpu);
|
||||
htw_stop();
|
||||
write_c0_entryhi(cpu_asid(cpu, mm));
|
||||
TLBMISS_HANDLER_SETUP_PGD(mm->pgd);
|
||||
htw_start();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_trap_emul_gva_lockless_begin() - Begin lockless access to GVA space.
|
||||
* @vcpu: VCPU pointer.
|
||||
*
|
||||
* Call before a GVA space access outside of guest mode, to ensure that
|
||||
* asynchronous TLB flush requests are handled or delayed until completion of
|
||||
* the GVA access (as indicated by a matching kvm_trap_emul_gva_lockless_end()).
|
||||
*
|
||||
* Should be called with IRQs already enabled.
|
||||
*/
|
||||
void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* We re-enable IRQs in kvm_trap_emul_gva_lockless_end() */
|
||||
WARN_ON_ONCE(irqs_disabled());
|
||||
|
||||
/*
|
||||
* The caller is about to access the GVA space, so we set the mode to
|
||||
* force TLB flush requests to send an IPI, and also disable IRQs to
|
||||
* delay IPI handling until kvm_trap_emul_gva_lockless_end().
|
||||
*/
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
* Make sure the read of VCPU requests is not reordered ahead of the
|
||||
* write to vcpu->mode, or we could miss a TLB flush request while
|
||||
* the requester sees the VCPU as outside of guest mode and not needing
|
||||
* an IPI.
|
||||
*/
|
||||
smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
|
||||
|
||||
/*
|
||||
* If a TLB flush has been requested (potentially while
|
||||
* OUTSIDE_GUEST_MODE and assumed immediately effective), perform it
|
||||
* before accessing the GVA space, and be sure to reload the ASID if
|
||||
* necessary as it'll be immediately used.
|
||||
*
|
||||
* TLB flush requests after this check will trigger an IPI due to the
|
||||
* mode change above, which will be delayed due to IRQs disabled.
|
||||
*/
|
||||
kvm_trap_emul_check_requests(vcpu, smp_processor_id(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_trap_emul_gva_lockless_end() - End lockless access to GVA space.
|
||||
* @vcpu: VCPU pointer.
|
||||
*
|
||||
* Called after a GVA space access outside of guest mode. Should have a matching
|
||||
* call to kvm_trap_emul_gva_lockless_begin().
|
||||
*/
|
||||
void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Make sure the write to vcpu->mode is not reordered in front of GVA
|
||||
* accesses, or a TLB flush requester may not think it necessary to send
|
||||
* an IPI.
|
||||
*/
|
||||
smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
|
||||
|
||||
/*
|
||||
* Now that the access to GVA space is complete, its safe for pending
|
||||
* TLB flush request IPIs to be handled (which indicates completion).
|
||||
*/
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static void kvm_trap_emul_vcpu_reenter(struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
|
||||
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
|
||||
struct mm_struct *mm;
|
||||
struct mips_coproc *cop0 = vcpu->arch.cop0;
|
||||
int i, cpu = smp_processor_id();
|
||||
unsigned int gasid;
|
||||
|
||||
/*
|
||||
* No need to reload ASID, IRQs are disabled already so there's no rush,
|
||||
* and we'll check if we need to regenerate below anyway before
|
||||
* re-entering the guest.
|
||||
*/
|
||||
kvm_trap_emul_check_requests(vcpu, cpu, false);
|
||||
|
||||
if (KVM_GUEST_KERNEL_MODE(vcpu)) {
|
||||
mm = kern_mm;
|
||||
} else {
|
||||
mm = user_mm;
|
||||
|
||||
/*
|
||||
* Lazy host ASID regeneration / PT flush for guest user mode.
|
||||
* If the guest ASID has changed since the last guest usermode
|
||||
* execution, invalidate the stale TLB entries and flush GVA PT
|
||||
* entries too.
|
||||
*/
|
||||
gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID;
|
||||
if (gasid != vcpu->arch.last_user_gasid) {
|
||||
kvm_mips_flush_gva_pt(user_mm->pgd, KMF_USER);
|
||||
for_each_possible_cpu(i)
|
||||
cpu_context(i, user_mm) = 0;
|
||||
vcpu->arch.last_user_gasid = gasid;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if ASID is stale. This may happen due to a TLB flush request or
|
||||
* a lazy user MM invalidation.
|
||||
*/
|
||||
if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) &
|
||||
asid_version_mask(cpu))
|
||||
get_new_mmu_context(mm, cpu);
|
||||
}
|
||||
|
||||
static int kvm_trap_emul_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
int r;
|
||||
|
||||
/* Check if we have any exceptions/interrupts pending */
|
||||
kvm_mips_deliver_interrupts(vcpu,
|
||||
kvm_read_c0_guest_cause(vcpu->arch.cop0));
|
||||
|
||||
kvm_trap_emul_vcpu_reenter(run, vcpu);
|
||||
|
||||
/*
|
||||
* We use user accessors to access guest memory, but we don't want to
|
||||
* invoke Linux page faulting.
|
||||
*/
|
||||
pagefault_disable();
|
||||
|
||||
/* Disable hardware page table walking while in guest */
|
||||
htw_stop();
|
||||
|
||||
/*
|
||||
* While in guest context we're in the guest's address space, not the
|
||||
* host process address space, so we need to be careful not to confuse
|
||||
* e.g. cache management IPIs.
|
||||
*/
|
||||
kvm_mips_suspend_mm(cpu);
|
||||
|
||||
r = vcpu->arch.vcpu_run(run, vcpu);
|
||||
|
||||
/* We may have migrated while handling guest exits */
|
||||
cpu = smp_processor_id();
|
||||
|
||||
/* Restore normal Linux process memory map */
|
||||
if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
|
||||
asid_version_mask(cpu)))
|
||||
get_new_mmu_context(current->mm, cpu);
|
||||
write_c0_entryhi(cpu_asid(cpu, current->mm));
|
||||
TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd);
|
||||
kvm_mips_resume_mm(cpu);
|
||||
|
||||
htw_start();
|
||||
|
||||
pagefault_enable();
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
|
||||
/* exit handlers */
|
||||
.handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
|
||||
|
@ -661,9 +1231,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
|
|||
.handle_fpe = kvm_trap_emul_handle_fpe,
|
||||
.handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
|
||||
|
||||
.vm_init = kvm_trap_emul_vm_init,
|
||||
.vcpu_init = kvm_trap_emul_vcpu_init,
|
||||
.vcpu_uninit = kvm_trap_emul_vcpu_uninit,
|
||||
.vcpu_setup = kvm_trap_emul_vcpu_setup,
|
||||
.flush_shadow_all = kvm_trap_emul_flush_shadow_all,
|
||||
.flush_shadow_memslot = kvm_trap_emul_flush_shadow_memslot,
|
||||
.gva_to_gpa = kvm_trap_emul_gva_to_gpa_cb,
|
||||
.queue_timer_int = kvm_mips_queue_timer_int_cb,
|
||||
.dequeue_timer_int = kvm_mips_dequeue_timer_int_cb,
|
||||
|
@ -675,8 +1247,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
|
|||
.copy_reg_indices = kvm_trap_emul_copy_reg_indices,
|
||||
.get_one_reg = kvm_trap_emul_get_one_reg,
|
||||
.set_one_reg = kvm_trap_emul_set_one_reg,
|
||||
.vcpu_get_regs = kvm_trap_emul_vcpu_get_regs,
|
||||
.vcpu_set_regs = kvm_trap_emul_vcpu_set_regs,
|
||||
.vcpu_load = kvm_trap_emul_vcpu_load,
|
||||
.vcpu_put = kvm_trap_emul_vcpu_put,
|
||||
.vcpu_run = kvm_trap_emul_vcpu_run,
|
||||
.vcpu_reenter = kvm_trap_emul_vcpu_reenter,
|
||||
};
|
||||
|
||||
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
|
||||
|
|
|
@ -22,6 +22,10 @@
|
|||
|
||||
#include <asm/book3s/64/mmu-hash.h>
|
||||
|
||||
/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
|
||||
#define PPC_MIN_HPT_ORDER 18
|
||||
#define PPC_MAX_HPT_ORDER 46
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
|
||||
static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -356,6 +360,18 @@ extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
|
|||
|
||||
extern void kvmhv_rm_send_ipi(int cpu);
|
||||
|
||||
static inline unsigned long kvmppc_hpt_npte(struct kvm_hpt_info *hpt)
|
||||
{
|
||||
/* HPTEs are 2**4 bytes long */
|
||||
return 1UL << (hpt->order - 4);
|
||||
}
|
||||
|
||||
static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
|
||||
{
|
||||
/* 128 (2**7) bytes in each HPTEG */
|
||||
return (1UL << (hpt->order - 7)) - 1;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
|
||||
#endif /* __ASM_KVM_BOOK3S_64_H__ */
|
||||
|
|
|
@ -241,12 +241,24 @@ struct kvm_arch_memory_slot {
|
|||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
};
|
||||
|
||||
struct kvm_hpt_info {
|
||||
/* Host virtual (linear mapping) address of guest HPT */
|
||||
unsigned long virt;
|
||||
/* Array of reverse mapping entries for each guest HPTE */
|
||||
struct revmap_entry *rev;
|
||||
/* Guest HPT size is 2**(order) bytes */
|
||||
u32 order;
|
||||
/* 1 if HPT allocated with CMA, 0 otherwise */
|
||||
int cma;
|
||||
};
|
||||
|
||||
struct kvm_resize_hpt;
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned int lpid;
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
unsigned int tlb_sets;
|
||||
unsigned long hpt_virt;
|
||||
struct revmap_entry *revmap;
|
||||
struct kvm_hpt_info hpt;
|
||||
atomic64_t mmio_update;
|
||||
unsigned int host_lpid;
|
||||
unsigned long host_lpcr;
|
||||
|
@ -256,20 +268,17 @@ struct kvm_arch {
|
|||
unsigned long lpcr;
|
||||
unsigned long vrma_slb_v;
|
||||
int hpte_setup_done;
|
||||
u32 hpt_order;
|
||||
atomic_t vcpus_running;
|
||||
u32 online_vcores;
|
||||
unsigned long hpt_npte;
|
||||
unsigned long hpt_mask;
|
||||
atomic_t hpte_mod_interest;
|
||||
cpumask_t need_tlb_flush;
|
||||
cpumask_t cpu_in_guest;
|
||||
int hpt_cma_alloc;
|
||||
u8 radix;
|
||||
pgd_t *pgtable;
|
||||
u64 process_table;
|
||||
struct dentry *debugfs_dir;
|
||||
struct dentry *htab_dentry;
|
||||
struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
|
||||
struct mutex hpt_mutex;
|
||||
|
|
|
@ -155,9 +155,10 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
|
|||
extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
|
||||
extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
|
||||
extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
|
||||
extern void kvmppc_free_hpt(struct kvm *kvm);
|
||||
extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
|
||||
extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
|
||||
extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
|
||||
extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
|
||||
extern long kvmppc_prepare_vrma(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
|
||||
|
@ -186,8 +187,8 @@ extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
|
|||
unsigned long tce_value, unsigned long npages);
|
||||
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
unsigned long ioba);
|
||||
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
|
||||
extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
|
||||
extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages);
|
||||
extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages);
|
||||
extern int kvmppc_core_init_vm(struct kvm *kvm);
|
||||
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
|
||||
extern void kvmppc_core_free_memslot(struct kvm *kvm,
|
||||
|
@ -214,6 +215,10 @@ extern void kvmppc_bookehv_exit(void);
|
|||
extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
|
||||
extern long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
|
||||
struct kvm_ppc_resize_hpt *rhpt);
|
||||
extern long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
|
||||
struct kvm_ppc_resize_hpt *rhpt);
|
||||
|
||||
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
|
||||
|
||||
|
|
|
@ -633,5 +633,7 @@ struct kvm_ppc_rmmu_info {
|
|||
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
|
||||
#define KVM_XICS_MASKED (1ULL << 41)
|
||||
#define KVM_XICS_PENDING (1ULL << 42)
|
||||
#define KVM_XICS_PRESENTED (1ULL << 43)
|
||||
#define KVM_XICS_QUEUED (1ULL << 44)
|
||||
|
||||
#endif /* __LINUX_KVM_POWERPC_H */
|
||||
|
|
|
@ -224,7 +224,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary);
|
||||
|
||||
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
|
||||
printk(KERN_ERR "KVM: Can't copy data from 0x%lx!\n", ptegp);
|
||||
printk_ratelimited(KERN_ERR
|
||||
"KVM: Can't copy data from 0x%lx!\n", ptegp);
|
||||
goto no_page_found;
|
||||
}
|
||||
|
||||
|
|
|
@ -265,7 +265,8 @@ do_second:
|
|||
goto no_page_found;
|
||||
|
||||
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
|
||||
printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
|
||||
printk_ratelimited(KERN_ERR
|
||||
"KVM: Can't copy data from 0x%lx!\n", ptegp);
|
||||
goto no_page_found;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,84 +40,101 @@
|
|||
|
||||
#include "trace_hv.h"
|
||||
|
||||
/* Power architecture requires HPT is at least 256kB */
|
||||
#define PPC_MIN_HPT_ORDER 18
|
||||
//#define DEBUG_RESIZE_HPT 1
|
||||
|
||||
#ifdef DEBUG_RESIZE_HPT
|
||||
#define resize_hpt_debug(resize, ...) \
|
||||
do { \
|
||||
printk(KERN_DEBUG "RESIZE HPT %p: ", resize); \
|
||||
printk(__VA_ARGS__); \
|
||||
} while (0)
|
||||
#else
|
||||
#define resize_hpt_debug(resize, ...) \
|
||||
do { } while (0)
|
||||
#endif
|
||||
|
||||
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||
long pte_index, unsigned long pteh,
|
||||
unsigned long ptel, unsigned long *pte_idx_ret);
|
||||
|
||||
struct kvm_resize_hpt {
|
||||
/* These fields read-only after init */
|
||||
struct kvm *kvm;
|
||||
struct work_struct work;
|
||||
u32 order;
|
||||
|
||||
/* These fields protected by kvm->lock */
|
||||
int error;
|
||||
bool prepare_done;
|
||||
|
||||
/* Private to the work thread, until prepare_done is true,
|
||||
* then protected by kvm->resize_hpt_sem */
|
||||
struct kvm_hpt_info hpt;
|
||||
};
|
||||
|
||||
static void kvmppc_rmap_reset(struct kvm *kvm);
|
||||
|
||||
long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
|
||||
int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
|
||||
{
|
||||
unsigned long hpt = 0;
|
||||
struct revmap_entry *rev;
|
||||
int cma = 0;
|
||||
struct page *page = NULL;
|
||||
long order = KVM_DEFAULT_HPT_ORDER;
|
||||
struct revmap_entry *rev;
|
||||
unsigned long npte;
|
||||
|
||||
if (htab_orderp) {
|
||||
order = *htab_orderp;
|
||||
if (order < PPC_MIN_HPT_ORDER)
|
||||
order = PPC_MIN_HPT_ORDER;
|
||||
}
|
||||
if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER))
|
||||
return -EINVAL;
|
||||
|
||||
kvm->arch.hpt_cma_alloc = 0;
|
||||
page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT));
|
||||
page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
|
||||
if (page) {
|
||||
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
|
||||
memset((void *)hpt, 0, (1ul << order));
|
||||
kvm->arch.hpt_cma_alloc = 1;
|
||||
cma = 1;
|
||||
}
|
||||
|
||||
/* Lastly try successively smaller sizes from the page allocator */
|
||||
/* Only do this if userspace didn't specify a size via ioctl */
|
||||
while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) {
|
||||
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
|
||||
__GFP_NOWARN, order - PAGE_SHIFT);
|
||||
if (!hpt)
|
||||
--order;
|
||||
}
|
||||
if (!hpt)
|
||||
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
|
||||
|__GFP_NOWARN, order - PAGE_SHIFT);
|
||||
|
||||
if (!hpt)
|
||||
return -ENOMEM;
|
||||
|
||||
kvm->arch.hpt_virt = hpt;
|
||||
kvm->arch.hpt_order = order;
|
||||
/* HPTEs are 2**4 bytes long */
|
||||
kvm->arch.hpt_npte = 1ul << (order - 4);
|
||||
/* 128 (2**7) bytes in each HPTEG */
|
||||
kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
|
||||
|
||||
atomic64_set(&kvm->arch.mmio_update, 0);
|
||||
npte = 1ul << (order - 4);
|
||||
|
||||
/* Allocate reverse map array */
|
||||
rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
|
||||
rev = vmalloc(sizeof(struct revmap_entry) * npte);
|
||||
if (!rev) {
|
||||
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
|
||||
goto out_freehpt;
|
||||
pr_err("kvmppc_allocate_hpt: Couldn't alloc reverse map array\n");
|
||||
if (cma)
|
||||
kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
|
||||
else
|
||||
free_pages(hpt, order - PAGE_SHIFT);
|
||||
return -ENOMEM;
|
||||
}
|
||||
kvm->arch.revmap = rev;
|
||||
kvm->arch.sdr1 = __pa(hpt) | (order - 18);
|
||||
|
||||
pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
|
||||
hpt, order, kvm->arch.lpid);
|
||||
info->order = order;
|
||||
info->virt = hpt;
|
||||
info->cma = cma;
|
||||
info->rev = rev;
|
||||
|
||||
if (htab_orderp)
|
||||
*htab_orderp = order;
|
||||
return 0;
|
||||
|
||||
out_freehpt:
|
||||
if (kvm->arch.hpt_cma_alloc)
|
||||
kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
|
||||
else
|
||||
free_pages(hpt, order - PAGE_SHIFT);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
|
||||
void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
|
||||
{
|
||||
atomic64_set(&kvm->arch.mmio_update, 0);
|
||||
kvm->arch.hpt = *info;
|
||||
kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18);
|
||||
|
||||
pr_debug("KVM guest htab at %lx (order %ld), LPID %x\n",
|
||||
info->virt, (long)info->order, kvm->arch.lpid);
|
||||
}
|
||||
|
||||
long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
|
||||
{
|
||||
long err = -EBUSY;
|
||||
long order;
|
||||
struct kvm_hpt_info info;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return -EINVAL;
|
||||
|
@ -132,36 +149,44 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
|
|||
goto out;
|
||||
}
|
||||
}
|
||||
if (kvm->arch.hpt_virt) {
|
||||
order = kvm->arch.hpt_order;
|
||||
if (kvm->arch.hpt.order == order) {
|
||||
/* We already have a suitable HPT */
|
||||
|
||||
/* Set the entire HPT to 0, i.e. invalid HPTEs */
|
||||
memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
|
||||
memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
|
||||
/*
|
||||
* Reset all the reverse-mapping chains for all memslots
|
||||
*/
|
||||
kvmppc_rmap_reset(kvm);
|
||||
/* Ensure that each vcpu will flush its TLB on next entry. */
|
||||
cpumask_setall(&kvm->arch.need_tlb_flush);
|
||||
*htab_orderp = order;
|
||||
err = 0;
|
||||
} else {
|
||||
err = kvmppc_alloc_hpt(kvm, htab_orderp);
|
||||
order = *htab_orderp;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
|
||||
if (kvm->arch.hpt.virt)
|
||||
kvmppc_free_hpt(&kvm->arch.hpt);
|
||||
|
||||
err = kvmppc_allocate_hpt(&info, order);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
kvmppc_set_hpt(kvm, &info);
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
void kvmppc_free_hpt(struct kvm *kvm)
|
||||
void kvmppc_free_hpt(struct kvm_hpt_info *info)
|
||||
{
|
||||
vfree(kvm->arch.revmap);
|
||||
if (kvm->arch.hpt_cma_alloc)
|
||||
kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
|
||||
1 << (kvm->arch.hpt_order - PAGE_SHIFT));
|
||||
else if (kvm->arch.hpt_virt)
|
||||
free_pages(kvm->arch.hpt_virt,
|
||||
kvm->arch.hpt_order - PAGE_SHIFT);
|
||||
vfree(info->rev);
|
||||
if (info->cma)
|
||||
kvm_free_hpt_cma(virt_to_page(info->virt),
|
||||
1 << (info->order - PAGE_SHIFT));
|
||||
else if (info->virt)
|
||||
free_pages(info->virt, info->order - PAGE_SHIFT);
|
||||
info->virt = 0;
|
||||
info->order = 0;
|
||||
}
|
||||
|
||||
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
|
||||
|
@ -196,8 +221,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
|
|||
if (npages > 1ul << (40 - porder))
|
||||
npages = 1ul << (40 - porder);
|
||||
/* Can't use more than 1 HPTE per HPTEG */
|
||||
if (npages > kvm->arch.hpt_mask + 1)
|
||||
npages = kvm->arch.hpt_mask + 1;
|
||||
if (npages > kvmppc_hpt_mask(&kvm->arch.hpt) + 1)
|
||||
npages = kvmppc_hpt_mask(&kvm->arch.hpt) + 1;
|
||||
|
||||
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
|
||||
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
|
||||
|
@ -207,7 +232,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
|
|||
for (i = 0; i < npages; ++i) {
|
||||
addr = i << porder;
|
||||
/* can't use hpt_hash since va > 64 bits */
|
||||
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
|
||||
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25)))
|
||||
& kvmppc_hpt_mask(&kvm->arch.hpt);
|
||||
/*
|
||||
* We assume that the hash table is empty and no
|
||||
* vcpus are using it at this stage. Since we create
|
||||
|
@ -340,11 +366,11 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
preempt_enable();
|
||||
return -ENOENT;
|
||||
}
|
||||
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
|
||||
hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
|
||||
v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
|
||||
gr = kvm->arch.revmap[index].guest_rpte;
|
||||
gr = kvm->arch.hpt.rev[index].guest_rpte;
|
||||
|
||||
unlock_hpte(hptep, orig_v);
|
||||
preempt_enable();
|
||||
|
@ -485,8 +511,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||
}
|
||||
}
|
||||
index = vcpu->arch.pgfault_index;
|
||||
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
|
||||
rev = &kvm->arch.revmap[index];
|
||||
hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
|
||||
rev = &kvm->arch.hpt.rev[index];
|
||||
preempt_disable();
|
||||
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
|
@ -745,13 +771,53 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
|||
return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
|
||||
}
|
||||
|
||||
/* Must be called with both HPTE and rmap locked */
|
||||
static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
|
||||
unsigned long *rmapp, unsigned long gfn)
|
||||
{
|
||||
__be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
|
||||
struct revmap_entry *rev = kvm->arch.hpt.rev;
|
||||
unsigned long j, h;
|
||||
unsigned long ptel, psize, rcbits;
|
||||
|
||||
j = rev[i].forw;
|
||||
if (j == i) {
|
||||
/* chain is now empty */
|
||||
*rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
|
||||
} else {
|
||||
/* remove i from chain */
|
||||
h = rev[i].back;
|
||||
rev[h].forw = j;
|
||||
rev[j].back = h;
|
||||
rev[i].forw = rev[i].back = i;
|
||||
*rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
|
||||
}
|
||||
|
||||
/* Now check and modify the HPTE */
|
||||
ptel = rev[i].guest_rpte;
|
||||
psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
|
||||
if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
|
||||
hpte_rpn(ptel, psize) == gfn) {
|
||||
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
|
||||
kvmppc_invalidate_hpte(kvm, hptep, i);
|
||||
hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
|
||||
/* Harvest R and C */
|
||||
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
|
||||
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
|
||||
if (rcbits & HPTE_R_C)
|
||||
kvmppc_update_rmap_change(rmapp, psize);
|
||||
if (rcbits & ~rev[i].guest_rpte) {
|
||||
rev[i].guest_rpte = ptel | rcbits;
|
||||
note_hpte_modification(kvm, &rev[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
unsigned long h, i, j;
|
||||
unsigned long i;
|
||||
__be64 *hptep;
|
||||
unsigned long ptel, psize, rcbits;
|
||||
unsigned long *rmapp;
|
||||
|
||||
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
|
@ -768,7 +834,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
* rmap chain lock.
|
||||
*/
|
||||
i = *rmapp & KVMPPC_RMAP_INDEX;
|
||||
hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
|
||||
hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
|
||||
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
|
||||
/* unlock rmap before spinning on the HPTE lock */
|
||||
unlock_rmap(rmapp);
|
||||
|
@ -776,37 +842,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
cpu_relax();
|
||||
continue;
|
||||
}
|
||||
j = rev[i].forw;
|
||||
if (j == i) {
|
||||
/* chain is now empty */
|
||||
*rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
|
||||
} else {
|
||||
/* remove i from chain */
|
||||
h = rev[i].back;
|
||||
rev[h].forw = j;
|
||||
rev[j].back = h;
|
||||
rev[i].forw = rev[i].back = i;
|
||||
*rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
|
||||
}
|
||||
|
||||
/* Now check and modify the HPTE */
|
||||
ptel = rev[i].guest_rpte;
|
||||
psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
|
||||
if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
|
||||
hpte_rpn(ptel, psize) == gfn) {
|
||||
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
|
||||
kvmppc_invalidate_hpte(kvm, hptep, i);
|
||||
hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
|
||||
/* Harvest R and C */
|
||||
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
|
||||
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
|
||||
if (rcbits & HPTE_R_C)
|
||||
kvmppc_update_rmap_change(rmapp, psize);
|
||||
if (rcbits & ~rev[i].guest_rpte) {
|
||||
rev[i].guest_rpte = ptel | rcbits;
|
||||
note_hpte_modification(kvm, &rev[i]);
|
||||
}
|
||||
}
|
||||
kvmppc_unmap_hpte(kvm, i, rmapp, gfn);
|
||||
unlock_rmap(rmapp);
|
||||
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
|
||||
}
|
||||
|
@ -860,7 +897,7 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
|
|||
static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
struct revmap_entry *rev = kvm->arch.hpt.rev;
|
||||
unsigned long head, i, j;
|
||||
__be64 *hptep;
|
||||
int ret = 0;
|
||||
|
@ -880,7 +917,7 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
|
||||
i = head = *rmapp & KVMPPC_RMAP_INDEX;
|
||||
do {
|
||||
hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
|
||||
hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
|
||||
j = rev[i].forw;
|
||||
|
||||
/* If this HPTE isn't referenced, ignore it */
|
||||
|
@ -923,7 +960,7 @@ int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
|
|||
static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
struct revmap_entry *rev = kvm->arch.hpt.rev;
|
||||
unsigned long head, i, j;
|
||||
unsigned long *hp;
|
||||
int ret = 1;
|
||||
|
@ -940,7 +977,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
if (*rmapp & KVMPPC_RMAP_PRESENT) {
|
||||
i = head = *rmapp & KVMPPC_RMAP_INDEX;
|
||||
do {
|
||||
hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
|
||||
hp = (unsigned long *)(kvm->arch.hpt.virt + (i << 4));
|
||||
j = rev[i].forw;
|
||||
if (be64_to_cpu(hp[1]) & HPTE_R_R)
|
||||
goto out;
|
||||
|
@ -980,7 +1017,7 @@ static int vcpus_running(struct kvm *kvm)
|
|||
*/
|
||||
static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
struct revmap_entry *rev = kvm->arch.hpt.rev;
|
||||
unsigned long head, i, j;
|
||||
unsigned long n;
|
||||
unsigned long v, r;
|
||||
|
@ -1005,7 +1042,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
|
|||
i = head = *rmapp & KVMPPC_RMAP_INDEX;
|
||||
do {
|
||||
unsigned long hptep1;
|
||||
hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
|
||||
hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
|
||||
j = rev[i].forw;
|
||||
|
||||
/*
|
||||
|
@ -1171,6 +1208,363 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
|
|||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* HPT resizing
|
||||
*/
|
||||
static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = kvmppc_allocate_hpt(&resize->hpt, resize->order);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
|
||||
resize->hpt.virt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
|
||||
unsigned long idx)
|
||||
{
|
||||
struct kvm *kvm = resize->kvm;
|
||||
struct kvm_hpt_info *old = &kvm->arch.hpt;
|
||||
struct kvm_hpt_info *new = &resize->hpt;
|
||||
unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1;
|
||||
unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1;
|
||||
__be64 *hptep, *new_hptep;
|
||||
unsigned long vpte, rpte, guest_rpte;
|
||||
int ret;
|
||||
struct revmap_entry *rev;
|
||||
unsigned long apsize, psize, avpn, pteg, hash;
|
||||
unsigned long new_idx, new_pteg, replace_vpte;
|
||||
|
||||
hptep = (__be64 *)(old->virt + (idx << 4));
|
||||
|
||||
/* Guest is stopped, so new HPTEs can't be added or faulted
|
||||
* in, only unmapped or altered by host actions. So, it's
|
||||
* safe to check this before we take the HPTE lock */
|
||||
vpte = be64_to_cpu(hptep[0]);
|
||||
if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
|
||||
return 0; /* nothing to do */
|
||||
|
||||
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
|
||||
vpte = be64_to_cpu(hptep[0]);
|
||||
|
||||
ret = 0;
|
||||
if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
|
||||
/* Nothing to do */
|
||||
goto out;
|
||||
|
||||
/* Unmap */
|
||||
rev = &old->rev[idx];
|
||||
guest_rpte = rev->guest_rpte;
|
||||
|
||||
ret = -EIO;
|
||||
apsize = hpte_page_size(vpte, guest_rpte);
|
||||
if (!apsize)
|
||||
goto out;
|
||||
|
||||
if (vpte & HPTE_V_VALID) {
|
||||
unsigned long gfn = hpte_rpn(guest_rpte, apsize);
|
||||
int srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
struct kvm_memory_slot *memslot =
|
||||
__gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
|
||||
if (memslot) {
|
||||
unsigned long *rmapp;
|
||||
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
|
||||
lock_rmap(rmapp);
|
||||
kvmppc_unmap_hpte(kvm, idx, rmapp, gfn);
|
||||
unlock_rmap(rmapp);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
}
|
||||
|
||||
/* Reload PTE after unmap */
|
||||
vpte = be64_to_cpu(hptep[0]);
|
||||
|
||||
BUG_ON(vpte & HPTE_V_VALID);
|
||||
BUG_ON(!(vpte & HPTE_V_ABSENT));
|
||||
|
||||
ret = 0;
|
||||
if (!(vpte & HPTE_V_BOLTED))
|
||||
goto out;
|
||||
|
||||
rpte = be64_to_cpu(hptep[1]);
|
||||
psize = hpte_base_page_size(vpte, rpte);
|
||||
avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
|
||||
pteg = idx / HPTES_PER_GROUP;
|
||||
if (vpte & HPTE_V_SECONDARY)
|
||||
pteg = ~pteg;
|
||||
|
||||
if (!(vpte & HPTE_V_1TB_SEG)) {
|
||||
unsigned long offset, vsid;
|
||||
|
||||
/* We only have 28 - 23 bits of offset in avpn */
|
||||
offset = (avpn & 0x1f) << 23;
|
||||
vsid = avpn >> 5;
|
||||
/* We can find more bits from the pteg value */
|
||||
if (psize < (1ULL << 23))
|
||||
offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
|
||||
|
||||
hash = vsid ^ (offset / psize);
|
||||
} else {
|
||||
unsigned long offset, vsid;
|
||||
|
||||
/* We only have 40 - 23 bits of seg_off in avpn */
|
||||
offset = (avpn & 0x1ffff) << 23;
|
||||
vsid = avpn >> 17;
|
||||
if (psize < (1ULL << 23))
|
||||
offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize;
|
||||
|
||||
hash = vsid ^ (vsid << 25) ^ (offset / psize);
|
||||
}
|
||||
|
||||
new_pteg = hash & new_hash_mask;
|
||||
if (vpte & HPTE_V_SECONDARY) {
|
||||
BUG_ON(~pteg != (hash & old_hash_mask));
|
||||
new_pteg = ~new_pteg;
|
||||
} else {
|
||||
BUG_ON(pteg != (hash & old_hash_mask));
|
||||
}
|
||||
|
||||
new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
|
||||
new_hptep = (__be64 *)(new->virt + (new_idx << 4));
|
||||
|
||||
replace_vpte = be64_to_cpu(new_hptep[0]);
|
||||
|
||||
if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
|
||||
BUG_ON(new->order >= old->order);
|
||||
|
||||
if (replace_vpte & HPTE_V_BOLTED) {
|
||||
if (vpte & HPTE_V_BOLTED)
|
||||
/* Bolted collision, nothing we can do */
|
||||
ret = -ENOSPC;
|
||||
/* Discard the new HPTE */
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Discard the previous HPTE */
|
||||
}
|
||||
|
||||
new_hptep[1] = cpu_to_be64(rpte);
|
||||
new->rev[new_idx].guest_rpte = guest_rpte;
|
||||
/* No need for a barrier, since new HPT isn't active */
|
||||
new_hptep[0] = cpu_to_be64(vpte);
|
||||
unlock_hpte(new_hptep, vpte);
|
||||
|
||||
out:
|
||||
unlock_hpte(hptep, vpte);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
|
||||
{
|
||||
struct kvm *kvm = resize->kvm;
|
||||
unsigned long i;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
|
||||
* that POWER9 uses, and could well hit a BUG_ON on POWER9.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
return -EIO;
|
||||
for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
|
||||
rc = resize_hpt_rehash_hpte(resize, i);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
|
||||
{
|
||||
struct kvm *kvm = resize->kvm;
|
||||
struct kvm_hpt_info hpt_tmp;
|
||||
|
||||
/* Exchange the pending tables in the resize structure with
|
||||
* the active tables */
|
||||
|
||||
resize_hpt_debug(resize, "resize_hpt_pivot()\n");
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
|
||||
hpt_tmp = kvm->arch.hpt;
|
||||
kvmppc_set_hpt(kvm, &resize->hpt);
|
||||
resize->hpt = hpt_tmp;
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
|
||||
resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
|
||||
}
|
||||
|
||||
static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
|
||||
{
|
||||
BUG_ON(kvm->arch.resize_hpt != resize);
|
||||
|
||||
if (!resize)
|
||||
return;
|
||||
|
||||
if (resize->hpt.virt)
|
||||
kvmppc_free_hpt(&resize->hpt);
|
||||
|
||||
kvm->arch.resize_hpt = NULL;
|
||||
kfree(resize);
|
||||
}
|
||||
|
||||
static void resize_hpt_prepare_work(struct work_struct *work)
|
||||
{
|
||||
struct kvm_resize_hpt *resize = container_of(work,
|
||||
struct kvm_resize_hpt,
|
||||
work);
|
||||
struct kvm *kvm = resize->kvm;
|
||||
int err;
|
||||
|
||||
resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
|
||||
resize->order);
|
||||
|
||||
err = resize_hpt_allocate(resize);
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
resize->error = err;
|
||||
resize->prepare_done = true;
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
|
||||
struct kvm_ppc_resize_hpt *rhpt)
|
||||
{
|
||||
unsigned long flags = rhpt->flags;
|
||||
unsigned long shift = rhpt->shift;
|
||||
struct kvm_resize_hpt *resize;
|
||||
int ret;
|
||||
|
||||
if (flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (shift && ((shift < 18) || (shift > 46)))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
resize = kvm->arch.resize_hpt;
|
||||
|
||||
if (resize) {
|
||||
if (resize->order == shift) {
|
||||
/* Suitable resize in progress */
|
||||
if (resize->prepare_done) {
|
||||
ret = resize->error;
|
||||
if (ret != 0)
|
||||
resize_hpt_release(kvm, resize);
|
||||
} else {
|
||||
ret = 100; /* estimated time in ms */
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* not suitable, cancel it */
|
||||
resize_hpt_release(kvm, resize);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
if (!shift)
|
||||
goto out; /* nothing to do */
|
||||
|
||||
/* start new resize */
|
||||
|
||||
resize = kzalloc(sizeof(*resize), GFP_KERNEL);
|
||||
resize->order = shift;
|
||||
resize->kvm = kvm;
|
||||
INIT_WORK(&resize->work, resize_hpt_prepare_work);
|
||||
kvm->arch.resize_hpt = resize;
|
||||
|
||||
schedule_work(&resize->work);
|
||||
|
||||
ret = 100; /* estimated time in ms */
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void resize_hpt_boot_vcpu(void *opaque)
|
||||
{
|
||||
/* Nothing to do, just force a KVM exit */
|
||||
}
|
||||
|
||||
long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
|
||||
struct kvm_ppc_resize_hpt *rhpt)
|
||||
{
|
||||
unsigned long flags = rhpt->flags;
|
||||
unsigned long shift = rhpt->shift;
|
||||
struct kvm_resize_hpt *resize;
|
||||
long ret;
|
||||
|
||||
if (flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (shift && ((shift < 18) || (shift > 46)))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
resize = kvm->arch.resize_hpt;
|
||||
|
||||
/* This shouldn't be possible */
|
||||
ret = -EIO;
|
||||
if (WARN_ON(!kvm->arch.hpte_setup_done))
|
||||
goto out_no_hpt;
|
||||
|
||||
/* Stop VCPUs from running while we mess with the HPT */
|
||||
kvm->arch.hpte_setup_done = 0;
|
||||
smp_mb();
|
||||
|
||||
/* Boot all CPUs out of the guest so they re-read
|
||||
* hpte_setup_done */
|
||||
on_each_cpu(resize_hpt_boot_vcpu, NULL, 1);
|
||||
|
||||
ret = -ENXIO;
|
||||
if (!resize || (resize->order != shift))
|
||||
goto out;
|
||||
|
||||
ret = -EBUSY;
|
||||
if (!resize->prepare_done)
|
||||
goto out;
|
||||
|
||||
ret = resize->error;
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
||||
ret = resize_hpt_rehash(resize);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
||||
resize_hpt_pivot(resize);
|
||||
|
||||
out:
|
||||
/* Let VCPUs run again */
|
||||
kvm->arch.hpte_setup_done = 1;
|
||||
smp_mb();
|
||||
out_no_hpt:
|
||||
resize_hpt_release(kvm, resize);
|
||||
mutex_unlock(&kvm->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Functions for reading and writing the hash table via reads and
|
||||
* writes on a file descriptor.
|
||||
|
@ -1311,8 +1705,8 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
|
|||
flags = ctx->flags;
|
||||
|
||||
i = ctx->index;
|
||||
hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
|
||||
revp = kvm->arch.revmap + i;
|
||||
hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
|
||||
revp = kvm->arch.hpt.rev + i;
|
||||
lbuf = (unsigned long __user *)buf;
|
||||
|
||||
nb = 0;
|
||||
|
@ -1327,7 +1721,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
|
|||
|
||||
/* Skip uninteresting entries, i.e. clean on not-first pass */
|
||||
if (!first_pass) {
|
||||
while (i < kvm->arch.hpt_npte &&
|
||||
while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
|
||||
!hpte_dirty(revp, hptp)) {
|
||||
++i;
|
||||
hptp += 2;
|
||||
|
@ -1337,7 +1731,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
|
|||
hdr.index = i;
|
||||
|
||||
/* Grab a series of valid entries */
|
||||
while (i < kvm->arch.hpt_npte &&
|
||||
while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
|
||||
hdr.n_valid < 0xffff &&
|
||||
nb + HPTE_SIZE < count &&
|
||||
record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
|
||||
|
@ -1353,7 +1747,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
|
|||
++revp;
|
||||
}
|
||||
/* Now skip invalid entries while we can */
|
||||
while (i < kvm->arch.hpt_npte &&
|
||||
while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
|
||||
hdr.n_invalid < 0xffff &&
|
||||
record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
|
||||
/* found an invalid entry */
|
||||
|
@ -1374,7 +1768,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
|
|||
}
|
||||
|
||||
/* Check if we've wrapped around the hash table */
|
||||
if (i >= kvm->arch.hpt_npte) {
|
||||
if (i >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
|
||||
i = 0;
|
||||
ctx->first_pass = 0;
|
||||
break;
|
||||
|
@ -1433,11 +1827,11 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
|
|||
|
||||
err = -EINVAL;
|
||||
i = hdr.index;
|
||||
if (i >= kvm->arch.hpt_npte ||
|
||||
i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
|
||||
if (i >= kvmppc_hpt_npte(&kvm->arch.hpt) ||
|
||||
i + hdr.n_valid + hdr.n_invalid > kvmppc_hpt_npte(&kvm->arch.hpt))
|
||||
break;
|
||||
|
||||
hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
|
||||
hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
|
||||
lbuf = (unsigned long __user *)buf;
|
||||
for (j = 0; j < hdr.n_valid; ++j) {
|
||||
__be64 hpte_v;
|
||||
|
@ -1624,8 +2018,9 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
|
|||
|
||||
kvm = p->kvm;
|
||||
i = p->hpt_index;
|
||||
hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
|
||||
for (; len != 0 && i < kvm->arch.hpt_npte; ++i, hptp += 2) {
|
||||
hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
|
||||
for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt);
|
||||
++i, hptp += 2) {
|
||||
if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
|
||||
continue;
|
||||
|
||||
|
@ -1635,7 +2030,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
|
|||
cpu_relax();
|
||||
v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
|
||||
hr = be64_to_cpu(hptp[1]);
|
||||
gr = kvm->arch.revmap[i].guest_rpte;
|
||||
gr = kvm->arch.hpt.rev[i].guest_rpte;
|
||||
unlock_hpte(hptp, v);
|
||||
preempt_enable();
|
||||
|
||||
|
|
|
@ -171,6 +171,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
|
||||
GFP_KERNEL);
|
||||
if (!stt)
|
||||
|
|
|
@ -182,7 +182,8 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
|
|||
++vcpu->stat.halt_wakeup;
|
||||
}
|
||||
|
||||
if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
|
||||
cpu = READ_ONCE(vcpu->arch.thread_cpu);
|
||||
if (cpu >= 0 && kvmppc_ipi_thread(cpu))
|
||||
return;
|
||||
|
||||
/* CPU points to the first thread of the core */
|
||||
|
@ -773,12 +774,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
tvcpu->arch.prodded = 1;
|
||||
smp_mb();
|
||||
if (vcpu->arch.ceded) {
|
||||
if (swait_active(&vcpu->wq)) {
|
||||
swake_up(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
if (tvcpu->arch.ceded)
|
||||
kvmppc_fast_vcpu_kick_hv(tvcpu);
|
||||
break;
|
||||
case H_CONFER:
|
||||
target = kvmppc_get_gpr(vcpu, 4);
|
||||
|
@ -2665,7 +2662,8 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
|
|||
int i;
|
||||
|
||||
for_each_runnable_thread(i, vcpu, vc) {
|
||||
if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
|
||||
if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
|
||||
vcpu->arch.prodded)
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -2851,7 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
n_ceded = 0;
|
||||
for_each_runnable_thread(i, v, vc) {
|
||||
if (!v->arch.pending_exceptions)
|
||||
if (!v->arch.pending_exceptions && !v->arch.prodded)
|
||||
n_ceded += v->arch.ceded;
|
||||
else
|
||||
v->arch.ceded = 0;
|
||||
|
@ -3199,12 +3197,23 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
|
|||
goto out; /* another vcpu beat us to it */
|
||||
|
||||
/* Allocate hashed page table (if not done already) and reset it */
|
||||
if (!kvm->arch.hpt_virt) {
|
||||
err = kvmppc_alloc_hpt(kvm, NULL);
|
||||
if (err) {
|
||||
if (!kvm->arch.hpt.virt) {
|
||||
int order = KVM_DEFAULT_HPT_ORDER;
|
||||
struct kvm_hpt_info info;
|
||||
|
||||
err = kvmppc_allocate_hpt(&info, order);
|
||||
/* If we get here, it means userspace didn't specify a
|
||||
* size explicitly. So, try successively smaller
|
||||
* sizes if the default failed. */
|
||||
while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER)
|
||||
err = kvmppc_allocate_hpt(&info, order);
|
||||
|
||||
if (err < 0) {
|
||||
pr_err("KVM: Couldn't alloc HPT\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvmppc_set_hpt(kvm, &info);
|
||||
}
|
||||
|
||||
/* Look up the memslot for guest physical address 0 */
|
||||
|
@ -3413,6 +3422,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
|
|||
|
||||
kvm->arch.lpcr = lpcr;
|
||||
|
||||
/* Initialization for future HPT resizes */
|
||||
kvm->arch.resize_hpt = NULL;
|
||||
|
||||
/*
|
||||
* Work out how many sets the TLB has, for the use of
|
||||
* the TLB invalidation loop in book3s_hv_rmhandlers.S.
|
||||
|
@ -3469,7 +3481,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
|
|||
if (kvm_is_radix(kvm))
|
||||
kvmppc_free_radix(kvm);
|
||||
else
|
||||
kvmppc_free_hpt(kvm);
|
||||
kvmppc_free_hpt(&kvm->arch.hpt);
|
||||
|
||||
kvmppc_free_pimap(kvm);
|
||||
}
|
||||
|
@ -3695,12 +3707,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
|
|||
r = -EFAULT;
|
||||
if (get_user(htab_order, (u32 __user *)argp))
|
||||
break;
|
||||
r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
|
||||
r = kvmppc_alloc_reset_hpt(kvm, htab_order);
|
||||
if (r)
|
||||
break;
|
||||
r = -EFAULT;
|
||||
if (put_user(htab_order, (u32 __user *)argp))
|
||||
break;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
@ -3715,6 +3724,28 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
|
|||
break;
|
||||
}
|
||||
|
||||
case KVM_PPC_RESIZE_HPT_PREPARE: {
|
||||
struct kvm_ppc_resize_hpt rhpt;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
|
||||
break;
|
||||
|
||||
r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt);
|
||||
break;
|
||||
}
|
||||
|
||||
case KVM_PPC_RESIZE_HPT_COMMIT: {
|
||||
struct kvm_ppc_resize_hpt rhpt;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
|
||||
break;
|
||||
|
||||
r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
|
|
|
@ -52,19 +52,19 @@ static int __init early_parse_kvm_cma_resv(char *p)
|
|||
}
|
||||
early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
|
||||
|
||||
struct page *kvm_alloc_hpt(unsigned long nr_pages)
|
||||
struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
|
||||
{
|
||||
VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
|
||||
|
||||
return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
|
||||
EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
|
||||
|
||||
void kvm_release_hpt(struct page *page, unsigned long nr_pages)
|
||||
void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages)
|
||||
{
|
||||
cma_release(kvm_cma, page, nr_pages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_release_hpt);
|
||||
EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
|
||||
|
||||
/**
|
||||
* kvm_cma_reserve() - reserve area for kvm hash pagetable
|
||||
|
|
|
@ -86,10 +86,10 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
|||
|
||||
if (*rmap & KVMPPC_RMAP_PRESENT) {
|
||||
i = *rmap & KVMPPC_RMAP_INDEX;
|
||||
head = &kvm->arch.revmap[i];
|
||||
head = &kvm->arch.hpt.rev[i];
|
||||
if (realmode)
|
||||
head = real_vmalloc_addr(head);
|
||||
tail = &kvm->arch.revmap[head->back];
|
||||
tail = &kvm->arch.hpt.rev[head->back];
|
||||
if (realmode)
|
||||
tail = real_vmalloc_addr(tail);
|
||||
rev->forw = i;
|
||||
|
@ -154,8 +154,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
|||
lock_rmap(rmap);
|
||||
|
||||
head = *rmap & KVMPPC_RMAP_INDEX;
|
||||
next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
|
||||
prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
|
||||
next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]);
|
||||
prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]);
|
||||
next->back = rev->back;
|
||||
prev->forw = rev->forw;
|
||||
if (head == pte_index) {
|
||||
|
@ -292,11 +292,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
|||
|
||||
/* Find and lock the HPTEG slot to use */
|
||||
do_insert:
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
|
||||
return H_PARAMETER;
|
||||
if (likely((flags & H_EXACT) == 0)) {
|
||||
pte_index &= ~7UL;
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
|
||||
for (i = 0; i < 8; ++i) {
|
||||
if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
|
||||
try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
|
||||
|
@ -327,7 +327,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
|||
}
|
||||
pte_index += i;
|
||||
} else {
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
|
||||
if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
|
||||
HPTE_V_ABSENT)) {
|
||||
/* Lock the slot and check again */
|
||||
|
@ -344,7 +344,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
|||
}
|
||||
|
||||
/* Save away the guest's idea of the second HPTE dword */
|
||||
rev = &kvm->arch.revmap[pte_index];
|
||||
rev = &kvm->arch.hpt.rev[pte_index];
|
||||
if (realmode)
|
||||
rev = real_vmalloc_addr(rev);
|
||||
if (rev) {
|
||||
|
@ -469,9 +469,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
|
|||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
|
||||
return H_PARAMETER;
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
|
||||
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
pte = orig_pte = be64_to_cpu(hpte[0]);
|
||||
|
@ -487,7 +487,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
|
|||
return H_NOT_FOUND;
|
||||
}
|
||||
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
|
||||
v = pte & ~HPTE_V_HVLOCK;
|
||||
if (v & HPTE_V_VALID) {
|
||||
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
|
||||
|
@ -557,13 +557,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
}
|
||||
if (req != 1 || flags == 3 ||
|
||||
pte_index >= kvm->arch.hpt_npte) {
|
||||
pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
|
||||
/* parameter error */
|
||||
args[j] = ((0xa0 | flags) << 56) + pte_index;
|
||||
ret = H_PARAMETER;
|
||||
break;
|
||||
}
|
||||
hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
|
||||
hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4));
|
||||
/* to avoid deadlock, don't spin except for first */
|
||||
if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
|
||||
if (n)
|
||||
|
@ -600,7 +600,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
args[j] = ((0x80 | flags) << 56) + pte_index;
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
|
||||
note_hpte_modification(kvm, rev);
|
||||
|
||||
if (!(hp0 & HPTE_V_VALID)) {
|
||||
|
@ -657,10 +657,10 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
|
||||
return H_PARAMETER;
|
||||
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
|
||||
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
v = pte_v = be64_to_cpu(hpte[0]);
|
||||
|
@ -680,7 +680,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||
/* Update guest view of 2nd HPTE dword */
|
||||
mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
|
||||
HPTE_R_KEY_HI | HPTE_R_KEY_LO;
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
|
||||
if (rev) {
|
||||
r = (rev->guest_rpte & ~mask) | bits;
|
||||
rev->guest_rpte = r;
|
||||
|
@ -728,15 +728,15 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
|
||||
return H_PARAMETER;
|
||||
if (flags & H_READ_4) {
|
||||
pte_index &= ~3;
|
||||
n = 4;
|
||||
}
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
|
||||
for (i = 0; i < n; ++i, ++pte_index) {
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
|
||||
v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
|
||||
r = be64_to_cpu(hpte[1]);
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||
|
@ -769,11 +769,11 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
|
||||
return H_PARAMETER;
|
||||
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
|
||||
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
v = be64_to_cpu(hpte[0]);
|
||||
|
@ -817,11 +817,11 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
|
||||
return H_PARAMETER;
|
||||
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
|
||||
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
v = be64_to_cpu(hpte[0]);
|
||||
|
@ -970,7 +970,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
|
|||
somask = (1UL << 28) - 1;
|
||||
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
|
||||
}
|
||||
hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
|
||||
hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt);
|
||||
avpn = slb_v & ~(somask >> 16); /* also includes B */
|
||||
avpn |= (eaddr & somask) >> 16;
|
||||
|
||||
|
@ -981,7 +981,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
|
|||
val |= avpn;
|
||||
|
||||
for (;;) {
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7));
|
||||
|
||||
for (i = 0; i < 16; i += 2) {
|
||||
/* Read the PTE racily */
|
||||
|
@ -1017,7 +1017,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
|
|||
if (val & HPTE_V_SECONDARY)
|
||||
break;
|
||||
val |= HPTE_V_SECONDARY;
|
||||
hash = hash ^ kvm->arch.hpt_mask;
|
||||
hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -1066,14 +1066,14 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
|
|||
return status; /* there really was no HPTE */
|
||||
return 0; /* for prot fault, HPTE disappeared */
|
||||
}
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
|
||||
hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
|
||||
v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
|
||||
r = be64_to_cpu(hpte[1]);
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||
v = hpte_new_to_old_v(v, r);
|
||||
r = hpte_new_to_old_r(r);
|
||||
}
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
|
||||
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]);
|
||||
gr = rev->guest_rpte;
|
||||
|
||||
unlock_hpte(hpte, orig_v);
|
||||
|
|
|
@ -35,7 +35,7 @@ int kvm_irq_bypass = 1;
|
|||
EXPORT_SYMBOL(kvm_irq_bypass);
|
||||
|
||||
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
||||
u32 new_irq);
|
||||
u32 new_irq, bool check_resend);
|
||||
static int xics_opal_set_server(unsigned int hw_irq, int server_cpu);
|
||||
|
||||
/* -- ICS routines -- */
|
||||
|
@ -44,20 +44,12 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
|
|||
{
|
||||
int i;
|
||||
|
||||
arch_spin_lock(&ics->lock);
|
||||
|
||||
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
|
||||
struct ics_irq_state *state = &ics->irq_state[i];
|
||||
|
||||
if (!state->resend)
|
||||
continue;
|
||||
|
||||
arch_spin_unlock(&ics->lock);
|
||||
icp_rm_deliver_irq(xics, icp, state->number);
|
||||
arch_spin_lock(&ics->lock);
|
||||
if (state->resend)
|
||||
icp_rm_deliver_irq(xics, icp, state->number, true);
|
||||
}
|
||||
|
||||
arch_spin_unlock(&ics->lock);
|
||||
}
|
||||
|
||||
/* -- ICP routines -- */
|
||||
|
@ -288,7 +280,7 @@ static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
|
|||
}
|
||||
|
||||
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
||||
u32 new_irq)
|
||||
u32 new_irq, bool check_resend)
|
||||
{
|
||||
struct ics_irq_state *state;
|
||||
struct kvmppc_ics *ics;
|
||||
|
@ -333,6 +325,10 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
|||
}
|
||||
}
|
||||
|
||||
if (check_resend)
|
||||
if (!state->resend)
|
||||
goto out;
|
||||
|
||||
/* Clear the resend bit of that interrupt */
|
||||
state->resend = 0;
|
||||
|
||||
|
@ -378,7 +374,9 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
|||
*/
|
||||
if (reject && reject != XICS_IPI) {
|
||||
arch_spin_unlock(&ics->lock);
|
||||
icp->n_reject++;
|
||||
new_irq = reject;
|
||||
check_resend = 0;
|
||||
goto again;
|
||||
}
|
||||
} else {
|
||||
|
@ -386,9 +384,15 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
|||
* We failed to deliver the interrupt we need to set the
|
||||
* resend map bit and mark the ICS state as needing a resend
|
||||
*/
|
||||
set_bit(ics->icsid, icp->resend_map);
|
||||
state->resend = 1;
|
||||
|
||||
/*
|
||||
* Make sure when checking resend, we don't miss the resend
|
||||
* if resend_map bit is seen and cleared.
|
||||
*/
|
||||
smp_wmb();
|
||||
set_bit(ics->icsid, icp->resend_map);
|
||||
|
||||
/*
|
||||
* If the need_resend flag got cleared in the ICP some time
|
||||
* between icp_rm_try_to_deliver() atomic update and now, then
|
||||
|
@ -397,7 +401,9 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
|||
*/
|
||||
smp_mb();
|
||||
if (!icp->state.need_resend) {
|
||||
state->resend = 0;
|
||||
arch_spin_unlock(&ics->lock);
|
||||
check_resend = 0;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
@ -592,7 +598,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
|
|||
/* Handle reject in real mode */
|
||||
if (reject && reject != XICS_IPI) {
|
||||
this_icp->n_reject++;
|
||||
icp_rm_deliver_irq(xics, icp, reject);
|
||||
icp_rm_deliver_irq(xics, icp, reject, false);
|
||||
}
|
||||
|
||||
/* Handle resends in real mode */
|
||||
|
@ -660,59 +666,45 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
|
|||
*/
|
||||
if (reject && reject != XICS_IPI) {
|
||||
icp->n_reject++;
|
||||
icp_rm_deliver_irq(xics, icp, reject);
|
||||
icp_rm_deliver_irq(xics, icp, reject, false);
|
||||
}
|
||||
bail:
|
||||
return check_too_hard(xics, icp);
|
||||
}
|
||||
|
||||
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
|
||||
static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
|
||||
{
|
||||
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
|
||||
struct kvmppc_icp *icp = vcpu->arch.icp;
|
||||
struct kvmppc_ics *ics;
|
||||
struct ics_irq_state *state;
|
||||
u32 irq = xirr & 0x00ffffff;
|
||||
u16 src;
|
||||
|
||||
if (!xics || !xics->real_mode)
|
||||
return H_TOO_HARD;
|
||||
u32 pq_old, pq_new;
|
||||
|
||||
/*
|
||||
* ICP State: EOI
|
||||
* ICS EOI handling: For LSI, if P bit is still set, we need to
|
||||
* resend it.
|
||||
*
|
||||
* Note: If EOI is incorrectly used by SW to lower the CPPR
|
||||
* value (ie more favored), we do not check for rejection of
|
||||
* a pending interrupt, this is a SW error and PAPR sepcifies
|
||||
* that we don't have to deal with it.
|
||||
*
|
||||
* The sending of an EOI to the ICS is handled after the
|
||||
* CPPR update
|
||||
*
|
||||
* ICP State: Down_CPPR which we handle
|
||||
* in a separate function as it's shared with H_CPPR.
|
||||
* For MSI, we move Q bit into P (and clear Q). If it is set,
|
||||
* resend it.
|
||||
*/
|
||||
icp_rm_down_cppr(xics, icp, xirr >> 24);
|
||||
|
||||
/* IPIs have no EOI */
|
||||
if (irq == XICS_IPI)
|
||||
goto bail;
|
||||
/*
|
||||
* EOI handling: If the interrupt is still asserted, we need to
|
||||
* resend it. We can take a lockless "peek" at the ICS state here.
|
||||
*
|
||||
* "Message" interrupts will never have "asserted" set
|
||||
*/
|
||||
ics = kvmppc_xics_find_ics(xics, irq, &src);
|
||||
if (!ics)
|
||||
goto bail;
|
||||
|
||||
state = &ics->irq_state[src];
|
||||
|
||||
/* Still asserted, resend it */
|
||||
if (state->asserted) {
|
||||
icp->n_reject++;
|
||||
icp_rm_deliver_irq(xics, icp, irq);
|
||||
}
|
||||
if (state->lsi)
|
||||
pq_new = state->pq_state;
|
||||
else
|
||||
do {
|
||||
pq_old = state->pq_state;
|
||||
pq_new = pq_old >> 1;
|
||||
} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
|
||||
|
||||
if (pq_new & PQ_PRESENTED)
|
||||
icp_rm_deliver_irq(xics, NULL, irq, false);
|
||||
|
||||
if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
|
||||
icp->rm_action |= XICS_RM_NOTIFY_EOI;
|
||||
|
@ -733,10 +725,43 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
|
|||
state->intr_cpu = -1;
|
||||
}
|
||||
}
|
||||
|
||||
bail:
|
||||
return check_too_hard(xics, icp);
|
||||
}
|
||||
|
||||
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
|
||||
{
|
||||
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
|
||||
struct kvmppc_icp *icp = vcpu->arch.icp;
|
||||
u32 irq = xirr & 0x00ffffff;
|
||||
|
||||
if (!xics || !xics->real_mode)
|
||||
return H_TOO_HARD;
|
||||
|
||||
/*
|
||||
* ICP State: EOI
|
||||
*
|
||||
* Note: If EOI is incorrectly used by SW to lower the CPPR
|
||||
* value (ie more favored), we do not check for rejection of
|
||||
* a pending interrupt, this is a SW error and PAPR specifies
|
||||
* that we don't have to deal with it.
|
||||
*
|
||||
* The sending of an EOI to the ICS is handled after the
|
||||
* CPPR update
|
||||
*
|
||||
* ICP State: Down_CPPR which we handle
|
||||
* in a separate function as it's shared with H_CPPR.
|
||||
*/
|
||||
icp_rm_down_cppr(xics, icp, xirr >> 24);
|
||||
|
||||
/* IPIs have no EOI */
|
||||
if (irq == XICS_IPI)
|
||||
return check_too_hard(xics, icp);
|
||||
|
||||
return ics_rm_eoi(vcpu, irq);
|
||||
}
|
||||
|
||||
unsigned long eoi_rc;
|
||||
|
||||
static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
|
||||
|
@ -823,14 +848,33 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
|
|||
{
|
||||
struct kvmppc_xics *xics;
|
||||
struct kvmppc_icp *icp;
|
||||
struct kvmppc_ics *ics;
|
||||
struct ics_irq_state *state;
|
||||
u32 irq;
|
||||
u16 src;
|
||||
u32 pq_old, pq_new;
|
||||
|
||||
irq = irq_map->v_hwirq;
|
||||
xics = vcpu->kvm->arch.xics;
|
||||
icp = vcpu->arch.icp;
|
||||
|
||||
kvmppc_rm_handle_irq_desc(irq_map->desc);
|
||||
icp_rm_deliver_irq(xics, icp, irq);
|
||||
|
||||
ics = kvmppc_xics_find_ics(xics, irq, &src);
|
||||
if (!ics)
|
||||
return 2;
|
||||
|
||||
state = &ics->irq_state[src];
|
||||
|
||||
/* only MSIs register bypass producers, so it must be MSI here */
|
||||
do {
|
||||
pq_old = state->pq_state;
|
||||
pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
|
||||
} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
|
||||
|
||||
/* Test P=1, Q=0, this is the only case where we present */
|
||||
if (pq_new == PQ_PRESENTED)
|
||||
icp_rm_deliver_irq(xics, icp, irq, false);
|
||||
|
||||
/* EOI the interrupt */
|
||||
icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
|
||||
|
|
|
@ -902,6 +902,69 @@ static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int exit_nr)
|
||||
{
|
||||
enum emulation_result er;
|
||||
ulong flags;
|
||||
u32 last_inst;
|
||||
int emul, r;
|
||||
|
||||
/*
|
||||
* shadow_srr1 only contains valid flags if we came here via a program
|
||||
* exception. The other exceptions (emulation assist, FP unavailable,
|
||||
* etc.) do not provide flags in SRR1, so use an illegal-instruction
|
||||
* exception when injecting a program interrupt into the guest.
|
||||
*/
|
||||
if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
|
||||
flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
|
||||
else
|
||||
flags = SRR1_PROGILL;
|
||||
|
||||
emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
|
||||
if (emul != EMULATE_DONE)
|
||||
return RESUME_GUEST;
|
||||
|
||||
if (kvmppc_get_msr(vcpu) & MSR_PR) {
|
||||
#ifdef EXIT_DEBUG
|
||||
pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
|
||||
kvmppc_get_pc(vcpu), last_inst);
|
||||
#endif
|
||||
if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) {
|
||||
kvmppc_core_queue_program(vcpu, flags);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
}
|
||||
|
||||
vcpu->stat.emulated_inst_exits++;
|
||||
er = kvmppc_emulate_instruction(run, vcpu);
|
||||
switch (er) {
|
||||
case EMULATE_DONE:
|
||||
r = RESUME_GUEST_NV;
|
||||
break;
|
||||
case EMULATE_AGAIN:
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
case EMULATE_FAIL:
|
||||
pr_crit("%s: emulation at %lx failed (%08x)\n",
|
||||
__func__, kvmppc_get_pc(vcpu), last_inst);
|
||||
kvmppc_core_queue_program(vcpu, flags);
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
case EMULATE_DO_MMIO:
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
r = RESUME_HOST_NV;
|
||||
break;
|
||||
case EMULATE_EXIT_USER:
|
||||
r = RESUME_HOST_NV;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int exit_nr)
|
||||
{
|
||||
|
@ -1044,71 +1107,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||
break;
|
||||
case BOOK3S_INTERRUPT_PROGRAM:
|
||||
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
|
||||
{
|
||||
enum emulation_result er;
|
||||
ulong flags;
|
||||
u32 last_inst;
|
||||
int emul;
|
||||
|
||||
program_interrupt:
|
||||
/*
|
||||
* shadow_srr1 only contains valid flags if we came here via
|
||||
* a program exception. The other exceptions (emulation assist,
|
||||
* FP unavailable, etc.) do not provide flags in SRR1, so use
|
||||
* an illegal-instruction exception when injecting a program
|
||||
* interrupt into the guest.
|
||||
*/
|
||||
if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
|
||||
flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
|
||||
else
|
||||
flags = SRR1_PROGILL;
|
||||
|
||||
emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
|
||||
if (emul != EMULATE_DONE) {
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
}
|
||||
|
||||
if (kvmppc_get_msr(vcpu) & MSR_PR) {
|
||||
#ifdef EXIT_DEBUG
|
||||
pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
|
||||
kvmppc_get_pc(vcpu), last_inst);
|
||||
#endif
|
||||
if ((last_inst & 0xff0007ff) !=
|
||||
(INS_DCBZ & 0xfffffff7)) {
|
||||
kvmppc_core_queue_program(vcpu, flags);
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
vcpu->stat.emulated_inst_exits++;
|
||||
er = kvmppc_emulate_instruction(run, vcpu);
|
||||
switch (er) {
|
||||
case EMULATE_DONE:
|
||||
r = RESUME_GUEST_NV;
|
||||
break;
|
||||
case EMULATE_AGAIN:
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
case EMULATE_FAIL:
|
||||
printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
|
||||
__func__, kvmppc_get_pc(vcpu), last_inst);
|
||||
kvmppc_core_queue_program(vcpu, flags);
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
case EMULATE_DO_MMIO:
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
r = RESUME_HOST_NV;
|
||||
break;
|
||||
case EMULATE_EXIT_USER:
|
||||
r = RESUME_HOST_NV;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
r = kvmppc_exit_pr_progint(run, vcpu, exit_nr);
|
||||
break;
|
||||
}
|
||||
case BOOK3S_INTERRUPT_SYSCALL:
|
||||
{
|
||||
u32 last_sc;
|
||||
|
@ -1185,7 +1185,7 @@ program_interrupt:
|
|||
emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
|
||||
&last_inst);
|
||||
if (emul == EMULATE_DONE)
|
||||
goto program_interrupt;
|
||||
r = kvmppc_exit_pr_progint(run, vcpu, exit_nr);
|
||||
else
|
||||
r = RESUME_GUEST;
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@
|
|||
/* -- ICS routines -- */
|
||||
|
||||
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
||||
u32 new_irq);
|
||||
u32 new_irq, bool check_resend);
|
||||
|
||||
/*
|
||||
* Return value ideally indicates how the interrupt was handled, but no
|
||||
|
@ -75,6 +75,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
|
|||
struct ics_irq_state *state;
|
||||
struct kvmppc_ics *ics;
|
||||
u16 src;
|
||||
u32 pq_old, pq_new;
|
||||
|
||||
XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
|
||||
|
||||
|
@ -87,25 +88,41 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
|
|||
if (!state->exists)
|
||||
return -EINVAL;
|
||||
|
||||
if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET)
|
||||
level = 1;
|
||||
else if (level == KVM_INTERRUPT_UNSET)
|
||||
level = 0;
|
||||
/*
|
||||
* We set state->asserted locklessly. This should be fine as
|
||||
* we are the only setter, thus concurrent access is undefined
|
||||
* to begin with.
|
||||
* Take other values the same as 1, consistent with original code.
|
||||
* maybe WARN here?
|
||||
*/
|
||||
if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
|
||||
state->asserted = 1;
|
||||
else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
|
||||
state->asserted = 0;
|
||||
|
||||
if (!state->lsi && level == 0) /* noop for MSI */
|
||||
return 0;
|
||||
}
|
||||
|
||||
do {
|
||||
pq_old = state->pq_state;
|
||||
if (state->lsi) {
|
||||
if (level) {
|
||||
if (pq_old & PQ_PRESENTED)
|
||||
/* Setting already set LSI ... */
|
||||
return 0;
|
||||
|
||||
pq_new = PQ_PRESENTED;
|
||||
} else
|
||||
pq_new = 0;
|
||||
} else
|
||||
pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
|
||||
} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
|
||||
|
||||
/* Test P=1, Q=0, this is the only case where we present */
|
||||
if (pq_new == PQ_PRESENTED)
|
||||
icp_deliver_irq(xics, NULL, irq, false);
|
||||
|
||||
/* Record which CPU this arrived on for passed-through interrupts */
|
||||
if (state->host_irq)
|
||||
state->intr_cpu = raw_smp_processor_id();
|
||||
|
||||
/* Attempt delivery */
|
||||
icp_deliver_irq(xics, NULL, irq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -114,29 +131,14 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
|
|||
{
|
||||
int i;
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
arch_spin_lock(&ics->lock);
|
||||
|
||||
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
|
||||
struct ics_irq_state *state = &ics->irq_state[i];
|
||||
|
||||
if (!state->resend)
|
||||
continue;
|
||||
|
||||
XICS_DBG("resend %#x prio %#x\n", state->number,
|
||||
state->priority);
|
||||
|
||||
arch_spin_unlock(&ics->lock);
|
||||
local_irq_restore(flags);
|
||||
icp_deliver_irq(xics, icp, state->number);
|
||||
local_irq_save(flags);
|
||||
arch_spin_lock(&ics->lock);
|
||||
if (state->resend) {
|
||||
XICS_DBG("resend %#x prio %#x\n", state->number,
|
||||
state->priority);
|
||||
icp_deliver_irq(xics, icp, state->number, true);
|
||||
}
|
||||
}
|
||||
|
||||
arch_spin_unlock(&ics->lock);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
|
||||
|
@ -155,6 +157,7 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
|
|||
deliver = false;
|
||||
if ((state->masked_pending || state->resend) && priority != MASKED) {
|
||||
state->masked_pending = 0;
|
||||
state->resend = 0;
|
||||
deliver = true;
|
||||
}
|
||||
|
||||
|
@ -189,7 +192,7 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
|
|||
state->masked_pending, state->resend);
|
||||
|
||||
if (write_xive(xics, ics, state, server, priority, priority))
|
||||
icp_deliver_irq(xics, icp, irq);
|
||||
icp_deliver_irq(xics, icp, irq, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -242,7 +245,7 @@ int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
|
|||
|
||||
if (write_xive(xics, ics, state, state->server, state->saved_priority,
|
||||
state->saved_priority))
|
||||
icp_deliver_irq(xics, icp, irq);
|
||||
icp_deliver_irq(xics, icp, irq, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -376,7 +379,7 @@ static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
|
|||
}
|
||||
|
||||
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
||||
u32 new_irq)
|
||||
u32 new_irq, bool check_resend)
|
||||
{
|
||||
struct ics_irq_state *state;
|
||||
struct kvmppc_ics *ics;
|
||||
|
@ -422,6 +425,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
|||
}
|
||||
}
|
||||
|
||||
if (check_resend)
|
||||
if (!state->resend)
|
||||
goto out;
|
||||
|
||||
/* Clear the resend bit of that interrupt */
|
||||
state->resend = 0;
|
||||
|
||||
|
@ -470,6 +477,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
|||
arch_spin_unlock(&ics->lock);
|
||||
local_irq_restore(flags);
|
||||
new_irq = reject;
|
||||
check_resend = 0;
|
||||
goto again;
|
||||
}
|
||||
} else {
|
||||
|
@ -477,9 +485,15 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
|||
* We failed to deliver the interrupt we need to set the
|
||||
* resend map bit and mark the ICS state as needing a resend
|
||||
*/
|
||||
set_bit(ics->icsid, icp->resend_map);
|
||||
state->resend = 1;
|
||||
|
||||
/*
|
||||
* Make sure when checking resend, we don't miss the resend
|
||||
* if resend_map bit is seen and cleared.
|
||||
*/
|
||||
smp_wmb();
|
||||
set_bit(ics->icsid, icp->resend_map);
|
||||
|
||||
/*
|
||||
* If the need_resend flag got cleared in the ICP some time
|
||||
* between icp_try_to_deliver() atomic update and now, then
|
||||
|
@ -488,8 +502,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
|||
*/
|
||||
smp_mb();
|
||||
if (!icp->state.need_resend) {
|
||||
state->resend = 0;
|
||||
arch_spin_unlock(&ics->lock);
|
||||
local_irq_restore(flags);
|
||||
check_resend = 0;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
@ -681,7 +697,7 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
|
|||
|
||||
/* Handle reject */
|
||||
if (reject && reject != XICS_IPI)
|
||||
icp_deliver_irq(xics, icp, reject);
|
||||
icp_deliver_irq(xics, icp, reject, false);
|
||||
|
||||
/* Handle resend */
|
||||
if (resend)
|
||||
|
@ -761,17 +777,54 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
|
|||
* attempt (see comments in icp_deliver_irq).
|
||||
*/
|
||||
if (reject && reject != XICS_IPI)
|
||||
icp_deliver_irq(xics, icp, reject);
|
||||
icp_deliver_irq(xics, icp, reject, false);
|
||||
}
|
||||
|
||||
static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq)
|
||||
{
|
||||
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
|
||||
struct kvmppc_icp *icp = vcpu->arch.icp;
|
||||
struct kvmppc_ics *ics;
|
||||
struct ics_irq_state *state;
|
||||
u16 src;
|
||||
u32 pq_old, pq_new;
|
||||
|
||||
/*
|
||||
* ICS EOI handling: For LSI, if P bit is still set, we need to
|
||||
* resend it.
|
||||
*
|
||||
* For MSI, we move Q bit into P (and clear Q). If it is set,
|
||||
* resend it.
|
||||
*/
|
||||
|
||||
ics = kvmppc_xics_find_ics(xics, irq, &src);
|
||||
if (!ics) {
|
||||
XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n", irq);
|
||||
return H_PARAMETER;
|
||||
}
|
||||
state = &ics->irq_state[src];
|
||||
|
||||
if (state->lsi)
|
||||
pq_new = state->pq_state;
|
||||
else
|
||||
do {
|
||||
pq_old = state->pq_state;
|
||||
pq_new = pq_old >> 1;
|
||||
} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
|
||||
|
||||
if (pq_new & PQ_PRESENTED)
|
||||
icp_deliver_irq(xics, icp, irq, false);
|
||||
|
||||
kvm_notify_acked_irq(vcpu->kvm, 0, irq);
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
|
||||
{
|
||||
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
|
||||
struct kvmppc_icp *icp = vcpu->arch.icp;
|
||||
struct kvmppc_ics *ics;
|
||||
struct ics_irq_state *state;
|
||||
u32 irq = xirr & 0x00ffffff;
|
||||
u16 src;
|
||||
|
||||
XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
|
||||
|
||||
|
@ -794,26 +847,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
|
|||
/* IPIs have no EOI */
|
||||
if (irq == XICS_IPI)
|
||||
return H_SUCCESS;
|
||||
/*
|
||||
* EOI handling: If the interrupt is still asserted, we need to
|
||||
* resend it. We can take a lockless "peek" at the ICS state here.
|
||||
*
|
||||
* "Message" interrupts will never have "asserted" set
|
||||
*/
|
||||
ics = kvmppc_xics_find_ics(xics, irq, &src);
|
||||
if (!ics) {
|
||||
XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
|
||||
return H_PARAMETER;
|
||||
}
|
||||
state = &ics->irq_state[src];
|
||||
|
||||
/* Still asserted, resend it */
|
||||
if (state->asserted)
|
||||
icp_deliver_irq(xics, icp, irq);
|
||||
|
||||
kvm_notify_acked_irq(vcpu->kvm, 0, irq);
|
||||
|
||||
return H_SUCCESS;
|
||||
return ics_eoi(vcpu, irq);
|
||||
}
|
||||
|
||||
int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
|
||||
|
@ -832,10 +867,6 @@ int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
|
|||
icp->n_rm_check_resend++;
|
||||
icp_check_resend(xics, icp->rm_resend_icp);
|
||||
}
|
||||
if (icp->rm_action & XICS_RM_REJECT) {
|
||||
icp->n_rm_reject++;
|
||||
icp_deliver_irq(xics, icp, icp->rm_reject);
|
||||
}
|
||||
if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
|
||||
icp->n_rm_notify_eoi++;
|
||||
kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
|
||||
|
@ -920,7 +951,7 @@ static int xics_debug_show(struct seq_file *m, void *private)
|
|||
int icsid, i;
|
||||
unsigned long flags;
|
||||
unsigned long t_rm_kick_vcpu, t_rm_check_resend;
|
||||
unsigned long t_rm_reject, t_rm_notify_eoi;
|
||||
unsigned long t_rm_notify_eoi;
|
||||
unsigned long t_reject, t_check_resend;
|
||||
|
||||
if (!kvm)
|
||||
|
@ -929,7 +960,6 @@ static int xics_debug_show(struct seq_file *m, void *private)
|
|||
t_rm_kick_vcpu = 0;
|
||||
t_rm_notify_eoi = 0;
|
||||
t_rm_check_resend = 0;
|
||||
t_rm_reject = 0;
|
||||
t_check_resend = 0;
|
||||
t_reject = 0;
|
||||
|
||||
|
@ -952,14 +982,13 @@ static int xics_debug_show(struct seq_file *m, void *private)
|
|||
t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
|
||||
t_rm_notify_eoi += icp->n_rm_notify_eoi;
|
||||
t_rm_check_resend += icp->n_rm_check_resend;
|
||||
t_rm_reject += icp->n_rm_reject;
|
||||
t_check_resend += icp->n_check_resend;
|
||||
t_reject += icp->n_reject;
|
||||
}
|
||||
|
||||
seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu reject=%lu notify_eoi=%lu\n",
|
||||
seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu notify_eoi=%lu\n",
|
||||
t_rm_kick_vcpu, t_rm_check_resend,
|
||||
t_rm_reject, t_rm_notify_eoi);
|
||||
t_rm_notify_eoi);
|
||||
seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
|
||||
t_check_resend, t_reject);
|
||||
for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
|
||||
|
@ -977,9 +1006,9 @@ static int xics_debug_show(struct seq_file *m, void *private)
|
|||
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
|
||||
struct ics_irq_state *irq = &ics->irq_state[i];
|
||||
|
||||
seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
|
||||
seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n",
|
||||
irq->number, irq->server, irq->priority,
|
||||
irq->saved_priority, irq->asserted,
|
||||
irq->saved_priority, irq->pq_state,
|
||||
irq->resend, irq->masked_pending);
|
||||
|
||||
}
|
||||
|
@ -1198,10 +1227,17 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
|
|||
val |= prio << KVM_XICS_PRIORITY_SHIFT;
|
||||
if (irqp->lsi) {
|
||||
val |= KVM_XICS_LEVEL_SENSITIVE;
|
||||
if (irqp->asserted)
|
||||
if (irqp->pq_state & PQ_PRESENTED)
|
||||
val |= KVM_XICS_PENDING;
|
||||
} else if (irqp->masked_pending || irqp->resend)
|
||||
val |= KVM_XICS_PENDING;
|
||||
|
||||
if (irqp->pq_state & PQ_PRESENTED)
|
||||
val |= KVM_XICS_PRESENTED;
|
||||
|
||||
if (irqp->pq_state & PQ_QUEUED)
|
||||
val |= KVM_XICS_QUEUED;
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
arch_spin_unlock(&ics->lock);
|
||||
|
@ -1253,18 +1289,20 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
|
|||
irqp->resend = 0;
|
||||
irqp->masked_pending = 0;
|
||||
irqp->lsi = 0;
|
||||
irqp->asserted = 0;
|
||||
if (val & KVM_XICS_LEVEL_SENSITIVE) {
|
||||
irqp->pq_state = 0;
|
||||
if (val & KVM_XICS_LEVEL_SENSITIVE)
|
||||
irqp->lsi = 1;
|
||||
if (val & KVM_XICS_PENDING)
|
||||
irqp->asserted = 1;
|
||||
}
|
||||
/* If PENDING, set P in case P is not saved because of old code */
|
||||
if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
|
||||
irqp->pq_state |= PQ_PRESENTED;
|
||||
if (val & KVM_XICS_QUEUED)
|
||||
irqp->pq_state |= PQ_QUEUED;
|
||||
irqp->exists = 1;
|
||||
arch_spin_unlock(&ics->lock);
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (val & KVM_XICS_PENDING)
|
||||
icp_deliver_irq(xics, NULL, irqp->number);
|
||||
icp_deliver_irq(xics, NULL, irqp->number, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -31,16 +31,19 @@
|
|||
/* Priority value to use for disabling an interrupt */
|
||||
#define MASKED 0xff
|
||||
|
||||
#define PQ_PRESENTED 1
|
||||
#define PQ_QUEUED 2
|
||||
|
||||
/* State for one irq source */
|
||||
struct ics_irq_state {
|
||||
u32 number;
|
||||
u32 server;
|
||||
u32 pq_state;
|
||||
u8 priority;
|
||||
u8 saved_priority;
|
||||
u8 resend;
|
||||
u8 masked_pending;
|
||||
u8 lsi; /* level-sensitive interrupt */
|
||||
u8 asserted; /* Only for LSI */
|
||||
u8 exists;
|
||||
int intr_cpu;
|
||||
u32 host_irq;
|
||||
|
@ -73,7 +76,6 @@ struct kvmppc_icp {
|
|||
*/
|
||||
#define XICS_RM_KICK_VCPU 0x1
|
||||
#define XICS_RM_CHECK_RESEND 0x2
|
||||
#define XICS_RM_REJECT 0x4
|
||||
#define XICS_RM_NOTIFY_EOI 0x8
|
||||
u32 rm_action;
|
||||
struct kvm_vcpu *rm_kick_target;
|
||||
|
@ -84,7 +86,6 @@ struct kvmppc_icp {
|
|||
/* Counters for each reason we exited real mode */
|
||||
unsigned long n_rm_kick_vcpu;
|
||||
unsigned long n_rm_check_resend;
|
||||
unsigned long n_rm_reject;
|
||||
unsigned long n_rm_notify_eoi;
|
||||
/* Counters for handling ICP processing in real mode */
|
||||
unsigned long n_check_resend;
|
||||
|
|
|
@ -511,6 +511,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_ONE_REG:
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_PPC_PAIRED_SINGLES:
|
||||
|
@ -612,6 +613,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_SPAPR_MULTITCE:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_SPAPR_RESIZE_HPT:
|
||||
/* Disable this on POWER9 until code handles new HPTE format */
|
||||
r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_PPC_HTM:
|
||||
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
|
||||
|
@ -1114,7 +1119,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|||
#endif
|
||||
}
|
||||
|
||||
r = kvmppc_vcpu_run(run, vcpu);
|
||||
if (run->immediate_exit)
|
||||
r = -EINTR;
|
||||
else
|
||||
r = kvmppc_vcpu_run(run, vcpu);
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
|
||||
|
|
|
@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
|
|||
ipte_unlock_simple(vcpu);
|
||||
}
|
||||
|
||||
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
|
||||
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
|
||||
enum gacc_mode mode)
|
||||
{
|
||||
union alet alet;
|
||||
|
@ -465,7 +465,9 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
|
|||
struct trans_exc_code_bits {
|
||||
unsigned long addr : 52; /* Translation-exception Address */
|
||||
unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
|
||||
unsigned long : 6;
|
||||
unsigned long : 2;
|
||||
unsigned long b56 : 1;
|
||||
unsigned long : 3;
|
||||
unsigned long b60 : 1;
|
||||
unsigned long b61 : 1;
|
||||
unsigned long as : 2; /* ASCE Identifier */
|
||||
|
@ -485,7 +487,7 @@ enum prot_type {
|
|||
};
|
||||
|
||||
static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
|
||||
ar_t ar, enum gacc_mode mode, enum prot_type prot)
|
||||
u8 ar, enum gacc_mode mode, enum prot_type prot)
|
||||
{
|
||||
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
|
||||
struct trans_exc_code_bits *tec;
|
||||
|
@ -497,14 +499,18 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
|
|||
switch (code) {
|
||||
case PGM_PROTECTION:
|
||||
switch (prot) {
|
||||
case PROT_TYPE_LA:
|
||||
tec->b56 = 1;
|
||||
break;
|
||||
case PROT_TYPE_KEYC:
|
||||
tec->b60 = 1;
|
||||
break;
|
||||
case PROT_TYPE_ALC:
|
||||
tec->b60 = 1;
|
||||
/* FALL THROUGH */
|
||||
case PROT_TYPE_DAT:
|
||||
tec->b61 = 1;
|
||||
break;
|
||||
default: /* LA and KEYC set b61 to 0, other params undefined */
|
||||
return code;
|
||||
}
|
||||
/* FALL THROUGH */
|
||||
case PGM_ASCE_TYPE:
|
||||
|
@ -539,7 +545,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
|
|||
}
|
||||
|
||||
static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
|
||||
unsigned long ga, ar_t ar, enum gacc_mode mode)
|
||||
unsigned long ga, u8 ar, enum gacc_mode mode)
|
||||
{
|
||||
int rc;
|
||||
struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
|
||||
|
@ -771,7 +777,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar,
|
||||
static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
||||
unsigned long *pages, unsigned long nr_pages,
|
||||
const union asce asce, enum gacc_mode mode)
|
||||
{
|
||||
|
@ -803,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
|
||||
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
|
||||
unsigned long len, enum gacc_mode mode)
|
||||
{
|
||||
psw_t *psw = &vcpu->arch.sie_block->gpsw;
|
||||
|
@ -877,7 +883,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
|
|||
* Note: The IPTE lock is not taken during this function, so the caller
|
||||
* has to take care of this.
|
||||
*/
|
||||
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
|
||||
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
|
||||
unsigned long *gpa, enum gacc_mode mode)
|
||||
{
|
||||
psw_t *psw = &vcpu->arch.sie_block->gpsw;
|
||||
|
@ -910,7 +916,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
|
|||
/**
|
||||
* check_gva_range - test a range of guest virtual addresses for accessibility
|
||||
*/
|
||||
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
|
||||
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
|
||||
unsigned long length, enum gacc_mode mode)
|
||||
{
|
||||
unsigned long gpa;
|
||||
|
|
|
@ -162,11 +162,11 @@ enum gacc_mode {
|
|||
};
|
||||
|
||||
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
|
||||
ar_t ar, unsigned long *gpa, enum gacc_mode mode);
|
||||
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
|
||||
u8 ar, unsigned long *gpa, enum gacc_mode mode);
|
||||
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
|
||||
unsigned long length, enum gacc_mode mode);
|
||||
|
||||
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
|
||||
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
|
||||
unsigned long len, enum gacc_mode mode);
|
||||
|
||||
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
|
||||
|
@ -218,7 +218,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
|
|||
* if data has been changed in guest space in case of an exception.
|
||||
*/
|
||||
static inline __must_check
|
||||
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
|
||||
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
|
||||
unsigned long len)
|
||||
{
|
||||
return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
|
||||
|
@ -238,7 +238,7 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
|
|||
* data will be copied from guest space to kernel space.
|
||||
*/
|
||||
static inline __must_check
|
||||
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
|
||||
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
|
||||
unsigned long len)
|
||||
{
|
||||
return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
|
||||
|
@ -247,10 +247,11 @@ int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
|
|||
/**
|
||||
* read_guest_instr - copy instruction data from guest space to kernel space
|
||||
* @vcpu: virtual cpu
|
||||
* @ga: guest address
|
||||
* @data: destination address in kernel space
|
||||
* @len: number of bytes to copy
|
||||
*
|
||||
* Copy @len bytes from the current psw address (guest space) to @data (kernel
|
||||
* Copy @len bytes from the given address (guest space) to @data (kernel
|
||||
* space).
|
||||
*
|
||||
* The behaviour of read_guest_instr is identical to read_guest, except that
|
||||
|
@ -258,10 +259,10 @@ int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
|
|||
* address-space mode.
|
||||
*/
|
||||
static inline __must_check
|
||||
int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len)
|
||||
int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
|
||||
unsigned long len)
|
||||
{
|
||||
return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len,
|
||||
GACC_IFETCH);
|
||||
return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -388,14 +388,13 @@ void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
|
|||
#define per_write_wp_event(code) \
|
||||
(code & (PER_CODE_STORE | PER_CODE_STORE_REAL))
|
||||
|
||||
static int debug_exit_required(struct kvm_vcpu *vcpu)
|
||||
static int debug_exit_required(struct kvm_vcpu *vcpu, u8 perc,
|
||||
unsigned long peraddr)
|
||||
{
|
||||
u8 perc = vcpu->arch.sie_block->perc;
|
||||
struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
|
||||
struct kvm_hw_wp_info_arch *wp_info = NULL;
|
||||
struct kvm_hw_bp_info_arch *bp_info = NULL;
|
||||
unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
|
||||
unsigned long peraddr = vcpu->arch.sie_block->peraddr;
|
||||
|
||||
if (guestdbg_hw_bp_enabled(vcpu)) {
|
||||
if (per_write_wp_event(perc) &&
|
||||
|
@ -437,36 +436,118 @@ exit_required:
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int per_fetched_addr(struct kvm_vcpu *vcpu, unsigned long *addr)
|
||||
{
|
||||
u8 exec_ilen = 0;
|
||||
u16 opcode[3];
|
||||
int rc;
|
||||
|
||||
if (vcpu->arch.sie_block->icptcode == ICPT_PROGI) {
|
||||
/* PER address references the fetched or the execute instr */
|
||||
*addr = vcpu->arch.sie_block->peraddr;
|
||||
/*
|
||||
* Manually detect if we have an EXECUTE instruction. As
|
||||
* instructions are always 2 byte aligned we can read the
|
||||
* first two bytes unconditionally
|
||||
*/
|
||||
rc = read_guest_instr(vcpu, *addr, &opcode, 2);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (opcode[0] >> 8 == 0x44)
|
||||
exec_ilen = 4;
|
||||
if ((opcode[0] & 0xff0f) == 0xc600)
|
||||
exec_ilen = 6;
|
||||
} else {
|
||||
/* instr was suppressed, calculate the responsible instr */
|
||||
*addr = __rewind_psw(vcpu->arch.sie_block->gpsw,
|
||||
kvm_s390_get_ilen(vcpu));
|
||||
if (vcpu->arch.sie_block->icptstatus & 0x01) {
|
||||
exec_ilen = (vcpu->arch.sie_block->icptstatus & 0x60) >> 4;
|
||||
if (!exec_ilen)
|
||||
exec_ilen = 4;
|
||||
}
|
||||
}
|
||||
|
||||
if (exec_ilen) {
|
||||
/* read the complete EXECUTE instr to detect the fetched addr */
|
||||
rc = read_guest_instr(vcpu, *addr, &opcode, exec_ilen);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (exec_ilen == 6) {
|
||||
/* EXECUTE RELATIVE LONG - RIL-b format */
|
||||
s32 rl = *((s32 *) (opcode + 1));
|
||||
|
||||
/* rl is a _signed_ 32 bit value specifying halfwords */
|
||||
*addr += (u64)(s64) rl * 2;
|
||||
} else {
|
||||
/* EXECUTE - RX-a format */
|
||||
u32 base = (opcode[1] & 0xf000) >> 12;
|
||||
u32 disp = opcode[1] & 0x0fff;
|
||||
u32 index = opcode[0] & 0x000f;
|
||||
|
||||
*addr = base ? vcpu->run->s.regs.gprs[base] : 0;
|
||||
*addr += index ? vcpu->run->s.regs.gprs[index] : 0;
|
||||
*addr += disp;
|
||||
}
|
||||
*addr = kvm_s390_logical_to_effective(vcpu, *addr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define guest_per_enabled(vcpu) \
|
||||
(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
|
||||
|
||||
int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const u64 cr10 = vcpu->arch.sie_block->gcr[10];
|
||||
const u64 cr11 = vcpu->arch.sie_block->gcr[11];
|
||||
const u8 ilen = kvm_s390_get_ilen(vcpu);
|
||||
struct kvm_s390_pgm_info pgm_info = {
|
||||
.code = PGM_PER,
|
||||
.per_code = PER_CODE_IFETCH,
|
||||
.per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen),
|
||||
};
|
||||
unsigned long fetched_addr;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* The PSW points to the next instruction, therefore the intercepted
|
||||
* instruction generated a PER i-fetch event. PER address therefore
|
||||
* points at the previous PSW address (could be an EXECUTE function).
|
||||
*/
|
||||
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
|
||||
if (!guestdbg_enabled(vcpu))
|
||||
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
|
||||
|
||||
if (debug_exit_required(vcpu, pgm_info.per_code, pgm_info.per_address))
|
||||
vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
|
||||
|
||||
if (!guest_per_enabled(vcpu) ||
|
||||
!(vcpu->arch.sie_block->gcr[9] & PER_EVENT_IFETCH))
|
||||
return 0;
|
||||
|
||||
rc = per_fetched_addr(vcpu, &fetched_addr);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
if (rc)
|
||||
/* instruction-fetching exceptions */
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
|
||||
if (in_addr_range(fetched_addr, cr10, cr11))
|
||||
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void filter_guest_per_event(struct kvm_vcpu *vcpu)
|
||||
static int filter_guest_per_event(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const u8 perc = vcpu->arch.sie_block->perc;
|
||||
u64 peraddr = vcpu->arch.sie_block->peraddr;
|
||||
u64 addr = vcpu->arch.sie_block->gpsw.addr;
|
||||
u64 cr9 = vcpu->arch.sie_block->gcr[9];
|
||||
u64 cr10 = vcpu->arch.sie_block->gcr[10];
|
||||
u64 cr11 = vcpu->arch.sie_block->gcr[11];
|
||||
/* filter all events, demanded by the guest */
|
||||
u8 guest_perc = perc & (cr9 >> 24) & PER_CODE_MASK;
|
||||
unsigned long fetched_addr;
|
||||
int rc;
|
||||
|
||||
if (!guest_per_enabled(vcpu))
|
||||
guest_perc = 0;
|
||||
|
@ -478,9 +559,17 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
|
|||
guest_perc &= ~PER_CODE_BRANCH;
|
||||
|
||||
/* filter "instruction-fetching" events */
|
||||
if (guest_perc & PER_CODE_IFETCH &&
|
||||
!in_addr_range(peraddr, cr10, cr11))
|
||||
guest_perc &= ~PER_CODE_IFETCH;
|
||||
if (guest_perc & PER_CODE_IFETCH) {
|
||||
rc = per_fetched_addr(vcpu, &fetched_addr);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
/*
|
||||
* Don't inject an irq on exceptions. This would make handling
|
||||
* on icpt code 8 very complex (as PSW was already rewound).
|
||||
*/
|
||||
if (rc || !in_addr_range(fetched_addr, cr10, cr11))
|
||||
guest_perc &= ~PER_CODE_IFETCH;
|
||||
}
|
||||
|
||||
/* All other PER events will be given to the guest */
|
||||
/* TODO: Check altered address/address space */
|
||||
|
@ -489,6 +578,7 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (!guest_perc)
|
||||
vcpu->arch.sie_block->iprcc &= ~PGM_PER;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH)
|
||||
|
@ -496,14 +586,17 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
|
|||
#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1)
|
||||
#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff)
|
||||
|
||||
void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
|
||||
int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int new_as;
|
||||
int rc, new_as;
|
||||
|
||||
if (debug_exit_required(vcpu))
|
||||
if (debug_exit_required(vcpu, vcpu->arch.sie_block->perc,
|
||||
vcpu->arch.sie_block->peraddr))
|
||||
vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
|
||||
|
||||
filter_guest_per_event(vcpu);
|
||||
rc = filter_guest_per_event(vcpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/*
|
||||
* Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger
|
||||
|
@ -532,4 +625,5 @@ void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
|
|||
(pssec(vcpu) || old_ssec(vcpu)))
|
||||
vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -238,7 +238,9 @@ static int handle_prog(struct kvm_vcpu *vcpu)
|
|||
vcpu->stat.exit_program_interruption++;
|
||||
|
||||
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
|
||||
kvm_s390_handle_per_event(vcpu);
|
||||
rc = kvm_s390_handle_per_event(vcpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
/* the interrupt might have been filtered out completely */
|
||||
if (vcpu->arch.sie_block->iprcc == 0)
|
||||
return 0;
|
||||
|
@ -359,6 +361,9 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_operexc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
psw_t oldpsw, newpsw;
|
||||
int rc;
|
||||
|
||||
vcpu->stat.exit_operation_exception++;
|
||||
trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
|
||||
vcpu->arch.sie_block->ipb);
|
||||
|
@ -369,6 +374,24 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
|
||||
return -EOPNOTSUPP;
|
||||
rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t));
|
||||
if (rc)
|
||||
return rc;
|
||||
/*
|
||||
* Avoid endless loops of operation exceptions, if the pgm new
|
||||
* PSW will cause a new operation exception.
|
||||
* The heuristic checks if the pgm new psw is within 6 bytes before
|
||||
* the faulting psw address (with same DAT, AS settings) and the
|
||||
* new psw is not a wait psw and the fault was not triggered by
|
||||
* problem state.
|
||||
*/
|
||||
oldpsw = vcpu->arch.sie_block->gpsw;
|
||||
if (oldpsw.addr - newpsw.addr <= 6 &&
|
||||
!(newpsw.mask & PSW_MASK_WAIT) &&
|
||||
!(oldpsw.mask & PSW_MASK_PSTATE) &&
|
||||
(newpsw.mask & PSW_MASK_ASC) == (oldpsw.mask & PSW_MASK_ASC) &&
|
||||
(newpsw.mask & PSW_MASK_DAT) == (oldpsw.mask & PSW_MASK_DAT))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
|
||||
}
|
||||
|
|
|
@ -218,7 +218,7 @@ static void allow_cpu_feat(unsigned long nr)
|
|||
static inline int plo_test_bit(unsigned char nr)
|
||||
{
|
||||
register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
|
||||
int cc = 3; /* subfunction not available */
|
||||
int cc;
|
||||
|
||||
asm volatile(
|
||||
/* Parameter registers are ignored for "test bit" */
|
||||
|
@ -371,6 +371,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_S390_IRQCHIP:
|
||||
case KVM_CAP_VM_ATTRIBUTES:
|
||||
case KVM_CAP_MP_STATE:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
case KVM_CAP_S390_INJECT_IRQ:
|
||||
case KVM_CAP_S390_USER_SIGP:
|
||||
case KVM_CAP_S390_USER_STSI:
|
||||
|
@ -443,6 +444,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
|||
struct kvm_memory_slot *memslot;
|
||||
int is_dirty = 0;
|
||||
|
||||
if (kvm_is_ucontrol(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = -EINVAL;
|
||||
|
@ -506,6 +510,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
|||
} else if (MACHINE_HAS_VX) {
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 129);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 129);
|
||||
if (test_facility(134)) {
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 134);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 134);
|
||||
}
|
||||
if (test_facility(135)) {
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 135);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 135);
|
||||
}
|
||||
r = 0;
|
||||
} else
|
||||
r = -EINVAL;
|
||||
|
@ -822,6 +834,13 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
}
|
||||
memcpy(kvm->arch.model.fac_list, proc->fac_list,
|
||||
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
||||
VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
|
||||
kvm->arch.model.ibc,
|
||||
kvm->arch.model.cpuid);
|
||||
VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
|
||||
kvm->arch.model.fac_list[0],
|
||||
kvm->arch.model.fac_list[1],
|
||||
kvm->arch.model.fac_list[2]);
|
||||
} else
|
||||
ret = -EFAULT;
|
||||
kfree(proc);
|
||||
|
@ -895,6 +914,13 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
proc->ibc = kvm->arch.model.ibc;
|
||||
memcpy(&proc->fac_list, kvm->arch.model.fac_list,
|
||||
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
||||
VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
|
||||
kvm->arch.model.ibc,
|
||||
kvm->arch.model.cpuid);
|
||||
VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
|
||||
kvm->arch.model.fac_list[0],
|
||||
kvm->arch.model.fac_list[1],
|
||||
kvm->arch.model.fac_list[2]);
|
||||
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
|
||||
ret = -EFAULT;
|
||||
kfree(proc);
|
||||
|
@ -918,6 +944,17 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
||||
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
|
||||
sizeof(S390_lowcore.stfle_fac_list));
|
||||
VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
|
||||
kvm->arch.model.ibc,
|
||||
kvm->arch.model.cpuid);
|
||||
VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
|
||||
mach->fac_mask[0],
|
||||
mach->fac_mask[1],
|
||||
mach->fac_mask[2]);
|
||||
VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
|
||||
mach->fac_list[0],
|
||||
mach->fac_list[1],
|
||||
mach->fac_list[2]);
|
||||
if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
|
||||
ret = -EFAULT;
|
||||
kfree(mach);
|
||||
|
@ -1939,6 +1976,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
|
||||
vcpu->arch.sie_block->ecb2 |= 0x08;
|
||||
if (test_kvm_facility(vcpu->kvm, 130))
|
||||
vcpu->arch.sie_block->ecb2 |= 0x20;
|
||||
vcpu->arch.sie_block->eca = 0x1002000U;
|
||||
if (sclp.has_cei)
|
||||
vcpu->arch.sie_block->eca |= 0x80000000U;
|
||||
|
@ -2579,7 +2618,7 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
|
|||
* to look up the current opcode to get the length of the instruction
|
||||
* to be able to forward the PSW.
|
||||
*/
|
||||
rc = read_guest_instr(vcpu, &opcode, 1);
|
||||
rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
|
||||
ilen = insn_length(opcode);
|
||||
if (rc < 0) {
|
||||
return rc;
|
||||
|
@ -2761,6 +2800,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
int rc;
|
||||
sigset_t sigsaved;
|
||||
|
||||
if (kvm_run->immediate_exit)
|
||||
return -EINTR;
|
||||
|
||||
if (guestdbg_exit_pending(vcpu)) {
|
||||
kvm_s390_prepare_debug_exit(vcpu);
|
||||
return 0;
|
||||
|
|
|
@ -86,9 +86,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
|
|||
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
|
||||
}
|
||||
|
||||
typedef u8 __bitwise ar_t;
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar)
|
||||
static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
|
||||
{
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
|
@ -101,7 +99,7 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar)
|
|||
|
||||
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
|
||||
u64 *address1, u64 *address2,
|
||||
ar_t *ar_b1, ar_t *ar_b2)
|
||||
u8 *ar_b1, u8 *ar_b2)
|
||||
{
|
||||
u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
|
||||
u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
|
||||
|
@ -125,7 +123,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2
|
|||
*r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar)
|
||||
static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, u8 *ar)
|
||||
{
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
|
||||
|
@ -140,7 +138,7 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar)
|
|||
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar)
|
||||
static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, u8 *ar)
|
||||
{
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
|
@ -379,7 +377,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
|
|||
void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* support for Basic/Extended SCA handling */
|
||||
static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
|
||||
|
|
|
@ -54,7 +54,7 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
|
|||
static int handle_set_clock(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int rc;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
u64 op2, val;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
|
@ -79,7 +79,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
|||
u64 operand2;
|
||||
u32 address;
|
||||
int rc;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_spx++;
|
||||
|
||||
|
@ -117,7 +117,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
|
|||
u64 operand2;
|
||||
u32 address;
|
||||
int rc;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_stpx++;
|
||||
|
||||
|
@ -147,7 +147,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
|
|||
u16 vcpu_id = vcpu->vcpu_id;
|
||||
u64 ga;
|
||||
int rc;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_stap++;
|
||||
|
||||
|
@ -380,7 +380,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
|
|||
u32 tpi_data[3];
|
||||
int rc;
|
||||
u64 addr;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
addr = kvm_s390_get_base_disp_s(vcpu, &ar);
|
||||
if (addr & 3)
|
||||
|
@ -548,7 +548,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
|
|||
psw_compat_t new_psw;
|
||||
u64 addr;
|
||||
int rc;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
if (gpsw->mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
@ -575,7 +575,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
|
|||
psw_t new_psw;
|
||||
u64 addr;
|
||||
int rc;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
@ -597,7 +597,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
|
|||
u64 stidp_data = vcpu->kvm->arch.model.cpuid;
|
||||
u64 operand2;
|
||||
int rc;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_stidp++;
|
||||
|
||||
|
@ -644,7 +644,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
|
|||
ASCEBC(mem->vm[0].cpi, 16);
|
||||
}
|
||||
|
||||
static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar,
|
||||
static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, u8 ar,
|
||||
u8 fc, u8 sel1, u16 sel2)
|
||||
{
|
||||
vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
|
||||
|
@ -663,7 +663,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
|||
unsigned long mem = 0;
|
||||
u64 operand2;
|
||||
int rc = 0;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_stsi++;
|
||||
VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2);
|
||||
|
@ -970,7 +970,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
|
|||
int reg, rc, nr_regs;
|
||||
u32 ctl_array[16];
|
||||
u64 ga;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_lctl++;
|
||||
|
||||
|
@ -1009,7 +1009,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
|
|||
int reg, rc, nr_regs;
|
||||
u32 ctl_array[16];
|
||||
u64 ga;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_stctl++;
|
||||
|
||||
|
@ -1043,7 +1043,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
|
|||
int reg, rc, nr_regs;
|
||||
u64 ctl_array[16];
|
||||
u64 ga;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_lctlg++;
|
||||
|
||||
|
@ -1081,7 +1081,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
|
|||
int reg, rc, nr_regs;
|
||||
u64 ctl_array[16];
|
||||
u64 ga;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_stctg++;
|
||||
|
||||
|
@ -1132,7 +1132,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
|
|||
unsigned long hva, gpa;
|
||||
int ret = 0, cc = 0;
|
||||
bool writable;
|
||||
ar_t ar;
|
||||
u8 ar;
|
||||
|
||||
vcpu->stat.instruction_tprot++;
|
||||
|
||||
|
|
|
@ -324,6 +324,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
/* Run-time-Instrumentation */
|
||||
if (test_kvm_facility(vcpu->kvm, 64))
|
||||
scb_s->ecb3 |= scb_o->ecb3 & 0x01U;
|
||||
/* Instruction Execution Prevention */
|
||||
if (test_kvm_facility(vcpu->kvm, 130))
|
||||
scb_s->ecb2 |= scb_o->ecb2 & 0x20U;
|
||||
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
|
||||
scb_s->eca |= scb_o->eca & 0x00000001U;
|
||||
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
|
||||
|
|
|
@ -744,7 +744,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
|||
|
||||
pgste_set_unlock(ptep, new);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return 0;
|
||||
return cc;
|
||||
}
|
||||
EXPORT_SYMBOL(reset_guest_reference_bit);
|
||||
|
||||
|
|
|
@ -80,6 +80,8 @@ static struct facility_def facility_defs[] = {
|
|||
76, /* msa extension 3 */
|
||||
77, /* msa extension 4 */
|
||||
78, /* enhanced-DAT 2 */
|
||||
130, /* instruction-execution-protection */
|
||||
131, /* enhanced-SOP 2 and side-effect */
|
||||
-1 /* END */
|
||||
}
|
||||
},
|
||||
|
|
|
@ -177,16 +177,8 @@ static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
|
|||
struct desc_struct *d = get_cpu_gdt_table(cpu);
|
||||
tss_desc tss;
|
||||
|
||||
/*
|
||||
* sizeof(unsigned long) coming from an extra "long" at the end
|
||||
* of the iobitmap. See tss_struct definition in processor.h
|
||||
*
|
||||
* -1? seg base+limit should be pointing to the address of the
|
||||
* last valid byte
|
||||
*/
|
||||
set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
|
||||
IO_BITMAP_OFFSET + IO_BITMAP_BYTES +
|
||||
sizeof(unsigned long) - 1);
|
||||
__KERNEL_TSS_LIMIT);
|
||||
write_gdt_entry(d, entry, &tss, DESC_TSS);
|
||||
}
|
||||
|
||||
|
@ -213,6 +205,54 @@ static inline void native_load_tr_desc(void)
|
|||
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
|
||||
}
|
||||
|
||||
static inline void force_reload_TR(void)
|
||||
{
|
||||
struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
|
||||
tss_desc tss;
|
||||
|
||||
memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
|
||||
|
||||
/*
|
||||
* LTR requires an available TSS, and the TSS is currently
|
||||
* busy. Make it be available so that LTR will work.
|
||||
*/
|
||||
tss.type = DESC_TSS;
|
||||
write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
|
||||
|
||||
load_TR_desc();
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(bool, need_tr_refresh);
|
||||
|
||||
static inline void refresh_TR(void)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(preemptible());
|
||||
|
||||
if (unlikely(this_cpu_read(need_tr_refresh))) {
|
||||
force_reload_TR();
|
||||
this_cpu_write(need_tr_refresh, false);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If you do something evil that corrupts the cached TSS limit (I'm looking
|
||||
* at you, VMX exits), call this function.
|
||||
*
|
||||
* The optimization here is that the TSS limit only matters for Linux if the
|
||||
* IO bitmap is in use. If the TSS limit gets forced to its minimum value,
|
||||
* everything works except that IO bitmap will be ignored and all CPL 3 IO
|
||||
* instructions will #GP, which is exactly what we want for normal tasks.
|
||||
*/
|
||||
static inline void invalidate_tss_limit(void)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(preemptible());
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
|
||||
force_reload_TR();
|
||||
else
|
||||
this_cpu_write(need_tr_refresh, true);
|
||||
}
|
||||
|
||||
static inline void native_load_gdt(const struct desc_ptr *dtr)
|
||||
{
|
||||
asm volatile("lgdt %0"::"m" (*dtr));
|
||||
|
|
|
@ -441,5 +441,6 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
|
|||
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
|
||||
void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
|
||||
void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
|
||||
bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt);
|
||||
|
||||
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
|
||||
|
|
|
@ -55,7 +55,6 @@
|
|||
#define KVM_REQ_TRIPLE_FAULT 10
|
||||
#define KVM_REQ_MMU_SYNC 11
|
||||
#define KVM_REQ_CLOCK_UPDATE 12
|
||||
#define KVM_REQ_DEACTIVATE_FPU 13
|
||||
#define KVM_REQ_EVENT 14
|
||||
#define KVM_REQ_APF_HALT 15
|
||||
#define KVM_REQ_STEAL_UPDATE 16
|
||||
|
@ -115,7 +114,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
|
|||
|
||||
#define KVM_PERMILLE_MMU_PAGES 20
|
||||
#define KVM_MIN_ALLOC_MMU_PAGES 64
|
||||
#define KVM_MMU_HASH_SHIFT 10
|
||||
#define KVM_MMU_HASH_SHIFT 12
|
||||
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
|
||||
#define KVM_MIN_FREE_MMU_PAGES 5
|
||||
#define KVM_REFILL_PAGES 25
|
||||
|
@ -208,6 +207,13 @@ enum {
|
|||
PFERR_WRITE_MASK | \
|
||||
PFERR_PRESENT_MASK)
|
||||
|
||||
/*
|
||||
* The mask used to denote special SPTEs, which can be either MMIO SPTEs or
|
||||
* Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
|
||||
* with the SVE bit in EPT PTEs.
|
||||
*/
|
||||
#define SPTE_SPECIAL_MASK (1ULL << 62)
|
||||
|
||||
/* apic attention bits */
|
||||
#define KVM_APIC_CHECK_VAPIC 0
|
||||
/*
|
||||
|
@ -668,6 +674,9 @@ struct kvm_vcpu_arch {
|
|||
|
||||
int pending_ioapic_eoi;
|
||||
int pending_external_vector;
|
||||
|
||||
/* GPA available (AMD only) */
|
||||
bool gpa_available;
|
||||
};
|
||||
|
||||
struct kvm_lpage_info {
|
||||
|
@ -716,6 +725,12 @@ struct kvm_hv {
|
|||
HV_REFERENCE_TSC_PAGE tsc_ref;
|
||||
};
|
||||
|
||||
enum kvm_irqchip_mode {
|
||||
KVM_IRQCHIP_NONE,
|
||||
KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
|
||||
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned int n_used_mmu_pages;
|
||||
unsigned int n_requested_mmu_pages;
|
||||
|
@ -788,7 +803,7 @@ struct kvm_arch {
|
|||
|
||||
u64 disabled_quirks;
|
||||
|
||||
bool irqchip_split;
|
||||
enum kvm_irqchip_mode irqchip_mode;
|
||||
u8 nr_reserved_ioapic_pins;
|
||||
|
||||
bool disabled_lapic_found;
|
||||
|
@ -815,6 +830,7 @@ struct kvm_vm_stat {
|
|||
ulong mmu_unsync;
|
||||
ulong remote_tlb_flush;
|
||||
ulong lpages;
|
||||
ulong max_mmu_page_hash_collisions;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_stat {
|
||||
|
@ -844,6 +860,7 @@ struct kvm_vcpu_stat {
|
|||
u64 hypercalls;
|
||||
u64 irq_injections;
|
||||
u64 nmi_injections;
|
||||
u64 req_event;
|
||||
};
|
||||
|
||||
struct x86_instruction_info;
|
||||
|
@ -918,8 +935,6 @@ struct kvm_x86_ops {
|
|||
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
|
||||
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
u32 (*get_pkru)(struct kvm_vcpu *vcpu);
|
||||
void (*fpu_activate)(struct kvm_vcpu *vcpu);
|
||||
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
|
||||
|
||||
void (*tlb_flush)(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -951,7 +966,7 @@ struct kvm_x86_ops {
|
|||
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
|
||||
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
|
||||
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
|
||||
void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
|
||||
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
|
||||
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
|
||||
int (*get_tdp_level)(void);
|
||||
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
|
||||
|
@ -1050,7 +1065,8 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu);
|
|||
void kvm_mmu_init_vm(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_vm(struct kvm *kvm);
|
||||
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask);
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
|
||||
u64 acc_track_mask);
|
||||
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
#ifndef _ASM_X86_KVM_CLOCK_H
|
||||
#define _ASM_X86_KVM_CLOCK_H
|
||||
|
||||
extern struct clocksource kvm_clock;
|
||||
|
||||
#endif /* _ASM_X86_KVM_CLOCK_H */
|
|
@ -673,7 +673,7 @@ static __always_inline void pv_kick(int cpu)
|
|||
PVOP_VCALL1(pv_lock_ops.kick, cpu);
|
||||
}
|
||||
|
||||
static __always_inline bool pv_vcpu_is_preempted(int cpu)
|
||||
static __always_inline bool pv_vcpu_is_preempted(long cpu)
|
||||
{
|
||||
return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu);
|
||||
}
|
||||
|
|
|
@ -304,7 +304,7 @@ struct x86_hw_tss {
|
|||
u16 reserved5;
|
||||
u16 io_bitmap_base;
|
||||
|
||||
} __attribute__((packed)) ____cacheline_aligned;
|
||||
} __attribute__((packed));
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -342,6 +342,16 @@ struct tss_struct {
|
|||
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
|
||||
|
||||
/*
|
||||
* sizeof(unsigned long) coming from an extra "long" at the end
|
||||
* of the iobitmap.
|
||||
*
|
||||
* -1? seg base+limit should be pointing to the address of the
|
||||
* last valid byte
|
||||
*/
|
||||
#define __KERNEL_TSS_LIMIT \
|
||||
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
#endif
|
||||
|
|
|
@ -34,7 +34,7 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
|
|||
}
|
||||
|
||||
#define vcpu_is_preempted vcpu_is_preempted
|
||||
static inline bool vcpu_is_preempted(int cpu)
|
||||
static inline bool vcpu_is_preempted(long cpu)
|
||||
{
|
||||
return pv_vcpu_is_preempted(cpu);
|
||||
}
|
||||
|
|
|
@ -467,8 +467,16 @@ enum vmcs_field {
|
|||
#define VMX_EPT_WRITABLE_MASK 0x2ull
|
||||
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
|
||||
#define VMX_EPT_IPAT_BIT (1ull << 6)
|
||||
#define VMX_EPT_ACCESS_BIT (1ull << 8)
|
||||
#define VMX_EPT_DIRTY_BIT (1ull << 9)
|
||||
#define VMX_EPT_ACCESS_BIT (1ull << 8)
|
||||
#define VMX_EPT_DIRTY_BIT (1ull << 9)
|
||||
#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
|
||||
VMX_EPT_WRITABLE_MASK | \
|
||||
VMX_EPT_EXECUTABLE_MASK)
|
||||
#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT)
|
||||
|
||||
/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
|
||||
#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \
|
||||
VMX_EPT_EXECUTABLE_MASK)
|
||||
|
||||
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
|
||||
|
||||
|
@ -499,6 +507,22 @@ struct vmx_msr_entry {
|
|||
#define ENTRY_FAIL_NMI 3
|
||||
#define ENTRY_FAIL_VMCS_LINK_PTR 4
|
||||
|
||||
/*
|
||||
* Exit Qualifications for EPT Violations
|
||||
*/
|
||||
#define EPT_VIOLATION_ACC_READ_BIT 0
|
||||
#define EPT_VIOLATION_ACC_WRITE_BIT 1
|
||||
#define EPT_VIOLATION_ACC_INSTR_BIT 2
|
||||
#define EPT_VIOLATION_READABLE_BIT 3
|
||||
#define EPT_VIOLATION_WRITABLE_BIT 4
|
||||
#define EPT_VIOLATION_EXECUTABLE_BIT 5
|
||||
#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
|
||||
#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
|
||||
#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
|
||||
#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
|
||||
#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
|
||||
#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT)
|
||||
|
||||
/*
|
||||
* VM-instruction error numbers
|
||||
*/
|
||||
|
|
|
@ -50,6 +50,15 @@ struct kvm_steal_time {
|
|||
__u32 pad[11];
|
||||
};
|
||||
|
||||
#define KVM_CLOCK_PAIRING_WALLCLOCK 0
|
||||
struct kvm_clock_pairing {
|
||||
__s64 sec;
|
||||
__s64 nsec;
|
||||
__u64 tsc;
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
};
|
||||
|
||||
#define KVM_STEAL_ALIGNMENT_BITS 5
|
||||
#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
|
||||
#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
|
||||
|
|
|
@ -13,6 +13,10 @@ static char syscalls_ia32[] = {
|
|||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
|
||||
#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
#include <asm/kvm_para.h>
|
||||
#endif
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
|
@ -22,6 +26,11 @@ int main(void)
|
|||
BLANK();
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
OFFSET(KVM_STEAL_TIME_preempted, kvm_steal_time, preempted);
|
||||
BLANK();
|
||||
#endif
|
||||
|
||||
#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
|
||||
ENTRY(bx);
|
||||
ENTRY(cx);
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <linux/syscalls.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
/*
|
||||
* this changes the io permissions bitmap in the current task.
|
||||
|
@ -45,6 +46,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
|||
memset(bitmap, 0xff, IO_BITMAP_BYTES);
|
||||
t->io_bitmap_ptr = bitmap;
|
||||
set_thread_flag(TIF_IO_BITMAP);
|
||||
|
||||
preempt_disable();
|
||||
refresh_TR();
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -589,7 +589,8 @@ out:
|
|||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
__visible bool __kvm_vcpu_is_preempted(int cpu)
|
||||
#ifdef CONFIG_X86_32
|
||||
__visible bool __kvm_vcpu_is_preempted(long cpu)
|
||||
{
|
||||
struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
|
||||
|
||||
|
@ -597,6 +598,29 @@ __visible bool __kvm_vcpu_is_preempted(int cpu)
|
|||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
|
||||
|
||||
#else
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
|
||||
|
||||
/*
|
||||
* Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
|
||||
* restoring to/from the stack.
|
||||
*/
|
||||
asm(
|
||||
".pushsection .text;"
|
||||
".global __raw_callee_save___kvm_vcpu_is_preempted;"
|
||||
".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
|
||||
"__raw_callee_save___kvm_vcpu_is_preempted:"
|
||||
"movq __per_cpu_offset(,%rdi,8), %rax;"
|
||||
"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
|
||||
"setne %al;"
|
||||
"ret;"
|
||||
".popsection");
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
|
||||
*/
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/reboot.h>
|
||||
#include <asm/kvmclock.h>
|
||||
|
||||
static int kvmclock __ro_after_init = 1;
|
||||
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
|
||||
|
@ -49,6 +50,7 @@ struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
|
|||
{
|
||||
return hv_clock;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pvclock_pvti_cpu0_va);
|
||||
|
||||
/*
|
||||
* The wallclock is the time of day when we booted. Since then, some time may
|
||||
|
@ -174,13 +176,14 @@ bool kvm_check_and_clear_guest_paused(void)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static struct clocksource kvm_clock = {
|
||||
struct clocksource kvm_clock = {
|
||||
.name = "kvm-clock",
|
||||
.read = kvm_clock_get_cycles,
|
||||
.rating = 400,
|
||||
.mask = CLOCKSOURCE_MASK(64),
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(kvm_clock);
|
||||
|
||||
int kvm_register_clock(char *txt)
|
||||
{
|
||||
|
|
|
@ -20,7 +20,7 @@ bool pv_is_native_spin_unlock(void)
|
|||
__raw_callee_save___native_queued_spin_unlock;
|
||||
}
|
||||
|
||||
__visible bool __native_vcpu_is_preempted(int cpu)
|
||||
__visible bool __native_vcpu_is_preempted(long cpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include <asm/mce.h>
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
|
@ -64,6 +65,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
|||
};
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_tss);
|
||||
|
||||
DEFINE_PER_CPU(bool, need_tr_refresh);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh);
|
||||
|
||||
/*
|
||||
* this gets called so that we can store lazy state into memory and copy the
|
||||
* current task into the new thread.
|
||||
|
@ -209,6 +213,12 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|||
*/
|
||||
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
|
||||
max(prev->io_bitmap_max, next->io_bitmap_max));
|
||||
|
||||
/*
|
||||
* Make sure that the TSS limit is correct for the CPU
|
||||
* to notice the IO bitmap.
|
||||
*/
|
||||
refresh_TR();
|
||||
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
|
||||
/*
|
||||
* Clear any possible leftover bits:
|
||||
|
|
|
@ -123,8 +123,6 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
|
|||
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
|
||||
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
|
||||
|
||||
kvm_x86_ops->fpu_activate(vcpu);
|
||||
|
||||
/*
|
||||
* The existing code assumes virtual address is 48-bit in the canonical
|
||||
* address checks; exit if it is ever changed.
|
||||
|
@ -383,7 +381,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
|
||||
/* cpuid 7.0.ecx*/
|
||||
const u32 kvm_cpuid_7_0_ecx_x86_features =
|
||||
F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/;
|
||||
F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
|
||||
|
||||
/* cpuid 7.0.edx*/
|
||||
const u32 kvm_cpuid_7_0_edx_x86_features =
|
||||
|
@ -861,12 +859,6 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
|
|||
if (!best)
|
||||
best = check_cpuid_limit(vcpu, function, index);
|
||||
|
||||
/*
|
||||
* Perfmon not yet supported for L2 guest.
|
||||
*/
|
||||
if (is_guest_mode(vcpu) && function == 0xa)
|
||||
best = NULL;
|
||||
|
||||
if (best) {
|
||||
*eax = best->eax;
|
||||
*ebx = best->ebx;
|
||||
|
|
|
@ -173,6 +173,7 @@
|
|||
#define NearBranch ((u64)1 << 52) /* Near branches */
|
||||
#define No16 ((u64)1 << 53) /* No 16 bit operand */
|
||||
#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
|
||||
#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
|
||||
|
||||
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
|
||||
|
||||
|
@ -4298,7 +4299,7 @@ static const struct opcode group1[] = {
|
|||
};
|
||||
|
||||
static const struct opcode group1A[] = {
|
||||
I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
|
||||
I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
|
||||
};
|
||||
|
||||
static const struct opcode group2[] = {
|
||||
|
@ -4336,7 +4337,7 @@ static const struct opcode group5[] = {
|
|||
I(SrcMemFAddr | ImplicitOps, em_call_far),
|
||||
I(SrcMem | NearBranch, em_jmp_abs),
|
||||
I(SrcMemFAddr | ImplicitOps, em_jmp_far),
|
||||
I(SrcMem | Stack, em_push), D(Undefined),
|
||||
I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
|
||||
};
|
||||
|
||||
static const struct opcode group6[] = {
|
||||
|
@ -4556,8 +4557,8 @@ static const struct opcode opcode_table[256] = {
|
|||
/* 0xA0 - 0xA7 */
|
||||
I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
|
||||
I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
|
||||
I2bv(SrcSI | DstDI | Mov | String, em_mov),
|
||||
F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
|
||||
I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
|
||||
F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
|
||||
/* 0xA8 - 0xAF */
|
||||
F2bv(DstAcc | SrcImm | NoWrite, em_test),
|
||||
I2bv(SrcAcc | DstDI | Mov | String, em_mov),
|
||||
|
@ -5671,3 +5672,14 @@ void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
|
|||
{
|
||||
writeback_registers(ctxt);
|
||||
}
|
||||
|
||||
bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
if (ctxt->rep_prefix && (ctxt->d & String))
|
||||
return false;
|
||||
|
||||
if (ctxt->d & TwoMemOp)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -305,13 +305,13 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
|
|||
return -ENOENT;
|
||||
|
||||
memset(&irq, 0, sizeof(irq));
|
||||
irq.dest_id = kvm_apic_id(vcpu->arch.apic);
|
||||
irq.shorthand = APIC_DEST_SELF;
|
||||
irq.dest_mode = APIC_DEST_PHYSICAL;
|
||||
irq.delivery_mode = APIC_DM_FIXED;
|
||||
irq.vector = vector;
|
||||
irq.level = 1;
|
||||
|
||||
ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL);
|
||||
ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
|
||||
trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -598,14 +598,14 @@ static const struct kvm_io_device_ops picdev_eclr_ops = {
|
|||
.write = picdev_eclr_write,
|
||||
};
|
||||
|
||||
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
||||
int kvm_pic_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_pic *s;
|
||||
int ret;
|
||||
|
||||
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
|
||||
if (!s)
|
||||
return NULL;
|
||||
return -ENOMEM;
|
||||
spin_lock_init(&s->lock);
|
||||
s->kvm = kvm;
|
||||
s->pics[0].elcr_mask = 0xf8;
|
||||
|
@ -635,7 +635,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
|||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return s;
|
||||
kvm->arch.vpic = s;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_unreg_1:
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
|
||||
|
@ -648,13 +650,17 @@ fail_unlock:
|
|||
|
||||
kfree(s);
|
||||
|
||||
return NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_destroy_pic(struct kvm_pic *vpic)
|
||||
void kvm_pic_destroy(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_pic *vpic = kvm->arch.vpic;
|
||||
|
||||
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
|
||||
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
|
||||
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
|
||||
|
||||
kvm->arch.vpic = NULL;
|
||||
kfree(vpic);
|
||||
}
|
||||
|
|
|
@ -73,8 +73,8 @@ struct kvm_pic {
|
|||
unsigned long irq_states[PIC_NUM_PINS];
|
||||
};
|
||||
|
||||
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
|
||||
void kvm_destroy_pic(struct kvm_pic *vpic);
|
||||
int kvm_pic_init(struct kvm *kvm);
|
||||
void kvm_pic_destroy(struct kvm *kvm);
|
||||
int kvm_pic_read_irq(struct kvm *kvm);
|
||||
void kvm_pic_update_irq(struct kvm_pic *s);
|
||||
|
||||
|
@ -93,18 +93,19 @@ static inline int pic_in_kernel(struct kvm *kvm)
|
|||
|
||||
static inline int irqchip_split(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.irqchip_split;
|
||||
return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT;
|
||||
}
|
||||
|
||||
static inline int irqchip_kernel(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL;
|
||||
}
|
||||
|
||||
static inline int irqchip_in_kernel(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_pic *vpic = pic_irqchip(kvm);
|
||||
bool ret;
|
||||
bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE;
|
||||
|
||||
ret = (vpic != NULL);
|
||||
ret |= irqchip_split(kvm);
|
||||
|
||||
/* Read vpic before kvm->irq_routing. */
|
||||
/* Matches with wmb after initializing kvm->irq_routing. */
|
||||
smp_rmb();
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -41,15 +41,6 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
|
|||
bool line_status)
|
||||
{
|
||||
struct kvm_pic *pic = pic_irqchip(kvm);
|
||||
|
||||
/*
|
||||
* XXX: rejecting pic routes when pic isn't in use would be better,
|
||||
* but the default routing table is installed while kvm->arch.vpic is
|
||||
* NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE.
|
||||
*/
|
||||
if (!pic)
|
||||
return -1;
|
||||
|
||||
return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
|
||||
}
|
||||
|
||||
|
@ -58,10 +49,6 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
|
|||
bool line_status)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
|
||||
if (!ioapic)
|
||||
return -1;
|
||||
|
||||
return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
|
||||
line_status);
|
||||
}
|
||||
|
@ -297,16 +284,20 @@ int kvm_set_routing_entry(struct kvm *kvm,
|
|||
case KVM_IRQ_ROUTING_IRQCHIP:
|
||||
delta = 0;
|
||||
switch (ue->u.irqchip.irqchip) {
|
||||
case KVM_IRQCHIP_PIC_MASTER:
|
||||
e->set = kvm_set_pic_irq;
|
||||
max_pin = PIC_NUM_PINS;
|
||||
break;
|
||||
case KVM_IRQCHIP_PIC_SLAVE:
|
||||
delta = 8;
|
||||
/* fall through */
|
||||
case KVM_IRQCHIP_PIC_MASTER:
|
||||
if (!pic_in_kernel(kvm))
|
||||
goto out;
|
||||
|
||||
e->set = kvm_set_pic_irq;
|
||||
max_pin = PIC_NUM_PINS;
|
||||
delta = 8;
|
||||
break;
|
||||
case KVM_IRQCHIP_IOAPIC:
|
||||
if (!ioapic_in_kernel(kvm))
|
||||
goto out;
|
||||
|
||||
max_pin = KVM_IOAPIC_NUM_PINS;
|
||||
e->set = kvm_set_ioapic_irq;
|
||||
break;
|
||||
|
@ -409,7 +400,7 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
|
|||
|
||||
void kvm_arch_post_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
|
||||
if (!irqchip_split(kvm))
|
||||
return;
|
||||
kvm_make_scan_ioapic_request(kvm);
|
||||
}
|
||||
|
|
|
@ -115,6 +115,16 @@ static inline int apic_enabled(struct kvm_lapic *apic)
|
|||
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
|
||||
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
|
||||
|
||||
static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
|
||||
{
|
||||
return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
|
||||
}
|
||||
|
||||
static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
|
||||
{
|
||||
return apic->vcpu->vcpu_id;
|
||||
}
|
||||
|
||||
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
|
||||
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
|
||||
switch (map->mode) {
|
||||
|
@ -159,13 +169,13 @@ static void recalculate_apic_map(struct kvm *kvm)
|
|||
struct kvm_apic_map *new, *old = NULL;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
u32 max_id = 255;
|
||||
u32 max_id = 255; /* enough space for any xAPIC ID */
|
||||
|
||||
mutex_lock(&kvm->arch.apic_map_lock);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
if (kvm_apic_present(vcpu))
|
||||
max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
|
||||
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
|
||||
|
||||
new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
|
||||
sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
|
||||
|
@ -179,16 +189,28 @@ static void recalculate_apic_map(struct kvm *kvm)
|
|||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
struct kvm_lapic **cluster;
|
||||
u16 mask;
|
||||
u32 ldr, aid;
|
||||
u32 ldr;
|
||||
u8 xapic_id;
|
||||
u32 x2apic_id;
|
||||
|
||||
if (!kvm_apic_present(vcpu))
|
||||
continue;
|
||||
|
||||
aid = kvm_apic_id(apic);
|
||||
ldr = kvm_lapic_get_reg(apic, APIC_LDR);
|
||||
xapic_id = kvm_xapic_id(apic);
|
||||
x2apic_id = kvm_x2apic_id(apic);
|
||||
|
||||
if (aid <= new->max_apic_id)
|
||||
new->phys_map[aid] = apic;
|
||||
/* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
|
||||
if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
|
||||
x2apic_id <= new->max_apic_id)
|
||||
new->phys_map[x2apic_id] = apic;
|
||||
/*
|
||||
* ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
|
||||
* prevent them from masking VCPUs with APIC ID <= 0xff.
|
||||
*/
|
||||
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
|
||||
new->phys_map[xapic_id] = apic;
|
||||
|
||||
ldr = kvm_lapic_get_reg(apic, APIC_LDR);
|
||||
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
new->mode |= KVM_APIC_MODE_X2APIC;
|
||||
|
@ -250,6 +272,8 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
|
|||
{
|
||||
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
|
||||
|
||||
WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
|
||||
|
||||
kvm_lapic_set_reg(apic, APIC_ID, id);
|
||||
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
|
||||
recalculate_apic_map(apic->vcpu->kvm);
|
||||
|
@ -317,7 +341,7 @@ static int find_highest_vector(void *bitmap)
|
|||
vec >= 0; vec -= APIC_VECTORS_PER_REG) {
|
||||
reg = bitmap + REG_POS(vec);
|
||||
if (*reg)
|
||||
return fls(*reg) - 1 + vec;
|
||||
return __fls(*reg) + vec;
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
@ -337,27 +361,32 @@ static u8 count_vectors(void *bitmap)
|
|||
return count;
|
||||
}
|
||||
|
||||
void __kvm_apic_update_irr(u32 *pir, void *regs)
|
||||
int __kvm_apic_update_irr(u32 *pir, void *regs)
|
||||
{
|
||||
u32 i, pir_val;
|
||||
u32 i, vec;
|
||||
u32 pir_val, irr_val;
|
||||
int max_irr = -1;
|
||||
|
||||
for (i = 0; i <= 7; i++) {
|
||||
for (i = vec = 0; i <= 7; i++, vec += 32) {
|
||||
pir_val = READ_ONCE(pir[i]);
|
||||
irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
|
||||
if (pir_val) {
|
||||
pir_val = xchg(&pir[i], 0);
|
||||
*((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
|
||||
irr_val |= xchg(&pir[i], 0);
|
||||
*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
|
||||
}
|
||||
if (irr_val)
|
||||
max_irr = __fls(irr_val) + vec;
|
||||
}
|
||||
|
||||
return max_irr;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
|
||||
|
||||
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
|
||||
int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
__kvm_apic_update_irr(pir, apic->regs);
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
return __kvm_apic_update_irr(pir, apic->regs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
|
||||
|
||||
|
@ -377,8 +406,6 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
|
|||
if (!apic->irr_pending)
|
||||
return -1;
|
||||
|
||||
if (apic->vcpu->arch.apicv_active)
|
||||
kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
|
||||
result = apic_search_irr(apic);
|
||||
ASSERT(result == -1 || result >= 16);
|
||||
|
||||
|
@ -392,9 +419,10 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
|
|||
vcpu = apic->vcpu;
|
||||
|
||||
if (unlikely(vcpu->arch.apicv_active)) {
|
||||
/* try to update RVI */
|
||||
/* need to update RVI */
|
||||
apic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
apic_find_highest_irr(apic));
|
||||
} else {
|
||||
apic->irr_pending = false;
|
||||
apic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
|
@ -484,6 +512,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
return apic_find_highest_irr(vcpu->arch.apic);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
|
||||
|
||||
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
int vector, int level, int trig_mode,
|
||||
|
@ -500,16 +529,14 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
|
|||
|
||||
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
|
||||
{
|
||||
|
||||
return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
|
||||
sizeof(val));
|
||||
return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, &val,
|
||||
sizeof(val));
|
||||
}
|
||||
|
||||
static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
|
||||
{
|
||||
|
||||
return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
|
||||
sizeof(*val));
|
||||
return kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, val,
|
||||
sizeof(*val));
|
||||
}
|
||||
|
||||
static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
|
||||
|
@ -546,7 +573,19 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
|
|||
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
|
||||
}
|
||||
|
||||
static void apic_update_ppr(struct kvm_lapic *apic)
|
||||
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
|
||||
{
|
||||
int highest_irr;
|
||||
if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)
|
||||
highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
|
||||
else
|
||||
highest_irr = apic_find_highest_irr(apic);
|
||||
if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
|
||||
return -1;
|
||||
return highest_irr;
|
||||
}
|
||||
|
||||
static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
|
||||
{
|
||||
u32 tpr, isrv, ppr, old_ppr;
|
||||
int isr;
|
||||
|
@ -564,13 +603,28 @@ static void apic_update_ppr(struct kvm_lapic *apic)
|
|||
apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
|
||||
apic, ppr, isr, isrv);
|
||||
|
||||
if (old_ppr != ppr) {
|
||||
*new_ppr = ppr;
|
||||
if (old_ppr != ppr)
|
||||
kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
|
||||
if (ppr < old_ppr)
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
}
|
||||
|
||||
return ppr < old_ppr;
|
||||
}
|
||||
|
||||
static void apic_update_ppr(struct kvm_lapic *apic)
|
||||
{
|
||||
u32 ppr;
|
||||
|
||||
if (__apic_update_ppr(apic, &ppr) &&
|
||||
apic_has_interrupt_for_ppr(apic, ppr) != -1)
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
}
|
||||
|
||||
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
apic_update_ppr(vcpu->arch.apic);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
|
||||
|
||||
static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
|
||||
{
|
||||
kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
|
||||
|
@ -579,10 +633,8 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
|
|||
|
||||
static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
|
||||
{
|
||||
if (apic_x2apic_mode(apic))
|
||||
return mda == X2APIC_BROADCAST;
|
||||
|
||||
return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
|
||||
return mda == (apic_x2apic_mode(apic) ?
|
||||
X2APIC_BROADCAST : APIC_BROADCAST);
|
||||
}
|
||||
|
||||
static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
|
||||
|
@ -591,9 +643,18 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
|
|||
return true;
|
||||
|
||||
if (apic_x2apic_mode(apic))
|
||||
return mda == kvm_apic_id(apic);
|
||||
return mda == kvm_x2apic_id(apic);
|
||||
|
||||
return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
|
||||
/*
|
||||
* Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
|
||||
* it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and
|
||||
* this allows unique addressing of VCPUs with APIC ID over 0xff.
|
||||
* The 0xff condition is needed because writeable xAPIC ID.
|
||||
*/
|
||||
if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
|
||||
return true;
|
||||
|
||||
return mda == kvm_xapic_id(apic);
|
||||
}
|
||||
|
||||
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
|
||||
|
@ -610,7 +671,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
|
|||
&& (logical_id & mda & 0xffff) != 0;
|
||||
|
||||
logical_id = GET_APIC_LOGICAL_ID(logical_id);
|
||||
mda = GET_APIC_DEST_FIELD(mda);
|
||||
|
||||
switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
|
||||
case APIC_DFR_FLAT:
|
||||
|
@ -627,9 +687,9 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
|
|||
|
||||
/* The KVM local APIC implementation has two quirks:
|
||||
*
|
||||
* - the xAPIC MDA stores the destination at bits 24-31, while this
|
||||
* is not true of struct kvm_lapic_irq's dest_id field. This is
|
||||
* just a quirk in the API and is not problematic.
|
||||
* - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
|
||||
* in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
|
||||
* KVM doesn't do that aliasing.
|
||||
*
|
||||
* - in-kernel IOAPIC messages have to be delivered directly to
|
||||
* x2APIC, because the kernel does not support interrupt remapping.
|
||||
|
@ -645,13 +705,12 @@ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
|
|||
struct kvm_lapic *source, struct kvm_lapic *target)
|
||||
{
|
||||
bool ipi = source != NULL;
|
||||
bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
|
||||
|
||||
if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
|
||||
!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
|
||||
!ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
|
||||
return X2APIC_BROADCAST;
|
||||
|
||||
return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
|
||||
return dest_id;
|
||||
}
|
||||
|
||||
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
|
@ -1907,9 +1966,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
vcpu->arch.apic_arb_prio = 0;
|
||||
vcpu->arch.apic_attention = 0;
|
||||
|
||||
apic_debug("%s: vcpu=%p, id=%d, base_msr="
|
||||
apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
|
||||
"0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
|
||||
vcpu, kvm_apic_id(apic),
|
||||
vcpu, kvm_lapic_get_reg(apic, APIC_ID),
|
||||
vcpu->arch.apic_base, apic->base_address);
|
||||
}
|
||||
|
||||
|
@ -2021,17 +2080,13 @@ nomem:
|
|||
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
int highest_irr;
|
||||
u32 ppr;
|
||||
|
||||
if (!apic_enabled(apic))
|
||||
return -1;
|
||||
|
||||
apic_update_ppr(apic);
|
||||
highest_irr = apic_find_highest_irr(apic);
|
||||
if ((highest_irr == -1) ||
|
||||
((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI)))
|
||||
return -1;
|
||||
return highest_irr;
|
||||
__apic_update_ppr(apic, &ppr);
|
||||
return apic_has_interrupt_for_ppr(apic, ppr);
|
||||
}
|
||||
|
||||
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
|
||||
|
@ -2067,6 +2122,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
int vector = kvm_apic_has_interrupt(vcpu);
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 ppr;
|
||||
|
||||
if (vector == -1)
|
||||
return -1;
|
||||
|
@ -2078,13 +2134,23 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
|
|||
* because the process would deliver it through the IDT.
|
||||
*/
|
||||
|
||||
apic_set_isr(vector, apic);
|
||||
apic_update_ppr(apic);
|
||||
apic_clear_irr(vector, apic);
|
||||
|
||||
if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
|
||||
apic_clear_isr(vector, apic);
|
||||
/*
|
||||
* For auto-EOI interrupts, there might be another pending
|
||||
* interrupt above PPR, so check whether to raise another
|
||||
* KVM_REQ_EVENT.
|
||||
*/
|
||||
apic_update_ppr(apic);
|
||||
} else {
|
||||
/*
|
||||
* For normal interrupts, PPR has been raised and there cannot
|
||||
* be a higher-priority pending interrupt---except if there was
|
||||
* a concurrent interrupt injection, but that would have
|
||||
* triggered KVM_REQ_EVENT already.
|
||||
*/
|
||||
apic_set_isr(vector, apic);
|
||||
__apic_update_ppr(apic, &ppr);
|
||||
}
|
||||
|
||||
return vector;
|
||||
|
@ -2145,8 +2211,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
|
|||
1 : count_vectors(apic->regs + APIC_ISR);
|
||||
apic->highest_isr_cache = -1;
|
||||
if (vcpu->arch.apicv_active) {
|
||||
if (kvm_x86_ops->apicv_post_state_restore)
|
||||
kvm_x86_ops->apicv_post_state_restore(vcpu);
|
||||
kvm_x86_ops->apicv_post_state_restore(vcpu);
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
apic_find_highest_irr(apic));
|
||||
kvm_x86_ops->hwapic_isr_update(vcpu,
|
||||
|
@ -2220,8 +2285,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
|
|||
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
|
||||
return;
|
||||
|
||||
if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
|
||||
sizeof(u32)))
|
||||
if (kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data,
|
||||
sizeof(u32)))
|
||||
return;
|
||||
|
||||
apic_set_tpr(vcpu->arch.apic, data & 0xff);
|
||||
|
@ -2273,14 +2338,14 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
|
|||
max_isr = 0;
|
||||
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
|
||||
|
||||
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
|
||||
sizeof(u32));
|
||||
kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data,
|
||||
sizeof(u32));
|
||||
}
|
||||
|
||||
int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
|
||||
{
|
||||
if (vapic_addr) {
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
|
||||
if (kvm_vcpu_gfn_to_hva_cache_init(vcpu,
|
||||
&vcpu->arch.apic->vapic_cache,
|
||||
vapic_addr, sizeof(u32)))
|
||||
return -EINVAL;
|
||||
|
@ -2374,7 +2439,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
|
|||
vcpu->arch.pv_eoi.msr_val = data;
|
||||
if (!pv_eoi_enabled(vcpu))
|
||||
return 0;
|
||||
return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
|
||||
return kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_eoi.data,
|
||||
addr, sizeof(u8));
|
||||
}
|
||||
|
||||
|
|
|
@ -71,8 +71,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
|
|||
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
int short_hand, unsigned int dest, int dest_mode);
|
||||
|
||||
void __kvm_apic_update_irr(u32 *pir, void *regs);
|
||||
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
|
||||
int __kvm_apic_update_irr(u32 *pir, void *regs);
|
||||
int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
|
||||
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
|
||||
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
|
||||
struct dest_map *dest_map);
|
||||
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
|
||||
|
@ -203,17 +204,6 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
|
|||
return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
|
||||
}
|
||||
|
||||
static inline u32 kvm_apic_id(struct kvm_lapic *apic)
|
||||
{
|
||||
/* To avoid a race between apic_base and following APIC_ID update when
|
||||
* switching to x2apic_mode, the x2apic mode returns initial x2apic id.
|
||||
*/
|
||||
if (apic_x2apic_mode(apic))
|
||||
return apic->vcpu->vcpu_id;
|
||||
|
||||
return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
|
||||
}
|
||||
|
||||
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
|
||||
|
||||
void wait_lapic_expire(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -37,6 +37,8 @@
|
|||
#include <linux/srcu.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/kern_levels.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/cmpxchg.h>
|
||||
|
@ -129,6 +131,10 @@ module_param(dbg, bool, 0644);
|
|||
#define ACC_USER_MASK PT_USER_MASK
|
||||
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
|
||||
|
||||
/* The mask for the R/X bits in EPT PTEs */
|
||||
#define PT64_EPT_READABLE_MASK 0x1ull
|
||||
#define PT64_EPT_EXECUTABLE_MASK 0x4ull
|
||||
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
@ -178,15 +184,40 @@ static u64 __read_mostly shadow_dirty_mask;
|
|||
static u64 __read_mostly shadow_mmio_mask;
|
||||
static u64 __read_mostly shadow_present_mask;
|
||||
|
||||
/*
|
||||
* The mask/value to distinguish a PTE that has been marked not-present for
|
||||
* access tracking purposes.
|
||||
* The mask would be either 0 if access tracking is disabled, or
|
||||
* SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled.
|
||||
*/
|
||||
static u64 __read_mostly shadow_acc_track_mask;
|
||||
static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
|
||||
|
||||
/*
|
||||
* The mask/shift to use for saving the original R/X bits when marking the PTE
|
||||
* as not-present for access tracking purposes. We do not save the W bit as the
|
||||
* PTEs being access tracked also need to be dirty tracked, so the W bit will be
|
||||
* restored only when a write is attempted to the page.
|
||||
*/
|
||||
static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
|
||||
PT64_EPT_EXECUTABLE_MASK;
|
||||
static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
|
||||
|
||||
static void mmu_spte_set(u64 *sptep, u64 spte);
|
||||
static void mmu_free_roots(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
|
||||
{
|
||||
shadow_mmio_mask = mmio_mask;
|
||||
shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
|
||||
|
||||
static inline bool is_access_track_spte(u64 spte)
|
||||
{
|
||||
/* Always false if shadow_acc_track_mask is zero. */
|
||||
return (spte & shadow_acc_track_mask) == shadow_acc_track_value;
|
||||
}
|
||||
|
||||
/*
|
||||
* the low bit of the generation number is always presumed to be zero.
|
||||
* This disables mmio caching during memslot updates. The concept is
|
||||
|
@ -284,17 +315,35 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
|
|||
}
|
||||
|
||||
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask)
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
|
||||
u64 acc_track_mask)
|
||||
{
|
||||
if (acc_track_mask != 0)
|
||||
acc_track_mask |= SPTE_SPECIAL_MASK;
|
||||
|
||||
shadow_user_mask = user_mask;
|
||||
shadow_accessed_mask = accessed_mask;
|
||||
shadow_dirty_mask = dirty_mask;
|
||||
shadow_nx_mask = nx_mask;
|
||||
shadow_x_mask = x_mask;
|
||||
shadow_present_mask = p_mask;
|
||||
shadow_acc_track_mask = acc_track_mask;
|
||||
WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
|
||||
|
||||
void kvm_mmu_clear_all_pte_masks(void)
|
||||
{
|
||||
shadow_user_mask = 0;
|
||||
shadow_accessed_mask = 0;
|
||||
shadow_dirty_mask = 0;
|
||||
shadow_nx_mask = 0;
|
||||
shadow_x_mask = 0;
|
||||
shadow_mmio_mask = 0;
|
||||
shadow_present_mask = 0;
|
||||
shadow_acc_track_mask = 0;
|
||||
}
|
||||
|
||||
static int is_cpuid_PSE36(void)
|
||||
{
|
||||
return 1;
|
||||
|
@ -307,7 +356,7 @@ static int is_nx(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int is_shadow_present_pte(u64 pte)
|
||||
{
|
||||
return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte);
|
||||
return (pte != 0) && !is_mmio_spte(pte);
|
||||
}
|
||||
|
||||
static int is_large_pte(u64 pte)
|
||||
|
@ -324,6 +373,11 @@ static int is_last_spte(u64 pte, int level)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool is_executable_pte(u64 spte)
|
||||
{
|
||||
return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
|
||||
}
|
||||
|
||||
static kvm_pfn_t spte_to_pfn(u64 pte)
|
||||
{
|
||||
return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
|
||||
|
@ -473,7 +527,7 @@ retry:
|
|||
}
|
||||
#endif
|
||||
|
||||
static bool spte_is_locklessly_modifiable(u64 spte)
|
||||
static bool spte_can_locklessly_be_made_writable(u64 spte)
|
||||
{
|
||||
return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
|
||||
(SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
|
||||
|
@ -481,36 +535,38 @@ static bool spte_is_locklessly_modifiable(u64 spte)
|
|||
|
||||
static bool spte_has_volatile_bits(u64 spte)
|
||||
{
|
||||
if (!is_shadow_present_pte(spte))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Always atomically update spte if it can be updated
|
||||
* out of mmu-lock, it can ensure dirty bit is not lost,
|
||||
* also, it can help us to get a stable is_writable_pte()
|
||||
* to ensure tlb flush is not missed.
|
||||
*/
|
||||
if (spte_is_locklessly_modifiable(spte))
|
||||
if (spte_can_locklessly_be_made_writable(spte) ||
|
||||
is_access_track_spte(spte))
|
||||
return true;
|
||||
|
||||
if (!shadow_accessed_mask)
|
||||
return false;
|
||||
if (shadow_accessed_mask) {
|
||||
if ((spte & shadow_accessed_mask) == 0 ||
|
||||
(is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!is_shadow_present_pte(spte))
|
||||
return false;
|
||||
|
||||
if ((spte & shadow_accessed_mask) &&
|
||||
(!is_writable_pte(spte) || (spte & shadow_dirty_mask)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask)
|
||||
static bool is_accessed_spte(u64 spte)
|
||||
{
|
||||
return (old_spte & bit_mask) && !(new_spte & bit_mask);
|
||||
return shadow_accessed_mask ? spte & shadow_accessed_mask
|
||||
: !is_access_track_spte(spte);
|
||||
}
|
||||
|
||||
static bool spte_is_bit_changed(u64 old_spte, u64 new_spte, u64 bit_mask)
|
||||
static bool is_dirty_spte(u64 spte)
|
||||
{
|
||||
return (old_spte & bit_mask) != (new_spte & bit_mask);
|
||||
return shadow_dirty_mask ? spte & shadow_dirty_mask
|
||||
: spte & PT_WRITABLE_MASK;
|
||||
}
|
||||
|
||||
/* Rules for using mmu_spte_set:
|
||||
|
@ -525,6 +581,31 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte)
|
|||
__set_spte(sptep, new_spte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the SPTE (excluding the PFN), but do not track changes in its
|
||||
* accessed/dirty status.
|
||||
*/
|
||||
static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
|
||||
{
|
||||
u64 old_spte = *sptep;
|
||||
|
||||
WARN_ON(!is_shadow_present_pte(new_spte));
|
||||
|
||||
if (!is_shadow_present_pte(old_spte)) {
|
||||
mmu_spte_set(sptep, new_spte);
|
||||
return old_spte;
|
||||
}
|
||||
|
||||
if (!spte_has_volatile_bits(old_spte))
|
||||
__update_clear_spte_fast(sptep, new_spte);
|
||||
else
|
||||
old_spte = __update_clear_spte_slow(sptep, new_spte);
|
||||
|
||||
WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
|
||||
|
||||
return old_spte;
|
||||
}
|
||||
|
||||
/* Rules for using mmu_spte_update:
|
||||
* Update the state bits, it means the mapped pfn is not changed.
|
||||
*
|
||||
|
@ -533,63 +614,49 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte)
|
|||
* will find a read-only spte, even though the writable spte
|
||||
* might be cached on a CPU's TLB, the return value indicates this
|
||||
* case.
|
||||
*
|
||||
* Returns true if the TLB needs to be flushed
|
||||
*/
|
||||
static bool mmu_spte_update(u64 *sptep, u64 new_spte)
|
||||
{
|
||||
u64 old_spte = *sptep;
|
||||
bool ret = false;
|
||||
bool flush = false;
|
||||
u64 old_spte = mmu_spte_update_no_track(sptep, new_spte);
|
||||
|
||||
WARN_ON(!is_shadow_present_pte(new_spte));
|
||||
|
||||
if (!is_shadow_present_pte(old_spte)) {
|
||||
mmu_spte_set(sptep, new_spte);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!spte_has_volatile_bits(old_spte))
|
||||
__update_clear_spte_fast(sptep, new_spte);
|
||||
else
|
||||
old_spte = __update_clear_spte_slow(sptep, new_spte);
|
||||
if (!is_shadow_present_pte(old_spte))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* For the spte updated out of mmu-lock is safe, since
|
||||
* we always atomically update it, see the comments in
|
||||
* spte_has_volatile_bits().
|
||||
*/
|
||||
if (spte_is_locklessly_modifiable(old_spte) &&
|
||||
if (spte_can_locklessly_be_made_writable(old_spte) &&
|
||||
!is_writable_pte(new_spte))
|
||||
ret = true;
|
||||
|
||||
if (!shadow_accessed_mask) {
|
||||
/*
|
||||
* We don't set page dirty when dropping non-writable spte.
|
||||
* So do it now if the new spte is becoming non-writable.
|
||||
*/
|
||||
if (ret)
|
||||
kvm_set_pfn_dirty(spte_to_pfn(old_spte));
|
||||
return ret;
|
||||
}
|
||||
flush = true;
|
||||
|
||||
/*
|
||||
* Flush TLB when accessed/dirty bits are changed in the page tables,
|
||||
* Flush TLB when accessed/dirty states are changed in the page tables,
|
||||
* to guarantee consistency between TLB and page tables.
|
||||
*/
|
||||
if (spte_is_bit_changed(old_spte, new_spte,
|
||||
shadow_accessed_mask | shadow_dirty_mask))
|
||||
ret = true;
|
||||
|
||||
if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask))
|
||||
if (is_accessed_spte(old_spte) && !is_accessed_spte(new_spte)) {
|
||||
flush = true;
|
||||
kvm_set_pfn_accessed(spte_to_pfn(old_spte));
|
||||
if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask))
|
||||
kvm_set_pfn_dirty(spte_to_pfn(old_spte));
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) {
|
||||
flush = true;
|
||||
kvm_set_pfn_dirty(spte_to_pfn(old_spte));
|
||||
}
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
||||
/*
|
||||
* Rules for using mmu_spte_clear_track_bits:
|
||||
* It sets the sptep from present to nonpresent, and track the
|
||||
* state bits, it is used to clear the last level sptep.
|
||||
* Returns non-zero if the PTE was previously valid.
|
||||
*/
|
||||
static int mmu_spte_clear_track_bits(u64 *sptep)
|
||||
{
|
||||
|
@ -613,11 +680,12 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
|
|||
*/
|
||||
WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
|
||||
|
||||
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
|
||||
if (is_accessed_spte(old_spte))
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
if (old_spte & (shadow_dirty_mask ? shadow_dirty_mask :
|
||||
PT_WRITABLE_MASK))
|
||||
|
||||
if (is_dirty_spte(old_spte))
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -636,6 +704,78 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
|
|||
return __get_spte_lockless(sptep);
|
||||
}
|
||||
|
||||
static u64 mark_spte_for_access_track(u64 spte)
|
||||
{
|
||||
if (shadow_accessed_mask != 0)
|
||||
return spte & ~shadow_accessed_mask;
|
||||
|
||||
if (shadow_acc_track_mask == 0 || is_access_track_spte(spte))
|
||||
return spte;
|
||||
|
||||
/*
|
||||
* Making an Access Tracking PTE will result in removal of write access
|
||||
* from the PTE. So, verify that we will be able to restore the write
|
||||
* access in the fast page fault path later on.
|
||||
*/
|
||||
WARN_ONCE((spte & PT_WRITABLE_MASK) &&
|
||||
!spte_can_locklessly_be_made_writable(spte),
|
||||
"kvm: Writable SPTE is not locklessly dirty-trackable\n");
|
||||
|
||||
WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask <<
|
||||
shadow_acc_track_saved_bits_shift),
|
||||
"kvm: Access Tracking saved bit locations are not zero\n");
|
||||
|
||||
spte |= (spte & shadow_acc_track_saved_bits_mask) <<
|
||||
shadow_acc_track_saved_bits_shift;
|
||||
spte &= ~shadow_acc_track_mask;
|
||||
spte |= shadow_acc_track_value;
|
||||
|
||||
return spte;
|
||||
}
|
||||
|
||||
/* Restore an acc-track PTE back to a regular PTE */
|
||||
static u64 restore_acc_track_spte(u64 spte)
|
||||
{
|
||||
u64 new_spte = spte;
|
||||
u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
|
||||
& shadow_acc_track_saved_bits_mask;
|
||||
|
||||
WARN_ON_ONCE(!is_access_track_spte(spte));
|
||||
|
||||
new_spte &= ~shadow_acc_track_mask;
|
||||
new_spte &= ~(shadow_acc_track_saved_bits_mask <<
|
||||
shadow_acc_track_saved_bits_shift);
|
||||
new_spte |= saved_bits;
|
||||
|
||||
return new_spte;
|
||||
}
|
||||
|
||||
/* Returns the Accessed status of the PTE and resets it at the same time. */
|
||||
static bool mmu_spte_age(u64 *sptep)
|
||||
{
|
||||
u64 spte = mmu_spte_get_lockless(sptep);
|
||||
|
||||
if (!is_accessed_spte(spte))
|
||||
return false;
|
||||
|
||||
if (shadow_accessed_mask) {
|
||||
clear_bit((ffs(shadow_accessed_mask) - 1),
|
||||
(unsigned long *)sptep);
|
||||
} else {
|
||||
/*
|
||||
* Capture the dirty status of the page, so that it doesn't get
|
||||
* lost when the SPTE is marked for access tracking.
|
||||
*/
|
||||
if (is_writable_pte(spte))
|
||||
kvm_set_pfn_dirty(spte_to_pfn(spte));
|
||||
|
||||
spte = mark_spte_for_access_track(spte);
|
||||
mmu_spte_update_no_track(sptep, spte);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
|
@ -1212,7 +1352,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
|
|||
u64 spte = *sptep;
|
||||
|
||||
if (!is_writable_pte(spte) &&
|
||||
!(pt_protect && spte_is_locklessly_modifiable(spte)))
|
||||
!(pt_protect && spte_can_locklessly_be_made_writable(spte)))
|
||||
return false;
|
||||
|
||||
rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
|
||||
|
@ -1420,7 +1560,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
|||
restart:
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep) {
|
||||
rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
|
||||
sptep, *sptep, gfn, level);
|
||||
sptep, *sptep, gfn, level);
|
||||
|
||||
need_flush = 1;
|
||||
|
||||
|
@ -1433,7 +1573,8 @@ restart:
|
|||
|
||||
new_spte &= ~PT_WRITABLE_MASK;
|
||||
new_spte &= ~SPTE_HOST_WRITEABLE;
|
||||
new_spte &= ~shadow_accessed_mask;
|
||||
|
||||
new_spte = mark_spte_for_access_track(new_spte);
|
||||
|
||||
mmu_spte_clear_track_bits(sptep);
|
||||
mmu_spte_set(sptep, new_spte);
|
||||
|
@ -1595,15 +1736,8 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
|||
struct rmap_iterator uninitialized_var(iter);
|
||||
int young = 0;
|
||||
|
||||
BUG_ON(!shadow_accessed_mask);
|
||||
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep) {
|
||||
if (*sptep & shadow_accessed_mask) {
|
||||
young = 1;
|
||||
clear_bit((ffs(shadow_accessed_mask) - 1),
|
||||
(unsigned long *)sptep);
|
||||
}
|
||||
}
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep)
|
||||
young |= mmu_spte_age(sptep);
|
||||
|
||||
trace_kvm_age_page(gfn, level, slot, young);
|
||||
return young;
|
||||
|
@ -1615,24 +1749,20 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
|||
{
|
||||
u64 *sptep;
|
||||
struct rmap_iterator iter;
|
||||
int young = 0;
|
||||
|
||||
/*
|
||||
* If there's no access bit in the secondary pte set by the
|
||||
* hardware it's up to gup-fast/gup to set the access bit in
|
||||
* the primary pte or in the page structure.
|
||||
* If there's no access bit in the secondary pte set by the hardware and
|
||||
* fast access tracking is also not enabled, it's up to gup-fast/gup to
|
||||
* set the access bit in the primary pte or in the page structure.
|
||||
*/
|
||||
if (!shadow_accessed_mask)
|
||||
if (!shadow_accessed_mask && !shadow_acc_track_mask)
|
||||
goto out;
|
||||
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep) {
|
||||
if (*sptep & shadow_accessed_mask) {
|
||||
young = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep)
|
||||
if (is_accessed_spte(*sptep))
|
||||
return 1;
|
||||
out:
|
||||
return young;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define RMAP_RECYCLE_THRESHOLD 1000
|
||||
|
@ -1660,7 +1790,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
|||
* This has some overhead, but not as much as the cost of swapping
|
||||
* out actively used pages or breaking up actively used hugepages.
|
||||
*/
|
||||
if (!shadow_accessed_mask)
|
||||
if (!shadow_accessed_mask && !shadow_acc_track_mask)
|
||||
return kvm_handle_hva_range(kvm, start, end, 0,
|
||||
kvm_unmap_rmapp);
|
||||
|
||||
|
@ -1713,7 +1843,7 @@ static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
|
|||
|
||||
static unsigned kvm_page_table_hashfn(gfn_t gfn)
|
||||
{
|
||||
return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1);
|
||||
return hash_64(gfn, KVM_MMU_HASH_SHIFT);
|
||||
}
|
||||
|
||||
static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
|
||||
|
@ -1904,17 +2034,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
|||
* since it has been deleted from active_mmu_pages but still can be found
|
||||
* at hast list.
|
||||
*
|
||||
* for_each_gfn_valid_sp() has skipped that kind of pages.
|
||||
* for_each_valid_sp() has skipped that kind of pages.
|
||||
*/
|
||||
#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
|
||||
#define for_each_valid_sp(_kvm, _sp, _gfn) \
|
||||
hlist_for_each_entry(_sp, \
|
||||
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
|
||||
if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \
|
||||
|| (_sp)->role.invalid) {} else
|
||||
if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \
|
||||
} else
|
||||
|
||||
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
|
||||
for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
|
||||
if ((_sp)->role.direct) {} else
|
||||
for_each_valid_sp(_kvm, _sp, _gfn) \
|
||||
if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
|
||||
|
||||
/* @sp->gfn should be write-protected at the call site */
|
||||
static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
|
@ -2116,6 +2246,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
|||
struct kvm_mmu_page *sp;
|
||||
bool need_sync = false;
|
||||
bool flush = false;
|
||||
int collisions = 0;
|
||||
LIST_HEAD(invalid_list);
|
||||
|
||||
role = vcpu->arch.mmu.base_role;
|
||||
|
@ -2130,7 +2261,12 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
|||
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
|
||||
role.quadrant = quadrant;
|
||||
}
|
||||
for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) {
|
||||
for_each_valid_sp(vcpu->kvm, sp, gfn) {
|
||||
if (sp->gfn != gfn) {
|
||||
collisions++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!need_sync && sp->unsync)
|
||||
need_sync = true;
|
||||
|
||||
|
@ -2153,7 +2289,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
|||
|
||||
__clear_sp_write_flooding_count(sp);
|
||||
trace_kvm_mmu_get_page(sp, false);
|
||||
return sp;
|
||||
goto out;
|
||||
}
|
||||
|
||||
++vcpu->kvm->stat.mmu_cache_miss;
|
||||
|
@ -2183,6 +2319,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
|||
trace_kvm_mmu_get_page(sp, true);
|
||||
|
||||
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
|
||||
out:
|
||||
if (collisions > vcpu->kvm->stat.max_mmu_page_hash_collisions)
|
||||
vcpu->kvm->stat.max_mmu_page_hash_collisions = collisions;
|
||||
return sp;
|
||||
}
|
||||
|
||||
|
@ -2583,6 +2722,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||
spte |= shadow_dirty_mask;
|
||||
}
|
||||
|
||||
if (speculative)
|
||||
spte = mark_spte_for_access_track(spte);
|
||||
|
||||
set_pte:
|
||||
if (mmu_spte_update(sptep, spte))
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
|
@ -2636,7 +2778,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
|
|||
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
|
||||
pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
|
||||
is_large_pte(*sptep)? "2MB" : "4kB",
|
||||
*sptep & PT_PRESENT_MASK ?"RW":"R", gfn,
|
||||
*sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn,
|
||||
*sptep, sptep);
|
||||
if (!was_rmapped && is_large_pte(*sptep))
|
||||
++vcpu->kvm->stat.lpages;
|
||||
|
@ -2869,32 +3011,42 @@ static bool page_fault_can_be_fast(u32 error_code)
|
|||
if (unlikely(error_code & PFERR_RSVD_MASK))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* #PF can be fast only if the shadow page table is present and it
|
||||
* is caused by write-protect, that means we just need change the
|
||||
* W bit of the spte which can be done out of mmu-lock.
|
||||
*/
|
||||
if (!(error_code & PFERR_PRESENT_MASK) ||
|
||||
!(error_code & PFERR_WRITE_MASK))
|
||||
/* See if the page fault is due to an NX violation */
|
||||
if (unlikely(((error_code & (PFERR_FETCH_MASK | PFERR_PRESENT_MASK))
|
||||
== (PFERR_FETCH_MASK | PFERR_PRESENT_MASK))))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
/*
|
||||
* #PF can be fast if:
|
||||
* 1. The shadow page table entry is not present, which could mean that
|
||||
* the fault is potentially caused by access tracking (if enabled).
|
||||
* 2. The shadow page table entry is present and the fault
|
||||
* is caused by write-protect, that means we just need change the W
|
||||
* bit of the spte which can be done out of mmu-lock.
|
||||
*
|
||||
* However, if access tracking is disabled we know that a non-present
|
||||
* page must be a genuine page fault where we have to create a new SPTE.
|
||||
* So, if access tracking is disabled, we return true only for write
|
||||
* accesses to a present page.
|
||||
*/
|
||||
|
||||
return shadow_acc_track_mask != 0 ||
|
||||
((error_code & (PFERR_WRITE_MASK | PFERR_PRESENT_MASK))
|
||||
== (PFERR_WRITE_MASK | PFERR_PRESENT_MASK));
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the SPTE was fixed successfully. Otherwise,
|
||||
* someone else modified the SPTE from its original value.
|
||||
*/
|
||||
static bool
|
||||
fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *sptep, u64 spte)
|
||||
u64 *sptep, u64 old_spte, u64 new_spte)
|
||||
{
|
||||
gfn_t gfn;
|
||||
|
||||
WARN_ON(!sp->role.direct);
|
||||
|
||||
/*
|
||||
* The gfn of direct spte is stable since it is calculated
|
||||
* by sp->gfn.
|
||||
*/
|
||||
gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
|
||||
|
||||
/*
|
||||
* Theoretically we could also set dirty bit (and flush TLB) here in
|
||||
* order to eliminate unnecessary PML logging. See comments in
|
||||
|
@ -2907,12 +3059,33 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|||
*
|
||||
* Compare with set_spte where instead shadow_dirty_mask is set.
|
||||
*/
|
||||
if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
|
||||
if (cmpxchg64(sptep, old_spte, new_spte) != old_spte)
|
||||
return false;
|
||||
|
||||
if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) {
|
||||
/*
|
||||
* The gfn of direct spte is stable since it is
|
||||
* calculated by sp->gfn.
|
||||
*/
|
||||
gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
|
||||
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool is_access_allowed(u32 fault_err_code, u64 spte)
|
||||
{
|
||||
if (fault_err_code & PFERR_FETCH_MASK)
|
||||
return is_executable_pte(spte);
|
||||
|
||||
if (fault_err_code & PFERR_WRITE_MASK)
|
||||
return is_writable_pte(spte);
|
||||
|
||||
/* Fault was on Read access */
|
||||
return spte & PT_PRESENT_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return value:
|
||||
* - true: let the vcpu to access on the same address again.
|
||||
|
@ -2923,8 +3096,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
|
|||
{
|
||||
struct kvm_shadow_walk_iterator iterator;
|
||||
struct kvm_mmu_page *sp;
|
||||
bool ret = false;
|
||||
bool fault_handled = false;
|
||||
u64 spte = 0ull;
|
||||
uint retry_count = 0;
|
||||
|
||||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return false;
|
||||
|
@ -2933,66 +3107,93 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
|
|||
return false;
|
||||
|
||||
walk_shadow_page_lockless_begin(vcpu);
|
||||
for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
|
||||
if (!is_shadow_present_pte(spte) || iterator.level < level)
|
||||
|
||||
do {
|
||||
u64 new_spte;
|
||||
|
||||
for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
|
||||
if (!is_shadow_present_pte(spte) ||
|
||||
iterator.level < level)
|
||||
break;
|
||||
|
||||
sp = page_header(__pa(iterator.sptep));
|
||||
if (!is_last_spte(spte, sp->role.level))
|
||||
break;
|
||||
|
||||
/*
|
||||
* If the mapping has been changed, let the vcpu fault on the
|
||||
* same address again.
|
||||
*/
|
||||
if (!is_shadow_present_pte(spte)) {
|
||||
ret = true;
|
||||
goto exit;
|
||||
}
|
||||
/*
|
||||
* Check whether the memory access that caused the fault would
|
||||
* still cause it if it were to be performed right now. If not,
|
||||
* then this is a spurious fault caused by TLB lazily flushed,
|
||||
* or some other CPU has already fixed the PTE after the
|
||||
* current CPU took the fault.
|
||||
*
|
||||
* Need not check the access of upper level table entries since
|
||||
* they are always ACC_ALL.
|
||||
*/
|
||||
if (is_access_allowed(error_code, spte)) {
|
||||
fault_handled = true;
|
||||
break;
|
||||
}
|
||||
|
||||
sp = page_header(__pa(iterator.sptep));
|
||||
if (!is_last_spte(spte, sp->role.level))
|
||||
goto exit;
|
||||
new_spte = spte;
|
||||
|
||||
/*
|
||||
* Check if it is a spurious fault caused by TLB lazily flushed.
|
||||
*
|
||||
* Need not check the access of upper level table entries since
|
||||
* they are always ACC_ALL.
|
||||
*/
|
||||
if (is_writable_pte(spte)) {
|
||||
ret = true;
|
||||
goto exit;
|
||||
}
|
||||
if (is_access_track_spte(spte))
|
||||
new_spte = restore_acc_track_spte(new_spte);
|
||||
|
||||
/*
|
||||
* Currently, to simplify the code, only the spte write-protected
|
||||
* by dirty-log can be fast fixed.
|
||||
*/
|
||||
if (!spte_is_locklessly_modifiable(spte))
|
||||
goto exit;
|
||||
/*
|
||||
* Currently, to simplify the code, write-protection can
|
||||
* be removed in the fast path only if the SPTE was
|
||||
* write-protected for dirty-logging or access tracking.
|
||||
*/
|
||||
if ((error_code & PFERR_WRITE_MASK) &&
|
||||
spte_can_locklessly_be_made_writable(spte))
|
||||
{
|
||||
new_spte |= PT_WRITABLE_MASK;
|
||||
|
||||
/*
|
||||
* Do not fix write-permission on the large spte since we only dirty
|
||||
* the first page into the dirty-bitmap in fast_pf_fix_direct_spte()
|
||||
* that means other pages are missed if its slot is dirty-logged.
|
||||
*
|
||||
* Instead, we let the slow page fault path create a normal spte to
|
||||
* fix the access.
|
||||
*
|
||||
* See the comments in kvm_arch_commit_memory_region().
|
||||
*/
|
||||
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
|
||||
goto exit;
|
||||
/*
|
||||
* Do not fix write-permission on the large spte. Since
|
||||
* we only dirty the first page into the dirty-bitmap in
|
||||
* fast_pf_fix_direct_spte(), other pages are missed
|
||||
* if its slot has dirty logging enabled.
|
||||
*
|
||||
* Instead, we let the slow page fault path create a
|
||||
* normal spte to fix the access.
|
||||
*
|
||||
* See the comments in kvm_arch_commit_memory_region().
|
||||
*/
|
||||
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Verify that the fault can be handled in the fast path */
|
||||
if (new_spte == spte ||
|
||||
!is_access_allowed(error_code, new_spte))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Currently, fast page fault only works for direct mapping
|
||||
* since the gfn is not stable for indirect shadow page. See
|
||||
* Documentation/virtual/kvm/locking.txt to get more detail.
|
||||
*/
|
||||
fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
|
||||
iterator.sptep, spte,
|
||||
new_spte);
|
||||
if (fault_handled)
|
||||
break;
|
||||
|
||||
if (++retry_count > 4) {
|
||||
printk_once(KERN_WARNING
|
||||
"kvm: Fast #PF retrying more than 4 times.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
} while (true);
|
||||
|
||||
/*
|
||||
* Currently, fast page fault only works for direct mapping since
|
||||
* the gfn is not stable for indirect shadow page.
|
||||
* See Documentation/virtual/kvm/locking.txt to get more detail.
|
||||
*/
|
||||
ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte);
|
||||
exit:
|
||||
trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
|
||||
spte, ret);
|
||||
spte, fault_handled);
|
||||
walk_shadow_page_lockless_end(vcpu);
|
||||
|
||||
return ret;
|
||||
return fault_handled;
|
||||
}
|
||||
|
||||
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
||||
|
@ -5063,6 +5264,8 @@ static void mmu_destroy_caches(void)
|
|||
|
||||
int kvm_mmu_module_init(void)
|
||||
{
|
||||
kvm_mmu_clear_all_pte_masks();
|
||||
|
||||
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
|
||||
sizeof(struct pte_list_desc),
|
||||
0, 0, NULL);
|
||||
|
|
|
@ -971,8 +971,8 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
|
|||
* a particular vCPU.
|
||||
*/
|
||||
#define SVM_VM_DATA_HASH_BITS 8
|
||||
DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
|
||||
static spinlock_t svm_vm_data_hash_lock;
|
||||
static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
|
||||
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
|
||||
|
||||
/* Note:
|
||||
* This function is called from IOMMU driver to notify
|
||||
|
@ -1077,8 +1077,6 @@ static __init int svm_hardware_setup(void)
|
|||
} else {
|
||||
pr_info("AVIC enabled\n");
|
||||
|
||||
hash_init(svm_vm_data_hash);
|
||||
spin_lock_init(&svm_vm_data_hash_lock);
|
||||
amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
|
||||
}
|
||||
}
|
||||
|
@ -1159,7 +1157,6 @@ static void init_vmcb(struct vcpu_svm *svm)
|
|||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
struct vmcb_save_area *save = &svm->vmcb->save;
|
||||
|
||||
svm->vcpu.fpu_active = 1;
|
||||
svm->vcpu.arch.hflags = 0;
|
||||
|
||||
set_cr_intercept(svm, INTERCEPT_CR0_READ);
|
||||
|
@ -1901,15 +1898,12 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
|
|||
ulong gcr0 = svm->vcpu.arch.cr0;
|
||||
u64 *hcr0 = &svm->vmcb->save.cr0;
|
||||
|
||||
if (!svm->vcpu.fpu_active)
|
||||
*hcr0 |= SVM_CR0_SELECTIVE_MASK;
|
||||
else
|
||||
*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
|
||||
| (gcr0 & SVM_CR0_SELECTIVE_MASK);
|
||||
*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
|
||||
| (gcr0 & SVM_CR0_SELECTIVE_MASK);
|
||||
|
||||
mark_dirty(svm->vmcb, VMCB_CR);
|
||||
|
||||
if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
|
||||
if (gcr0 == *hcr0) {
|
||||
clr_cr_intercept(svm, INTERCEPT_CR0_READ);
|
||||
clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
|
||||
} else {
|
||||
|
@ -1940,8 +1934,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|||
if (!npt_enabled)
|
||||
cr0 |= X86_CR0_PG | X86_CR0_WP;
|
||||
|
||||
if (!vcpu->fpu_active)
|
||||
cr0 |= X86_CR0_TS;
|
||||
/*
|
||||
* re-enable caching here because the QEMU bios
|
||||
* does not do it - this results in some delay at
|
||||
|
@ -2160,22 +2152,6 @@ static int ac_interception(struct vcpu_svm *svm)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void svm_fpu_activate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
clr_exception_intercept(svm, NM_VECTOR);
|
||||
|
||||
svm->vcpu.fpu_active = 1;
|
||||
update_cr0_intercept(svm);
|
||||
}
|
||||
|
||||
static int nm_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
svm_fpu_activate(&svm->vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool is_erratum_383(void)
|
||||
{
|
||||
int err, i;
|
||||
|
@ -2573,9 +2549,6 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
|
|||
if (!npt_enabled && svm->apf_reason == 0)
|
||||
return NESTED_EXIT_HOST;
|
||||
break;
|
||||
case SVM_EXIT_EXCP_BASE + NM_VECTOR:
|
||||
nm_interception(svm);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -4020,7 +3993,6 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
|
|||
[SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
|
||||
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
|
||||
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
|
||||
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
|
||||
[SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
|
||||
[SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
|
||||
[SVM_EXIT_INTR] = intr_interception,
|
||||
|
@ -4182,6 +4154,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
|
|||
|
||||
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
|
||||
|
||||
vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
|
||||
|
||||
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
|
||||
vcpu->arch.cr0 = svm->vmcb->save.cr0;
|
||||
if (npt_enabled)
|
||||
|
@ -4357,11 +4331,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
|||
return;
|
||||
}
|
||||
|
||||
static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
|
||||
{
|
||||
kvm_lapic_set_irr(vec, vcpu->arch.apic);
|
||||
|
@ -5077,14 +5046,6 @@ static bool svm_has_wbinvd_exit(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
set_exception_intercept(svm, NM_VECTOR);
|
||||
update_cr0_intercept(svm);
|
||||
}
|
||||
|
||||
#define PRE_EX(exit) { .exit_code = (exit), \
|
||||
.stage = X86_ICPT_PRE_EXCEPT, }
|
||||
#define POST_EX(exit) { .exit_code = (exit), \
|
||||
|
@ -5345,9 +5306,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
|||
|
||||
.get_pkru = svm_get_pkru,
|
||||
|
||||
.fpu_activate = svm_fpu_activate,
|
||||
.fpu_deactivate = svm_fpu_deactivate,
|
||||
|
||||
.tlb_flush = svm_flush_tlb,
|
||||
|
||||
.run = svm_vcpu_run,
|
||||
|
@ -5371,7 +5329,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
|||
.get_enable_apicv = svm_get_enable_apicv,
|
||||
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
|
||||
.load_eoi_exitmap = svm_load_eoi_exitmap,
|
||||
.sync_pir_to_irr = svm_sync_pir_to_irr,
|
||||
.hwapic_irr_update = svm_hwapic_irr_update,
|
||||
.hwapic_isr_update = svm_hwapic_isr_update,
|
||||
.apicv_post_state_restore = avic_post_state_restore,
|
||||
|
|
1025
arch/x86/kvm/vmx.c
1025
arch/x86/kvm/vmx.c
File diff suppressed because it is too large
Load Diff
|
@ -180,6 +180,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
|
||||
{ "irq_injections", VCPU_STAT(irq_injections) },
|
||||
{ "nmi_injections", VCPU_STAT(nmi_injections) },
|
||||
{ "req_event", VCPU_STAT(req_event) },
|
||||
{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
|
||||
{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
|
||||
{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
|
||||
|
@ -190,6 +191,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|||
{ "mmu_unsync", VM_STAT(mmu_unsync) },
|
||||
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
|
||||
{ "largepages", VM_STAT(lpages) },
|
||||
{ "max_mmu_page_hash_collisions",
|
||||
VM_STAT(max_mmu_page_hash_collisions) },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
|
@ -1139,6 +1142,7 @@ struct pvclock_gtod_data {
|
|||
|
||||
u64 boot_ns;
|
||||
u64 nsec_base;
|
||||
u64 wall_time_sec;
|
||||
};
|
||||
|
||||
static struct pvclock_gtod_data pvclock_gtod_data;
|
||||
|
@ -1162,6 +1166,8 @@ static void update_pvclock_gtod(struct timekeeper *tk)
|
|||
vdata->boot_ns = boot_ns;
|
||||
vdata->nsec_base = tk->tkr_mono.xtime_nsec;
|
||||
|
||||
vdata->wall_time_sec = tk->xtime_sec;
|
||||
|
||||
write_seqcount_end(&vdata->seq);
|
||||
}
|
||||
#endif
|
||||
|
@ -1623,6 +1629,28 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
|
|||
return mode;
|
||||
}
|
||||
|
||||
static int do_realtime(struct timespec *ts, u64 *cycle_now)
|
||||
{
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
unsigned long seq;
|
||||
int mode;
|
||||
u64 ns;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(>od->seq);
|
||||
mode = gtod->clock.vclock_mode;
|
||||
ts->tv_sec = gtod->wall_time_sec;
|
||||
ns = gtod->nsec_base;
|
||||
ns += vgettsc(cycle_now);
|
||||
ns >>= gtod->clock.shift;
|
||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
/* returns true if host is using tsc clocksource */
|
||||
static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
|
||||
{
|
||||
|
@ -1632,6 +1660,17 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
|
|||
|
||||
return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
|
||||
}
|
||||
|
||||
/* returns true if host is using tsc clocksource */
|
||||
static bool kvm_get_walltime_and_clockread(struct timespec *ts,
|
||||
u64 *cycle_now)
|
||||
{
|
||||
/* checked again under seqlock below */
|
||||
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
|
||||
return false;
|
||||
|
||||
return do_realtime(ts, cycle_now) == VCLOCK_TSC;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -1772,7 +1811,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
|
|||
struct kvm_vcpu_arch *vcpu = &v->arch;
|
||||
struct pvclock_vcpu_time_info guest_hv_clock;
|
||||
|
||||
if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
|
||||
if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time,
|
||||
&guest_hv_clock, sizeof(guest_hv_clock))))
|
||||
return;
|
||||
|
||||
|
@ -1793,9 +1832,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
|
|||
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
|
||||
|
||||
vcpu->hv_clock.version = guest_hv_clock.version + 1;
|
||||
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
||||
&vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock.version));
|
||||
kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
|
||||
&vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock.version));
|
||||
|
||||
smp_wmb();
|
||||
|
||||
|
@ -1809,16 +1848,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
|
|||
|
||||
trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
|
||||
|
||||
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
||||
&vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock));
|
||||
kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
|
||||
&vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock));
|
||||
|
||||
smp_wmb();
|
||||
|
||||
vcpu->hv_clock.version++;
|
||||
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
||||
&vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock.version));
|
||||
kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
|
||||
&vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock.version));
|
||||
}
|
||||
|
||||
static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
|
@ -2051,7 +2090,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
|
||||
if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa,
|
||||
sizeof(u32)))
|
||||
return 1;
|
||||
|
||||
|
@ -2070,7 +2109,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
|||
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
|
||||
return;
|
||||
|
||||
if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
|
||||
if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime,
|
||||
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
|
||||
return;
|
||||
|
||||
|
@ -2081,7 +2120,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
|||
|
||||
vcpu->arch.st.steal.version += 1;
|
||||
|
||||
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
|
||||
kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
|
||||
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
|
||||
|
||||
smp_wmb();
|
||||
|
@ -2090,14 +2129,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.st.last_steal;
|
||||
vcpu->arch.st.last_steal = current->sched_info.run_delay;
|
||||
|
||||
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
|
||||
kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
|
||||
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
|
||||
|
||||
smp_wmb();
|
||||
|
||||
vcpu->arch.st.steal.version += 1;
|
||||
|
||||
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
|
||||
kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
|
||||
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
|
||||
}
|
||||
|
||||
|
@ -2202,7 +2241,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
if (!(data & 1))
|
||||
break;
|
||||
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
|
||||
if (kvm_vcpu_gfn_to_hva_cache_init(vcpu,
|
||||
&vcpu->arch.pv_time, data & ~1ULL,
|
||||
sizeof(struct pvclock_vcpu_time_info)))
|
||||
vcpu->arch.pv_time_enabled = false;
|
||||
|
@ -2223,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
if (data & KVM_STEAL_RESERVED_MASK)
|
||||
return 1;
|
||||
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
|
||||
if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime,
|
||||
data & KVM_STEAL_VALID_BITS,
|
||||
sizeof(struct kvm_steal_time)))
|
||||
return 1;
|
||||
|
@ -2633,6 +2672,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_DISABLE_QUIRKS:
|
||||
case KVM_CAP_SET_BOOT_CPU_ID:
|
||||
case KVM_CAP_SPLIT_IRQCHIP:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
|
||||
case KVM_CAP_ASSIGN_DEV_IRQ:
|
||||
case KVM_CAP_PCI_2_3:
|
||||
|
@ -2836,7 +2876,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
|
|||
|
||||
vcpu->arch.st.steal.preempted = 1;
|
||||
|
||||
kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
|
||||
kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime,
|
||||
&vcpu->arch.st.steal.preempted,
|
||||
offsetof(struct kvm_steal_time, preempted),
|
||||
sizeof(vcpu->arch.st.steal.preempted));
|
||||
|
@ -2870,7 +2910,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
|
||||
struct kvm_lapic_state *s)
|
||||
{
|
||||
if (vcpu->arch.apicv_active)
|
||||
if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
|
||||
kvm_x86_ops->sync_pir_to_irr(vcpu);
|
||||
|
||||
return kvm_apic_get_state(vcpu, s);
|
||||
|
@ -3897,7 +3937,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
|||
goto split_irqchip_unlock;
|
||||
/* Pairs with irqchip_in_kernel. */
|
||||
smp_wmb();
|
||||
kvm->arch.irqchip_split = true;
|
||||
kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
|
||||
kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
|
||||
r = 0;
|
||||
split_irqchip_unlock:
|
||||
|
@ -3960,40 +4000,41 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
|
||||
break;
|
||||
case KVM_CREATE_IRQCHIP: {
|
||||
struct kvm_pic *vpic;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
r = -EEXIST;
|
||||
if (kvm->arch.vpic)
|
||||
if (irqchip_in_kernel(kvm))
|
||||
goto create_irqchip_unlock;
|
||||
|
||||
r = -EINVAL;
|
||||
if (kvm->created_vcpus)
|
||||
goto create_irqchip_unlock;
|
||||
r = -ENOMEM;
|
||||
vpic = kvm_create_pic(kvm);
|
||||
if (vpic) {
|
||||
r = kvm_ioapic_init(kvm);
|
||||
if (r) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
kvm_destroy_pic(vpic);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
goto create_irqchip_unlock;
|
||||
}
|
||||
} else
|
||||
|
||||
r = kvm_pic_init(kvm);
|
||||
if (r)
|
||||
goto create_irqchip_unlock;
|
||||
|
||||
r = kvm_ioapic_init(kvm);
|
||||
if (r) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
kvm_pic_destroy(kvm);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
goto create_irqchip_unlock;
|
||||
}
|
||||
|
||||
r = kvm_setup_default_irq_routing(kvm);
|
||||
if (r) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kvm_ioapic_destroy(kvm);
|
||||
kvm_destroy_pic(vpic);
|
||||
kvm_pic_destroy(kvm);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
goto create_irqchip_unlock;
|
||||
}
|
||||
/* Write kvm->irq_routing before kvm->arch.vpic. */
|
||||
/* Write kvm->irq_routing before enabling irqchip_in_kernel. */
|
||||
smp_wmb();
|
||||
kvm->arch.vpic = vpic;
|
||||
kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
|
||||
create_irqchip_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
|
@ -4029,7 +4070,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
}
|
||||
|
||||
r = -ENXIO;
|
||||
if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
|
||||
if (!irqchip_kernel(kvm))
|
||||
goto get_irqchip_out;
|
||||
r = kvm_vm_ioctl_get_irqchip(kvm, chip);
|
||||
if (r)
|
||||
|
@ -4053,7 +4094,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
}
|
||||
|
||||
r = -ENXIO;
|
||||
if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
|
||||
if (!irqchip_kernel(kvm))
|
||||
goto set_irqchip_out;
|
||||
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
|
||||
if (r)
|
||||
|
@ -4462,6 +4503,21 @@ out:
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
|
||||
|
||||
static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
|
||||
gpa_t gpa, bool write)
|
||||
{
|
||||
/* For APIC access vmexit */
|
||||
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
|
||||
return 1;
|
||||
|
||||
if (vcpu_match_mmio_gpa(vcpu, gpa)) {
|
||||
trace_vcpu_match_mmio(gva, gpa, write, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
|
||||
gpa_t *gpa, struct x86_exception *exception,
|
||||
bool write)
|
||||
|
@ -4488,16 +4544,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
|
|||
if (*gpa == UNMAPPED_GVA)
|
||||
return -1;
|
||||
|
||||
/* For APIC access vmexit */
|
||||
if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
|
||||
return 1;
|
||||
|
||||
if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
|
||||
trace_vcpu_match_mmio(gva, *gpa, write, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
|
||||
}
|
||||
|
||||
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
|
@ -4594,6 +4641,22 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
|
|||
int handled, ret;
|
||||
bool write = ops->write;
|
||||
struct kvm_mmio_fragment *frag;
|
||||
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
||||
|
||||
/*
|
||||
* If the exit was due to a NPF we may already have a GPA.
|
||||
* If the GPA is present, use it to avoid the GVA to GPA table walk.
|
||||
* Note, this cannot be used on string operations since string
|
||||
* operation using rep will only have the initial GPA from the NPF
|
||||
* occurred.
|
||||
*/
|
||||
if (vcpu->arch.gpa_available &&
|
||||
emulator_can_use_gpa(ctxt) &&
|
||||
vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
|
||||
(addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
|
||||
gpa = exception->address;
|
||||
goto mmio;
|
||||
}
|
||||
|
||||
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
|
||||
|
||||
|
@ -5610,6 +5673,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
|
||||
restart:
|
||||
/* Save the faulting GPA (cr2) in the address field */
|
||||
ctxt->exception.address = cr2;
|
||||
|
||||
r = x86_emulate_insn(ctxt);
|
||||
|
||||
if (r == EMULATION_INTERCEPTED)
|
||||
|
@ -5924,9 +5990,6 @@ static void kvm_set_mmio_spte_mask(void)
|
|||
/* Mask the reserved physical address bits. */
|
||||
mask = rsvd_bits(maxphyaddr, 51);
|
||||
|
||||
/* Bit 62 is always reserved for 32bit host. */
|
||||
mask |= 0x3ull << 62;
|
||||
|
||||
/* Set the present bit. */
|
||||
mask |= 1ull;
|
||||
|
||||
|
@ -6025,7 +6088,7 @@ int kvm_arch_init(void *opaque)
|
|||
|
||||
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
|
||||
PT_DIRTY_MASK, PT64_NX_MASK, 0,
|
||||
PT_PRESENT_MASK);
|
||||
PT_PRESENT_MASK, 0);
|
||||
kvm_timer_init();
|
||||
|
||||
perf_register_guest_info_callbacks(&kvm_guest_cbs);
|
||||
|
@ -6087,6 +6150,35 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
|
||||
unsigned long clock_type)
|
||||
{
|
||||
struct kvm_clock_pairing clock_pairing;
|
||||
struct timespec ts;
|
||||
u64 cycle;
|
||||
int ret;
|
||||
|
||||
if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
|
||||
return -KVM_EOPNOTSUPP;
|
||||
|
||||
if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
|
||||
return -KVM_EOPNOTSUPP;
|
||||
|
||||
clock_pairing.sec = ts.tv_sec;
|
||||
clock_pairing.nsec = ts.tv_nsec;
|
||||
clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
|
||||
clock_pairing.flags = 0;
|
||||
|
||||
ret = 0;
|
||||
if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
|
||||
sizeof(struct kvm_clock_pairing)))
|
||||
ret = -KVM_EFAULT;
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* kvm_pv_kick_cpu_op: Kick a vcpu.
|
||||
*
|
||||
|
@ -6151,6 +6243,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
|
|||
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
|
||||
ret = 0;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case KVM_HC_CLOCK_PAIRING:
|
||||
ret = kvm_pv_clock_pairing(vcpu, a0, a1);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ret = -KVM_ENOSYS;
|
||||
break;
|
||||
|
@ -6564,7 +6661,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
|
|||
if (irqchip_split(vcpu->kvm))
|
||||
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
|
||||
else {
|
||||
if (vcpu->arch.apicv_active)
|
||||
if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
|
||||
kvm_x86_ops->sync_pir_to_irr(vcpu);
|
||||
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
|
||||
}
|
||||
|
@ -6655,10 +6752,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
|
||||
vcpu->fpu_active = 0;
|
||||
kvm_x86_ops->fpu_deactivate(vcpu);
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
|
||||
/* Page is swapped out. Do synthetic halt */
|
||||
vcpu->arch.apf.halted = true;
|
||||
|
@ -6718,21 +6811,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
kvm_hv_process_stimers(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM_REQ_EVENT is not set when posted interrupts are set by
|
||||
* VT-d hardware, so we have to update RVI unconditionally.
|
||||
*/
|
||||
if (kvm_lapic_enabled(vcpu)) {
|
||||
/*
|
||||
* Update architecture specific hints for APIC
|
||||
* virtual interrupt delivery.
|
||||
*/
|
||||
if (vcpu->arch.apicv_active)
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
kvm_lapic_find_highest_irr(vcpu));
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
|
||||
++vcpu->stat.req_event;
|
||||
kvm_apic_accept_events(vcpu);
|
||||
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
|
||||
r = 1;
|
||||
|
@ -6773,22 +6853,40 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
preempt_disable();
|
||||
|
||||
kvm_x86_ops->prepare_guest_switch(vcpu);
|
||||
if (vcpu->fpu_active)
|
||||
kvm_load_guest_fpu(vcpu);
|
||||
kvm_load_guest_fpu(vcpu);
|
||||
|
||||
/*
|
||||
* Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
|
||||
* IPI are then delayed after guest entry, which ensures that they
|
||||
* result in virtual interrupt delivery.
|
||||
*/
|
||||
local_irq_disable();
|
||||
vcpu->mode = IN_GUEST_MODE;
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
|
||||
/*
|
||||
* We should set ->mode before check ->requests,
|
||||
* Please see the comment in kvm_make_all_cpus_request.
|
||||
* This also orders the write to mode from any reads
|
||||
* to the page tables done while the VCPU is running.
|
||||
* Please see the comment in kvm_flush_remote_tlbs.
|
||||
* 1) We should set ->mode before checking ->requests. Please see
|
||||
* the comment in kvm_make_all_cpus_request.
|
||||
*
|
||||
* 2) For APICv, we should set ->mode before checking PIR.ON. This
|
||||
* pairs with the memory barrier implicit in pi_test_and_set_on
|
||||
* (see vmx_deliver_posted_interrupt).
|
||||
*
|
||||
* 3) This also orders the write to mode from any reads to the page
|
||||
* tables done while the VCPU is running. Please see the comment
|
||||
* in kvm_flush_remote_tlbs.
|
||||
*/
|
||||
smp_mb__after_srcu_read_unlock();
|
||||
|
||||
local_irq_disable();
|
||||
/*
|
||||
* This handles the case where a posted interrupt was
|
||||
* notified with kvm_vcpu_kick.
|
||||
*/
|
||||
if (kvm_lapic_enabled(vcpu)) {
|
||||
if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
|
||||
kvm_x86_ops->sync_pir_to_irr(vcpu);
|
||||
}
|
||||
|
||||
if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
|
||||
|| need_resched() || signal_pending(current)) {
|
||||
|
@ -6927,6 +7025,9 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
|
|||
|
||||
static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
|
||||
kvm_x86_ops->check_nested_events(vcpu, false);
|
||||
|
||||
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
||||
!vcpu->arch.apf.halted);
|
||||
}
|
||||
|
@ -7098,7 +7199,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
} else
|
||||
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
|
||||
|
||||
r = vcpu_run(vcpu);
|
||||
if (kvm_run->immediate_exit)
|
||||
r = -EINTR;
|
||||
else
|
||||
r = vcpu_run(vcpu);
|
||||
|
||||
out:
|
||||
post_kvm_run_save(vcpu);
|
||||
|
@ -8293,9 +8397,6 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
|
|||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
|
||||
kvm_x86_ops->check_nested_events(vcpu, false);
|
||||
|
||||
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
|
||||
}
|
||||
|
||||
|
@ -8432,9 +8533,8 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
|||
|
||||
static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
|
||||
{
|
||||
|
||||
return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
|
||||
sizeof(val));
|
||||
return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val,
|
||||
sizeof(val));
|
||||
}
|
||||
|
||||
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
||||
|
|
|
@ -90,4 +90,16 @@ config PTP_1588_CLOCK_PCH
|
|||
To compile this driver as a module, choose M here: the module
|
||||
will be called ptp_pch.
|
||||
|
||||
config PTP_1588_CLOCK_KVM
|
||||
tristate "KVM virtual PTP clock"
|
||||
depends on PTP_1588_CLOCK
|
||||
depends on KVM_GUEST && X86
|
||||
default y
|
||||
help
|
||||
This driver adds support for using kvm infrastructure as a PTP
|
||||
clock. This clock is only useful if you are using KVM guests.
|
||||
|
||||
To compile this driver as a module, choose M here: the module
|
||||
will be called ptp_kvm.
|
||||
|
||||
endmenu
|
||||
|
|
|
@ -6,3 +6,4 @@ ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o
|
|||
obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o
|
||||
obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o
|
||||
obj-$(CONFIG_PTP_1588_CLOCK_PCH) += ptp_pch.o
|
||||
obj-$(CONFIG_PTP_1588_CLOCK_KVM) += ptp_kvm.o
|
||||
|
|
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Virtual PTP 1588 clock for use with KVM guests
|
||||
*
|
||||
* Copyright (C) 2017 Red Hat Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
#include <linux/device.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <uapi/linux/kvm_para.h>
|
||||
#include <asm/kvm_para.h>
|
||||
#include <asm/pvclock.h>
|
||||
#include <asm/kvmclock.h>
|
||||
#include <uapi/asm/kvm_para.h>
|
||||
|
||||
#include <linux/ptp_clock_kernel.h>
|
||||
|
||||
struct kvm_ptp_clock {
|
||||
struct ptp_clock *ptp_clock;
|
||||
struct ptp_clock_info caps;
|
||||
};
|
||||
|
||||
DEFINE_SPINLOCK(kvm_ptp_lock);
|
||||
|
||||
static struct pvclock_vsyscall_time_info *hv_clock;
|
||||
|
||||
static struct kvm_clock_pairing clock_pair;
|
||||
static phys_addr_t clock_pair_gpa;
|
||||
|
||||
static int ptp_kvm_get_time_fn(ktime_t *device_time,
|
||||
struct system_counterval_t *system_counter,
|
||||
void *ctx)
|
||||
{
|
||||
unsigned long ret;
|
||||
struct timespec64 tspec;
|
||||
unsigned version;
|
||||
int cpu;
|
||||
struct pvclock_vcpu_time_info *src;
|
||||
|
||||
spin_lock(&kvm_ptp_lock);
|
||||
|
||||
preempt_disable_notrace();
|
||||
cpu = smp_processor_id();
|
||||
src = &hv_clock[cpu].pvti;
|
||||
|
||||
do {
|
||||
/*
|
||||
* We are using a TSC value read in the hosts
|
||||
* kvm_hc_clock_pairing handling.
|
||||
* So any changes to tsc_to_system_mul
|
||||
* and tsc_shift or any other pvclock
|
||||
* data invalidate that measurement.
|
||||
*/
|
||||
version = pvclock_read_begin(src);
|
||||
|
||||
ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
|
||||
clock_pair_gpa,
|
||||
KVM_CLOCK_PAIRING_WALLCLOCK);
|
||||
if (ret != 0) {
|
||||
pr_err_ratelimited("clock pairing hypercall ret %lu\n", ret);
|
||||
spin_unlock(&kvm_ptp_lock);
|
||||
preempt_enable_notrace();
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
tspec.tv_sec = clock_pair.sec;
|
||||
tspec.tv_nsec = clock_pair.nsec;
|
||||
ret = __pvclock_read_cycles(src, clock_pair.tsc);
|
||||
} while (pvclock_read_retry(src, version));
|
||||
|
||||
preempt_enable_notrace();
|
||||
|
||||
system_counter->cycles = ret;
|
||||
system_counter->cs = &kvm_clock;
|
||||
|
||||
*device_time = timespec64_to_ktime(tspec);
|
||||
|
||||
spin_unlock(&kvm_ptp_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptp_kvm_getcrosststamp(struct ptp_clock_info *ptp,
|
||||
struct system_device_crosststamp *xtstamp)
|
||||
{
|
||||
return get_device_system_crosststamp(ptp_kvm_get_time_fn, NULL,
|
||||
NULL, xtstamp);
|
||||
}
|
||||
|
||||
/*
|
||||
* PTP clock operations
|
||||
*/
|
||||
|
||||
static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int ptp_kvm_settime(struct ptp_clock_info *ptp,
|
||||
const struct timespec64 *ts)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
|
||||
{
|
||||
unsigned long ret;
|
||||
struct timespec64 tspec;
|
||||
|
||||
spin_lock(&kvm_ptp_lock);
|
||||
|
||||
ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
|
||||
clock_pair_gpa,
|
||||
KVM_CLOCK_PAIRING_WALLCLOCK);
|
||||
if (ret != 0) {
|
||||
pr_err_ratelimited("clock offset hypercall ret %lu\n", ret);
|
||||
spin_unlock(&kvm_ptp_lock);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
tspec.tv_sec = clock_pair.sec;
|
||||
tspec.tv_nsec = clock_pair.nsec;
|
||||
spin_unlock(&kvm_ptp_lock);
|
||||
|
||||
memcpy(ts, &tspec, sizeof(struct timespec64));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptp_kvm_enable(struct ptp_clock_info *ptp,
|
||||
struct ptp_clock_request *rq, int on)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static struct ptp_clock_info ptp_kvm_caps = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "KVM virtual PTP",
|
||||
.max_adj = 0,
|
||||
.n_ext_ts = 0,
|
||||
.n_pins = 0,
|
||||
.pps = 0,
|
||||
.adjfreq = ptp_kvm_adjfreq,
|
||||
.adjtime = ptp_kvm_adjtime,
|
||||
.gettime64 = ptp_kvm_gettime,
|
||||
.settime64 = ptp_kvm_settime,
|
||||
.enable = ptp_kvm_enable,
|
||||
.getcrosststamp = ptp_kvm_getcrosststamp,
|
||||
};
|
||||
|
||||
/* module operations */
|
||||
|
||||
static struct kvm_ptp_clock kvm_ptp_clock;
|
||||
|
||||
static void __exit ptp_kvm_exit(void)
|
||||
{
|
||||
ptp_clock_unregister(kvm_ptp_clock.ptp_clock);
|
||||
}
|
||||
|
||||
static int __init ptp_kvm_init(void)
|
||||
{
|
||||
long ret;
|
||||
|
||||
clock_pair_gpa = slow_virt_to_phys(&clock_pair);
|
||||
hv_clock = pvclock_pvti_cpu0_va();
|
||||
|
||||
if (!hv_clock)
|
||||
return -ENODEV;
|
||||
|
||||
ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,
|
||||
KVM_CLOCK_PAIRING_WALLCLOCK);
|
||||
if (ret == -KVM_ENOSYS || ret == -KVM_EOPNOTSUPP)
|
||||
return -ENODEV;
|
||||
|
||||
kvm_ptp_clock.caps = ptp_kvm_caps;
|
||||
|
||||
kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);
|
||||
|
||||
if (IS_ERR(kvm_ptp_clock.ptp_clock))
|
||||
return PTR_ERR(kvm_ptp_clock.ptp_clock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(ptp_kvm_init);
|
||||
module_exit(ptp_kvm_exit);
|
||||
|
||||
MODULE_AUTHOR("Marcelo Tosatti <mtosatti@redhat.com>");
|
||||
MODULE_DESCRIPTION("PTP clock using KVMCLOCK");
|
||||
MODULE_LICENSE("GPL");
|
|
@ -23,20 +23,24 @@
|
|||
#include <linux/hrtimer.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
struct arch_timer_kvm {
|
||||
struct arch_timer_context {
|
||||
/* Registers: control register, timer value */
|
||||
u32 cnt_ctl;
|
||||
u64 cnt_cval;
|
||||
|
||||
/* Timer IRQ */
|
||||
struct kvm_irq_level irq;
|
||||
|
||||
/* Active IRQ state caching */
|
||||
bool active_cleared_last;
|
||||
|
||||
/* Virtual offset */
|
||||
u64 cntvoff;
|
||||
};
|
||||
|
||||
struct arch_timer_cpu {
|
||||
/* Registers: control register, timer value */
|
||||
u32 cntv_ctl; /* Saved/restored */
|
||||
u64 cntv_cval; /* Saved/restored */
|
||||
|
||||
/*
|
||||
* Anything that is not used directly from assembly code goes
|
||||
* here.
|
||||
*/
|
||||
struct arch_timer_context vtimer;
|
||||
struct arch_timer_context ptimer;
|
||||
|
||||
/* Background timer used when the guest is not running */
|
||||
struct hrtimer timer;
|
||||
|
@ -47,21 +51,15 @@ struct arch_timer_cpu {
|
|||
/* Background timer active */
|
||||
bool armed;
|
||||
|
||||
/* Timer IRQ */
|
||||
struct kvm_irq_level irq;
|
||||
|
||||
/* Active IRQ state caching */
|
||||
bool active_cleared_last;
|
||||
|
||||
/* Is the timer enabled */
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
int kvm_timer_hyp_init(void);
|
||||
int kvm_timer_enable(struct kvm_vcpu *vcpu);
|
||||
void kvm_timer_init(struct kvm *kvm);
|
||||
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_irq_level *irq);
|
||||
const struct kvm_irq_level *virt_irq,
|
||||
const struct kvm_irq_level *phys_irq);
|
||||
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
|
||||
|
@ -70,11 +68,16 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
|
|||
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
|
||||
int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
|
||||
|
||||
bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
|
||||
bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
|
||||
void kvm_timer_schedule(struct kvm_vcpu *vcpu);
|
||||
void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
|
||||
|
||||
u64 kvm_phys_timer_read(void);
|
||||
|
||||
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_timer_init_vhe(void);
|
||||
|
||||
#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer)
|
||||
#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer)
|
||||
#endif
|
||||
|
|
|
@ -71,6 +71,8 @@ struct vgic_global {
|
|||
|
||||
/* GIC system register CPU interface */
|
||||
struct static_key_false gicv3_cpuif;
|
||||
|
||||
u32 ich_vtr_el2;
|
||||
};
|
||||
|
||||
extern struct vgic_global kvm_vgic_global_state;
|
||||
|
@ -101,9 +103,10 @@ struct vgic_irq {
|
|||
*/
|
||||
|
||||
u32 intid; /* Guest visible INTID */
|
||||
bool pending;
|
||||
bool line_level; /* Level only */
|
||||
bool soft_pending; /* Level only */
|
||||
bool pending_latch; /* The pending latch state used to calculate
|
||||
* the pending state for both level
|
||||
* and edge triggered IRQs. */
|
||||
bool active; /* not used for LPIs */
|
||||
bool enabled;
|
||||
bool hw; /* Tied to HW IRQ */
|
||||
|
@ -165,6 +168,8 @@ struct vgic_its {
|
|||
struct list_head collection_list;
|
||||
};
|
||||
|
||||
struct vgic_state_iter;
|
||||
|
||||
struct vgic_dist {
|
||||
bool in_kernel;
|
||||
bool ready;
|
||||
|
@ -212,6 +217,9 @@ struct vgic_dist {
|
|||
spinlock_t lpi_list_lock;
|
||||
struct list_head lpi_list_head;
|
||||
int lpi_list_count;
|
||||
|
||||
/* used by vgic-debug */
|
||||
struct vgic_state_iter *iter;
|
||||
};
|
||||
|
||||
struct vgic_v2_cpu_if {
|
||||
|
@ -269,6 +277,12 @@ struct vgic_cpu {
|
|||
u64 pendbaser;
|
||||
|
||||
bool lpis_enabled;
|
||||
|
||||
/* Cache guest priority bits */
|
||||
u32 num_pri_bits;
|
||||
|
||||
/* Cache guest interrupt ID bits */
|
||||
u32 num_id_bits;
|
||||
};
|
||||
|
||||
extern struct static_key_false vgic_v2_cpuif_trap;
|
||||
|
|
|
@ -349,8 +349,30 @@
|
|||
/*
|
||||
* CPU interface registers
|
||||
*/
|
||||
#define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1)
|
||||
#define ICC_CTLR_EL1_EOImode_drop (1U << 1)
|
||||
#define ICC_CTLR_EL1_EOImode_SHIFT (1)
|
||||
#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT)
|
||||
#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT)
|
||||
#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT)
|
||||
#define ICC_CTLR_EL1_CBPR_SHIFT 0
|
||||
#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT)
|
||||
#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8
|
||||
#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
|
||||
#define ICC_CTLR_EL1_ID_BITS_SHIFT 11
|
||||
#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
|
||||
#define ICC_CTLR_EL1_SEIS_SHIFT 14
|
||||
#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
|
||||
#define ICC_CTLR_EL1_A3V_SHIFT 15
|
||||
#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
|
||||
#define ICC_PMR_EL1_SHIFT 0
|
||||
#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT)
|
||||
#define ICC_BPR0_EL1_SHIFT 0
|
||||
#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT)
|
||||
#define ICC_BPR1_EL1_SHIFT 0
|
||||
#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT)
|
||||
#define ICC_IGRPEN0_EL1_SHIFT 0
|
||||
#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
|
||||
#define ICC_IGRPEN1_EL1_SHIFT 0
|
||||
#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
|
||||
#define ICC_SRE_EL1_SRE (1U << 0)
|
||||
|
||||
/*
|
||||
|
@ -379,14 +401,29 @@
|
|||
#define ICH_HCR_EN (1 << 0)
|
||||
#define ICH_HCR_UIE (1 << 1)
|
||||
|
||||
#define ICH_VMCR_CTLR_SHIFT 0
|
||||
#define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT)
|
||||
#define ICH_VMCR_CBPR_SHIFT 4
|
||||
#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT)
|
||||
#define ICH_VMCR_EOIM_SHIFT 9
|
||||
#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT)
|
||||
#define ICH_VMCR_BPR1_SHIFT 18
|
||||
#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT)
|
||||
#define ICH_VMCR_BPR0_SHIFT 21
|
||||
#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT)
|
||||
#define ICH_VMCR_PMR_SHIFT 24
|
||||
#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)
|
||||
#define ICH_VMCR_ENG0_SHIFT 0
|
||||
#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)
|
||||
#define ICH_VMCR_ENG1_SHIFT 1
|
||||
#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
|
||||
|
||||
#define ICH_VTR_PRI_BITS_SHIFT 29
|
||||
#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT)
|
||||
#define ICH_VTR_ID_BITS_SHIFT 23
|
||||
#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT)
|
||||
#define ICH_VTR_SEIS_SHIFT 22
|
||||
#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
|
||||
#define ICH_VTR_A3V_SHIFT 21
|
||||
#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
|
||||
|
||||
#define ICC_IAR1_EL1_SPURIOUS 0x3ff
|
||||
|
||||
|
|
|
@ -45,7 +45,6 @@
|
|||
* include/linux/kvm_h.
|
||||
*/
|
||||
#define KVM_MEMSLOT_INVALID (1UL << 16)
|
||||
#define KVM_MEMSLOT_INCOHERENT (1UL << 17)
|
||||
|
||||
/* Two fragments for cross MMIO pages. */
|
||||
#define KVM_MAX_MMIO_FRAGMENTS 2
|
||||
|
@ -222,7 +221,6 @@ struct kvm_vcpu {
|
|||
struct mutex mutex;
|
||||
struct kvm_run *run;
|
||||
|
||||
int fpu_active;
|
||||
int guest_fpu_loaded, guest_xcr0_loaded;
|
||||
struct swait_queue_head wq;
|
||||
struct pid *pid;
|
||||
|
@ -642,18 +640,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
|||
int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
|
||||
unsigned long len);
|
||||
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
|
||||
int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
void *data, unsigned long len);
|
||||
int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
|
||||
void *data, unsigned long len);
|
||||
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
|
||||
int offset, int len);
|
||||
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
|
||||
unsigned long len);
|
||||
int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
void *data, unsigned long len);
|
||||
int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
void *data, int offset, unsigned long len);
|
||||
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
gpa_t gpa, unsigned long len);
|
||||
int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
|
||||
void *data, unsigned long len);
|
||||
int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
|
||||
void *data, int offset, unsigned long len);
|
||||
int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
|
||||
gpa_t gpa, unsigned long len);
|
||||
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
|
||||
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
|
||||
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
|
||||
|
|
|
@ -218,7 +218,8 @@ struct kvm_hyperv_exit {
|
|||
struct kvm_run {
|
||||
/* in */
|
||||
__u8 request_interrupt_window;
|
||||
__u8 padding1[7];
|
||||
__u8 immediate_exit;
|
||||
__u8 padding1[6];
|
||||
|
||||
/* out */
|
||||
__u32 exit_reason;
|
||||
|
@ -685,6 +686,13 @@ struct kvm_ppc_smmu_info {
|
|||
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
|
||||
};
|
||||
|
||||
/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
|
||||
struct kvm_ppc_resize_hpt {
|
||||
__u64 flags;
|
||||
__u32 shift;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
#define KVMIO 0xAE
|
||||
|
||||
/* machine type bits, to be used as argument to KVM_CREATE_VM */
|
||||
|
@ -871,8 +879,10 @@ struct kvm_ppc_smmu_info {
|
|||
#define KVM_CAP_S390_USER_INSTR0 130
|
||||
#define KVM_CAP_MSI_DEVID 131
|
||||
#define KVM_CAP_PPC_HTM 132
|
||||
#define KVM_CAP_SPAPR_RESIZE_HPT 133
|
||||
#define KVM_CAP_PPC_MMU_RADIX 134
|
||||
#define KVM_CAP_PPC_MMU_HASH_V3 135
|
||||
#define KVM_CAP_IMMEDIATE_EXIT 136
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -1189,6 +1199,9 @@ struct kvm_s390_ucas_mapping {
|
|||
#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
|
||||
/* Available with KVM_CAP_PPC_RTAS */
|
||||
#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
|
||||
/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
|
||||
#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
|
||||
#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
|
||||
/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
|
||||
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
|
||||
/* Available with KVM_CAP_PPC_RADIX_MMU */
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#define KVM_EFAULT EFAULT
|
||||
#define KVM_E2BIG E2BIG
|
||||
#define KVM_EPERM EPERM
|
||||
#define KVM_EOPNOTSUPP 95
|
||||
|
||||
#define KVM_HC_VAPIC_POLL_IRQ 1
|
||||
#define KVM_HC_MMU_OP 2
|
||||
|
@ -23,6 +24,7 @@
|
|||
#define KVM_HC_MIPS_GET_CLOCK_FREQ 6
|
||||
#define KVM_HC_MIPS_EXIT_VM 7
|
||||
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
|
||||
#define KVM_HC_CLOCK_PAIRING 9
|
||||
|
||||
/*
|
||||
* hypercalls use architecture specific
|
||||
|
|
|
@ -37,10 +37,10 @@ static u32 host_vtimer_irq_flags;
|
|||
|
||||
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.timer_cpu.active_cleared_last = false;
|
||||
vcpu_vtimer(vcpu)->active_cleared_last = false;
|
||||
}
|
||||
|
||||
static u64 kvm_phys_timer_read(void)
|
||||
u64 kvm_phys_timer_read(void)
|
||||
{
|
||||
return timecounter->cc->read(timecounter->cc);
|
||||
}
|
||||
|
@ -98,12 +98,12 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
|
|||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
|
||||
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
u64 cval, now;
|
||||
|
||||
cval = vcpu->arch.timer_cpu.cntv_cval;
|
||||
now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
|
||||
cval = timer_ctx->cnt_cval;
|
||||
now = kvm_phys_timer_read() - timer_ctx->cntvoff;
|
||||
|
||||
if (now < cval) {
|
||||
u64 ns;
|
||||
|
@ -118,6 +118,35 @@ static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
|
||||
(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the earliest expiration time in ns among guest timers.
|
||||
* Note that it will return 0 if none of timers can fire.
|
||||
*/
|
||||
static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
|
||||
if (kvm_timer_irq_can_fire(vtimer))
|
||||
min_virt = kvm_timer_compute_delta(vtimer);
|
||||
|
||||
if (kvm_timer_irq_can_fire(ptimer))
|
||||
min_phys = kvm_timer_compute_delta(ptimer);
|
||||
|
||||
/* If none of timers can fire, then return 0 */
|
||||
if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX))
|
||||
return 0;
|
||||
|
||||
return min(min_virt, min_phys);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
|
||||
{
|
||||
struct arch_timer_cpu *timer;
|
||||
|
@ -132,7 +161,7 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
|
|||
* PoV (NTP on the host may have forced it to expire
|
||||
* early). If we should have slept longer, restart it.
|
||||
*/
|
||||
ns = kvm_timer_compute_delta(vcpu);
|
||||
ns = kvm_timer_earliest_exp(vcpu);
|
||||
if (unlikely(ns)) {
|
||||
hrtimer_forward_now(hrt, ns_to_ktime(ns));
|
||||
return HRTIMER_RESTART;
|
||||
|
@ -142,42 +171,33 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
|
|||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
|
||||
bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
|
||||
(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
|
||||
}
|
||||
|
||||
bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
u64 cval, now;
|
||||
|
||||
if (!kvm_timer_irq_can_fire(vcpu))
|
||||
if (!kvm_timer_irq_can_fire(timer_ctx))
|
||||
return false;
|
||||
|
||||
cval = timer->cntv_cval;
|
||||
now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
|
||||
cval = timer_ctx->cnt_cval;
|
||||
now = kvm_phys_timer_read() - timer_ctx->cntvoff;
|
||||
|
||||
return cval <= now;
|
||||
}
|
||||
|
||||
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
|
||||
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
|
||||
struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
int ret;
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
BUG_ON(!vgic_initialized(vcpu->kvm));
|
||||
|
||||
timer->active_cleared_last = false;
|
||||
timer->irq.level = new_level;
|
||||
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq,
|
||||
timer->irq.level);
|
||||
ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
|
||||
timer->irq.irq,
|
||||
timer->irq.level);
|
||||
timer_ctx->active_cleared_last = false;
|
||||
timer_ctx->irq.level = new_level;
|
||||
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
|
||||
timer_ctx->irq.level);
|
||||
|
||||
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq,
|
||||
timer_ctx->irq.level);
|
||||
WARN_ON(ret);
|
||||
}
|
||||
|
||||
|
@ -188,22 +208,43 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
|
|||
static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
|
||||
/*
|
||||
* If userspace modified the timer registers via SET_ONE_REG before
|
||||
* the vgic was initialized, we mustn't set the timer->irq.level value
|
||||
* the vgic was initialized, we mustn't set the vtimer->irq.level value
|
||||
* because the guest would never see the interrupt. Instead wait
|
||||
* until we call this function from kvm_timer_flush_hwstate.
|
||||
*/
|
||||
if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
|
||||
return -ENODEV;
|
||||
|
||||
if (kvm_timer_should_fire(vcpu) != timer->irq.level)
|
||||
kvm_timer_update_irq(vcpu, !timer->irq.level);
|
||||
if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
|
||||
kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
|
||||
|
||||
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
|
||||
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Schedule the background timer for the emulated timer. */
|
||||
static void kvm_timer_emulate(struct kvm_vcpu *vcpu,
|
||||
struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
if (kvm_timer_should_fire(timer_ctx))
|
||||
return;
|
||||
|
||||
if (!kvm_timer_irq_can_fire(timer_ctx))
|
||||
return;
|
||||
|
||||
/* The timer has not yet expired, schedule a background timer */
|
||||
timer_arm(timer, kvm_timer_compute_delta(timer_ctx));
|
||||
}
|
||||
|
||||
/*
|
||||
* Schedule the background timer before calling kvm_vcpu_block, so that this
|
||||
* thread is removed from its waitqueue and made runnable when there's a timer
|
||||
|
@ -212,26 +253,31 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
|
|||
void kvm_timer_schedule(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
|
||||
BUG_ON(timer_is_armed(timer));
|
||||
|
||||
/*
|
||||
* No need to schedule a background timer if the guest timer has
|
||||
* No need to schedule a background timer if any guest timer has
|
||||
* already expired, because kvm_vcpu_block will return before putting
|
||||
* the thread to sleep.
|
||||
*/
|
||||
if (kvm_timer_should_fire(vcpu))
|
||||
if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the timer is not capable of raising interrupts (disabled or
|
||||
* If both timers are not capable of raising interrupts (disabled or
|
||||
* masked), then there's no more work for us to do.
|
||||
*/
|
||||
if (!kvm_timer_irq_can_fire(vcpu))
|
||||
if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer))
|
||||
return;
|
||||
|
||||
/* The timer has not yet expired, schedule a background timer */
|
||||
timer_arm(timer, kvm_timer_compute_delta(vcpu));
|
||||
/*
|
||||
* The guest timers have not yet expired, schedule a background timer.
|
||||
* Set the earliest expiration time among the guest timers.
|
||||
*/
|
||||
timer_arm(timer, kvm_timer_earliest_exp(vcpu));
|
||||
}
|
||||
|
||||
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
|
||||
|
@ -249,13 +295,16 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
bool phys_active;
|
||||
int ret;
|
||||
|
||||
if (kvm_timer_update_state(vcpu))
|
||||
return;
|
||||
|
||||
/* Set the background timer for the physical timer emulation. */
|
||||
kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu));
|
||||
|
||||
/*
|
||||
* If we enter the guest with the virtual input level to the VGIC
|
||||
* asserted, then we have already told the VGIC what we need to, and
|
||||
|
@ -273,8 +322,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
|||
* to ensure that hardware interrupts from the timer triggers a guest
|
||||
* exit.
|
||||
*/
|
||||
phys_active = timer->irq.level ||
|
||||
kvm_vgic_map_is_active(vcpu, timer->irq.irq);
|
||||
phys_active = vtimer->irq.level ||
|
||||
kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
|
||||
|
||||
/*
|
||||
* We want to avoid hitting the (re)distributor as much as
|
||||
|
@ -296,7 +345,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
|||
* - cached value is "active clear"
|
||||
* - value to be programmed is "active clear"
|
||||
*/
|
||||
if (timer->active_cleared_last && !phys_active)
|
||||
if (vtimer->active_cleared_last && !phys_active)
|
||||
return;
|
||||
|
||||
ret = irq_set_irqchip_state(host_vtimer_irq,
|
||||
|
@ -304,7 +353,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
|||
phys_active);
|
||||
WARN_ON(ret);
|
||||
|
||||
timer->active_cleared_last = !phys_active;
|
||||
vtimer->active_cleared_last = !phys_active;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -318,7 +367,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
BUG_ON(timer_is_armed(timer));
|
||||
/*
|
||||
* This is to cancel the background timer for the physical timer
|
||||
* emulation if it is set.
|
||||
*/
|
||||
timer_disarm(timer);
|
||||
|
||||
/*
|
||||
* The guest could have modified the timer registers or the timer
|
||||
|
@ -328,9 +381,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_irq_level *irq)
|
||||
const struct kvm_irq_level *virt_irq,
|
||||
const struct kvm_irq_level *phys_irq)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
|
||||
/*
|
||||
* The vcpu timer irq number cannot be determined in
|
||||
|
@ -338,7 +393,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
|||
* kvm_vcpu_set_target(). To handle this, we determine
|
||||
* vcpu timer irq number when the vcpu is reset.
|
||||
*/
|
||||
timer->irq.irq = irq->irq;
|
||||
vtimer->irq.irq = virt_irq->irq;
|
||||
ptimer->irq.irq = phys_irq->irq;
|
||||
|
||||
/*
|
||||
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
|
||||
|
@ -346,16 +402,40 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
|||
* resets the timer to be disabled and unmasked and is compliant with
|
||||
* the ARMv7 architecture.
|
||||
*/
|
||||
timer->cntv_ctl = 0;
|
||||
vtimer->cnt_ctl = 0;
|
||||
ptimer->cnt_ctl = 0;
|
||||
kvm_timer_update_state(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Make the updates of cntvoff for all vtimer contexts atomic */
|
||||
static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
|
||||
{
|
||||
int i;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_vcpu *tmp;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm_for_each_vcpu(i, tmp, kvm)
|
||||
vcpu_vtimer(tmp)->cntvoff = cntvoff;
|
||||
|
||||
/*
|
||||
* When called from the vcpu create path, the CPU being created is not
|
||||
* included in the loop above, so we just set it here as well.
|
||||
*/
|
||||
vcpu_vtimer(vcpu)->cntvoff = cntvoff;
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
/* Synchronize cntvoff across all vtimers of a VM. */
|
||||
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
|
||||
vcpu_ptimer(vcpu)->cntvoff = 0;
|
||||
|
||||
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
|
||||
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
timer->timer.function = kvm_timer_expire;
|
||||
|
@ -368,17 +448,17 @@ static void kvm_timer_init_interrupt(void *info)
|
|||
|
||||
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
|
||||
switch (regid) {
|
||||
case KVM_REG_ARM_TIMER_CTL:
|
||||
timer->cntv_ctl = value;
|
||||
vtimer->cnt_ctl = value;
|
||||
break;
|
||||
case KVM_REG_ARM_TIMER_CNT:
|
||||
vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
|
||||
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
|
||||
break;
|
||||
case KVM_REG_ARM_TIMER_CVAL:
|
||||
timer->cntv_cval = value;
|
||||
vtimer->cnt_cval = value;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
|
@ -390,15 +470,15 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
|
|||
|
||||
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
|
||||
switch (regid) {
|
||||
case KVM_REG_ARM_TIMER_CTL:
|
||||
return timer->cntv_ctl;
|
||||
return vtimer->cnt_ctl;
|
||||
case KVM_REG_ARM_TIMER_CNT:
|
||||
return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
|
||||
return kvm_phys_timer_read() - vtimer->cntvoff;
|
||||
case KVM_REG_ARM_TIMER_CVAL:
|
||||
return timer->cntv_cval;
|
||||
return vtimer->cnt_cval;
|
||||
}
|
||||
return (u64)-1;
|
||||
}
|
||||
|
@ -462,14 +542,16 @@ int kvm_timer_hyp_init(void)
|
|||
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
|
||||
timer_disarm(timer);
|
||||
kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq);
|
||||
kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
|
||||
}
|
||||
|
||||
int kvm_timer_enable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
struct irq_desc *desc;
|
||||
struct irq_data *data;
|
||||
int phys_irq;
|
||||
|
@ -497,7 +579,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
|
|||
* Tell the VGIC that the virtual interrupt is tied to a
|
||||
* physical interrupt. We do that once per VCPU.
|
||||
*/
|
||||
ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq);
|
||||
ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -506,11 +588,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_timer_init(struct kvm *kvm)
|
||||
{
|
||||
kvm->arch.timer.cntvoff = kvm_phys_timer_read();
|
||||
}
|
||||
|
||||
/*
|
||||
* On VHE system, we only need to configure trap on physical timer and counter
|
||||
* accesses in EL0 and EL1 once, not for every world switch.
|
||||
|
|
|
@ -25,11 +25,12 @@
|
|||
void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
u64 val;
|
||||
|
||||
if (timer->enabled) {
|
||||
timer->cntv_ctl = read_sysreg_el0(cntv_ctl);
|
||||
timer->cntv_cval = read_sysreg_el0(cntv_cval);
|
||||
vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
|
||||
vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
|
||||
}
|
||||
|
||||
/* Disable the virtual timer */
|
||||
|
@ -52,8 +53,8 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
|
|||
|
||||
void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
u64 val;
|
||||
|
||||
/* Those bits are already configured at boot on VHE-system */
|
||||
|
@ -69,9 +70,9 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
if (timer->enabled) {
|
||||
write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
|
||||
write_sysreg_el0(timer->cntv_cval, cntv_cval);
|
||||
write_sysreg(vtimer->cntvoff, cntvoff_el2);
|
||||
write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
|
||||
isb();
|
||||
write_sysreg_el0(timer->cntv_ctl, cntv_ctl);
|
||||
write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* Copyright (C) 2016 Linaro
|
||||
* Author: Christoffer Dall <christoffer.dall@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <kvm/arm_vgic.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include "vgic.h"
|
||||
|
||||
/*
|
||||
* Structure to control looping through the entire vgic state. We start at
|
||||
* zero for each field and move upwards. So, if dist_id is 0 we print the
|
||||
* distributor info. When dist_id is 1, we have already printed it and move
|
||||
* on.
|
||||
*
|
||||
* When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and
|
||||
* so on.
|
||||
*/
|
||||
struct vgic_state_iter {
|
||||
int nr_cpus;
|
||||
int nr_spis;
|
||||
int dist_id;
|
||||
int vcpu_id;
|
||||
int intid;
|
||||
};
|
||||
|
||||
static void iter_next(struct vgic_state_iter *iter)
|
||||
{
|
||||
if (iter->dist_id == 0) {
|
||||
iter->dist_id++;
|
||||
return;
|
||||
}
|
||||
|
||||
iter->intid++;
|
||||
if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
|
||||
++iter->vcpu_id < iter->nr_cpus)
|
||||
iter->intid = 0;
|
||||
}
|
||||
|
||||
static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
|
||||
loff_t pos)
|
||||
{
|
||||
int nr_cpus = atomic_read(&kvm->online_vcpus);
|
||||
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
iter->nr_cpus = nr_cpus;
|
||||
iter->nr_spis = kvm->arch.vgic.nr_spis;
|
||||
|
||||
/* Fast forward to the right position if needed */
|
||||
while (pos--)
|
||||
iter_next(iter);
|
||||
}
|
||||
|
||||
static bool end_of_vgic(struct vgic_state_iter *iter)
|
||||
{
|
||||
return iter->dist_id > 0 &&
|
||||
iter->vcpu_id == iter->nr_cpus &&
|
||||
(iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis;
|
||||
}
|
||||
|
||||
static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
|
||||
{
|
||||
struct kvm *kvm = (struct kvm *)s->private;
|
||||
struct vgic_state_iter *iter;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
iter = kvm->arch.vgic.iter;
|
||||
if (iter) {
|
||||
iter = ERR_PTR(-EBUSY);
|
||||
goto out;
|
||||
}
|
||||
|
||||
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
|
||||
if (!iter) {
|
||||
iter = ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
|
||||
iter_init(kvm, iter, *pos);
|
||||
kvm->arch.vgic.iter = iter;
|
||||
|
||||
if (end_of_vgic(iter))
|
||||
iter = NULL;
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return iter;
|
||||
}
|
||||
|
||||
static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
{
|
||||
struct kvm *kvm = (struct kvm *)s->private;
|
||||
struct vgic_state_iter *iter = kvm->arch.vgic.iter;
|
||||
|
||||
++*pos;
|
||||
iter_next(iter);
|
||||
if (end_of_vgic(iter))
|
||||
iter = NULL;
|
||||
return iter;
|
||||
}
|
||||
|
||||
static void vgic_debug_stop(struct seq_file *s, void *v)
|
||||
{
|
||||
struct kvm *kvm = (struct kvm *)s->private;
|
||||
struct vgic_state_iter *iter;
|
||||
|
||||
/*
|
||||
* If the seq file wasn't properly opened, there's nothing to clearn
|
||||
* up.
|
||||
*/
|
||||
if (IS_ERR(v))
|
||||
return;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
iter = kvm->arch.vgic.iter;
|
||||
kfree(iter);
|
||||
kvm->arch.vgic.iter = NULL;
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
|
||||
{
|
||||
seq_printf(s, "Distributor\n");
|
||||
seq_printf(s, "===========\n");
|
||||
seq_printf(s, "vgic_model:\t%s\n",
|
||||
(dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ?
|
||||
"GICv3" : "GICv2");
|
||||
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
|
||||
seq_printf(s, "enabled:\t%d\n", dist->enabled);
|
||||
seq_printf(s, "\n");
|
||||
|
||||
seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
|
||||
seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
|
||||
}
|
||||
|
||||
static void print_header(struct seq_file *s, struct vgic_irq *irq,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int id = 0;
|
||||
char *hdr = "SPI ";
|
||||
|
||||
if (vcpu) {
|
||||
hdr = "VCPU";
|
||||
id = vcpu->vcpu_id;
|
||||
}
|
||||
|
||||
seq_printf(s, "\n");
|
||||
seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id);
|
||||
seq_printf(s, "---------------------------------------------------------------\n");
|
||||
}
|
||||
|
||||
static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
char *type;
|
||||
if (irq->intid < VGIC_NR_SGIS)
|
||||
type = "SGI";
|
||||
else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
|
||||
type = "PPI";
|
||||
else
|
||||
type = "SPI";
|
||||
|
||||
if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
|
||||
print_header(s, irq, vcpu);
|
||||
|
||||
seq_printf(s, " %s %4d "
|
||||
" %2d "
|
||||
"%d%d%d%d%d%d "
|
||||
"%8d "
|
||||
"%8x "
|
||||
" %2x "
|
||||
"%3d "
|
||||
" %2d "
|
||||
"\n",
|
||||
type, irq->intid,
|
||||
(irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1,
|
||||
irq->pending_latch,
|
||||
irq->line_level,
|
||||
irq->active,
|
||||
irq->enabled,
|
||||
irq->hw,
|
||||
irq->config == VGIC_CONFIG_LEVEL,
|
||||
irq->hwintid,
|
||||
irq->mpidr,
|
||||
irq->source,
|
||||
irq->priority,
|
||||
(irq->vcpu) ? irq->vcpu->vcpu_id : -1);
|
||||
|
||||
}
|
||||
|
||||
static int vgic_debug_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct kvm *kvm = (struct kvm *)s->private;
|
||||
struct vgic_state_iter *iter = (struct vgic_state_iter *)v;
|
||||
struct vgic_irq *irq;
|
||||
struct kvm_vcpu *vcpu = NULL;
|
||||
|
||||
if (iter->dist_id == 0) {
|
||||
print_dist_state(s, &kvm->arch.vgic);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!kvm->arch.vgic.initialized)
|
||||
return 0;
|
||||
|
||||
if (iter->vcpu_id < iter->nr_cpus) {
|
||||
vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
|
||||
irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid];
|
||||
} else {
|
||||
irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
|
||||
}
|
||||
|
||||
spin_lock(&irq->irq_lock);
|
||||
print_irq_state(s, irq, vcpu);
|
||||
spin_unlock(&irq->irq_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct seq_operations vgic_debug_seq_ops = {
|
||||
.start = vgic_debug_start,
|
||||
.next = vgic_debug_next,
|
||||
.stop = vgic_debug_stop,
|
||||
.show = vgic_debug_show
|
||||
};
|
||||
|
||||
static int debug_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int ret;
|
||||
ret = seq_open(file, &vgic_debug_seq_ops);
|
||||
if (!ret) {
|
||||
struct seq_file *seq;
|
||||
/* seq_open will have modified file->private_data */
|
||||
seq = file->private_data;
|
||||
seq->private = inode->i_private;
|
||||
}
|
||||
|
||||
return ret;
|
||||
};
|
||||
|
||||
static struct file_operations vgic_debug_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = debug_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release
|
||||
};
|
||||
|
||||
int vgic_debug_init(struct kvm *kvm)
|
||||
{
|
||||
if (!kvm->debugfs_dentry)
|
||||
return -ENOENT;
|
||||
|
||||
if (!debugfs_create_file("vgic-state", 0444,
|
||||
kvm->debugfs_dentry,
|
||||
kvm,
|
||||
&vgic_debug_fops))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vgic_debug_destroy(struct kvm *kvm)
|
||||
{
|
||||
return 0;
|
||||
}
|
|
@ -259,6 +259,8 @@ int vgic_init(struct kvm *kvm)
|
|||
if (ret)
|
||||
goto out;
|
||||
|
||||
vgic_debug_init(kvm);
|
||||
|
||||
dist->initialized = true;
|
||||
out:
|
||||
return ret;
|
||||
|
@ -288,6 +290,8 @@ static void __kvm_vgic_destroy(struct kvm *kvm)
|
|||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
vgic_debug_destroy(kvm);
|
||||
|
||||
kvm_vgic_dist_destroy(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
|
|
|
@ -99,6 +99,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
|||
if (!vgic_has_its(kvm))
|
||||
return -ENODEV;
|
||||
|
||||
if (!level)
|
||||
return -1;
|
||||
|
||||
return vgic_its_inject_msi(kvm, &msi);
|
||||
}
|
||||
|
||||
|
|
|
@ -350,7 +350,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
|
|||
|
||||
irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
|
||||
spin_lock(&irq->irq_lock);
|
||||
irq->pending = pendmask & (1U << bit_nr);
|
||||
irq->pending_latch = pendmask & (1U << bit_nr);
|
||||
vgic_queue_irq_unlock(vcpu->kvm, irq);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
@ -465,7 +465,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
|
|||
return -EBUSY;
|
||||
|
||||
spin_lock(&itte->irq->irq_lock);
|
||||
itte->irq->pending = true;
|
||||
itte->irq->pending_latch = true;
|
||||
vgic_queue_irq_unlock(kvm, itte->irq);
|
||||
|
||||
return 0;
|
||||
|
@ -913,7 +913,7 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
|
|||
if (!itte)
|
||||
return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
|
||||
|
||||
itte->irq->pending = false;
|
||||
itte->irq->pending_latch = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue