This pull request is slightly bigger than usual at this stage, but

I swear I would have sent it the same to Linus!  The main cause for
 this is that I was on vacation until two weeks ago and it took a while
 to sort all the pending patches between 4.19 and 4.20, test them and
 so on.
 
 It's mostly small bugfixes and cleanups, mostly around x86 nested
 virtualization.  One important change, not related to nested
 virtualization, is that the ability for the guest kernel to trap CPUID
 instructions (in Linux that's the ARCH_SET_CPUID arch_prctl) is now
 masked by default.  This is because the feature is detected through an
 MSR; a very bad idea that Intel seems to like more and more.  Some
 applications choke if the other fields of that MSR are not initialized
 as on real hardware, hence we have to disable the whole MSR by default,
 as was the case before Linux 4.12.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQEcBAABAgAGBQJbpPo1AAoJEL/70l94x66DdxgH/is0qe6ZBtzb6Qc0W+8mHHD7
 nxIkWAs2V5NsouJ750YwRQ+0Ym407+wlNt30acdBUEoXhrnA5/TvyGq999XvCL96
 upWEIxpIgbvTMX/e2nLhe4wQdhsboUK4r0/B9IFgVFYrdCt5uRXjB2G4ewxcqxL/
 GxxqrAKhaRsbQG9Xv0Fw5Vohh/Ls6fQDJcyuY1EBnbMpVenq2QDLI6cOAPXncyFb
 uLN6ov4GNCWIPckwxejri5XhZesUOsafrmn48sApShh4T6TrisrdtSYdzl+DGza+
 j5vhIEwdFO5kulZ3viuhqKJOnS2+F6wvfZ75IKT0tEKeU2bi+ifGDyGRefSF6Q0=
 =YXLw
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Paolo writes:
  "It's mostly small bugfixes and cleanups, mostly around x86 nested
   virtualization.  One important change, not related to nested
   virtualization, is that the ability for the guest kernel to trap
   CPUID instructions (in Linux that's the ARCH_SET_CPUID arch_prctl) is
   now masked by default.  This is because the feature is detected
   through an MSR; a very bad idea that Intel seems to like more and
   more.  Some applications choke if the other fields of that MSR are
   not initialized as on real hardware, hence we have to disable the
   whole MSR by default, as was the case before Linux 4.12."

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (23 commits)
  KVM: nVMX: Fix bad cleanup on error of get/set nested state IOCTLs
  kvm: selftests: Add platform_info_test
  KVM: x86: Control guest reads of MSR_PLATFORM_INFO
  KVM: x86: Turbo bits in MSR_PLATFORM_INFO
  nVMX x86: Check VPID value on vmentry of L2 guests
  nVMX x86: check posted-interrupt descriptor addresss on vmentry of L2
  KVM: nVMX: Wake blocked vCPU in guest-mode if pending interrupt in virtual APICv
  KVM: VMX: check nested state and CR4.VMXE against SMM
  kvm: x86: make kvm_{load|put}_guest_fpu() static
  x86/hyper-v: rename ipi_arg_{ex,non_ex} structures
  KVM: VMX: use preemption timer to force immediate VMExit
  KVM: VMX: modify preemption timer bit only when arming timer
  KVM: VMX: immediately mark preemption timer expired only for zero value
  KVM: SVM: Switch to bitmap_zalloc()
  KVM/MMU: Fix comment in walk_shadow_page_lockless_end()
  kvm: selftests: use -pthread instead of -lpthread
  KVM: x86: don't reset root in kvm_mmu_setup()
  kvm: mmu: Don't read PDPTEs when paging is not enabled
  x86/kvm/lapic: always disable MMIO interface in x2APIC mode
  KVM: s390: Make huge pages unavailable in ucontrol VMs
  ...
This commit is contained in:
Greg Kroah-Hartman 2018-09-21 16:21:42 +02:00
commit a27fb6d983
27 changed files with 537 additions and 244 deletions

View File

@ -4510,7 +4510,8 @@ Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
Architectures: s390 Architectures: s390
Parameters: none Parameters: none
Returns: 0 on success, -EINVAL if hpage module parameter was not set Returns: 0 on success, -EINVAL if hpage module parameter was not set
or cmma is enabled or cmma is enabled, or the VM has the KVM_VM_S390_UCONTROL
flag set
With this capability the KVM support for memory backing with 1m pages With this capability the KVM support for memory backing with 1m pages
through hugetlbfs can be enabled for a VM. After the capability is through hugetlbfs can be enabled for a VM. After the capability is
@ -4521,6 +4522,15 @@ hpage module parameter is not set to 1, -EINVAL is returned.
While it is generally possible to create a huge page backed VM without While it is generally possible to create a huge page backed VM without
this capability, the VM will not be able to run. this capability, the VM will not be able to run.
7.14 KVM_CAP_MSR_PLATFORM_INFO
Architectures: x86
Parameters: args[0] whether feature should be enabled or not
With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
a #GP would be raised when the guest tries to access. Currently, this
capability does not enable write permissions of this MSR for the guest.
8. Other capabilities. 8. Other capabilities.
---------------------- ----------------------

View File

@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
return hash__vmemmap_remove_mapping(start, page_size); return hash__vmemmap_remove_mapping(start, page_size);
} }
#endif #endif
struct page *realmode_pfn_to_page(unsigned long pfn);
static inline pte_t pmd_pte(pmd_t pmd) static inline pte_t pmd_pte(pmd_t pmd)
{ {

View File

@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void); extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction); unsigned long *hpa, enum dma_data_direction *direction);
extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction);
#else #else
static inline void iommu_register_group(struct iommu_table_group *table_group, static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, int pci_domain_number,

View File

@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa); unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa); unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif #endif

View File

@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
} }
EXPORT_SYMBOL_GPL(iommu_tce_xchg); EXPORT_SYMBOL_GPL(iommu_tce_xchg);
#ifdef CONFIG_PPC_BOOK3S_64
long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction)
{
long ret;
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
if (likely(pg)) {
SetPageDirty(pg);
} else {
tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
ret = -EFAULT;
}
}
return ret;
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
#endif
int iommu_take_ownership(struct iommu_table *tbl) int iommu_take_ownership(struct iommu_table *tbl)
{ {
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;

View File

@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr) unsigned long ea, unsigned long dsisr)
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
unsigned long mmu_seq, pte_size; unsigned long mmu_seq;
unsigned long gpa, gfn, hva, pfn; unsigned long gpa, gfn, hva;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
struct page *page = NULL; struct page *page = NULL;
long ret; long ret;
@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/ */
hva = gfn_to_hva_memslot(memslot, gfn); hva = gfn_to_hva_memslot(memslot, gfn);
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) { if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
pfn = page_to_pfn(page);
upgrade_write = true; upgrade_write = true;
} else { } else {
unsigned long pfn;
/* Call KVM generic code to do the slow-path check */ /* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
writing, upgrade_p); writing, upgrade_p);
@ -639,61 +640,43 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
} }
} }
/* See if we can insert a 1GB or 2MB large PTE here */ /*
level = 0; * Read the PTE from the process' radix tree and use that
if (page && PageCompound(page)) { * so we get the shift and attribute bits.
pte_size = PAGE_SIZE << compound_order(compound_head(page)); */
if (pte_size >= PUD_SIZE && local_irq_disable();
(gpa & (PUD_SIZE - PAGE_SIZE)) == ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
(hva & (PUD_SIZE - PAGE_SIZE))) { pte = *ptep;
level = 2; local_irq_enable();
pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
} else if (pte_size >= PMD_SIZE && /* Get pte level from shift/size */
(gpa & (PMD_SIZE - PAGE_SIZE)) == if (shift == PUD_SHIFT &&
(hva & (PMD_SIZE - PAGE_SIZE))) { (gpa & (PUD_SIZE - PAGE_SIZE)) ==
level = 1; (hva & (PUD_SIZE - PAGE_SIZE))) {
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); level = 2;
} else if (shift == PMD_SHIFT &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
} else {
level = 0;
if (shift > PAGE_SHIFT) {
/*
* If the pte maps more than one page, bring over
* bits from the virtual address to get the real
* address of the specific single page we want.
*/
unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
pte = __pte(pte_val(pte) | (hva & rpnmask));
} }
} }
/* pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
* Compute the PTE value that we need to insert. if (writing || upgrade_write) {
*/ if (pte_val(pte) & _PAGE_WRITE)
if (page) { pte = __pte(pte_val(pte) | _PAGE_DIRTY);
pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
_PAGE_ACCESSED;
if (writing || upgrade_write)
pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
pte = pfn_pte(pfn, __pgprot(pgflags));
} else { } else {
/* pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
* Read the PTE from the process' radix tree and use that
* so we get the attribute bits.
*/
local_irq_disable();
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
pte = *ptep;
local_irq_enable();
if (shift == PUD_SHIFT &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
} else if (shift == PMD_SHIFT &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
} else if (shift && shift != PAGE_SHIFT) {
/* Adjust PFN */
unsigned long mask = (1ul << shift) - PAGE_SIZE;
pte = __pte(pte_val(pte) | (hva & mask));
}
pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
if (writing || upgrade_write) {
if (pte_val(pte) & _PAGE_WRITE)
pte = __pte(pte_val(pte) | _PAGE_DIRTY);
} else {
pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
}
} }
/* Allocate space in the tree and write the PTE */ /* Allocate space in the tree and write the PTE */

View File

@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry) static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
/*
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
* calling this so we still get here a valid UA.
*/
if (pua && *pua)
mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
}
return ret;
}
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
unsigned long entry)
{ {
unsigned long hpa = 0; unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE; enum dma_data_direction dir = DMA_NONE;
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
} }
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0; unsigned long hpa = 0;
long ret; long ret;
if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir)) if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
/* /*
* real mode xchg can fail if struct page crosses * real mode xchg can fail if struct page crosses
* a page boundary * a page boundary
@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret) if (ret)
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
return ret; return ret;
} }
@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_CLOSED; return H_CLOSED;
ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) { if (ret) {
mm_iommu_mapped_dec(mem); mm_iommu_mapped_dec(mem);
/* /*
@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
return ret; return ret;
WARN_ON_ONCE_RM(1); WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry, tce); kvmppc_tce_put(stt, entry, tce);
@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
goto unlock_exit; goto unlock_exit;
WARN_ON_ONCE_RM(1); WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry + i, tce); kvmppc_tce_put(stt, entry + i, tce);
@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
return ret; return ret;
WARN_ON_ONCE_RM(1); WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
} }

View File

@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr,
{ {
} }
/*
* We do not have access to the sparsemem vmemmap, so we fallback to
* walking the list of sparsemem blocks which we already maintain for
* the sake of crashdump. In the long run, we might want to maintain
* a tree if performance of that linear walk becomes a problem.
*
* realmode_pfn_to_page functions can fail due to:
* 1) As real sparsemem blocks do not lay in RAM continously (they
* are in virtual address space which is not available in the real mode),
* the requested page struct can be split between blocks so get_page/put_page
* may fail.
* 2) When huge pages are used, the get_page/put_page API will fail
* in real mode as the linked addresses in the page struct are virtual
* too.
*/
struct page *realmode_pfn_to_page(unsigned long pfn)
{
struct vmemmap_backing *vmem_back;
struct page *page;
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
if (pg_va < vmem_back->virt_addr)
continue;
/* After vmemmap_list entry free is possible, need check all */
if ((pg_va + sizeof(struct page)) <=
(vmem_back->virt_addr + page_size)) {
page = (struct page *) (vmem_back->phys + pg_va -
vmem_back->virt_addr);
return page;
}
}
/* Probably that page struct is split between real pages */
return NULL;
}
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#else
struct page *realmode_pfn_to_page(unsigned long pfn)
{
struct page *page = pfn_to_page(pfn);
return page;
}
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#endif /* CONFIG_SPARSEMEM_VMEMMAP */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64

View File

@ -18,11 +18,15 @@
#include <linux/migrate.h> #include <linux/migrate.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/sizes.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pte-walk.h> #include <asm/pte-walk.h>
static DEFINE_MUTEX(mem_list_mutex); static DEFINE_MUTEX(mem_list_mutex);
#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
struct mm_iommu_table_group_mem_t { struct mm_iommu_table_group_mem_t {
struct list_head next; struct list_head next;
struct rcu_head rcu; struct rcu_head rcu;
@ -263,6 +267,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
if (!page) if (!page)
continue; continue;
if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
SetPageDirty(page);
put_page(page); put_page(page);
mem->hpas[i] = 0; mem->hpas[i] = 0;
} }
@ -360,7 +367,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
unsigned long ua, unsigned long entries) unsigned long ua, unsigned long entries)
@ -390,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift) if (pageshift > mem->pageshift)
return -EFAULT; return -EFAULT;
*hpa = *va | (ua & ~PAGE_MASK); *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0; return 0;
} }
@ -413,11 +419,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (!pa) if (!pa)
return -EFAULT; return -EFAULT;
*hpa = *pa | (ua & ~PAGE_MASK); *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
{
struct mm_iommu_table_group_mem_t *mem;
long entry;
void *va;
unsigned long *pa;
mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
if (!mem)
return;
entry = (ua - mem->ua) >> PAGE_SHIFT;
va = &mem->hpas[entry];
pa = (void *) vmalloc_to_phys(va);
if (!pa)
return;
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
}
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{ {

View File

@ -481,7 +481,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break; break;
case KVM_CAP_S390_HPAGE_1M: case KVM_CAP_S390_HPAGE_1M:
r = 0; r = 0;
if (hpage) if (hpage && !kvm_is_ucontrol(kvm))
r = 1; r = 1;
break; break;
case KVM_CAP_S390_MEM_OP: case KVM_CAP_S390_MEM_OP:
@ -691,7 +691,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
if (kvm->created_vcpus) if (kvm->created_vcpus)
r = -EBUSY; r = -EBUSY;
else if (!hpage || kvm->arch.use_cmma) else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
r = -EINVAL; r = -EINVAL;
else { else {
r = 0; r = 0;

View File

@ -708,11 +708,13 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
vmaddr |= gaddr & ~PMD_MASK; vmaddr |= gaddr & ~PMD_MASK;
/* Find vma in the parent mm */ /* Find vma in the parent mm */
vma = find_vma(gmap->mm, vmaddr); vma = find_vma(gmap->mm, vmaddr);
if (!vma)
continue;
/* /*
* We do not discard pages that are backed by * We do not discard pages that are backed by
* hugetlbfs, so we don't have to refault them. * hugetlbfs, so we don't have to refault them.
*/ */
if (vma && is_vm_hugetlb_page(vma)) if (is_vm_hugetlb_page(vma))
continue; continue;
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
zap_page_range(vma, vmaddr, size); zap_page_range(vma, vmaddr, size);

View File

@ -95,8 +95,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
*/ */
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
{ {
struct ipi_arg_ex **arg; struct hv_send_ipi_ex **arg;
struct ipi_arg_ex *ipi_arg; struct hv_send_ipi_ex *ipi_arg;
unsigned long flags; unsigned long flags;
int nr_bank = 0; int nr_bank = 0;
int ret = 1; int ret = 1;
@ -105,7 +105,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
return false; return false;
local_irq_save(flags); local_irq_save(flags);
arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
ipi_arg = *arg; ipi_arg = *arg;
if (unlikely(!ipi_arg)) if (unlikely(!ipi_arg))
@ -135,7 +135,7 @@ ipi_mask_ex_done:
static bool __send_ipi_mask(const struct cpumask *mask, int vector) static bool __send_ipi_mask(const struct cpumask *mask, int vector)
{ {
int cur_cpu, vcpu; int cur_cpu, vcpu;
struct ipi_arg_non_ex ipi_arg; struct hv_send_ipi ipi_arg;
int ret = 1; int ret = 1;
trace_hyperv_send_ipi_mask(mask, vector); trace_hyperv_send_ipi_mask(mask, vector);

View File

@ -726,19 +726,21 @@ struct hv_enlightened_vmcs {
#define HV_STIMER_AUTOENABLE (1ULL << 3) #define HV_STIMER_AUTOENABLE (1ULL << 3)
#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F)
struct ipi_arg_non_ex {
u32 vector;
u32 reserved;
u64 cpu_mask;
};
struct hv_vpset { struct hv_vpset {
u64 format; u64 format;
u64 valid_bank_mask; u64 valid_bank_mask;
u64 bank_contents[]; u64 bank_contents[];
}; };
struct ipi_arg_ex { /* HvCallSendSyntheticClusterIpi hypercall */
struct hv_send_ipi {
u32 vector;
u32 reserved;
u64 cpu_mask;
};
/* HvCallSendSyntheticClusterIpiEx hypercall */
struct hv_send_ipi_ex {
u32 vector; u32 vector;
u32 reserved; u32 reserved;
struct hv_vpset vp_set; struct hv_vpset vp_set;

View File

@ -869,6 +869,8 @@ struct kvm_arch {
bool x2apic_format; bool x2apic_format;
bool x2apic_broadcast_quirk_disabled; bool x2apic_broadcast_quirk_disabled;
bool guest_can_read_msr_platform_info;
}; };
struct kvm_vm_stat { struct kvm_vm_stat {
@ -1022,6 +1024,7 @@ struct kvm_x86_ops {
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
@ -1055,6 +1058,7 @@ struct kvm_x86_ops {
bool (*umip_emulated)(void); bool (*umip_emulated)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
@ -1482,6 +1486,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
int kvm_is_in_guest(void); int kvm_is_in_guest(void);

View File

@ -377,6 +377,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002

View File

@ -1344,9 +1344,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{ {
return kvm_apic_hw_enabled(apic) && return addr >= apic->base_address &&
addr >= apic->base_address && addr < apic->base_address + LAPIC_MMIO_LENGTH;
addr < apic->base_address + LAPIC_MMIO_LENGTH;
} }
static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
@ -1358,6 +1357,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address)) if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
if (!kvm_check_has_quirk(vcpu->kvm,
KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
return -EOPNOTSUPP;
memset(data, 0xff, len);
return 0;
}
kvm_lapic_reg_read(apic, offset, len, data); kvm_lapic_reg_read(apic, offset, len, data);
return 0; return 0;
@ -1917,6 +1925,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address)) if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
if (!kvm_check_has_quirk(vcpu->kvm,
KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
return -EOPNOTSUPP;
return 0;
}
/* /*
* APIC register must be aligned on 128-bits boundary. * APIC register must be aligned on 128-bits boundary.
* 32/64/128 bits registers must be accessed thru 32 bits. * 32/64/128 bits registers must be accessed thru 32 bits.

View File

@ -899,7 +899,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
{ {
/* /*
* Make sure the write to vcpu->mode is not reordered in front of * Make sure the write to vcpu->mode is not reordered in front of
* reads to sptes. If it does, kvm_commit_zap_page() can see us * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us
* OUTSIDE_GUEST_MODE and proceed to free the shadow page table. * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
*/ */
smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
@ -5417,7 +5417,12 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
{ {
MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
kvm_init_mmu(vcpu, true); /*
* kvm_mmu_setup() is called only on vCPU initialization.
* Therefore, no need to reset mmu roots as they are not yet
* initialized.
*/
kvm_init_mmu(vcpu, false);
} }
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,

View File

@ -1226,8 +1226,7 @@ static __init int sev_hardware_setup(void)
min_sev_asid = cpuid_edx(0x8000001F); min_sev_asid = cpuid_edx(0x8000001F);
/* Initialize SEV ASID bitmap */ /* Initialize SEV ASID bitmap */
sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid), sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
sizeof(unsigned long), GFP_KERNEL);
if (!sev_asid_bitmap) if (!sev_asid_bitmap)
return 1; return 1;
@ -1405,7 +1404,7 @@ static __exit void svm_hardware_unsetup(void)
int cpu; int cpu;
if (svm_sev_enabled()) if (svm_sev_enabled())
kfree(sev_asid_bitmap); bitmap_free(sev_asid_bitmap);
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu); svm_cpu_uninit(cpu);
@ -7149,6 +7148,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.check_intercept = svm_check_intercept, .check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr, .handle_external_intr = svm_handle_external_intr,
.request_immediate_exit = __kvm_request_immediate_exit,
.sched_in = svm_sched_in, .sched_in = svm_sched_in,
.pmu_ops = &amd_pmu_ops, .pmu_ops = &amd_pmu_ops,

View File

@ -397,6 +397,7 @@ struct loaded_vmcs {
int cpu; int cpu;
bool launched; bool launched;
bool nmi_known_unmasked; bool nmi_known_unmasked;
bool hv_timer_armed;
/* Support for vnmi-less CPUs */ /* Support for vnmi-less CPUs */
int soft_vnmi_blocked; int soft_vnmi_blocked;
ktime_t entry_time; ktime_t entry_time;
@ -1019,6 +1020,8 @@ struct vcpu_vmx {
int ple_window; int ple_window;
bool ple_window_dirty; bool ple_window_dirty;
bool req_immediate_exit;
/* Support for PML */ /* Support for PML */
#define PML_ENTITY_NUM 512 #define PML_ENTITY_NUM 512
struct page *pml_pg; struct page *pml_pg;
@ -2864,6 +2867,8 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
u16 fs_sel, gs_sel; u16 fs_sel, gs_sel;
int i; int i;
vmx->req_immediate_exit = false;
if (vmx->loaded_cpu_state) if (vmx->loaded_cpu_state)
return; return;
@ -5393,9 +5398,10 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* To use VMXON (and later other VMX instructions), a guest * To use VMXON (and later other VMX instructions), a guest
* must first be able to turn on cr4.VMXE (see handle_vmon()). * must first be able to turn on cr4.VMXE (see handle_vmon()).
* So basically the check on whether to allow nested VMX * So basically the check on whether to allow nested VMX
* is here. * is here. We operate under the default treatment of SMM,
* so VMX cannot be enabled under SMM.
*/ */
if (!nested_vmx_allowed(vcpu)) if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
return 1; return 1;
} }
@ -6183,6 +6189,27 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
nested_mark_vmcs12_pages_dirty(vcpu); nested_mark_vmcs12_pages_dirty(vcpu);
} }
static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
void *vapic_page;
u32 vppr;
int rvi;
if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
!nested_cpu_has_vid(get_vmcs12(vcpu)) ||
WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
return false;
rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
vapic_page = kmap(vmx->nested.virtual_apic_page);
vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
kunmap(vmx->nested.virtual_apic_page);
return ((rvi & 0xf0) > (vppr & 0xf0));
}
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
bool nested) bool nested)
{ {
@ -7966,6 +7993,9 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL; kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
} }
if (!cpu_has_vmx_preemption_timer())
kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
u64 vmx_msr; u64 vmx_msr;
@ -9208,7 +9238,8 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
static int handle_preemption_timer(struct kvm_vcpu *vcpu) static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{ {
kvm_lapic_expired_hv_timer(vcpu); if (!to_vmx(vcpu)->req_immediate_exit)
kvm_lapic_expired_hv_timer(vcpu);
return 1; return 1;
} }
@ -10595,24 +10626,43 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false); msrs[i].host, false);
} }
static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
{
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
if (!vmx->loaded_vmcs->hv_timer_armed)
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
vmx->loaded_vmcs->hv_timer_armed = true;
}
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 tscl; u64 tscl;
u32 delta_tsc; u32 delta_tsc;
if (vmx->hv_deadline_tsc == -1) if (vmx->req_immediate_exit) {
vmx_arm_hv_timer(vmx, 0);
return; return;
}
tscl = rdtsc(); if (vmx->hv_deadline_tsc != -1) {
if (vmx->hv_deadline_tsc > tscl) tscl = rdtsc();
/* sure to be 32 bit only because checked on set_hv_timer */ if (vmx->hv_deadline_tsc > tscl)
delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> /* set_hv_timer ensures the delta fits in 32-bits */
cpu_preemption_timer_multi); delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
else cpu_preemption_timer_multi);
delta_tsc = 0; else
delta_tsc = 0;
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); vmx_arm_hv_timer(vmx, delta_tsc);
return;
}
if (vmx->loaded_vmcs->hv_timer_armed)
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
vmx->loaded_vmcs->hv_timer_armed = false;
} }
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
@ -10672,7 +10722,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx); atomic_switch_perf_msrs(vmx);
vmx_arm_hv_timer(vcpu); vmx_update_hv_timer(vcpu);
/* /*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if * If this vCPU has touched SPEC_CTRL, restore the guest's value if
@ -11427,16 +11477,18 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
if (vcpu->arch.virtual_tsc_khz == 0) /*
return; * A timer value of zero is architecturally guaranteed to cause
* a VMExit prior to executing any instructions in the guest.
/* Make sure short timeouts reliably trigger an immediate vmexit. */
* hrtimer_start does not guarantee this. */ if (preemption_timeout == 0) {
if (preemption_timeout <= 1) {
vmx_preemption_timer_fn(&vmx->nested.preemption_timer); vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
return; return;
} }
if (vcpu->arch.virtual_tsc_khz == 0)
return;
preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
preemption_timeout *= 1000000; preemption_timeout *= 1000000;
do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
@ -11646,11 +11698,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
* bits 15:8 should be zero in posted_intr_nv, * bits 15:8 should be zero in posted_intr_nv,
* the descriptor address has been already checked * the descriptor address has been already checked
* in nested_get_vmcs12_pages. * in nested_get_vmcs12_pages.
*
* bits 5:0 of posted_intr_desc_addr should be zero.
*/ */
if (nested_cpu_has_posted_intr(vmcs12) && if (nested_cpu_has_posted_intr(vmcs12) &&
(!nested_cpu_has_vid(vmcs12) || (!nested_cpu_has_vid(vmcs12) ||
!nested_exit_intr_ack_set(vcpu) || !nested_exit_intr_ack_set(vcpu) ||
vmcs12->posted_intr_nv & 0xff00)) (vmcs12->posted_intr_nv & 0xff00) ||
(vmcs12->posted_intr_desc_addr & 0x3f) ||
(!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr))))
return -EINVAL; return -EINVAL;
/* tpr shadow is needed by all apicv features. */ /* tpr shadow is needed by all apicv features. */
@ -12076,11 +12132,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control = vmcs12->pin_based_vm_exec_control; exec_control = vmcs12->pin_based_vm_exec_control;
/* Preemption timer setting is only taken from vmcs01. */ /* Preemption timer setting is computed directly in vmx_vcpu_run. */
exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
exec_control |= vmcs_config.pin_based_exec_ctrl; exec_control |= vmcs_config.pin_based_exec_ctrl;
if (vmx->hv_deadline_tsc == -1) exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; vmx->loaded_vmcs->hv_timer_armed = false;
/* Posted interrupts setting is only taken from vmcs12. */ /* Posted interrupts setting is only taken from vmcs12. */
if (nested_cpu_has_posted_intr(vmcs12)) { if (nested_cpu_has_posted_intr(vmcs12)) {
@ -12318,6 +12373,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD; return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12)) if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD; return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@ -12863,6 +12921,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0; return 0;
} }
static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
{
to_vmx(vcpu)->req_immediate_exit = true;
}
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
{ {
ktime_t remaining = ktime_t remaining =
@ -13253,12 +13316,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
if (vmx->hv_deadline_tsc == -1)
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
else
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
if (kvm_has_tsc_control) if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx); decache_tsc_multiplier(vmx);
@ -13462,18 +13520,12 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
return -ERANGE; return -ERANGE;
vmx->hv_deadline_tsc = tscl + delta_tsc; vmx->hv_deadline_tsc = tscl + delta_tsc;
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
return delta_tsc == 0; return delta_tsc == 0;
} }
static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); to_vmx(vcpu)->hv_deadline_tsc = -1;
vmx->hv_deadline_tsc = -1;
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
} }
#endif #endif
@ -13954,6 +14006,14 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL; return -EINVAL;
/*
* SMM temporarily disables VMX, so we cannot be in guest mode,
* nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
* must be zero.
*/
if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags)
return -EINVAL;
if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
!(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL; return -EINVAL;
@ -14097,6 +14157,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.apicv_post_state_restore = vmx_apicv_post_state_restore, .apicv_post_state_restore = vmx_apicv_post_state_restore,
.hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update, .hwapic_isr_update = vmx_hwapic_isr_update,
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
.sync_pir_to_irr = vmx_sync_pir_to_irr, .sync_pir_to_irr = vmx_sync_pir_to_irr,
.deliver_posted_interrupt = vmx_deliver_posted_interrupt, .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
@ -14130,6 +14191,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.umip_emulated = vmx_umip_emulated, .umip_emulated = vmx_umip_emulated,
.check_nested_events = vmx_check_nested_events, .check_nested_events = vmx_check_nested_events,
.request_immediate_exit = vmx_request_immediate_exit,
.sched_in = vmx_sched_in, .sched_in = vmx_sched_in,

View File

@ -628,7 +628,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
gfn_t gfn; gfn_t gfn;
int r; int r;
if (is_long_mode(vcpu) || !is_pae(vcpu)) if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
return false; return false;
if (!test_bit(VCPU_EXREG_PDPTR, if (!test_bit(VCPU_EXREG_PDPTR,
@ -2537,7 +2537,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break; break;
case MSR_PLATFORM_INFO: case MSR_PLATFORM_INFO:
if (!msr_info->host_initiated || if (!msr_info->host_initiated ||
data & ~MSR_PLATFORM_INFO_CPUID_FAULT ||
(!(data & MSR_PLATFORM_INFO_CPUID_FAULT) && (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
cpuid_fault_enabled(vcpu))) cpuid_fault_enabled(vcpu)))
return 1; return 1;
@ -2780,6 +2779,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.osvw.status; msr_info->data = vcpu->arch.osvw.status;
break; break;
case MSR_PLATFORM_INFO: case MSR_PLATFORM_INFO:
if (!msr_info->host_initiated &&
!vcpu->kvm->arch.guest_can_read_msr_platform_info)
return 1;
msr_info->data = vcpu->arch.msr_platform_info; msr_info->data = vcpu->arch.msr_platform_info;
break; break;
case MSR_MISC_FEATURES_ENABLES: case MSR_MISC_FEATURES_ENABLES:
@ -2927,6 +2929,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
r = 1; r = 1;
break; break;
case KVM_CAP_SYNC_REGS: case KVM_CAP_SYNC_REGS:
@ -4007,19 +4010,23 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break; break;
BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size)); BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
r = -EFAULT;
if (get_user(user_data_size, &user_kvm_nested_state->size)) if (get_user(user_data_size, &user_kvm_nested_state->size))
return -EFAULT; break;
r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state, r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
user_data_size); user_data_size);
if (r < 0) if (r < 0)
return r; break;
if (r > user_data_size) { if (r > user_data_size) {
if (put_user(r, &user_kvm_nested_state->size)) if (put_user(r, &user_kvm_nested_state->size))
return -EFAULT; r = -EFAULT;
return -E2BIG; else
r = -E2BIG;
break;
} }
r = 0; r = 0;
break; break;
} }
@ -4031,19 +4038,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!kvm_x86_ops->set_nested_state) if (!kvm_x86_ops->set_nested_state)
break; break;
r = -EFAULT;
if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state))) if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
return -EFAULT; break;
r = -EINVAL;
if (kvm_state.size < sizeof(kvm_state)) if (kvm_state.size < sizeof(kvm_state))
return -EINVAL; break;
if (kvm_state.flags & if (kvm_state.flags &
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE)) ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
return -EINVAL; break;
/* nested_run_pending implies guest_mode. */ /* nested_run_pending implies guest_mode. */
if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING) if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
return -EINVAL; break;
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
break; break;
@ -4350,6 +4359,10 @@ split_irqchip_unlock:
kvm->arch.pause_in_guest = true; kvm->arch.pause_in_guest = true;
r = 0; r = 0;
break; break;
case KVM_CAP_MSR_PLATFORM_INFO:
kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
r = 0;
break;
default: default:
r = -EINVAL; r = -EINVAL;
break; break;
@ -7361,6 +7374,12 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
} }
EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
{
smp_send_reschedule(vcpu->cpu);
}
EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
/* /*
* Returns 1 to let vcpu_run() continue the guest execution loop without * Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the * exiting to the userspace. Otherwise, the value will be returned to the
@ -7565,7 +7584,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_immediate_exit) { if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu);
smp_send_reschedule(vcpu->cpu); kvm_x86_ops->request_immediate_exit(vcpu);
} }
trace_kvm_entry(vcpu->vcpu_id); trace_kvm_entry(vcpu->vcpu_id);
@ -7829,6 +7848,29 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
~XFEATURE_MASK_PKRU);
preempt_enable();
trace_kvm_fpu(1);
}
/* When vcpu_run ends, restore user space FPU context. */
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
preempt_enable();
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
int r; int r;
@ -8177,7 +8219,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_update_cpuid(vcpu); kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu); idx = srcu_read_lock(&vcpu->kvm->srcu);
if (!is_long_mode(vcpu) && is_pae(vcpu)) { if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
mmu_reset_needed = 1; mmu_reset_needed = 1;
} }
@ -8406,29 +8448,6 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET; vcpu->arch.cr0 |= X86_CR0_ET;
} }
/* Swap (qemu) user FPU context for the guest FPU context. */
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
~XFEATURE_MASK_PKRU);
preempt_enable();
trace_kvm_fpu(1);
}
/* When vcpu_run ends, restore user space FPU context. */
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
preempt_enable();
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{ {
void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
@ -8852,6 +8871,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
pvclock_update_vm_gtod_copy(kvm); pvclock_update_vm_gtod_copy(kvm);
kvm->arch.guest_can_read_msr_platform_info = true;
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
@ -9200,6 +9221,13 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvm_page_track_flush_slot(kvm, slot); kvm_page_track_flush_slot(kvm, slot);
} }
static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
return (is_guest_mode(vcpu) &&
kvm_x86_ops->guest_apic_has_interrupt &&
kvm_x86_ops->guest_apic_has_interrupt(vcpu));
}
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
{ {
if (!list_empty_careful(&vcpu->async_pf.done)) if (!list_empty_careful(&vcpu->async_pf.done))
@ -9224,7 +9252,8 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true; return true;
if (kvm_arch_interrupt_allowed(vcpu) && if (kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_has_interrupt(vcpu)) (kvm_cpu_has_interrupt(vcpu) ||
kvm_guest_apic_has_interrupt(vcpu)))
return true; return true;
if (kvm_hv_has_stimer_pending(vcpu)) if (kvm_hv_has_stimer_pending(vcpu))

View File

@ -733,8 +733,6 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_vcpu_yield_to(struct kvm_vcpu *target); int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm);

View File

@ -952,6 +952,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_S390_HPAGE_1M 156
#define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_NESTED_STATE 157
#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158
#define KVM_CAP_MSR_PLATFORM_INFO 159
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING

View File

@ -1,4 +1,5 @@
cr4_cpuid_sync_test cr4_cpuid_sync_test
platform_info_test
set_sregs_test set_sregs_test
sync_regs_test sync_regs_test
vmx_tsc_adjust_test vmx_tsc_adjust_test

View File

@ -6,7 +6,8 @@ UNAME_M := $(shell uname -m)
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
LIBKVM_x86_64 = lib/x86.c lib/vmx.c LIBKVM_x86_64 = lib/x86.c lib/vmx.c
TEST_GEN_PROGS_x86_64 = set_sregs_test TEST_GEN_PROGS_x86_64 = platform_info_test
TEST_GEN_PROGS_x86_64 += set_sregs_test
TEST_GEN_PROGS_x86_64 += sync_regs_test TEST_GEN_PROGS_x86_64 += sync_regs_test
TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
@ -20,7 +21,7 @@ INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I.. CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
LDFLAGS += -lpthread LDFLAGS += -pthread
# After inclusion, $(OUTPUT) is defined and # After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/ # $(TEST_GEN_PROGS) starts with $(OUTPUT)/

View File

@ -50,6 +50,7 @@ enum vm_mem_backing_src_type {
}; };
int kvm_check_cap(long cap); int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_free(struct kvm_vm *vmp);
@ -108,6 +109,9 @@ void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events); struct kvm_vcpu_events *events);
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events); struct kvm_vcpu_events *events);
uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
uint64_t msr_value);
const char *exit_reason_str(unsigned int exit_reason); const char *exit_reason_str(unsigned int exit_reason);

View File

@ -63,6 +63,29 @@ int kvm_check_cap(long cap)
return ret; return ret;
} }
/* VM Enable Capability
*
* Input Args:
* vm - Virtual Machine
* cap - Capability
*
* Output Args: None
*
* Return: On success, 0. On failure a TEST_ASSERT failure is produced.
*
* Enables a capability (KVM_CAP_*) on the VM.
*/
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
{
int ret;
ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
" rc: %i errno: %i", ret, errno);
return ret;
}
static void vm_open(struct kvm_vm *vm, int perm) static void vm_open(struct kvm_vm *vm, int perm)
{ {
vm->kvm_fd = open(KVM_DEV_PATH, perm); vm->kvm_fd = open(KVM_DEV_PATH, perm);
@ -1220,6 +1243,72 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
ret, errno); ret, errno);
} }
/* VCPU Get MSR
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
* msr_index - Index of MSR
*
* Output Args: None
*
* Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
*
* Get value of MSR for VCPU.
*/
uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
struct kvm_msrs header;
struct kvm_msr_entry entry;
} buffer = {};
int r;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
" rc: %i errno: %i", r, errno);
return buffer.entry.data;
}
/* VCPU Set MSR
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
* msr_index - Index of MSR
* msr_value - New value of MSR
*
* Output Args: None
*
* Return: On success, nothing. On failure a TEST_ASSERT is produced.
*
* Set value of MSR for VCPU.
*/
void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
uint64_t msr_value)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
struct kvm_msrs header;
struct kvm_msr_entry entry;
} buffer = {};
int r;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
memset(&buffer, 0, sizeof(buffer));
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
buffer.entry.data = msr_value;
r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
" rc: %i errno: %i", r, errno);
}
/* VM VCPU Args Set /* VM VCPU Args Set
* *
* Input Args: * Input Args:

View File

@ -0,0 +1,110 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Test for x86 KVM_CAP_MSR_PLATFORM_INFO
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Verifies expected behavior of controlling guest access to
* MSR_PLATFORM_INFO.
*/
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include "test_util.h"
#include "kvm_util.h"
#include "x86.h"
#define VCPU_ID 0
#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
static void guest_code(void)
{
uint64_t msr_platform_info;
for (;;) {
msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
GUEST_SYNC(msr_platform_info);
asm volatile ("inc %r11");
}
}
static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
{
struct kvm_enable_cap cap = {};
cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
cap.flags = 0;
cap.args[0] = (int)enable;
vm_enable_cap(vm, &cap);
}
static void test_msr_platform_info_enabled(struct kvm_vm *vm)
{
struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct guest_args args;
set_msr_platform_info_enabled(vm, true);
vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
guest_args_read(vm, VCPU_ID, &args);
TEST_ASSERT(args.port == GUEST_PORT_SYNC,
"Received IO from port other than PORT_HOST_SYNC: %u\n",
run->io.port);
TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
}
static void test_msr_platform_info_disabled(struct kvm_vm *vm)
{
struct kvm_run *run = vcpu_state(vm, VCPU_ID);
set_msr_platform_info_enabled(vm, false);
vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
"Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
}
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_run *state;
int rv;
uint64_t msr_platform_info;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
if (!rv) {
fprintf(stderr,
"KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
exit(KSFT_SKIP);
}
vm = vm_create_default(VCPU_ID, 0, guest_code);
msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
test_msr_platform_info_disabled(vm);
test_msr_platform_info_enabled(vm);
vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
kvm_vm_free(vm);
return 0;
}