diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9a128357fd15..2f6ccb065c4a 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -55,6 +55,13 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) return 1; } +static u64 int_word_to_isc_bits(u32 int_word) +{ + u8 isc = (int_word & 0x38000000) >> 27; + + return (0x80 >> isc) << 24; +} + static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { @@ -96,7 +103,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (psw_ioint_disabled(vcpu)) return 0; - if (vcpu->arch.sie_block->gcr[6] & inti->io.io_int_word) + if (vcpu->arch.sie_block->gcr[6] & + int_word_to_isc_bits(inti->io.io_int_word)) return 1; return 0; default: @@ -724,7 +732,8 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, list_for_each_entry(iter, &fi->list, list) { if (!is_ioint(iter->type)) continue; - if (cr6 && ((cr6 & iter->io.io_int_word) == 0)) + if (cr6 && + ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0)) continue; if (schid) { if (((schid & 0x00000000ffff0000) >> 16) != @@ -811,11 +820,14 @@ int kvm_s390_inject_vm(struct kvm *kvm, if (!is_ioint(inti->type)) list_add_tail(&inti->list, &fi->list); else { + u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + /* Keep I/O interrupts sorted in isc order. */ list_for_each_entry(iter, &fi->list, list) { if (!is_ioint(iter->type)) continue; - if (iter->io.io_int_word <= inti->io.io_int_word) + if (int_word_to_isc_bits(iter->io.io_int_word) + <= isc_bits) continue; break; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2b11318151a4..a335cc6cde72 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2995,14 +2995,11 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al; - ctxt->eflags &= ~(X86_EFLAGS_PF | X86_EFLAGS_SF | X86_EFLAGS_ZF); - - if (!al) - ctxt->eflags |= X86_EFLAGS_ZF; - if (!(al & 1)) - ctxt->eflags |= X86_EFLAGS_PF; - if (al & 0x80) - ctxt->eflags |= X86_EFLAGS_SF; + /* Set PF, ZF, SF */ + ctxt->src.type = OP_IMM; + ctxt->src.val = 0; + ctxt->src.bytes = 1; + fastop(ctxt, em_or); return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0242a8a1b2e2..1cda1f332654 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -832,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; - max_level = kvm_x86_ops->get_lpage_level() < host_level ? - kvm_x86_ops->get_lpage_level() : host_level; + max_level = min(kvm_x86_ops->get_lpage_level(), host_level); for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) @@ -1106,8 +1105,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) /* * Write-protect on the specified @sptep, @pt_protect indicates whether - * spte writ-protection is caused by protecting shadow page table. - * @flush indicates whether tlb need be flushed. + * spte write-protection is caused by protecting shadow page table. * * Note: write protection is difference between drity logging and spte * protection: @@ -1116,10 +1114,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) * - for spte protection, the spte can be writable only after unsync-ing * shadow page. * - * Return true if the spte is dropped. + * Return true if tlb need be flushed. */ -static bool -spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) +static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) { u64 spte = *sptep; @@ -1129,17 +1126,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); - if (__drop_large_spte(kvm, sptep)) { - *flush |= true; - return true; - } - if (pt_protect) spte &= ~SPTE_MMU_WRITEABLE; spte = spte & ~PT_WRITABLE_MASK; - *flush |= mmu_spte_update(sptep, spte); - return false; + return mmu_spte_update(sptep, spte); } static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, @@ -1151,11 +1142,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { BUG_ON(!(*sptep & PT_PRESENT_MASK)); - if (spte_write_protect(kvm, sptep, &flush, pt_protect)) { - sptep = rmap_get_first(*rmapp, &iter); - continue; - } + flush |= spte_write_protect(kvm, sptep, pt_protect); sptep = rmap_get_next(&iter); } @@ -1959,9 +1947,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) { u64 spte; - spte = __pa(sp->spt) - | PT_PRESENT_MASK | PT_ACCESSED_MASK - | PT_WRITABLE_MASK | PT_USER_MASK; + spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | + shadow_user_mask | shadow_x_mask | shadow_accessed_mask; + mmu_spte_set(sptep, spte); } @@ -2400,16 +2388,15 @@ done: } static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pt_access, unsigned pte_access, - int write_fault, int *emulate, int level, gfn_t gfn, - pfn_t pfn, bool speculative, bool host_writable) + unsigned pte_access, int write_fault, int *emulate, + int level, gfn_t gfn, pfn_t pfn, bool speculative, + bool host_writable) { int was_rmapped = 0; int rmap_count; - pgprintk("%s: spte %llx access %x write_fault %d gfn %llx\n", - __func__, *sptep, pt_access, - write_fault, gfn); + pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, + *sptep, write_fault, gfn); if (is_rmap_spte(*sptep)) { /* @@ -2525,7 +2512,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, return -1; for (i = 0; i < ret; i++, gfn++, start++) - mmu_set_spte(vcpu, start, ACC_ALL, access, 0, NULL, + mmu_set_spte(vcpu, start, access, 0, NULL, sp->role.level, gfn, page_to_pfn(pages[i]), true, true); @@ -2586,9 +2573,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { - unsigned pte_access = ACC_ALL; - - mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, + mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, write, &emulate, level, gfn, pfn, prefault, map_writable); direct_pte_prefetch(vcpu, iterator.sptep); @@ -2596,6 +2581,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, break; } + drop_large_spte(vcpu, iterator.sptep); + if (!is_shadow_present_pte(*iterator.sptep)) { u64 base_addr = iterator.addr; @@ -2605,11 +2592,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, iterator.level - 1, 1, ACC_ALL, iterator.sptep); - mmu_spte_set(iterator.sptep, - __pa(sp->spt) - | PT_PRESENT_MASK | PT_WRITABLE_MASK - | shadow_user_mask | shadow_x_mask - | shadow_accessed_mask); + link_shadow_page(iterator.sptep, sp); } } return emulate; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 34c5c99323f4..105dd5bd550e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -326,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * we call mmu_set_spte() with host_writable = true because * pte_prefetch_gfn_to_pfn always gets a writable pfn. */ - mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, - NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); + mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, + gfn, pfn, true, true); return true; } @@ -470,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, } clear_sp_write_flooding_count(it.sptep); - mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, - write_fault, &emulate, it.level, - gw->gfn, pfn, prefault, map_writable); + mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate, + it.level, gw->gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); return emulate; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fe9a9cfadbd6..6667042714cc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,8 +84,7 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv_reg_vid = 1; -module_param(enable_apicv_reg_vid, bool, S_IRUGO); +static bool __read_mostly enable_apicv_reg_vid; /* * If nested=1, nested virtualization is supported, i.e., guests may use @@ -95,12 +94,8 @@ module_param(enable_apicv_reg_vid, bool, S_IRUGO); static bool __read_mostly nested = 0; module_param(nested, bool, S_IRUGO); -#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ - (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) -#define KVM_GUEST_CR0_MASK \ - (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ - (X86_CR0_WP | X86_CR0_NE) +#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) +#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ @@ -3137,11 +3132,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long hw_cr0; + hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK); if (enable_unrestricted_guest) - hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) - | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; + hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; else { - hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; + hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) enter_pmode(vcpu); @@ -5925,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; gpa_t bitmap; - if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) return 1; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 373e17a0d398..3c5bb6fe5280 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6897,33 +6897,28 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, bool user_alloc) { int npages = memslot->npages; - int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; - /* Prevent internal slot pages from being moved by fork()/COW. */ - if (memslot->id >= KVM_USER_MEM_SLOTS) - map_flags = MAP_SHARED | MAP_ANONYMOUS; - - /*To keep backward compatibility with older userspace, - *x86 needs to handle !user_alloc case. + /* + * Only private memory slots need to be mapped here since + * KVM_SET_MEMORY_REGION ioctl is no longer supported. */ - if (!user_alloc) { - if (npages && !old.npages) { - unsigned long userspace_addr; + if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { + unsigned long userspace_addr; - userspace_addr = vm_mmap(NULL, 0, - npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - map_flags, - 0); + /* + * MAP_SHARED to prevent internal slot pages from being moved + * by fork()/COW. + */ + userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, 0); - if (IS_ERR((void *)userspace_addr)) - return PTR_ERR((void *)userspace_addr); + if (IS_ERR((void *)userspace_addr)) + return PTR_ERR((void *)userspace_addr); - memslot->userspace_addr = userspace_addr; - } + memslot->userspace_addr = userspace_addr; } - return 0; } @@ -6935,7 +6930,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; - if (!user_alloc && !old.user_alloc && old.npages && !npages) { + if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { int ret; ret = vm_munmap(old.userspace_addr, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0350e0d5e031..722cae78bbc4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -273,7 +273,6 @@ struct kvm_memory_slot { unsigned long userspace_addr; u32 flags; short id; - bool user_alloc; }; static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2e93630b4add..adc68feb5c5a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -839,7 +839,6 @@ int __kvm_set_memory_region(struct kvm *kvm, r = -ENOMEM; if (change == KVM_MR_CREATE) { - new.user_alloc = user_alloc; new.userspace_addr = mem->userspace_addr; if (kvm_arch_create_memslot(&new, npages))