diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 19693b8add93..e6b5bb012ccb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -99,6 +99,8 @@ struct kvm_nested_guest; struct kvm_vm_stat { ulong remote_tlb_flush; + ulong num_2M_pages; + ulong num_1G_pages; }; struct kvm_vcpu_stat { @@ -377,6 +379,7 @@ struct kvmppc_mmu { void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs); u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr); u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr); + int (*slbfee)(struct kvm_vcpu *vcpu, gva_t eaddr, ulong *ret_slb); void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr); void (*slbia)(struct kvm_vcpu *vcpu); /* book3s */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index eb0d79f0ca45..d283d3179fbc 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -36,6 +36,8 @@ #endif #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #include +#include +#include #endif /* @@ -141,6 +143,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu); @@ -616,6 +619,18 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } #endif /* CONFIG_KVM_XIVE */ +#ifdef CONFIG_PPC_POWERNV +static inline bool xics_on_xive(void) +{ + return xive_enabled() && cpu_has_feature(CPU_FTR_HVMODE); +} +#else +static inline bool xics_on_xive(void) +{ + return false; +} +#endif + /* * Prototypes for functions called only from assembler code. * Having prototypes reduces sparse errors. @@ -632,7 +647,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, unsigned int yield_count); long kvmppc_h_random(struct kvm_vcpu *vcpu); void kvmhv_commence_exit(int trap); -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); void kvmppc_subcore_enter_guest(void); void kvmppc_subcore_exit_guest(void); long kvmppc_realmode_hmi_handler(void); diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index a8b8903e1844..17996bc9382b 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release); extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, - bool user_mode); + bool user_mode, bool in_guest); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index bd933a75f0bc..d501b48f287e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -301,13 +301,13 @@ static void machine_check_process_queued_event(struct irq_work *work) while (__this_cpu_read(mce_queue_count) > 0) { index = __this_cpu_read(mce_queue_count) - 1; evt = this_cpu_ptr(&mce_event_queue[index]); - machine_check_print_event_info(evt, false); + machine_check_print_event_info(evt, false, false); __this_cpu_dec(mce_queue_count); } } void machine_check_print_event_info(struct machine_check_event *evt, - bool user_mode) + bool user_mode, bool in_guest) { const char *level, *sevstr, *subtype; static const char *mc_ue_types[] = { @@ -387,7 +387,9 @@ void machine_check_print_event_info(struct machine_check_event *evt, evt->disposition == MCE_DISPOSITION_RECOVERED ? "Recovered" : "Not recovered"); - if (user_mode) { + if (in_guest) { + printk("%s Guest NIP: %016llx\n", level, evt->srr0); + } else if (user_mode) { printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, evt->srr0, current->pid, current->comm); } else { diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 64f1135e7732..3223aec88b2c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -10,11 +10,6 @@ common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o -CFLAGS_e500_mmu.o := -I. -CFLAGS_e500_mmu_host.o := -I. -CFLAGS_emulate.o := -I. -CFLAGS_emulate_loadstore.o := -I. - common-objs-y += powerpc.o emulate_loadstore.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index bd1a677dd9e4..10c5579d20ce 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -39,6 +39,7 @@ #include "book3s.h" #include "trace.h" +#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU /* #define EXIT_DEBUG */ @@ -71,6 +72,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "pthru_all", VCPU_STAT(pthru_all) }, { "pthru_host", VCPU_STAT(pthru_host) }, { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, + { "largepages_2M", VM_STAT(num_2M_pages) }, + { "largepages_1G", VM_STAT(num_1G_pages) }, { NULL } }; @@ -192,6 +195,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) } EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio); +void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags) +{ + /* might as well deliver this straight away */ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags); +} +EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check); + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) { /* might as well deliver this straight away */ @@ -635,7 +645,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, r = -ENXIO; break; } - if (xive_enabled()) + if (xics_on_xive()) *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu)); else *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); @@ -708,7 +718,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, r = -ENXIO; break; } - if (xive_enabled()) + if (xics_on_xive()) r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val)); else r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); @@ -984,7 +994,7 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { - if (xive_enabled()) + if (xics_on_xive()) return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level, line_status); else @@ -1037,7 +1047,7 @@ static int kvmppc_book3s_init(void) #ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XIVE - if (xive_enabled()) { + if (xics_on_xive()) { kvmppc_xive_init_module(); kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); } else @@ -1050,7 +1060,7 @@ static int kvmppc_book3s_init(void) static void kvmppc_book3s_exit(void) { #ifdef CONFIG_KVM_XICS - if (xive_enabled()) + if (xics_on_xive()) kvmppc_xive_exit_module(); #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 612169988a3d..6f789f674048 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -425,6 +425,7 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) mmu->slbmte = NULL; mmu->slbmfee = NULL; mmu->slbmfev = NULL; + mmu->slbfee = NULL; mmu->slbie = NULL; mmu->slbia = NULL; } diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index c92dd25bed23..d4b967f0e8d4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -435,6 +435,19 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT); } +static int kvmppc_mmu_book3s_64_slbfee(struct kvm_vcpu *vcpu, gva_t eaddr, + ulong *ret_slb) +{ + struct kvmppc_slb *slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr); + + if (slbe) { + *ret_slb = slbe->origv; + return 0; + } + *ret_slb = 0; + return -ENOENT; +} + static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) { struct kvmppc_slb *slbe; @@ -670,6 +683,7 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) mmu->slbmte = kvmppc_mmu_book3s_64_slbmte; mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee; mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev; + mmu->slbfee = kvmppc_mmu_book3s_64_slbfee; mmu->slbie = kvmppc_mmu_book3s_64_slbie; mmu->slbia = kvmppc_mmu_book3s_64_slbia; mmu->xlate = kvmppc_mmu_book3s_64_xlate; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bd2dcfbf00cd..be7bc070eae5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -441,6 +441,24 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, { u32 last_inst; + /* + * Fast path - check if the guest physical address corresponds to a + * device on the FAST_MMIO_BUS, if so we can avoid loading the + * instruction all together, then we can just handle it and return. + */ + if (is_store) { + int idx, ret; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0, + NULL); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (!ret) { + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + return RESUME_GUEST; + } + } + /* * If we fail, we just return to the guest and try executing it again. */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 1b821c6efdef..f55ef071883f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -403,8 +403,13 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, if (!memslot) return; } - if (shift) + if (shift) { /* 1GB or 2MB page */ page_size = 1ul << shift; + if (shift == PMD_SHIFT) + kvm->stat.num_2M_pages--; + else if (shift == PUD_SHIFT) + kvm->stat.num_1G_pages--; + } gpa &= ~(page_size - 1); hpa = old & PTE_RPN_MASK; @@ -878,6 +883,14 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, put_page(page); } + /* Increment number of large pages if we (successfully) inserted one */ + if (!ret) { + if (level == 1) + kvm->stat.num_2M_pages++; + else if (level == 2) + kvm->stat.num_1G_pages++; + } + return ret; } diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 532ab79734c7..f02b04973710 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -133,7 +133,6 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, continue; kref_put(&stit->kref, kvm_spapr_tce_liobn_put); - return; } } } @@ -338,14 +337,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, } } + kvm_get_kvm(kvm); if (!ret) ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, stt, O_RDWR | O_CLOEXEC); - if (ret >= 0) { + if (ret >= 0) list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); - kvm_get_kvm(kvm); - } + else + kvm_put_kvm(kvm); mutex_unlock(&kvm->lock); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 8c7e933e942e..6ef7c5f00a49 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -47,6 +47,7 @@ #define OP_31_XOP_SLBMFEV 851 #define OP_31_XOP_EIOIO 854 #define OP_31_XOP_SLBMFEE 915 +#define OP_31_XOP_SLBFEE 979 #define OP_31_XOP_TBEGIN 654 #define OP_31_XOP_TABORT 910 @@ -416,6 +417,23 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.mmu.slbia(vcpu); break; + case OP_31_XOP_SLBFEE: + if (!(inst & 1) || !vcpu->arch.mmu.slbfee) { + return EMULATE_FAIL; + } else { + ulong b, t; + ulong cr = kvmppc_get_cr(vcpu) & ~CR0_MASK; + + b = kvmppc_get_gpr(vcpu, rb); + if (!vcpu->arch.mmu.slbfee(vcpu, b, &t)) + cr |= 2 << CR0_SHIFT; + kvmppc_set_gpr(vcpu, rt, t); + /* copy XER[SO] bit to CR0[SO] */ + cr |= (vcpu->arch.regs.xer & 0x80000000) >> + (31 - CR0_SHIFT); + kvmppc_set_cr(vcpu, cr); + } + break; case OP_31_XOP_SLBMFEE: if (!vcpu->arch.mmu.slbmfee) { emulated = EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 062f3c92c871..06964350b97a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -922,7 +922,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_IPOLL: case H_XIRR_X: if (kvmppc_xics_enabled(vcpu)) { - if (xive_enabled()) { + if (xics_on_xive()) { ret = H_NOT_AVAILABLE; return RESUME_GUEST; } @@ -937,6 +937,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5)); break; +#ifdef CONFIG_SPAPR_TCE_IOMMU case H_GET_TCE: ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5)); @@ -966,6 +967,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (ret == H_TOO_HARD) return RESUME_HOST; break; +#endif case H_RANDOM: if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) ret = H_HARDWARE; @@ -1215,6 +1217,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_MACHINE_CHECK: + /* Print the MCE event to host console. */ + machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); + + /* + * If the guest can do FWNMI, exit to userspace so it can + * deliver a FWNMI to the guest. + * Otherwise we synthesize a machine check for the guest + * so that it knows that the machine check occurred. + */ + if (!vcpu->kvm->arch.fwnmi_enabled) { + ulong flags = vcpu->arch.shregs.msr & 0x083c0000; + kvmppc_core_queue_machine_check(vcpu, flags); + r = RESUME_GUEST; + break; + } + /* Exit to guest with KVM_EXIT_NMI as exit reason */ run->exit_reason = KVM_EXIT_NMI; run->hw.hardware_exit_reason = vcpu->arch.trap; @@ -1227,8 +1245,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV; r = RESUME_HOST; - /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false); break; case BOOK3S_INTERRUPT_PROGRAM: { @@ -1392,7 +1408,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Pass the machine check to the L1 guest */ r = RESUME_HOST; /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false); + machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); break; /* * We get these next two if the guest accesses a page which it thinks @@ -1431,7 +1447,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) case BOOK3S_INTERRUPT_HV_RM_HARD: vcpu->arch.trap = 0; r = RESUME_GUEST; - if (!xive_enabled()) + if (!xics_on_xive()) kvmppc_xics_rm_complete(vcpu, 0); break; default: @@ -3455,6 +3471,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long host_dscr = mfspr(SPRN_DSCR); unsigned long host_tidr = mfspr(SPRN_TIDR); unsigned long host_iamr = mfspr(SPRN_IAMR); + unsigned long host_amr = mfspr(SPRN_AMR); s64 dec; u64 tb; int trap, save_pmu; @@ -3571,13 +3588,15 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_PSPB, 0); mtspr(SPRN_WORT, 0); - mtspr(SPRN_AMR, 0); mtspr(SPRN_UAMOR, 0); mtspr(SPRN_DSCR, host_dscr); mtspr(SPRN_TIDR, host_tidr); mtspr(SPRN_IAMR, host_iamr); mtspr(SPRN_PSPB, 0); + if (host_amr != vcpu->arch.amr) + mtspr(SPRN_AMR, host_amr); + msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); store_fp_state(&vcpu->arch.fp); #ifdef CONFIG_ALTIVEC @@ -3650,7 +3669,7 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) #ifdef CONFIG_KVM_XICS static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) { - if (!xive_enabled()) + if (!xics_on_xive()) return false; return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < vcpu->arch.xive_saved_state.cppr; @@ -4210,7 +4229,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&kvm->srcu, srcu_idx); } else if (r == RESUME_PASSTHROUGH) { - if (WARN_ON(xive_enabled())) + if (WARN_ON(xics_on_xive())) r = H_SUCCESS; else r = kvmppc_xics_rm_complete(vcpu, 0); @@ -4734,7 +4753,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) * If xive is enabled, we route 0x500 interrupts directly * to the guest. */ - if (xive_enabled()) + if (xics_on_xive()) lpcr |= LPCR_LPES; } @@ -4970,7 +4989,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) if (i == pimap->n_mapped) pimap->n_mapped++; - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); else kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); @@ -5011,7 +5030,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) return -ENODEV; } - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); else kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); @@ -5343,13 +5362,11 @@ static int kvm_init_subcore_bitmap(void) continue; sibling_subcore_state = - kmalloc_node(sizeof(struct sibling_subcore_state), + kzalloc_node(sizeof(struct sibling_subcore_state), GFP_KERNEL, node); if (!sibling_subcore_state) return -ENOMEM; - memset(sibling_subcore_state, 0, - sizeof(struct sibling_subcore_state)); for (j = 0; j < threads_per_core; j++) { int cpu = first_cpu + j; @@ -5390,7 +5407,7 @@ static int kvmppc_book3s_init_hv(void) * indirectly, via OPAL. */ #ifdef CONFIG_SMP - if (!xive_enabled() && !kvmhv_on_pseries() && + if (!xics_on_xive() && !kvmhv_on_pseries() && !local_paca->kvm_hstate.xics_phys) { struct device_node *np; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index a71e2fc00a4e..b0cf22477e87 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -257,7 +257,7 @@ void kvmhv_rm_send_ipi(int cpu) } /* We should never reach this */ - if (WARN_ON_ONCE(xive_enabled())) + if (WARN_ON_ONCE(xics_on_xive())) return; /* Else poke the target with an IPI */ @@ -577,7 +577,7 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_xirr(vcpu); if (unlikely(!__xive_vm_h_xirr)) @@ -592,7 +592,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; vcpu->arch.regs.gpr[5] = get_tb(); - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_xirr(vcpu); if (unlikely(!__xive_vm_h_xirr)) @@ -606,7 +606,7 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_ipoll(vcpu, server); if (unlikely(!__xive_vm_h_ipoll)) @@ -621,7 +621,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_ipi(vcpu, server, mfrr); if (unlikely(!__xive_vm_h_ipi)) @@ -635,7 +635,7 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_cppr(vcpu, cppr); if (unlikely(!__xive_vm_h_cppr)) @@ -649,7 +649,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_eoi(vcpu, xirr); if (unlikely(!__xive_vm_h_eoi)) diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 0787f12c1a1b..8c24c3bea0bf 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -66,10 +66,8 @@ static void reload_slb(struct kvm_vcpu *vcpu) /* * On POWER7, see if we can handle a machine check that occurred inside * the guest in real mode, without switching to the host partition. - * - * Returns: 0 => exit guest, 1 => deliver machine check to guest */ -static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) +static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) { unsigned long srr1 = vcpu->arch.shregs.msr; struct machine_check_event mce_evt; @@ -111,52 +109,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) } /* - * See if we have already handled the condition in the linux host. - * We assume that if the condition is recovered then linux host - * will have generated an error log event that we will pick - * up and log later. - * Don't release mce event now. We will queue up the event so that - * we can log the MCE event info on host console. + * Now get the event and stash it in the vcpu struct so it can + * be handled by the primary thread in virtual mode. We can't + * call machine_check_queue_event() here if we are running on + * an offline secondary thread. */ - if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) - goto out; + if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) { + if (handled && mce_evt.version == MCE_V1) + mce_evt.disposition = MCE_DISPOSITION_RECOVERED; + } else { + memset(&mce_evt, 0, sizeof(mce_evt)); + } - if (mce_evt.version == MCE_V1 && - (mce_evt.severity == MCE_SEV_NO_ERROR || - mce_evt.disposition == MCE_DISPOSITION_RECOVERED)) - handled = 1; - -out: - /* - * For guest that supports FWNMI capability, hook the MCE event into - * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI - * exit reason. On our way to exit we will pull this event from vcpu - * structure and print it from thread 0 of the core/subcore. - * - * For guest that does not support FWNMI capability (old QEMU): - * We are now going enter guest either through machine check - * interrupt (for unhandled errors) or will continue from - * current HSRR0 (for handled errors) in guest. Hence - * queue up the event so that we can log it from host console later. - */ - if (vcpu->kvm->arch.fwnmi_enabled) { - /* - * Hook up the mce event on to vcpu structure. - * First clear the old event. - */ - memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt)); - if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) { - vcpu->arch.mce_evt = mce_evt; - } - } else - machine_check_queue_event(); - - return handled; + vcpu->arch.mce_evt = mce_evt; } -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) { - return kvmppc_realmode_mc_power7(vcpu); + kvmppc_realmode_mc_power7(vcpu); } /* Check if dynamic split is in force and return subcore size accordingly. */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index b3f5786b20dc..3b9662a4207e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -144,6 +144,13 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, return; } + if (xive_enabled() && kvmhv_on_pseries()) { + /* No XICS access or hypercalls available, too hard */ + this_icp->rm_action |= XICS_RM_KICK_VCPU; + this_icp->rm_kick_target = vcpu; + return; + } + /* * Check if the core is loaded, * if not, find an available host core to post to wake the VCPU, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9b8d50a7cbaf..3a5e719ef032 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -58,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_DAWR (SFS-56) #define STACK_SLOT_DAWRX (SFS-64) #define STACK_SLOT_HFSCR (SFS-72) +#define STACK_SLOT_AMR (SFS-80) +#define STACK_SLOT_UAMOR (SFS-88) /* the following is used by the P9 short path */ #define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ @@ -726,11 +728,9 @@ BEGIN_FTR_SECTION mfspr r5, SPRN_TIDR mfspr r6, SPRN_PSSCR mfspr r7, SPRN_PID - mfspr r8, SPRN_IAMR std r5, STACK_SLOT_TID(r1) std r6, STACK_SLOT_PSSCR(r1) std r7, STACK_SLOT_PID(r1) - std r8, STACK_SLOT_IAMR(r1) mfspr r5, SPRN_HFSCR std r5, STACK_SLOT_HFSCR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) @@ -738,11 +738,18 @@ BEGIN_FTR_SECTION mfspr r5, SPRN_CIABR mfspr r6, SPRN_DAWR mfspr r7, SPRN_DAWRX + mfspr r8, SPRN_IAMR std r5, STACK_SLOT_CIABR(r1) std r6, STACK_SLOT_DAWR(r1) std r7, STACK_SLOT_DAWRX(r1) + std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + mfspr r5, SPRN_AMR + std r5, STACK_SLOT_AMR(r1) + mfspr r6, SPRN_UAMOR + std r6, STACK_SLOT_UAMOR(r1) + BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ @@ -1631,22 +1638,25 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_PSPB, r0 mtspr SPRN_WORT, r0 BEGIN_FTR_SECTION - mtspr SPRN_IAMR, r0 mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 sldi r0, r0, 31 mtspr SPRN_MMCRS, r0 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) -8: - /* Save and reset AMR and UAMOR before turning on the MMU */ + /* Save and restore AMR, IAMR and UAMOR before turning on the MMU */ + ld r8, STACK_SLOT_IAMR(r1) + mtspr SPRN_IAMR, r8 + +8: /* Power7 jumps back in here */ mfspr r5,SPRN_AMR mfspr r6,SPRN_UAMOR std r5,VCPU_AMR(r9) std r6,VCPU_UAMOR(r9) - li r6,0 - mtspr SPRN_AMR,r6 + ld r5,STACK_SLOT_AMR(r1) + ld r6,STACK_SLOT_UAMOR(r1) + mtspr SPRN_AMR, r5 mtspr SPRN_UAMOR, r6 /* Switch DSCR back to host value */ @@ -1746,11 +1756,9 @@ BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) ld r7, STACK_SLOT_PID(r1) - ld r8, STACK_SLOT_IAMR(r1) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 mtspr SPRN_PID, r7 - mtspr SPRN_IAMR, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) #ifdef CONFIG_PPC_RADIX_MMU @@ -2264,8 +2272,13 @@ hcall_real_table: .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table .long DOTSYM(kvmppc_h_protect) - hcall_real_table +#ifdef CONFIG_SPAPR_TCE_IOMMU .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table .long DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table +#else + .long 0 /* 0x1c */ + .long 0 /* 0x20 */ +#endif .long 0 /* 0x24 - H_SET_SPRG0 */ .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table .long 0 /* 0x2c */ @@ -2343,8 +2356,13 @@ hcall_real_table: .long 0 /* 0x12c */ .long 0 /* 0x130 */ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table +#ifdef CONFIG_SPAPR_TCE_IOMMU .long DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table +#else + .long 0 /* 0x138 */ + .long 0 /* 0x13c */ +#endif .long 0 /* 0x140 */ .long 0 /* 0x144 */ .long 0 /* 0x148 */ @@ -2826,49 +2844,15 @@ kvm_cede_exit: #endif /* CONFIG_KVM_XICS */ 3: b guest_exit_cont - /* Try to handle a machine check in real mode */ + /* Try to do machine check recovery in real mode */ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop + /* all machine checks go to virtual mode for further handling */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK - /* - * For the guest that is FWNMI capable, deliver all the MCE errors - * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit - * reason. This new approach injects machine check errors in guest - * address space to guest with additional information in the form - * of RTAS event, thus enabling guest kernel to suitably handle - * such errors. - * - * For the guest that is not FWNMI capable (old QEMU) fallback - * to old behaviour for backward compatibility: - * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either - * through machine check interrupt (set HSRR0 to 0x200). - * For handled errors (no-fatal), just go back to guest execution - * with current HSRR0. - * if we receive machine check with MSR(RI=0) then deliver it to - * guest as machine check causing guest to crash. - */ - ld r11, VCPU_MSR(r9) - rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ - bne guest_exit_cont /* if so, exit to host */ - /* Check if guest is capable of handling NMI exit */ - ld r10, VCPU_KVM(r9) - lbz r10, KVM_FWNMI(r10) - cmpdi r10, 1 /* FWNMI capable? */ - beq guest_exit_cont /* if so, exit with KVM_EXIT_NMI. */ - - /* if not, fall through for backward compatibility. */ - andi. r10, r11, MSR_RI /* check for unrecoverable exception */ - beq 1f /* Deliver a machine check to guest */ - ld r10, VCPU_PC(r9) - cmpdi r3, 0 /* Did we handle MCE ? */ - bne 2f /* Continue guest execution. */ - /* If not, deliver a machine check. SRR0/1 are already set */ -1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - bl kvmppc_msr_interrupt -2: b fast_interrupt_c_return + b guest_exit_cont /* * Call C code to handle a HMI in real mode. diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 2d3b2b1cc272..4e178c4c1ea5 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -33,7 +33,7 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) server = be32_to_cpu(args->args[1]); priority = be32_to_cpu(args->args[2]); - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority); else rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); @@ -56,7 +56,7 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) irq = be32_to_cpu(args->args[0]); server = priority = 0; - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority); else rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); @@ -83,7 +83,7 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) irq = be32_to_cpu(args->args[0]); - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_int_off(vcpu->kvm, irq); else rc = kvmppc_xics_int_off(vcpu->kvm, irq); @@ -105,7 +105,7 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) irq = be32_to_cpu(args->args[0]); - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_int_on(vcpu->kvm, irq); else rc = kvmppc_xics_int_on(vcpu->kvm, irq); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b90a7d154180..8c69af10f91d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -748,7 +748,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); break; case KVMPPC_IRQ_XICS: - if (xive_enabled()) + if (xics_on_xive()) kvmppc_xive_cleanup_vcpu(vcpu); else kvmppc_xics_free_icp(vcpu); @@ -1931,7 +1931,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = -EPERM; dev = kvm_device_from_filp(f.file); if (dev) { - if (xive_enabled()) + if (xics_on_xive()) r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]); else r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 79586f127521..05c85be0370f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -587,7 +587,7 @@ int opal_machine_check(struct pt_regs *regs) evt.version); return 0; } - machine_check_print_event_info(&evt, user_mode(regs)); + machine_check_print_event_info(&evt, user_mode(regs), false); if (opal_recover_mce(regs, &evt)) return 1;