Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD
- Better machine check handling for HV KVM - Ability to support guests with threads=2, 4 or 8 on POWER9 - Fix for a race that could cause delayed recognition of signals - Fix for a bug where POWER9 guests could sleep with interrupts pending.
This commit is contained in:
commit
8a53e7e572
|
@ -4131,6 +4131,34 @@ Parameters: none
|
|||
Allow use of adapter-interruption suppression.
|
||||
Returns: 0 on success; -EBUSY if a VCPU has already been created.
|
||||
|
||||
7.11 KVM_CAP_PPC_SMT
|
||||
|
||||
Architectures: ppc
|
||||
Parameters: vsmt_mode, flags
|
||||
|
||||
Enabling this capability on a VM provides userspace with a way to set
|
||||
the desired virtual SMT mode (i.e. the number of virtual CPUs per
|
||||
virtual core). The virtual SMT mode, vsmt_mode, must be a power of 2
|
||||
between 1 and 8. On POWER8, vsmt_mode must also be no greater than
|
||||
the number of threads per subcore for the host. Currently flags must
|
||||
be 0. A successful call to enable this capability will result in
|
||||
vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is
|
||||
subsequently queried for the VM. This capability is only supported by
|
||||
HV KVM, and can only be set before any VCPUs have been created.
|
||||
The KVM_CAP_PPC_SMT_POSSIBLE capability indicates which virtual SMT
|
||||
modes are available.
|
||||
|
||||
7.12 KVM_CAP_PPC_FWNMI
|
||||
|
||||
Architectures: ppc
|
||||
Parameters: none
|
||||
|
||||
With this capability a machine check exception in the guest address
|
||||
space will cause KVM to exit the guest with NMI exit reason. This
|
||||
enables QEMU to build error log and branch to guest kernel registered
|
||||
machine check handling routine. Without this capability KVM will
|
||||
branch to guests' 0x200 interrupt vector.
|
||||
|
||||
8. Other capabilities.
|
||||
----------------------
|
||||
|
||||
|
@ -4292,3 +4320,12 @@ Currently the following bits are defined for the device_irq_level bitmap:
|
|||
Future versions of kvm may implement additional events. These will get
|
||||
indicated by returning a higher number from KVM_CHECK_EXTENSION and will be
|
||||
listed above.
|
||||
|
||||
8.10 KVM_CAP_PPC_SMT_POSSIBLE
|
||||
|
||||
Architectures: ppc
|
||||
|
||||
Querying this capability returns a bitmap indicating the possible
|
||||
virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
|
||||
(counting from the right) is set, then a virtual SMT mode of 2^N is
|
||||
available.
|
||||
|
|
|
@ -86,7 +86,6 @@ struct kvmppc_vcore {
|
|||
u16 last_cpu;
|
||||
u8 vcore_state;
|
||||
u8 in_guest;
|
||||
struct kvmppc_vcore *master_vcore;
|
||||
struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
|
||||
struct list_head preempt_list;
|
||||
spinlock_t lock;
|
||||
|
|
|
@ -81,7 +81,7 @@ struct kvm_split_mode {
|
|||
u8 subcore_size;
|
||||
u8 do_nap;
|
||||
u8 napped[MAX_SMT_THREADS];
|
||||
struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
|
||||
struct kvmppc_vcore *vc[MAX_SUBCORES];
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/hvcall.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#define KVM_MAX_VCPUS NR_CPUS
|
||||
#define KVM_MAX_VCORES NR_CPUS
|
||||
|
@ -267,6 +268,8 @@ struct kvm_resize_hpt;
|
|||
|
||||
struct kvm_arch {
|
||||
unsigned int lpid;
|
||||
unsigned int smt_mode; /* # vcpus per virtual core */
|
||||
unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
unsigned int tlb_sets;
|
||||
struct kvm_hpt_info hpt;
|
||||
|
@ -285,6 +288,7 @@ struct kvm_arch {
|
|||
cpumask_t need_tlb_flush;
|
||||
cpumask_t cpu_in_guest;
|
||||
u8 radix;
|
||||
u8 fwnmi_enabled;
|
||||
pgd_t *pgtable;
|
||||
u64 process_table;
|
||||
struct dentry *debugfs_dir;
|
||||
|
@ -566,6 +570,7 @@ struct kvm_vcpu_arch {
|
|||
ulong wort;
|
||||
ulong tid;
|
||||
ulong psscr;
|
||||
ulong hfscr;
|
||||
ulong shadow_srr1;
|
||||
#endif
|
||||
u32 vrsave; /* also USPRG0 */
|
||||
|
@ -579,7 +584,7 @@ struct kvm_vcpu_arch {
|
|||
ulong mcsrr0;
|
||||
ulong mcsrr1;
|
||||
ulong mcsr;
|
||||
u32 dec;
|
||||
ulong dec;
|
||||
#ifdef CONFIG_BOOKE
|
||||
u32 decar;
|
||||
#endif
|
||||
|
@ -710,6 +715,7 @@ struct kvm_vcpu_arch {
|
|||
unsigned long pending_exceptions;
|
||||
u8 ceded;
|
||||
u8 prodded;
|
||||
u8 doorbell_request;
|
||||
u32 last_inst;
|
||||
|
||||
struct swait_queue_head *wqp;
|
||||
|
@ -722,6 +728,7 @@ struct kvm_vcpu_arch {
|
|||
int prev_cpu;
|
||||
bool timer_running;
|
||||
wait_queue_head_t cpu_run;
|
||||
struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
|
||||
|
||||
struct kvm_vcpu_arch_shared *shared;
|
||||
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
|
||||
|
|
|
@ -315,6 +315,8 @@ struct kvmppc_ops {
|
|||
struct irq_bypass_producer *);
|
||||
int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
|
||||
int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
|
||||
int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
|
||||
unsigned long flags);
|
||||
};
|
||||
|
||||
extern struct kvmppc_ops *kvmppc_hv_ops;
|
||||
|
|
|
@ -103,6 +103,8 @@
|
|||
#define OP_31_XOP_STBUX 247
|
||||
#define OP_31_XOP_LHZX 279
|
||||
#define OP_31_XOP_LHZUX 311
|
||||
#define OP_31_XOP_MSGSNDP 142
|
||||
#define OP_31_XOP_MSGCLRP 174
|
||||
#define OP_31_XOP_MFSPR 339
|
||||
#define OP_31_XOP_LWAX 341
|
||||
#define OP_31_XOP_LHAX 343
|
||||
|
|
|
@ -60,6 +60,12 @@ struct kvm_regs {
|
|||
|
||||
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
|
||||
|
||||
/* flags for kvm_run.flags */
|
||||
#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0)
|
||||
#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0)
|
||||
#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0)
|
||||
#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0)
|
||||
|
||||
/*
|
||||
* Feature bits indicate which sections of the sregs struct are valid,
|
||||
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
|
||||
|
|
|
@ -485,6 +485,7 @@ int main(void)
|
|||
OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
|
||||
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
|
||||
OFFSET(KVM_RADIX, kvm, arch.radix);
|
||||
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
|
||||
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
|
||||
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
|
||||
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
|
||||
|
@ -513,6 +514,7 @@ int main(void)
|
|||
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
|
||||
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
|
||||
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
|
||||
OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
|
||||
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
|
||||
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
|
||||
OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
|
||||
|
@ -542,6 +544,7 @@ int main(void)
|
|||
OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
|
||||
OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
|
||||
OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
|
||||
OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
|
||||
OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
|
||||
OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
|
||||
OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
|
||||
|
|
|
@ -405,6 +405,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
|
|||
break;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
|
||||
|
||||
uint64_t get_mce_fault_addr(struct machine_check_event *evt)
|
||||
{
|
||||
|
|
|
@ -46,6 +46,8 @@
|
|||
#include <linux/of.h>
|
||||
|
||||
#include <asm/reg.h>
|
||||
#include <asm/ppc-opcode.h>
|
||||
#include <asm/disassemble.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
@ -645,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
|
|||
unsigned long stolen;
|
||||
unsigned long core_stolen;
|
||||
u64 now;
|
||||
unsigned long flags;
|
||||
|
||||
dt = vcpu->arch.dtl_ptr;
|
||||
vpa = vcpu->arch.vpa.pinned_addr;
|
||||
|
@ -652,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
|
|||
core_stolen = vcore_stolen_time(vc, now);
|
||||
stolen = core_stolen - vcpu->arch.stolen_logged;
|
||||
vcpu->arch.stolen_logged = core_stolen;
|
||||
spin_lock_irq(&vcpu->arch.tbacct_lock);
|
||||
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
|
||||
stolen += vcpu->arch.busy_stolen;
|
||||
vcpu->arch.busy_stolen = 0;
|
||||
spin_unlock_irq(&vcpu->arch.tbacct_lock);
|
||||
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
|
||||
if (!dt || !vpa)
|
||||
return;
|
||||
memset(dt, 0, sizeof(struct dtl_entry));
|
||||
|
@ -675,6 +678,26 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
|
|||
vcpu->arch.dtl.dirty = true;
|
||||
}
|
||||
|
||||
/* See if there is a doorbell interrupt pending for a vcpu */
|
||||
static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int thr;
|
||||
struct kvmppc_vcore *vc;
|
||||
|
||||
if (vcpu->arch.doorbell_request)
|
||||
return true;
|
||||
/*
|
||||
* Ensure that the read of vcore->dpdes comes after the read
|
||||
* of vcpu->doorbell_request. This barrier matches the
|
||||
* lwsync in book3s_hv_rmhandlers.S just before the
|
||||
* fast_guest_return label.
|
||||
*/
|
||||
smp_rmb();
|
||||
vc = vcpu->arch.vcore;
|
||||
thr = vcpu->vcpu_id - vc->first_vcpuid;
|
||||
return !!(vc->dpdes & (1 << thr));
|
||||
}
|
||||
|
||||
static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
|
||||
|
@ -926,6 +949,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run,
|
|||
}
|
||||
}
|
||||
|
||||
static void do_nothing(void *x)
|
||||
{
|
||||
}
|
||||
|
||||
static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int thr, cpu, pcpu, nthreads;
|
||||
struct kvm_vcpu *v;
|
||||
unsigned long dpdes;
|
||||
|
||||
nthreads = vcpu->kvm->arch.emul_smt_mode;
|
||||
dpdes = 0;
|
||||
cpu = vcpu->vcpu_id & ~(nthreads - 1);
|
||||
for (thr = 0; thr < nthreads; ++thr, ++cpu) {
|
||||
v = kvmppc_find_vcpu(vcpu->kvm, cpu);
|
||||
if (!v)
|
||||
continue;
|
||||
/*
|
||||
* If the vcpu is currently running on a physical cpu thread,
|
||||
* interrupt it in order to pull it out of the guest briefly,
|
||||
* which will update its vcore->dpdes value.
|
||||
*/
|
||||
pcpu = READ_ONCE(v->cpu);
|
||||
if (pcpu >= 0)
|
||||
smp_call_function_single(pcpu, do_nothing, NULL, 1);
|
||||
if (kvmppc_doorbell_pending(v))
|
||||
dpdes |= 1 << thr;
|
||||
}
|
||||
return dpdes;
|
||||
}
|
||||
|
||||
/*
|
||||
* On POWER9, emulate doorbell-related instructions in order to
|
||||
* give the guest the illusion of running on a multi-threaded core.
|
||||
* The instructions emulated are msgsndp, msgclrp, mfspr TIR,
|
||||
* and mfspr DPDES.
|
||||
*/
|
||||
static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 inst, rb, thr;
|
||||
unsigned long arg;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_vcpu *tvcpu;
|
||||
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
return EMULATE_FAIL;
|
||||
if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
|
||||
return RESUME_GUEST;
|
||||
if (get_op(inst) != 31)
|
||||
return EMULATE_FAIL;
|
||||
rb = get_rb(inst);
|
||||
thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
|
||||
switch (get_xop(inst)) {
|
||||
case OP_31_XOP_MSGSNDP:
|
||||
arg = kvmppc_get_gpr(vcpu, rb);
|
||||
if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
|
||||
break;
|
||||
arg &= 0x3f;
|
||||
if (arg >= kvm->arch.emul_smt_mode)
|
||||
break;
|
||||
tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
|
||||
if (!tvcpu)
|
||||
break;
|
||||
if (!tvcpu->arch.doorbell_request) {
|
||||
tvcpu->arch.doorbell_request = 1;
|
||||
kvmppc_fast_vcpu_kick_hv(tvcpu);
|
||||
}
|
||||
break;
|
||||
case OP_31_XOP_MSGCLRP:
|
||||
arg = kvmppc_get_gpr(vcpu, rb);
|
||||
if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
|
||||
break;
|
||||
vcpu->arch.vcore->dpdes = 0;
|
||||
vcpu->arch.doorbell_request = 0;
|
||||
break;
|
||||
case OP_31_XOP_MFSPR:
|
||||
switch (get_sprn(inst)) {
|
||||
case SPRN_TIR:
|
||||
arg = thr;
|
||||
break;
|
||||
case SPRN_DPDES:
|
||||
arg = kvmppc_read_dpdes(vcpu);
|
||||
break;
|
||||
default:
|
||||
return EMULATE_FAIL;
|
||||
}
|
||||
kvmppc_set_gpr(vcpu, get_rt(inst), arg);
|
||||
break;
|
||||
default:
|
||||
return EMULATE_FAIL;
|
||||
}
|
||||
kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
|
@ -971,15 +1089,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||
r = RESUME_GUEST;
|
||||
break;
|
||||
case BOOK3S_INTERRUPT_MACHINE_CHECK:
|
||||
/*
|
||||
* Deliver a machine check interrupt to the guest.
|
||||
* We have to do this, even if the host has handled the
|
||||
* machine check, because machine checks use SRR0/1 and
|
||||
* the interrupt might have trashed guest state in them.
|
||||
*/
|
||||
kvmppc_book3s_queue_irqprio(vcpu,
|
||||
BOOK3S_INTERRUPT_MACHINE_CHECK);
|
||||
r = RESUME_GUEST;
|
||||
/* Exit to guest with KVM_EXIT_NMI as exit reason */
|
||||
run->exit_reason = KVM_EXIT_NMI;
|
||||
run->hw.hardware_exit_reason = vcpu->arch.trap;
|
||||
/* Clear out the old NMI status from run->flags */
|
||||
run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
|
||||
/* Now set the NMI status */
|
||||
if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
|
||||
run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
|
||||
else
|
||||
run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
|
||||
|
||||
r = RESUME_HOST;
|
||||
/* Print the MCE event to host console. */
|
||||
machine_check_print_event_info(&vcpu->arch.mce_evt, false);
|
||||
break;
|
||||
case BOOK3S_INTERRUPT_PROGRAM:
|
||||
{
|
||||
|
@ -1048,12 +1171,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||
break;
|
||||
/*
|
||||
* This occurs if the guest (kernel or userspace), does something that
|
||||
* is prohibited by HFSCR. We just generate a program interrupt to
|
||||
* the guest.
|
||||
* is prohibited by HFSCR.
|
||||
* On POWER9, this could be a doorbell instruction that we need
|
||||
* to emulate.
|
||||
* Otherwise, we just generate a program interrupt to the guest.
|
||||
*/
|
||||
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
|
||||
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
||||
r = RESUME_GUEST;
|
||||
r = EMULATE_FAIL;
|
||||
if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
|
||||
r = kvmppc_emulate_doorbell_instr(vcpu);
|
||||
if (r == EMULATE_FAIL) {
|
||||
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
||||
r = RESUME_GUEST;
|
||||
}
|
||||
break;
|
||||
case BOOK3S_INTERRUPT_HV_RM_HARD:
|
||||
r = RESUME_PASSTHROUGH;
|
||||
|
@ -1143,6 +1273,12 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
|
|||
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_207S))
|
||||
mask |= LPCR_AIL;
|
||||
/*
|
||||
* On POWER9, allow userspace to enable large decrementer for the
|
||||
* guest, whether or not the host has it enabled.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
mask |= LPCR_LD;
|
||||
|
||||
/* Broken 32-bit version of LPCR must not clear top bits */
|
||||
if (preserve_top32)
|
||||
|
@ -1486,6 +1622,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
|
|||
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
|
||||
break;
|
||||
case KVM_REG_PPC_TB_OFFSET:
|
||||
/*
|
||||
* POWER9 DD1 has an erratum where writing TBU40 causes
|
||||
* the timebase to lose ticks. So we don't let the
|
||||
* timebase offset be changed on P9 DD1. (It is
|
||||
* initialized to zero.)
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_POWER9_DD1))
|
||||
break;
|
||||
/* round up to multiple of 2^24 */
|
||||
vcpu->arch.vcore->tb_offset =
|
||||
ALIGN(set_reg_val(id, *val), 1UL << 24);
|
||||
|
@ -1603,7 +1747,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
|
|||
init_swait_queue_head(&vcore->wq);
|
||||
vcore->preempt_tb = TB_NIL;
|
||||
vcore->lpcr = kvm->arch.lpcr;
|
||||
vcore->first_vcpuid = core * threads_per_vcore();
|
||||
vcore->first_vcpuid = core * kvm->arch.smt_mode;
|
||||
vcore->kvm = kvm;
|
||||
INIT_LIST_HEAD(&vcore->preempt_list);
|
||||
|
||||
|
@ -1762,14 +1906,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
|
|||
unsigned int id)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int err = -EINVAL;
|
||||
int err;
|
||||
int core;
|
||||
struct kvmppc_vcore *vcore;
|
||||
|
||||
core = id / threads_per_vcore();
|
||||
if (core >= KVM_MAX_VCORES)
|
||||
goto out;
|
||||
|
||||
err = -ENOMEM;
|
||||
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
|
||||
if (!vcpu)
|
||||
|
@ -1800,6 +1940,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
|
|||
vcpu->arch.busy_preempt = TB_NIL;
|
||||
vcpu->arch.intr_msr = MSR_SF | MSR_ME;
|
||||
|
||||
/*
|
||||
* Set the default HFSCR for the guest from the host value.
|
||||
* This value is only used on POWER9.
|
||||
* On POWER9 DD1, TM doesn't work, so we make sure to
|
||||
* prevent the guest from using it.
|
||||
* On POWER9, we want to virtualize the doorbell facility, so we
|
||||
* turn off the HFSCR bit, which causes those instructions to trap.
|
||||
*/
|
||||
vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
|
||||
if (!cpu_has_feature(CPU_FTR_TM))
|
||||
vcpu->arch.hfscr &= ~HFSCR_TM;
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
vcpu->arch.hfscr &= ~HFSCR_MSGP;
|
||||
|
||||
kvmppc_mmu_book3s_hv_init(vcpu);
|
||||
|
||||
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
|
||||
|
@ -1807,11 +1961,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
|
|||
init_waitqueue_head(&vcpu->arch.cpu_run);
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
vcore = kvm->arch.vcores[core];
|
||||
if (!vcore) {
|
||||
vcore = kvmppc_vcore_create(kvm, core);
|
||||
kvm->arch.vcores[core] = vcore;
|
||||
kvm->arch.online_vcores++;
|
||||
vcore = NULL;
|
||||
err = -EINVAL;
|
||||
core = id / kvm->arch.smt_mode;
|
||||
if (core < KVM_MAX_VCORES) {
|
||||
vcore = kvm->arch.vcores[core];
|
||||
if (!vcore) {
|
||||
err = -ENOMEM;
|
||||
vcore = kvmppc_vcore_create(kvm, core);
|
||||
kvm->arch.vcores[core] = vcore;
|
||||
kvm->arch.online_vcores++;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
|
@ -1839,6 +1999,43 @@ out:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
|
||||
unsigned long flags)
|
||||
{
|
||||
int err;
|
||||
int esmt = 0;
|
||||
|
||||
if (flags)
|
||||
return -EINVAL;
|
||||
if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
|
||||
return -EINVAL;
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||
/*
|
||||
* On POWER8 (or POWER7), the threading mode is "strict",
|
||||
* so we pack smt_mode vcpus per vcore.
|
||||
*/
|
||||
if (smt_mode > threads_per_subcore)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/*
|
||||
* On POWER9, the threading mode is "loose",
|
||||
* so each vcpu gets its own vcore.
|
||||
*/
|
||||
esmt = smt_mode;
|
||||
smt_mode = 1;
|
||||
}
|
||||
mutex_lock(&kvm->lock);
|
||||
err = -EBUSY;
|
||||
if (!kvm->arch.online_vcores) {
|
||||
kvm->arch.smt_mode = smt_mode;
|
||||
kvm->arch.emul_smt_mode = esmt;
|
||||
err = 0;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
|
||||
{
|
||||
if (vpa->pinned_addr)
|
||||
|
@ -1889,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
extern void __kvmppc_vcore_entry(void);
|
||||
extern int __kvmppc_vcore_entry(void);
|
||||
|
||||
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
|
||||
struct kvm_vcpu *vcpu)
|
||||
|
@ -1954,10 +2151,6 @@ static void kvmppc_release_hwthread(int cpu)
|
|||
tpaca->kvm_hstate.kvm_split_mode = NULL;
|
||||
}
|
||||
|
||||
static void do_nothing(void *x)
|
||||
{
|
||||
}
|
||||
|
||||
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
|
@ -1975,11 +2168,35 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
|
|||
smp_call_function_single(cpu + i, do_nothing, NULL, 1);
|
||||
}
|
||||
|
||||
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
/*
|
||||
* With radix, the guest can do TLB invalidations itself,
|
||||
* and it could choose to use the local form (tlbiel) if
|
||||
* it is invalidating a translation that has only ever been
|
||||
* used on one vcpu. However, that doesn't mean it has
|
||||
* only ever been used on one physical cpu, since vcpus
|
||||
* can move around between pcpus. To cope with this, when
|
||||
* a vcpu moves from one pcpu to another, we need to tell
|
||||
* any vcpus running on the same core as this vcpu previously
|
||||
* ran to flush the TLB. The TLB is shared between threads,
|
||||
* so we use a single bit in .need_tlb_flush for all 4 threads.
|
||||
*/
|
||||
if (vcpu->arch.prev_cpu != pcpu) {
|
||||
if (vcpu->arch.prev_cpu >= 0 &&
|
||||
cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
|
||||
cpu_first_thread_sibling(pcpu))
|
||||
radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
|
||||
vcpu->arch.prev_cpu = pcpu;
|
||||
}
|
||||
}
|
||||
|
||||
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
|
||||
{
|
||||
int cpu;
|
||||
struct paca_struct *tpaca;
|
||||
struct kvmppc_vcore *mvc = vc->master_vcore;
|
||||
struct kvm *kvm = vc->kvm;
|
||||
|
||||
cpu = vc->pcpu;
|
||||
|
@ -1989,36 +2206,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
|
|||
vcpu->arch.timer_running = 0;
|
||||
}
|
||||
cpu += vcpu->arch.ptid;
|
||||
vcpu->cpu = mvc->pcpu;
|
||||
vcpu->cpu = vc->pcpu;
|
||||
vcpu->arch.thread_cpu = cpu;
|
||||
|
||||
/*
|
||||
* With radix, the guest can do TLB invalidations itself,
|
||||
* and it could choose to use the local form (tlbiel) if
|
||||
* it is invalidating a translation that has only ever been
|
||||
* used on one vcpu. However, that doesn't mean it has
|
||||
* only ever been used on one physical cpu, since vcpus
|
||||
* can move around between pcpus. To cope with this, when
|
||||
* a vcpu moves from one pcpu to another, we need to tell
|
||||
* any vcpus running on the same core as this vcpu previously
|
||||
* ran to flush the TLB. The TLB is shared between threads,
|
||||
* so we use a single bit in .need_tlb_flush for all 4 threads.
|
||||
*/
|
||||
if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
|
||||
if (vcpu->arch.prev_cpu >= 0 &&
|
||||
cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
|
||||
cpu_first_thread_sibling(cpu))
|
||||
radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
|
||||
vcpu->arch.prev_cpu = cpu;
|
||||
}
|
||||
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
|
||||
}
|
||||
tpaca = &paca[cpu];
|
||||
tpaca->kvm_hstate.kvm_vcpu = vcpu;
|
||||
tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
|
||||
tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
|
||||
/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
|
||||
smp_wmb();
|
||||
tpaca->kvm_hstate.kvm_vcore = mvc;
|
||||
tpaca->kvm_hstate.kvm_vcore = vc;
|
||||
if (cpu != smp_processor_id())
|
||||
kvmppc_ipi_thread(cpu);
|
||||
}
|
||||
|
@ -2147,8 +2344,7 @@ struct core_info {
|
|||
int max_subcore_threads;
|
||||
int total_threads;
|
||||
int subcore_threads[MAX_SUBCORES];
|
||||
struct kvm *subcore_vm[MAX_SUBCORES];
|
||||
struct list_head vcs[MAX_SUBCORES];
|
||||
struct kvmppc_vcore *vc[MAX_SUBCORES];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -2159,17 +2355,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
|
|||
|
||||
static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
|
||||
{
|
||||
int sub;
|
||||
|
||||
memset(cip, 0, sizeof(*cip));
|
||||
cip->n_subcores = 1;
|
||||
cip->max_subcore_threads = vc->num_threads;
|
||||
cip->total_threads = vc->num_threads;
|
||||
cip->subcore_threads[0] = vc->num_threads;
|
||||
cip->subcore_vm[0] = vc->kvm;
|
||||
for (sub = 0; sub < MAX_SUBCORES; ++sub)
|
||||
INIT_LIST_HEAD(&cip->vcs[sub]);
|
||||
list_add_tail(&vc->preempt_list, &cip->vcs[0]);
|
||||
cip->vc[0] = vc;
|
||||
}
|
||||
|
||||
static bool subcore_config_ok(int n_subcores, int n_threads)
|
||||
|
@ -2189,9 +2380,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
|
|||
return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
|
||||
}
|
||||
|
||||
static void init_master_vcore(struct kvmppc_vcore *vc)
|
||||
static void init_vcore_to_run(struct kvmppc_vcore *vc)
|
||||
{
|
||||
vc->master_vcore = vc;
|
||||
vc->entry_exit_map = 0;
|
||||
vc->in_guest = 0;
|
||||
vc->napping_threads = 0;
|
||||
|
@ -2216,9 +2406,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
|
|||
++cip->n_subcores;
|
||||
cip->total_threads += vc->num_threads;
|
||||
cip->subcore_threads[sub] = vc->num_threads;
|
||||
cip->subcore_vm[sub] = vc->kvm;
|
||||
init_master_vcore(vc);
|
||||
list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
|
||||
cip->vc[sub] = vc;
|
||||
init_vcore_to_run(vc);
|
||||
list_del_init(&vc->preempt_list);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -2286,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
|
|||
spin_unlock(&lp->lock);
|
||||
}
|
||||
|
||||
static bool recheck_signals(struct core_info *cip)
|
||||
{
|
||||
int sub, i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
for (sub = 0; sub < cip->n_subcores; ++sub)
|
||||
for_each_runnable_thread(i, vcpu, cip->vc[sub])
|
||||
if (signal_pending(vcpu->arch.run_task))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
|
||||
{
|
||||
int still_running = 0, i;
|
||||
|
@ -2323,7 +2525,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
|
|||
wake_up(&vcpu->arch.cpu_run);
|
||||
}
|
||||
}
|
||||
list_del_init(&vc->preempt_list);
|
||||
if (!is_master) {
|
||||
if (still_running > 0) {
|
||||
kvmppc_vcore_preempt(vc);
|
||||
|
@ -2385,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void set_irq_happened(int trap)
|
||||
{
|
||||
switch (trap) {
|
||||
case BOOK3S_INTERRUPT_EXTERNAL:
|
||||
local_paca->irq_happened |= PACA_IRQ_EE;
|
||||
break;
|
||||
case BOOK3S_INTERRUPT_H_DOORBELL:
|
||||
local_paca->irq_happened |= PACA_IRQ_DBELL;
|
||||
break;
|
||||
case BOOK3S_INTERRUPT_HMI:
|
||||
local_paca->irq_happened |= PACA_IRQ_HMI;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Run a set of guest threads on a physical core.
|
||||
* Called with vc->lock held.
|
||||
|
@ -2395,7 +2611,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
int i;
|
||||
int srcu_idx;
|
||||
struct core_info core_info;
|
||||
struct kvmppc_vcore *pvc, *vcnext;
|
||||
struct kvmppc_vcore *pvc;
|
||||
struct kvm_split_mode split_info, *sip;
|
||||
int split, subcore_size, active;
|
||||
int sub;
|
||||
|
@ -2404,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
int pcpu, thr;
|
||||
int target_threads;
|
||||
int controlled_threads;
|
||||
int trap;
|
||||
|
||||
/*
|
||||
* Remove from the list any threads that have a signal pending
|
||||
|
@ -2418,7 +2635,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
/*
|
||||
* Initialize *vc.
|
||||
*/
|
||||
init_master_vcore(vc);
|
||||
init_vcore_to_run(vc);
|
||||
vc->preempt_tb = TB_NIL;
|
||||
|
||||
/*
|
||||
|
@ -2455,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
if (vc->num_threads < target_threads)
|
||||
collect_piggybacks(&core_info, target_threads);
|
||||
|
||||
/*
|
||||
* On radix, arrange for TLB flushing if necessary.
|
||||
* This has to be done before disabling interrupts since
|
||||
* it uses smp_call_function().
|
||||
*/
|
||||
pcpu = smp_processor_id();
|
||||
if (kvm_is_radix(vc->kvm)) {
|
||||
for (sub = 0; sub < core_info.n_subcores; ++sub)
|
||||
for_each_runnable_thread(i, vcpu, core_info.vc[sub])
|
||||
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hard-disable interrupts, and check resched flag and signals.
|
||||
* If we need to reschedule or deliver a signal, clean up
|
||||
* and return without going into the guest(s).
|
||||
*/
|
||||
local_irq_disable();
|
||||
hard_irq_disable();
|
||||
if (lazy_irq_pending() || need_resched() ||
|
||||
recheck_signals(&core_info)) {
|
||||
local_irq_enable();
|
||||
vc->vcore_state = VCORE_INACTIVE;
|
||||
/* Unlock all except the primary vcore */
|
||||
for (sub = 1; sub < core_info.n_subcores; ++sub) {
|
||||
pvc = core_info.vc[sub];
|
||||
/* Put back on to the preempted vcores list */
|
||||
kvmppc_vcore_preempt(pvc);
|
||||
spin_unlock(&pvc->lock);
|
||||
}
|
||||
for (i = 0; i < controlled_threads; ++i)
|
||||
kvmppc_release_hwthread(pcpu + i);
|
||||
return;
|
||||
}
|
||||
|
||||
kvmppc_clear_host_core(pcpu);
|
||||
|
||||
/* Decide on micro-threading (split-core) mode */
|
||||
subcore_size = threads_per_subcore;
|
||||
cmd_bit = stat_bit = 0;
|
||||
|
@ -2478,13 +2732,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
split_info.ldbar = mfspr(SPRN_LDBAR);
|
||||
split_info.subcore_size = subcore_size;
|
||||
for (sub = 0; sub < core_info.n_subcores; ++sub)
|
||||
split_info.master_vcs[sub] =
|
||||
list_first_entry(&core_info.vcs[sub],
|
||||
struct kvmppc_vcore, preempt_list);
|
||||
split_info.vc[sub] = core_info.vc[sub];
|
||||
/* order writes to split_info before kvm_split_mode pointer */
|
||||
smp_wmb();
|
||||
}
|
||||
pcpu = smp_processor_id();
|
||||
for (thr = 0; thr < controlled_threads; ++thr)
|
||||
paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
|
||||
|
||||
|
@ -2504,32 +2755,29 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
}
|
||||
}
|
||||
|
||||
kvmppc_clear_host_core(pcpu);
|
||||
|
||||
/* Start all the threads */
|
||||
active = 0;
|
||||
for (sub = 0; sub < core_info.n_subcores; ++sub) {
|
||||
thr = subcore_thread_map[sub];
|
||||
thr0_done = false;
|
||||
active |= 1 << thr;
|
||||
list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
|
||||
pvc->pcpu = pcpu + thr;
|
||||
for_each_runnable_thread(i, vcpu, pvc) {
|
||||
kvmppc_start_thread(vcpu, pvc);
|
||||
kvmppc_create_dtl_entry(vcpu, pvc);
|
||||
trace_kvm_guest_enter(vcpu);
|
||||
if (!vcpu->arch.ptid)
|
||||
thr0_done = true;
|
||||
active |= 1 << (thr + vcpu->arch.ptid);
|
||||
}
|
||||
/*
|
||||
* We need to start the first thread of each subcore
|
||||
* even if it doesn't have a vcpu.
|
||||
*/
|
||||
if (pvc->master_vcore == pvc && !thr0_done)
|
||||
kvmppc_start_thread(NULL, pvc);
|
||||
thr += pvc->num_threads;
|
||||
pvc = core_info.vc[sub];
|
||||
pvc->pcpu = pcpu + thr;
|
||||
for_each_runnable_thread(i, vcpu, pvc) {
|
||||
kvmppc_start_thread(vcpu, pvc);
|
||||
kvmppc_create_dtl_entry(vcpu, pvc);
|
||||
trace_kvm_guest_enter(vcpu);
|
||||
if (!vcpu->arch.ptid)
|
||||
thr0_done = true;
|
||||
active |= 1 << (thr + vcpu->arch.ptid);
|
||||
}
|
||||
/*
|
||||
* We need to start the first thread of each subcore
|
||||
* even if it doesn't have a vcpu.
|
||||
*/
|
||||
if (!thr0_done)
|
||||
kvmppc_start_thread(NULL, pvc);
|
||||
thr += pvc->num_threads;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2556,17 +2804,27 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
trace_kvmppc_run_core(vc, 0);
|
||||
|
||||
for (sub = 0; sub < core_info.n_subcores; ++sub)
|
||||
list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
|
||||
spin_unlock(&pvc->lock);
|
||||
spin_unlock(&core_info.vc[sub]->lock);
|
||||
|
||||
/*
|
||||
* Interrupts will be enabled once we get into the guest,
|
||||
* so tell lockdep that we're about to enable interrupts.
|
||||
*/
|
||||
trace_hardirqs_on();
|
||||
|
||||
guest_enter();
|
||||
|
||||
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
|
||||
|
||||
__kvmppc_vcore_entry();
|
||||
trap = __kvmppc_vcore_entry();
|
||||
|
||||
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
|
||||
|
||||
guest_exit();
|
||||
|
||||
trace_hardirqs_off();
|
||||
set_irq_happened(trap);
|
||||
|
||||
spin_lock(&vc->lock);
|
||||
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
|
||||
vc->vcore_state = VCORE_EXITING;
|
||||
|
@ -2594,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
split_info.do_nap = 0;
|
||||
}
|
||||
|
||||
kvmppc_set_host_core(pcpu);
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
/* Let secondaries go back to the offline loop */
|
||||
for (i = 0; i < controlled_threads; ++i) {
|
||||
kvmppc_release_hwthread(pcpu + i);
|
||||
|
@ -2602,18 +2864,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|||
cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
|
||||
}
|
||||
|
||||
kvmppc_set_host_core(pcpu);
|
||||
|
||||
spin_unlock(&vc->lock);
|
||||
|
||||
/* make sure updates to secondary vcpu structs are visible now */
|
||||
smp_mb();
|
||||
guest_exit();
|
||||
|
||||
for (sub = 0; sub < core_info.n_subcores; ++sub)
|
||||
list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
|
||||
preempt_list)
|
||||
post_guest_process(pvc, pvc == vc);
|
||||
for (sub = 0; sub < core_info.n_subcores; ++sub) {
|
||||
pvc = core_info.vc[sub];
|
||||
post_guest_process(pvc, pvc == vc);
|
||||
}
|
||||
|
||||
spin_lock(&vc->lock);
|
||||
preempt_enable();
|
||||
|
@ -2658,6 +2917,30 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
|
|||
vc->halt_poll_ns /= halt_poll_ns_shrink;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!xive_enabled())
|
||||
return false;
|
||||
return vcpu->arch.xive_saved_state.pipr <
|
||||
vcpu->arch.xive_saved_state.cppr;
|
||||
}
|
||||
#else
|
||||
static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
|
||||
static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
|
||||
kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any of the runnable vcpus on the vcore have pending
|
||||
* exceptions or are no longer ceded
|
||||
|
@ -2668,8 +2951,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
|
|||
int i;
|
||||
|
||||
for_each_runnable_thread(i, vcpu, vc) {
|
||||
if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
|
||||
vcpu->arch.prodded)
|
||||
if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -2811,15 +3093,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
if (!signal_pending(current)) {
|
||||
if (vc->vcore_state == VCORE_PIGGYBACK) {
|
||||
struct kvmppc_vcore *mvc = vc->master_vcore;
|
||||
if (spin_trylock(&mvc->lock)) {
|
||||
if (mvc->vcore_state == VCORE_RUNNING &&
|
||||
!VCORE_IS_EXITING(mvc)) {
|
||||
if (spin_trylock(&vc->lock)) {
|
||||
if (vc->vcore_state == VCORE_RUNNING &&
|
||||
!VCORE_IS_EXITING(vc)) {
|
||||
kvmppc_create_dtl_entry(vcpu, vc);
|
||||
kvmppc_start_thread(vcpu, vc);
|
||||
trace_kvm_guest_enter(vcpu);
|
||||
}
|
||||
spin_unlock(&mvc->lock);
|
||||
spin_unlock(&vc->lock);
|
||||
}
|
||||
} else if (vc->vcore_state == VCORE_RUNNING &&
|
||||
!VCORE_IS_EXITING(vc)) {
|
||||
|
@ -2855,7 +3136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
n_ceded = 0;
|
||||
for_each_runnable_thread(i, v, vc) {
|
||||
if (!v->arch.pending_exceptions && !v->arch.prodded)
|
||||
if (!kvmppc_vcpu_woken(v))
|
||||
n_ceded += v->arch.ceded;
|
||||
else
|
||||
v->arch.ceded = 0;
|
||||
|
@ -2907,12 +3188,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
{
|
||||
int r;
|
||||
int srcu_idx;
|
||||
unsigned long ebb_regs[3] = {}; /* shut up GCC */
|
||||
unsigned long user_tar = 0;
|
||||
unsigned int user_vrsave;
|
||||
|
||||
if (!vcpu->arch.sane) {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't allow entry with a suspended transaction, because
|
||||
* the guest entry/exit code will lose it.
|
||||
* If the guest has TM enabled, save away their TM-related SPRs
|
||||
* (they will get restored by the TM unavailable interrupt).
|
||||
*/
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
|
||||
(current->thread.regs->msr & MSR_TM)) {
|
||||
if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
|
||||
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
|
||||
run->fail_entry.hardware_entry_failure_reason = 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
|
||||
current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
|
||||
current->thread.tm_texasr = mfspr(SPRN_TEXASR);
|
||||
current->thread.regs->msr &= ~MSR_TM;
|
||||
}
|
||||
#endif
|
||||
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
|
||||
/* No need to go into the guest when all we'll do is come back out */
|
||||
|
@ -2934,6 +3239,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
|
||||
flush_all_to_thread(current);
|
||||
|
||||
/* Save userspace EBB and other register values */
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
|
||||
ebb_regs[0] = mfspr(SPRN_EBBHR);
|
||||
ebb_regs[1] = mfspr(SPRN_EBBRR);
|
||||
ebb_regs[2] = mfspr(SPRN_BESCR);
|
||||
user_tar = mfspr(SPRN_TAR);
|
||||
}
|
||||
user_vrsave = mfspr(SPRN_VRSAVE);
|
||||
|
||||
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
|
||||
vcpu->arch.pgdir = current->mm->pgd;
|
||||
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
|
||||
|
@ -2960,6 +3274,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
}
|
||||
} while (is_kvmppc_resume_guest(r));
|
||||
|
||||
/* Restore userspace EBB and other register values */
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
|
||||
mtspr(SPRN_EBBHR, ebb_regs[0]);
|
||||
mtspr(SPRN_EBBRR, ebb_regs[1]);
|
||||
mtspr(SPRN_BESCR, ebb_regs[2]);
|
||||
mtspr(SPRN_TAR, user_tar);
|
||||
mtspr(SPRN_FSCR, current->thread.fscr);
|
||||
}
|
||||
mtspr(SPRN_VRSAVE, user_vrsave);
|
||||
|
||||
out:
|
||||
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
|
||||
atomic_dec(&vcpu->kvm->arch.vcpus_running);
|
||||
|
@ -3467,6 +3791,19 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
|
|||
if (!cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
kvm_hv_vm_activated();
|
||||
|
||||
/*
|
||||
* Initialize smt_mode depending on processor.
|
||||
* POWER8 and earlier have to use "strict" threading, where
|
||||
* all vCPUs in a vcore have to run on the same (sub)core,
|
||||
* whereas on POWER9 the threads can each run a different
|
||||
* guest.
|
||||
*/
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
kvm->arch.smt_mode = threads_per_subcore;
|
||||
else
|
||||
kvm->arch.smt_mode = 1;
|
||||
kvm->arch.emul_smt_mode = 1;
|
||||
|
||||
/*
|
||||
* Create a debugfs directory for the VM
|
||||
*/
|
||||
|
@ -3896,6 +4233,7 @@ static struct kvmppc_ops kvm_ops_hv = {
|
|||
#endif
|
||||
.configure_mmu = kvmhv_configure_mmu,
|
||||
.get_rmmu_info = kvmhv_get_rmmu_info,
|
||||
.set_smt_mode = kvmhv_set_smt_mode,
|
||||
};
|
||||
|
||||
static int kvm_init_subcore_bitmap(void)
|
||||
|
|
|
@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
|
|||
return;
|
||||
|
||||
for (i = 0; i < MAX_SUBCORES; ++i) {
|
||||
vc = sip->master_vcs[i];
|
||||
vc = sip->vc[i];
|
||||
if (!vc)
|
||||
break;
|
||||
do {
|
||||
|
|
|
@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
|
|||
std r3, HSTATE_DABR(r13)
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
||||
|
||||
/* Hard-disable interrupts */
|
||||
mfmsr r10
|
||||
std r10, HSTATE_HOST_MSR(r13)
|
||||
rldicl r10,r10,48,1
|
||||
rotldi r10,r10,16
|
||||
mtmsrd r10,1
|
||||
|
||||
/* Save host PMU registers */
|
||||
BEGIN_FTR_SECTION
|
||||
/* Work around P8 PMAE bug */
|
||||
|
@ -121,10 +114,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
* Put whatever is in the decrementer into the
|
||||
* hypervisor decrementer.
|
||||
*/
|
||||
BEGIN_FTR_SECTION
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
ld r6, VCORE_KVM(r5)
|
||||
ld r9, KVM_HOST_LPCR(r6)
|
||||
andis. r9, r9, LPCR_LD@h
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
mfspr r8,SPRN_DEC
|
||||
mftb r7
|
||||
mtspr SPRN_HDEC,r8
|
||||
BEGIN_FTR_SECTION
|
||||
/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
|
||||
bne 32f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
extsw r8,r8
|
||||
32: mtspr SPRN_HDEC,r8
|
||||
add r8,r8,r7
|
||||
std r8,HSTATE_DECEXP(r13)
|
||||
|
||||
|
@ -143,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
*
|
||||
* R1 = host R1
|
||||
* R2 = host R2
|
||||
* R3 = trap number on this thread
|
||||
* R12 = exit handler id
|
||||
* R13 = PACA
|
||||
*/
|
||||
|
|
|
@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
|
|||
|
||||
out:
|
||||
/*
|
||||
* For guest that supports FWNMI capability, hook the MCE event into
|
||||
* vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
|
||||
* exit reason. On our way to exit we will pull this event from vcpu
|
||||
* structure and print it from thread 0 of the core/subcore.
|
||||
*
|
||||
* For guest that does not support FWNMI capability (old QEMU):
|
||||
* We are now going enter guest either through machine check
|
||||
* interrupt (for unhandled errors) or will continue from
|
||||
* current HSRR0 (for handled errors) in guest. Hence
|
||||
* queue up the event so that we can log it from host console later.
|
||||
*/
|
||||
machine_check_queue_event();
|
||||
if (vcpu->kvm->arch.fwnmi_enabled) {
|
||||
/*
|
||||
* Hook up the mce event on to vcpu structure.
|
||||
* First clear the old event.
|
||||
*/
|
||||
memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
|
||||
if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
|
||||
vcpu->arch.mce_evt = mce_evt;
|
||||
}
|
||||
} else
|
||||
machine_check_queue_event();
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
|
|
@ -32,12 +32,30 @@
|
|||
#include <asm/opal.h>
|
||||
#include <asm/xive-regs.h>
|
||||
|
||||
/* Sign-extend HDEC if not on POWER9 */
|
||||
#define EXTEND_HDEC(reg) \
|
||||
BEGIN_FTR_SECTION; \
|
||||
extsw reg, reg; \
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||
|
||||
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
|
||||
|
||||
/* Values in HSTATE_NAPPING(r13) */
|
||||
#define NAPPING_CEDE 1
|
||||
#define NAPPING_NOVCPU 2
|
||||
|
||||
/* Stack frame offsets for kvmppc_hv_entry */
|
||||
#define SFS 160
|
||||
#define STACK_SLOT_TRAP (SFS-4)
|
||||
#define STACK_SLOT_TID (SFS-16)
|
||||
#define STACK_SLOT_PSSCR (SFS-24)
|
||||
#define STACK_SLOT_PID (SFS-32)
|
||||
#define STACK_SLOT_IAMR (SFS-40)
|
||||
#define STACK_SLOT_CIABR (SFS-48)
|
||||
#define STACK_SLOT_DAWR (SFS-56)
|
||||
#define STACK_SLOT_DAWRX (SFS-64)
|
||||
#define STACK_SLOT_HFSCR (SFS-72)
|
||||
|
||||
/*
|
||||
* Call kvmppc_hv_entry in real mode.
|
||||
* Must be called with interrupts hard-disabled.
|
||||
|
@ -51,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
|
|||
std r0, PPC_LR_STKOFF(r1)
|
||||
stdu r1, -112(r1)
|
||||
mfmsr r10
|
||||
std r10, HSTATE_HOST_MSR(r13)
|
||||
LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
|
||||
li r0,MSR_RI
|
||||
andc r0,r10,r0
|
||||
|
@ -135,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
stb r0, HSTATE_HWTHREAD_REQ(r13)
|
||||
|
||||
/*
|
||||
* For external and machine check interrupts, we need
|
||||
* to call the Linux handler to process the interrupt.
|
||||
* We do that by jumping to absolute address 0x500 for
|
||||
* external interrupts, or the machine_check_fwnmi label
|
||||
* for machine checks (since firmware might have patched
|
||||
* the vector area at 0x200). The [h]rfid at the end of the
|
||||
* handler will return to the book3s_hv_interrupts.S code.
|
||||
* For other interrupts we do the rfid to get back
|
||||
* to the book3s_hv_interrupts.S code here.
|
||||
* For external interrupts we need to call the Linux
|
||||
* handler to process the interrupt. We do that by jumping
|
||||
* to absolute address 0x500 for external interrupts.
|
||||
* The [h]rfid at the end of the handler will return to
|
||||
* the book3s_hv_interrupts.S code. For other interrupts
|
||||
* we do the rfid to get back to the book3s_hv_interrupts.S
|
||||
* code here.
|
||||
*/
|
||||
ld r8, 112+PPC_LR_STKOFF(r1)
|
||||
addi r1, r1, 112
|
||||
ld r7, HSTATE_HOST_MSR(r13)
|
||||
|
||||
/* Return the trap number on this thread as the return value */
|
||||
mr r3, r12
|
||||
|
||||
/*
|
||||
* If we came back from the guest via a relocation-on interrupt,
|
||||
* we will be in virtual mode at this point, which makes it a
|
||||
|
@ -158,62 +178,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
andi. r0, r0, MSR_IR /* in real mode? */
|
||||
bne .Lvirt_return
|
||||
|
||||
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
beq 11f
|
||||
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
|
||||
beq 15f /* Invoke the H_DOORBELL handler */
|
||||
cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
|
||||
beq cr2, 14f /* HMI check */
|
||||
|
||||
/* RFI into the highmem handler, or branch to interrupt handler */
|
||||
/* RFI into the highmem handler */
|
||||
mfmsr r6
|
||||
li r0, MSR_RI
|
||||
andc r6, r6, r0
|
||||
mtmsrd r6, 1 /* Clear RI in MSR */
|
||||
mtsrr0 r8
|
||||
mtsrr1 r7
|
||||
beq cr1, 13f /* machine check */
|
||||
RFI
|
||||
|
||||
/* On POWER7, we have external interrupts set to use HSRR0/1 */
|
||||
11: mtspr SPRN_HSRR0, r8
|
||||
mtspr SPRN_HSRR1, r7
|
||||
ba 0x500
|
||||
|
||||
13: b machine_check_fwnmi
|
||||
|
||||
14: mtspr SPRN_HSRR0, r8
|
||||
mtspr SPRN_HSRR1, r7
|
||||
b hmi_exception_after_realmode
|
||||
|
||||
15: mtspr SPRN_HSRR0, r8
|
||||
mtspr SPRN_HSRR1, r7
|
||||
ba 0xe80
|
||||
|
||||
/* Virtual-mode return - can't get here for HMI or machine check */
|
||||
/* Virtual-mode return */
|
||||
.Lvirt_return:
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
beq 16f
|
||||
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
|
||||
beq 17f
|
||||
andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
|
||||
beq 18f
|
||||
mtmsrd r7, 1 /* if so then re-enable them */
|
||||
18: mtlr r8
|
||||
mtlr r8
|
||||
blr
|
||||
|
||||
16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
|
||||
mtspr SPRN_HSRR1, r7
|
||||
b exc_virt_0x4500_hardware_interrupt
|
||||
|
||||
17: mtspr SPRN_HSRR0, r8
|
||||
mtspr SPRN_HSRR1, r7
|
||||
b exc_virt_0x4e80_h_doorbell
|
||||
|
||||
kvmppc_primary_no_guest:
|
||||
/* We handle this much like a ceded vcpu */
|
||||
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
|
||||
/* HDEC may be larger than DEC for arch >= v3.00, but since the */
|
||||
/* HDEC value came from DEC in the first place, it will fit */
|
||||
mfspr r3, SPRN_HDEC
|
||||
mtspr SPRN_DEC, r3
|
||||
/*
|
||||
|
@ -295,8 +278,9 @@ kvm_novcpu_wakeup:
|
|||
|
||||
/* See if our timeslice has expired (HDEC is negative) */
|
||||
mfspr r0, SPRN_HDEC
|
||||
EXTEND_HDEC(r0)
|
||||
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
|
||||
cmpwi r0, 0
|
||||
cmpdi r0, 0
|
||||
blt kvm_novcpu_exit
|
||||
|
||||
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
|
||||
|
@ -319,10 +303,10 @@ kvm_novcpu_exit:
|
|||
bl kvmhv_accumulate_time
|
||||
#endif
|
||||
13: mr r3, r12
|
||||
stw r12, 112-4(r1)
|
||||
stw r12, STACK_SLOT_TRAP(r1)
|
||||
bl kvmhv_commence_exit
|
||||
nop
|
||||
lwz r12, 112-4(r1)
|
||||
lwz r12, STACK_SLOT_TRAP(r1)
|
||||
b kvmhv_switch_to_host
|
||||
|
||||
/*
|
||||
|
@ -390,8 +374,8 @@ kvm_secondary_got_guest:
|
|||
lbz r4, HSTATE_PTID(r13)
|
||||
cmpwi r4, 0
|
||||
bne 63f
|
||||
lis r6, 0x7fff
|
||||
ori r6, r6, 0xffff
|
||||
LOAD_REG_ADDR(r6, decrementer_max)
|
||||
ld r6, 0(r6)
|
||||
mtspr SPRN_HDEC, r6
|
||||
/* and set per-LPAR registers, if doing dynamic micro-threading */
|
||||
ld r6, HSTATE_SPLIT_MODE(r13)
|
||||
|
@ -545,11 +529,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
* *
|
||||
*****************************************************************************/
|
||||
|
||||
/* Stack frame offsets */
|
||||
#define STACK_SLOT_TID (112-16)
|
||||
#define STACK_SLOT_PSSCR (112-24)
|
||||
#define STACK_SLOT_PID (112-32)
|
||||
|
||||
.global kvmppc_hv_entry
|
||||
kvmppc_hv_entry:
|
||||
|
||||
|
@ -565,7 +544,7 @@ kvmppc_hv_entry:
|
|||
*/
|
||||
mflr r0
|
||||
std r0, PPC_LR_STKOFF(r1)
|
||||
stdu r1, -112(r1)
|
||||
stdu r1, -SFS(r1)
|
||||
|
||||
/* Save R1 in the PACA */
|
||||
std r1, HSTATE_HOST_R1(r13)
|
||||
|
@ -749,10 +728,22 @@ BEGIN_FTR_SECTION
|
|||
mfspr r5, SPRN_TIDR
|
||||
mfspr r6, SPRN_PSSCR
|
||||
mfspr r7, SPRN_PID
|
||||
mfspr r8, SPRN_IAMR
|
||||
std r5, STACK_SLOT_TID(r1)
|
||||
std r6, STACK_SLOT_PSSCR(r1)
|
||||
std r7, STACK_SLOT_PID(r1)
|
||||
std r8, STACK_SLOT_IAMR(r1)
|
||||
mfspr r5, SPRN_HFSCR
|
||||
std r5, STACK_SLOT_HFSCR(r1)
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
BEGIN_FTR_SECTION
|
||||
mfspr r5, SPRN_CIABR
|
||||
mfspr r6, SPRN_DAWR
|
||||
mfspr r7, SPRN_DAWRX
|
||||
std r5, STACK_SLOT_CIABR(r1)
|
||||
std r6, STACK_SLOT_DAWR(r1)
|
||||
std r7, STACK_SLOT_DAWRX(r1)
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
|
||||
BEGIN_FTR_SECTION
|
||||
/* Set partition DABR */
|
||||
|
@ -895,8 +886,10 @@ FTR_SECTION_ELSE
|
|||
ld r5, VCPU_TID(r4)
|
||||
ld r6, VCPU_PSSCR(r4)
|
||||
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
|
||||
ld r7, VCPU_HFSCR(r4)
|
||||
mtspr SPRN_TIDR, r5
|
||||
mtspr SPRN_PSSCR, r6
|
||||
mtspr SPRN_HFSCR, r7
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
8:
|
||||
|
||||
|
@ -911,7 +904,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
|||
mftb r7
|
||||
subf r3,r7,r8
|
||||
mtspr SPRN_DEC,r3
|
||||
stw r3,VCPU_DEC(r4)
|
||||
std r3,VCPU_DEC(r4)
|
||||
|
||||
ld r5, VCPU_SPRG0(r4)
|
||||
ld r6, VCPU_SPRG1(r4)
|
||||
|
@ -968,7 +961,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
|||
|
||||
/* Check if HDEC expires soon */
|
||||
mfspr r3, SPRN_HDEC
|
||||
cmpwi r3, 512 /* 1 microsecond */
|
||||
EXTEND_HDEC(r3)
|
||||
cmpdi r3, 512 /* 1 microsecond */
|
||||
blt hdec_soon
|
||||
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
|
@ -1022,7 +1016,13 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
|
|||
li r0, BOOK3S_INTERRUPT_EXTERNAL
|
||||
bne cr1, 12f
|
||||
mfspr r0, SPRN_DEC
|
||||
cmpwi r0, 0
|
||||
BEGIN_FTR_SECTION
|
||||
/* On POWER9 check whether the guest has large decrementer enabled */
|
||||
andis. r8, r8, LPCR_LD@h
|
||||
bne 15f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
extsw r0, r0
|
||||
15: cmpdi r0, 0
|
||||
li r0, BOOK3S_INTERRUPT_DECREMENTER
|
||||
bge 5f
|
||||
|
||||
|
@ -1032,6 +1032,23 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
|
|||
mr r9, r4
|
||||
bl kvmppc_msr_interrupt
|
||||
5:
|
||||
BEGIN_FTR_SECTION
|
||||
b fast_guest_return
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||
/* On POWER9, check for pending doorbell requests */
|
||||
lbz r0, VCPU_DBELL_REQ(r4)
|
||||
cmpwi r0, 0
|
||||
beq fast_guest_return
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
/* Set DPDES register so the CPU will take a doorbell interrupt */
|
||||
li r0, 1
|
||||
mtspr SPRN_DPDES, r0
|
||||
std r0, VCORE_DPDES(r5)
|
||||
/* Make sure other cpus see vcore->dpdes set before dbell req clear */
|
||||
lwsync
|
||||
/* Clear the pending doorbell request */
|
||||
li r0, 0
|
||||
stb r0, VCPU_DBELL_REQ(r4)
|
||||
|
||||
/*
|
||||
* Required state:
|
||||
|
@ -1206,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
|||
|
||||
stw r12,VCPU_TRAP(r9)
|
||||
|
||||
/*
|
||||
* Now that we have saved away SRR0/1 and HSRR0/1,
|
||||
* interrupts are recoverable in principle, so set MSR_RI.
|
||||
* This becomes important for relocation-on interrupts from
|
||||
* the guest, which we can get in radix mode on POWER9.
|
||||
*/
|
||||
li r0, MSR_RI
|
||||
mtmsrd r0, 1
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
|
||||
addi r3, r9, VCPU_TB_RMINTR
|
||||
mr r4, r9
|
||||
|
@ -1262,6 +1288,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
|||
beq 4f
|
||||
b guest_exit_cont
|
||||
3:
|
||||
/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
|
||||
cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
|
||||
bne 14f
|
||||
mfspr r3, SPRN_HFSCR
|
||||
std r3, VCPU_HFSCR(r9)
|
||||
b guest_exit_cont
|
||||
14:
|
||||
/* External interrupt ? */
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
bne+ guest_exit_cont
|
||||
|
@ -1449,12 +1482,18 @@ mc_cont:
|
|||
mtspr SPRN_SPURR,r4
|
||||
|
||||
/* Save DEC */
|
||||
ld r3, HSTATE_KVM_VCORE(r13)
|
||||
mfspr r5,SPRN_DEC
|
||||
mftb r6
|
||||
/* On P9, if the guest has large decr enabled, don't sign extend */
|
||||
BEGIN_FTR_SECTION
|
||||
ld r4, VCORE_LPCR(r3)
|
||||
andis. r4, r4, LPCR_LD@h
|
||||
bne 16f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
extsw r5,r5
|
||||
add r5,r5,r6
|
||||
16: add r5,r5,r6
|
||||
/* r5 is a guest timebase value here, convert to host TB */
|
||||
ld r3,HSTATE_KVM_VCORE(r13)
|
||||
ld r4,VCORE_TB_OFFSET(r3)
|
||||
subf r5,r4,r5
|
||||
std r5,VCPU_DEC_EXPIRES(r9)
|
||||
|
@ -1499,17 +1538,19 @@ FTR_SECTION_ELSE
|
|||
rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
|
||||
rotldi r6, r6, 60
|
||||
std r6, VCPU_PSSCR(r9)
|
||||
/* Restore host HFSCR value */
|
||||
ld r7, STACK_SLOT_HFSCR(r1)
|
||||
mtspr SPRN_HFSCR, r7
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
/*
|
||||
* Restore various registers to 0, where non-zero values
|
||||
* set by the guest could disrupt the host.
|
||||
*/
|
||||
li r0, 0
|
||||
mtspr SPRN_IAMR, r0
|
||||
mtspr SPRN_CIABR, r0
|
||||
mtspr SPRN_DAWRX, r0
|
||||
mtspr SPRN_PSPB, r0
|
||||
mtspr SPRN_WORT, r0
|
||||
BEGIN_FTR_SECTION
|
||||
mtspr SPRN_IAMR, r0
|
||||
mtspr SPRN_TCSCR, r0
|
||||
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
|
||||
li r0, 1
|
||||
|
@ -1525,6 +1566,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
|||
std r6,VCPU_UAMOR(r9)
|
||||
li r6,0
|
||||
mtspr SPRN_AMR,r6
|
||||
mtspr SPRN_UAMOR, r6
|
||||
|
||||
/* Switch DSCR back to host value */
|
||||
mfspr r8, SPRN_DSCR
|
||||
|
@ -1669,13 +1711,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
ptesync
|
||||
|
||||
/* Restore host values of some registers */
|
||||
BEGIN_FTR_SECTION
|
||||
ld r5, STACK_SLOT_CIABR(r1)
|
||||
ld r6, STACK_SLOT_DAWR(r1)
|
||||
ld r7, STACK_SLOT_DAWRX(r1)
|
||||
mtspr SPRN_CIABR, r5
|
||||
mtspr SPRN_DAWR, r6
|
||||
mtspr SPRN_DAWRX, r7
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
BEGIN_FTR_SECTION
|
||||
ld r5, STACK_SLOT_TID(r1)
|
||||
ld r6, STACK_SLOT_PSSCR(r1)
|
||||
ld r7, STACK_SLOT_PID(r1)
|
||||
ld r8, STACK_SLOT_IAMR(r1)
|
||||
mtspr SPRN_TIDR, r5
|
||||
mtspr SPRN_PSSCR, r6
|
||||
mtspr SPRN_PID, r7
|
||||
mtspr SPRN_IAMR, r8
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
BEGIN_FTR_SECTION
|
||||
PPC_INVALIDATE_ERAT
|
||||
|
@ -1819,8 +1871,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
|
|||
li r0, KVM_GUEST_MODE_NONE
|
||||
stb r0, HSTATE_IN_GUEST(r13)
|
||||
|
||||
ld r0, 112+PPC_LR_STKOFF(r1)
|
||||
addi r1, r1, 112
|
||||
ld r0, SFS+PPC_LR_STKOFF(r1)
|
||||
addi r1, r1, SFS
|
||||
mtlr r0
|
||||
blr
|
||||
|
||||
|
@ -2366,12 +2418,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
|||
mfspr r3, SPRN_DEC
|
||||
mfspr r4, SPRN_HDEC
|
||||
mftb r5
|
||||
cmpw r3, r4
|
||||
BEGIN_FTR_SECTION
|
||||
/* On P9 check whether the guest has large decrementer mode enabled */
|
||||
ld r6, HSTATE_KVM_VCORE(r13)
|
||||
ld r6, VCORE_LPCR(r6)
|
||||
andis. r6, r6, LPCR_LD@h
|
||||
bne 68f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
extsw r3, r3
|
||||
68: EXTEND_HDEC(r4)
|
||||
cmpd r3, r4
|
||||
ble 67f
|
||||
mtspr SPRN_DEC, r4
|
||||
67:
|
||||
/* save expiry time of guest decrementer */
|
||||
extsw r3, r3
|
||||
add r3, r3, r5
|
||||
ld r4, HSTATE_KVM_VCPU(r13)
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
|
@ -2552,22 +2612,32 @@ machine_check_realmode:
|
|||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
/*
|
||||
* Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
|
||||
* machine check interrupt (set HSRR0 to 0x200). And for handled
|
||||
* errors (no-fatal), just go back to guest execution with current
|
||||
* HSRR0 instead of exiting guest. This new approach will inject
|
||||
* machine check to guest for fatal error causing guest to crash.
|
||||
*
|
||||
* The old code used to return to host for unhandled errors which
|
||||
* was causing guest to hang with soft lockups inside guest and
|
||||
* makes it difficult to recover guest instance.
|
||||
* For the guest that is FWNMI capable, deliver all the MCE errors
|
||||
* (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
|
||||
* reason. This new approach injects machine check errors in guest
|
||||
* address space to guest with additional information in the form
|
||||
* of RTAS event, thus enabling guest kernel to suitably handle
|
||||
* such errors.
|
||||
*
|
||||
* For the guest that is not FWNMI capable (old QEMU) fallback
|
||||
* to old behaviour for backward compatibility:
|
||||
* Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
|
||||
* through machine check interrupt (set HSRR0 to 0x200).
|
||||
* For handled errors (no-fatal), just go back to guest execution
|
||||
* with current HSRR0.
|
||||
* if we receive machine check with MSR(RI=0) then deliver it to
|
||||
* guest as machine check causing guest to crash.
|
||||
*/
|
||||
ld r11, VCPU_MSR(r9)
|
||||
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
|
||||
bne mc_cont /* if so, exit to host */
|
||||
/* Check if guest is capable of handling NMI exit */
|
||||
ld r10, VCPU_KVM(r9)
|
||||
lbz r10, KVM_FWNMI(r10)
|
||||
cmpdi r10, 1 /* FWNMI capable? */
|
||||
beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
|
||||
|
||||
/* if not, fall through for backward compatibility. */
|
||||
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
|
||||
beq 1f /* Deliver a machine check to guest */
|
||||
ld r10, VCPU_PC(r9)
|
||||
|
|
|
@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
|
|||
unsigned long dec_nsec;
|
||||
unsigned long long dec_time;
|
||||
|
||||
pr_debug("mtDEC: %x\n", vcpu->arch.dec);
|
||||
pr_debug("mtDEC: %lx\n", vcpu->arch.dec);
|
||||
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
|
@ -109,7 +109,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
|||
case SPRN_TBWU: break;
|
||||
|
||||
case SPRN_DEC:
|
||||
vcpu->arch.dec = spr_val;
|
||||
vcpu->arch.dec = (u32) spr_val;
|
||||
kvmppc_emulate_dec(vcpu);
|
||||
break;
|
||||
|
||||
|
|
|
@ -553,13 +553,28 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
case KVM_CAP_PPC_SMT:
|
||||
r = 0;
|
||||
if (hv_enabled) {
|
||||
if (kvm) {
|
||||
if (kvm->arch.emul_smt_mode > 1)
|
||||
r = kvm->arch.emul_smt_mode;
|
||||
else
|
||||
r = kvm->arch.smt_mode;
|
||||
} else if (hv_enabled) {
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
r = 1;
|
||||
else
|
||||
r = threads_per_subcore;
|
||||
}
|
||||
break;
|
||||
case KVM_CAP_PPC_SMT_POSSIBLE:
|
||||
r = 1;
|
||||
if (hv_enabled) {
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
r = ((threads_per_subcore << 1) - 1);
|
||||
else
|
||||
/* P9 can emulate dbells, so allow any mode */
|
||||
r = 8 | 4 | 2 | 1;
|
||||
}
|
||||
break;
|
||||
case KVM_CAP_PPC_RMA:
|
||||
r = 0;
|
||||
break;
|
||||
|
@ -617,6 +632,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
/* Disable this on POWER9 until code handles new HPTE format */
|
||||
r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
case KVM_CAP_PPC_FWNMI:
|
||||
r = hv_enabled;
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_PPC_HTM:
|
||||
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
|
||||
|
@ -1537,6 +1557,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
|||
break;
|
||||
}
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
case KVM_CAP_PPC_FWNMI:
|
||||
r = -EINVAL;
|
||||
if (!is_kvmppc_hv_enabled(vcpu->kvm))
|
||||
break;
|
||||
r = 0;
|
||||
vcpu->kvm->arch.fwnmi_enabled = true;
|
||||
break;
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
|
@ -1711,6 +1740,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
|||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_CAP_PPC_SMT: {
|
||||
unsigned long mode = cap->args[0];
|
||||
unsigned long flags = cap->args[1];
|
||||
|
||||
r = -EINVAL;
|
||||
if (kvm->arch.kvm_ops->set_smt_mode)
|
||||
r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
r = -EINVAL;
|
||||
|
|
|
@ -925,6 +925,8 @@ struct kvm_ppc_resize_hpt {
|
|||
#define KVM_CAP_X86_GUEST_MWAIT 143
|
||||
#define KVM_CAP_ARM_USER_IRQ 144
|
||||
#define KVM_CAP_S390_CMMA_MIGRATION 145
|
||||
#define KVM_CAP_PPC_FWNMI 146
|
||||
#define KVM_CAP_PPC_SMT_POSSIBLE 147
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
|
Loading…
Reference in New Issue