KVM: PPC: Book3S HV: Allow for running POWER9 host in single-threaded mode

This patch allows for a mode on POWER9 hosts where we control all the
threads of a core, much as we do on POWER8.  The mode is controlled by
a module parameter on the kvm_hv module, called "indep_threads_mode".
The normal mode on POWER9 is the "independent threads" mode, with
indep_threads_mode=Y, where the host is in SMT4 mode (or in fact any
desired SMT mode) and each thread independently enters and exits from
KVM guests without reference to what other threads in the core are
doing.

If indep_threads_mode is set to N at the point when a VM is started,
KVM will expect every core that the guest runs on to be in single
threaded mode (that is, threads 1, 2 and 3 offline), and will set the
flag that prevents secondary threads from coming online.  We can still
use all four threads; the code that implements dynamic micro-threading
on POWER8 will become active in over-commit situations and will allow
up to three other VCPUs to be run on the secondary threads of the core
whenever a VCPU is run.

The reason for wanting this mode is that this will allow us to run HPT
guests on a radix host on a POWER9 machine that does not support
"mixed mode", that is, having some threads in a core be in HPT mode
while other threads are in radix mode.  It will also make it possible
to implement a "strict threads" mode in future, if desired.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
Paul Mackerras 2017-10-16 16:11:57 +11:00
parent 18c3640cef
commit 516f7898ae
3 changed files with 57 additions and 33 deletions

View File

@ -281,6 +281,7 @@ struct kvm_arch {
cpumask_t cpu_in_guest; cpumask_t cpu_in_guest;
u8 radix; u8 radix;
u8 fwnmi_enabled; u8 fwnmi_enabled;
bool threads_indep;
pgd_t *pgtable; pgd_t *pgtable;
u64 process_table; u64 process_table;
struct dentry *debugfs_dir; struct dentry *debugfs_dir;

View File

@ -98,6 +98,10 @@ static int target_smt_mode;
module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
static bool indep_threads_mode = true;
module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
#ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XICS
static struct kernel_param_ops module_param_ops = { static struct kernel_param_ops module_param_ops = {
.set = param_set_int, .set = param_set_int,
@ -1734,9 +1738,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
* MMU mode (radix or HPT), unfortunately, but since we only support * MMU mode (radix or HPT), unfortunately, but since we only support
* HPT guests on a HPT host so far, that isn't an impediment yet. * HPT guests on a HPT host so far, that isn't an impediment yet.
*/ */
static int threads_per_vcore(void) static int threads_per_vcore(struct kvm *kvm)
{ {
if (cpu_has_feature(CPU_FTR_ARCH_300)) if (kvm->arch.threads_indep)
return 1; return 1;
return threads_per_subcore; return threads_per_subcore;
} }
@ -2228,11 +2232,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
kvmppc_ipi_thread(cpu); kvmppc_ipi_thread(cpu);
} }
static void kvmppc_wait_for_nap(void) static void kvmppc_wait_for_nap(int n_threads)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
int i, loops; int i, loops;
int n_threads = threads_per_vcore();
if (n_threads <= 1) if (n_threads <= 1)
return; return;
@ -2319,7 +2322,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_PREEMPT; vc->vcore_state = VCORE_PREEMPT;
vc->pcpu = smp_processor_id(); vc->pcpu = smp_processor_id();
if (vc->num_threads < threads_per_vcore()) { if (vc->num_threads < threads_per_vcore(vc->kvm)) {
spin_lock(&lp->lock); spin_lock(&lp->lock);
list_add_tail(&vc->preempt_list, &lp->list); list_add_tail(&vc->preempt_list, &lp->list);
spin_unlock(&lp->lock); spin_unlock(&lp->lock);
@ -2357,7 +2360,7 @@ struct core_info {
/* /*
* This mapping means subcores 0 and 1 can use threads 0-3 and 4-7 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
* respectively in 2-way micro-threading (split-core) mode. * respectively in 2-way micro-threading (split-core) mode on POWER8.
*/ */
static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 }; static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
@ -2373,7 +2376,14 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
static bool subcore_config_ok(int n_subcores, int n_threads) static bool subcore_config_ok(int n_subcores, int n_threads)
{ {
/* Can only dynamically split if unsplit to begin with */ /*
* POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
* mode, with one thread per subcore.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
return n_subcores <= 4 && n_threads == 1;
/* On POWER8, can only dynamically split if unsplit to begin with */
if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS) if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
return false; return false;
if (n_subcores > MAX_SUBCORES) if (n_subcores > MAX_SUBCORES)
@ -2632,6 +2642,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int target_threads; int target_threads;
int controlled_threads; int controlled_threads;
int trap; int trap;
bool is_power8;
/* /*
* Remove from the list any threads that have a signal pending * Remove from the list any threads that have a signal pending
@ -2654,7 +2665,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* the number of threads per subcore, except on POWER9, * the number of threads per subcore, except on POWER9,
* where it's 1 because the threads are (mostly) independent. * where it's 1 because the threads are (mostly) independent.
*/ */
controlled_threads = threads_per_vcore(); controlled_threads = threads_per_vcore(vc->kvm);
/* /*
* Make sure we are running on primary threads, and that secondary * Make sure we are running on primary threads, and that secondary
@ -2725,8 +2736,16 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cmd_bit = stat_bit = 0; cmd_bit = stat_bit = 0;
split = core_info.n_subcores; split = core_info.n_subcores;
sip = NULL; sip = NULL;
is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
&& !cpu_has_feature(CPU_FTR_ARCH_300);
if (split > 1) { if (split > 1) {
/* threads_per_subcore must be MAX_SMT_THREADS (8) here */ sip = &split_info;
memset(&split_info, 0, sizeof(split_info));
for (sub = 0; sub < core_info.n_subcores; ++sub)
split_info.vc[sub] = core_info.vc[sub];
if (is_power8) {
if (split == 2 && (dynamic_mt_modes & 2)) { if (split == 2 && (dynamic_mt_modes & 2)) {
cmd_bit = HID0_POWER8_1TO2LPAR; cmd_bit = HID0_POWER8_1TO2LPAR;
stat_bit = HID0_POWER8_2LPARMODE; stat_bit = HID0_POWER8_2LPARMODE;
@ -2736,21 +2755,21 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
stat_bit = HID0_POWER8_4LPARMODE; stat_bit = HID0_POWER8_4LPARMODE;
} }
subcore_size = MAX_SMT_THREADS / split; subcore_size = MAX_SMT_THREADS / split;
sip = &split_info;
memset(&split_info, 0, sizeof(split_info));
split_info.rpr = mfspr(SPRN_RPR); split_info.rpr = mfspr(SPRN_RPR);
split_info.pmmar = mfspr(SPRN_PMMAR); split_info.pmmar = mfspr(SPRN_PMMAR);
split_info.ldbar = mfspr(SPRN_LDBAR); split_info.ldbar = mfspr(SPRN_LDBAR);
split_info.subcore_size = subcore_size; split_info.subcore_size = subcore_size;
for (sub = 0; sub < core_info.n_subcores; ++sub) } else {
split_info.vc[sub] = core_info.vc[sub]; split_info.subcore_size = 1;
}
/* order writes to split_info before kvm_split_mode pointer */ /* order writes to split_info before kvm_split_mode pointer */
smp_wmb(); smp_wmb();
} }
for (thr = 0; thr < controlled_threads; ++thr) for (thr = 0; thr < controlled_threads; ++thr)
paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
/* Initiate micro-threading (split-core) if required */ /* Initiate micro-threading (split-core) on POWER8 if required */
if (cmd_bit) { if (cmd_bit) {
unsigned long hid0 = mfspr(SPRN_HID0); unsigned long hid0 = mfspr(SPRN_HID0);
@ -2769,7 +2788,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/* Start all the threads */ /* Start all the threads */
active = 0; active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) { for (sub = 0; sub < core_info.n_subcores; ++sub) {
thr = subcore_thread_map[sub]; thr = is_power8 ? subcore_thread_map[sub] : sub;
thr0_done = false; thr0_done = false;
active |= 1 << thr; active |= 1 << thr;
pvc = core_info.vc[sub]; pvc = core_info.vc[sub];
@ -2796,18 +2815,18 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* the vcore pointer in the PACA of the secondaries. * the vcore pointer in the PACA of the secondaries.
*/ */
smp_mb(); smp_mb();
if (cmd_bit)
split_info.do_nap = 1; /* ask secondaries to nap when done */
/* /*
* When doing micro-threading, poke the inactive threads as well. * When doing micro-threading, poke the inactive threads as well.
* This gets them to the nap instruction after kvm_do_nap, * This gets them to the nap instruction after kvm_do_nap,
* which reduces the time taken to unsplit later. * which reduces the time taken to unsplit later.
*/ */
if (split > 1) if (cmd_bit) {
split_info.do_nap = 1; /* ask secondaries to nap when done */
for (thr = 1; thr < threads_per_subcore; ++thr) for (thr = 1; thr < threads_per_subcore; ++thr)
if (!(active & (1 << thr))) if (!(active & (1 << thr)))
kvmppc_ipi_thread(pcpu + thr); kvmppc_ipi_thread(pcpu + thr);
}
vc->vcore_state = VCORE_RUNNING; vc->vcore_state = VCORE_RUNNING;
preempt_disable(); preempt_disable();
@ -2841,10 +2860,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_EXITING; vc->vcore_state = VCORE_EXITING;
/* wait for secondary threads to finish writing their state to memory */ /* wait for secondary threads to finish writing their state to memory */
kvmppc_wait_for_nap(); kvmppc_wait_for_nap(controlled_threads);
/* Return to whole-core mode if we split the core earlier */ /* Return to whole-core mode if we split the core earlier */
if (split > 1) { if (cmd_bit) {
unsigned long hid0 = mfspr(SPRN_HID0); unsigned long hid0 = mfspr(SPRN_HID0);
unsigned long loops = 0; unsigned long loops = 0;
@ -3822,10 +3841,12 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/* /*
* Track that we now have a HV mode VM active. This blocks secondary * Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online. * CPU threads from coming online.
* On POWER9, we only need to do this for HPT guests on a radix * On POWER9, we only need to do this if the "indep_threads_mode"
* host, which is not yet supported. * module parameter has been set to N.
*/ */
if (!cpu_has_feature(CPU_FTR_ARCH_300)) if (cpu_has_feature(CPU_FTR_ARCH_300))
kvm->arch.threads_indep = indep_threads_mode;
if (!kvm->arch.threads_indep)
kvm_hv_vm_activated(); kvm_hv_vm_activated();
/* /*
@ -3865,7 +3886,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{ {
debugfs_remove_recursive(kvm->arch.debugfs_dir); debugfs_remove_recursive(kvm->arch.debugfs_dir);
if (!cpu_has_feature(CPU_FTR_ARCH_300)) if (!kvm->arch.threads_indep)
kvm_hv_vm_deactivated(); kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm); kvmppc_free_vcores(kvm);

View File

@ -385,6 +385,7 @@ kvm_secondary_got_guest:
ld r6, 0(r6) ld r6, 0(r6)
mtspr SPRN_HDEC, r6 mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */ /* and set per-LPAR registers, if doing dynamic micro-threading */
BEGIN_FTR_SECTION
ld r6, HSTATE_SPLIT_MODE(r13) ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0 cmpdi r6, 0
beq 63f beq 63f
@ -395,6 +396,7 @@ kvm_secondary_got_guest:
ld r0, KVM_SPLIT_LDBAR(r6) ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0 mtspr SPRN_LDBAR, r0
isync isync
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
63: 63:
/* Order load of vcpu after load of vcore */ /* Order load of vcpu after load of vcore */
lwsync lwsync