powerpc/powernv/idle: add a basic stop 0-3 driver for POWER10

This driver does not restore stop > 3 state, so it limits itself
to states which do not lose full state or TB.

The POWER10 SPRs are sufficiently different from P9 that it seems
easier to split out the P10 code. The POWER10 deep sleep code
(e.g., the BHRB restore) has been taken out, but it can be re-added
when stop > 3 support is added.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Pratik Rajesh Sampat<psampat@linux.ibm.com>
Tested-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com>
Reviewed-by: Pratik Rajesh Sampat<psampat@linux.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200819094700.493399-1-npiggin@gmail.com
This commit is contained in:
Nicholas Piggin 2020-08-19 19:47:00 +10:00 committed by Michael Ellerman
parent 79b123cdf9
commit ffd2961bb4
5 changed files with 212 additions and 97 deletions

View File

@ -222,8 +222,6 @@ struct machdep_calls {
extern void e500_idle(void);
extern void power4_idle(void);
extern void power7_idle(void);
extern void power9_idle(void);
extern void ppc6xx_idle(void);
extern void book3e_idle(void);

View File

@ -432,7 +432,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
extern void power7_idle_type(unsigned long type);
extern void power9_idle_type(unsigned long stop_psscr_val,
extern void arch300_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask);
extern int fix_alignment(struct pt_regs *);

View File

@ -1353,6 +1353,7 @@
#define PVR_POWER8NVL 0x004C
#define PVR_POWER8 0x004D
#define PVR_POWER9 0x004E
#define PVR_POWER10 0x0080
#define PVR_BE 0x0070
#define PVR_PA6T 0x0090

View File

@ -565,7 +565,7 @@ void power7_idle_type(unsigned long type)
irq_set_pending_from_srr1(srr1);
}
void power7_idle(void)
static void power7_idle(void)
{
if (!powersave_nap)
return;
@ -659,20 +659,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
mmcr0 = mfspr(SPRN_MMCR0);
}
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
/*
* POWER10 uses MMCRA (BHRBRD) as BHRB disable bit.
* If the user hasn't asked for the BHRB to be
* written, the value of MMCRA[BHRBRD] is 1.
* On wakeup from stop, MMCRA[BHRBD] will be 0,
* since it is previleged resource and will be lost.
* Thus, if we do not save and restore the MMCRA[BHRBD],
* hardware will be needlessly writing to the BHRB
* in problem mode.
*/
mmcra = mfspr(SPRN_MMCRA);
}
if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
sprs.lpcr = mfspr(SPRN_LPCR);
sprs.hfscr = mfspr(SPRN_HFSCR);
@ -735,10 +721,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
mtspr(SPRN_MMCR0, mmcr0);
}
/* Reload MMCRA to restore BHRB disable bit for POWER10 */
if (cpu_has_feature(CPU_FTR_ARCH_31))
mtspr(SPRN_MMCRA, mmcra);
/*
* DD2.2 and earlier need to set then clear bit 60 in MMCRA
* to ensure the PMU starts running.
@ -823,73 +805,6 @@ out:
return srr1;
}
#ifdef CONFIG_HOTPLUG_CPU
static unsigned long power9_offline_stop(unsigned long psscr)
{
unsigned long srr1;
#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
__ppc64_runlatch_off();
srr1 = power9_idle_stop(psscr, true);
__ppc64_runlatch_on();
#else
/*
* Tell KVM we're entering idle.
* This does not have to be done in real mode because the P9 MMU
* is independent per-thread. Some steppings share radix/hash mode
* between threads, but in that case KVM has a barrier sync in real
* mode before and after switching between radix and hash.
*
* kvm_start_guest must still be called in real mode though, hence
* the false argument.
*/
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
__ppc64_runlatch_off();
srr1 = power9_idle_stop(psscr, false);
__ppc64_runlatch_on();
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
/* Order setting hwthread_state vs. testing hwthread_req */
smp_mb();
if (local_paca->kvm_hstate.hwthread_req)
srr1 = idle_kvm_start_guest(srr1);
mtmsr(MSR_KERNEL);
#endif
return srr1;
}
#endif
void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask)
{
unsigned long psscr;
unsigned long srr1;
if (!prep_irq_for_idle_irqsoff())
return;
psscr = mfspr(SPRN_PSSCR);
psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
__ppc64_runlatch_off();
srr1 = power9_idle_stop(psscr, true);
__ppc64_runlatch_on();
fini_irq_for_idle_irqsoff();
irq_set_pending_from_srr1(srr1);
}
/*
* Used for ppc_md.power_save which needs a function with no parameters
*/
void power9_idle(void)
{
power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* This is used in working around bugs in thread reconfiguration
@ -962,6 +877,198 @@ void pnv_power9_force_smt4_release(void)
EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
struct p10_sprs {
/*
* SPRs that get lost in shallow states:
*
* P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
* isa300 idle routines restore CR, LR.
* CTR is volatile
* idle thread doesn't use FP or VEC
* kernel doesn't use TAR
* HSPRG1 is only live in HV interrupt entry
* SPRG2 is only live in KVM guests, KVM handles it.
*/
};
static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
unsigned long *state = &paca_ptrs[first]->idle_state;
unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
unsigned long srr1;
unsigned long pls;
// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
bool sprs_saved = false;
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
BUG_ON(!mmu_on);
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
*/
srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
if (likely(!srr1))
return 0;
/*
* Registers not saved, can't recover!
* This would be a hardware bug
*/
BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
goto out;
}
/* EC=ESL=1 case */
if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
/* XXX: save SPRs for deep state loss here. */
sprs_saved = true;
atomic_start_thread_idle();
}
srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
psscr = mfspr(SPRN_PSSCR);
WARN_ON_ONCE(!srr1);
WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
hmi_exception_realmode(NULL);
/*
* On POWER10, SRR1 bits do not match exactly as expected.
* SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
* just always test PSSCR for SPR/TB state loss.
*/
pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
if (likely(pls < deep_spr_loss_state)) {
if (sprs_saved)
atomic_stop_thread_idle();
goto out;
}
/* HV state loss */
BUG_ON(!sprs_saved);
atomic_lock_thread_idle();
if ((*state & core_thread_mask) != 0)
goto core_woken;
/* XXX: restore per-core SPRs here */
if (pls >= pnv_first_tb_loss_level) {
/* TB loss */
if (opal_resync_timebase() != OPAL_SUCCESS)
BUG();
}
/*
* isync after restoring shared SPRs and before unlocking. Unlock
* only contains hwsync which does not necessarily do the right
* thing for SPRs.
*/
isync();
core_woken:
atomic_unlock_and_stop_thread_idle();
/* XXX: restore per-thread SPRs here */
if (!radix_enabled())
__slb_restore_bolted_realmode();
out:
if (mmu_on)
mtmsr(MSR_KERNEL);
return srr1;
}
#ifdef CONFIG_HOTPLUG_CPU
static unsigned long arch300_offline_stop(unsigned long psscr)
{
unsigned long srr1;
#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
srr1 = power10_idle_stop(psscr, true);
else
srr1 = power9_idle_stop(psscr, true);
__ppc64_runlatch_on();
#else
/*
* Tell KVM we're entering idle.
* This does not have to be done in real mode because the P9 MMU
* is independent per-thread. Some steppings share radix/hash mode
* between threads, but in that case KVM has a barrier sync in real
* mode before and after switching between radix and hash.
*
* kvm_start_guest must still be called in real mode though, hence
* the false argument.
*/
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
srr1 = power10_idle_stop(psscr, false);
else
srr1 = power9_idle_stop(psscr, false);
__ppc64_runlatch_on();
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
/* Order setting hwthread_state vs. testing hwthread_req */
smp_mb();
if (local_paca->kvm_hstate.hwthread_req)
srr1 = idle_kvm_start_guest(srr1);
mtmsr(MSR_KERNEL);
#endif
return srr1;
}
#endif
void arch300_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask)
{
unsigned long psscr;
unsigned long srr1;
if (!prep_irq_for_idle_irqsoff())
return;
psscr = mfspr(SPRN_PSSCR);
psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
srr1 = power10_idle_stop(psscr, true);
else
srr1 = power9_idle_stop(psscr, true);
__ppc64_runlatch_on();
fini_irq_for_idle_irqsoff();
irq_set_pending_from_srr1(srr1);
}
/*
* Used for ppc_md.power_save which needs a function with no parameters
*/
static void arch300_idle(void)
{
arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
}
#ifdef CONFIG_HOTPLUG_CPU
void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
@ -995,7 +1102,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
psscr = mfspr(SPRN_PSSCR);
psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
pnv_deepest_stop_psscr_val;
srr1 = power9_offline_stop(psscr);
srr1 = arch300_offline_stop(psscr);
} else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
srr1 = power7_offline();
} else {
@ -1093,11 +1200,15 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
* @dt_idle_states: Number of idle state entries
* Returns 0 on success
*/
static void __init pnv_power9_idle_init(void)
static void __init pnv_arch300_idle_init(void)
{
u64 max_residency_ns = 0;
int i;
/* stop is not really architected, we only have p9,p10 drivers */
if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
return;
/*
* pnv_deepest_stop_{val,mask} should be set to values corresponding to
* the deepest stop state.
@ -1112,6 +1223,11 @@ static void __init pnv_power9_idle_init(void)
struct pnv_idle_states_t *state = &pnv_idle_states[i];
u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
/* No deep loss driver implemented for POWER10 yet */
if (pvr_version_is(PVR_POWER10) &&
state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
continue;
if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
(pnv_first_tb_loss_level > psscr_rl))
pnv_first_tb_loss_level = psscr_rl;
@ -1162,7 +1278,7 @@ static void __init pnv_power9_idle_init(void)
if (unlikely(!default_stop_found)) {
pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
} else {
ppc_md.power_save = power9_idle;
ppc_md.power_save = arch300_idle;
pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
pnv_default_stop_val, pnv_default_stop_mask);
}
@ -1224,7 +1340,7 @@ static void __init pnv_probe_idle_states(void)
}
if (cpu_has_feature(CPU_FTR_ARCH_300))
pnv_power9_idle_init();
pnv_arch300_idle_init();
for (i = 0; i < nr_pnv_idle_states; i++)
supported_cpuidle_states |= pnv_idle_states[i].flags;
@ -1295,7 +1411,7 @@ static int pnv_parse_cpuidle_dt(void)
for (i = 0; i < nr_idle_states; i++)
pnv_idle_states[i].residency_ns = temp_u32[i];
/* For power9 */
/* For power9 and later */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
/* Read pm_crtl_val */
if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
@ -1358,8 +1474,8 @@ static int __init pnv_init_idle_states(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
/* P7/P8 nap */
p->thread_idle_state = PNV_THREAD_RUNNING;
} else {
/* P9 stop */
} else if (pvr_version_is(PVR_POWER9)) {
/* P9 stop workarounds */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
p->requested_psscr = 0;
atomic_set(&p->dont_stop, 0);

View File

@ -141,7 +141,7 @@ static int stop_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
power9_idle_type(stop_psscr_table[index].val,
arch300_idle_type(stop_psscr_table[index].val,
stop_psscr_table[index].mask);
return index;
}