2011-01-24 15:42:41 +08:00
|
|
|
/*
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
* This file contains the power_save function for Power7 CPUs.
|
2011-01-24 15:42:41 +08:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/threads.h>
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/page.h>
|
|
|
|
#include <asm/cputable.h>
|
|
|
|
#include <asm/thread_info.h>
|
|
|
|
#include <asm/ppc_asm.h>
|
|
|
|
#include <asm/asm-offsets.h>
|
|
|
|
#include <asm/ppc-opcode.h>
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
#include <asm/hw_irq.h>
|
2012-02-03 08:54:17 +08:00
|
|
|
#include <asm/kvm_book3s_asm.h>
|
2014-02-26 08:08:43 +08:00
|
|
|
#include <asm/opal.h>
|
2011-01-24 15:42:41 +08:00
|
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
2014-02-26 08:08:25 +08:00
|
|
|
/* Idle state entry routines */
|
2011-01-24 15:42:41 +08:00
|
|
|
|
2014-02-26 08:08:25 +08:00
|
|
|
#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
|
|
|
|
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
|
|
|
|
std r0,0(r1); \
|
|
|
|
ptesync; \
|
|
|
|
ld r0,0(r1); \
|
|
|
|
1: cmp cr0,r0,r0; \
|
|
|
|
bne 1b; \
|
|
|
|
IDLE_INST; \
|
|
|
|
b .
|
2011-01-24 15:42:41 +08:00
|
|
|
|
2014-02-26 08:08:25 +08:00
|
|
|
.text
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Pass requested state in r3:
|
|
|
|
* 0 - nap
|
|
|
|
* 1 - sleep
|
|
|
|
*/
|
|
|
|
_GLOBAL(power7_powersave_common)
|
|
|
|
/* Use r3 to pass state nap/sleep/winkle */
|
2011-01-24 15:42:41 +08:00
|
|
|
/* NAP is a state loss, we create a regs frame on the
|
|
|
|
* stack, fill it up with the state we care about and
|
|
|
|
* stick a pointer to it in PACAR1. We really only
|
|
|
|
* need to save PC, some CR bits and the NV GPRs,
|
|
|
|
* but for now an interrupt frame will do.
|
|
|
|
*/
|
|
|
|
mflr r0
|
|
|
|
std r0,16(r1)
|
|
|
|
stdu r1,-INT_FRAME_SIZE(r1)
|
|
|
|
std r0,_LINK(r1)
|
|
|
|
std r0,_NIP(r1)
|
|
|
|
|
|
|
|
#ifndef CONFIG_SMP
|
|
|
|
/* Make sure FPU, VSX etc... are flushed as we may lose
|
|
|
|
* state when going to nap mode
|
|
|
|
*/
|
|
|
|
bl .discard_lazy_cpu_state
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
|
|
|
|
/* Hard disable interrupts */
|
|
|
|
mfmsr r9
|
|
|
|
rldicl r9,r9,48,1
|
|
|
|
rotldi r9,r9,16
|
|
|
|
mtmsrd r9,1 /* hard-disable interrupts */
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
|
|
|
|
/* Check if something happened while soft-disabled */
|
|
|
|
lbz r0,PACAIRQHAPPENED(r13)
|
|
|
|
cmpwi cr0,r0,0
|
|
|
|
beq 1f
|
|
|
|
addi r1,r1,INT_FRAME_SIZE
|
|
|
|
ld r0,16(r1)
|
|
|
|
mtlr r0
|
|
|
|
blr
|
|
|
|
|
|
|
|
1: /* We mark irqs hard disabled as this is the state we'll
|
|
|
|
* be in when returning and we need to tell arch_local_irq_restore()
|
|
|
|
* about it
|
|
|
|
*/
|
|
|
|
li r0,PACA_IRQ_HARD_DIS
|
|
|
|
stb r0,PACAIRQHAPPENED(r13)
|
|
|
|
|
|
|
|
/* We haven't lost state ... yet */
|
2011-01-24 15:42:41 +08:00
|
|
|
li r0,0
|
2011-12-06 03:47:26 +08:00
|
|
|
stb r0,PACA_NAPSTATELOST(r13)
|
2011-01-24 15:42:41 +08:00
|
|
|
|
|
|
|
/* Continue saving state */
|
|
|
|
SAVE_GPR(2, r1)
|
|
|
|
SAVE_NVGPRS(r1)
|
2014-02-26 08:08:25 +08:00
|
|
|
mfcr r4
|
|
|
|
std r4,_CCR(r1)
|
2011-01-24 15:42:41 +08:00
|
|
|
std r9,_MSR(r1)
|
|
|
|
std r1,PACAR1(r13)
|
|
|
|
|
2013-10-30 22:34:31 +08:00
|
|
|
_GLOBAL(power7_enter_nap_mode)
|
2013-10-08 00:47:52 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
2012-02-03 08:54:17 +08:00
|
|
|
/* Tell KVM we're napping */
|
|
|
|
li r4,KVM_HWTHREAD_IN_NAP
|
|
|
|
stb r4,HSTATE_HWTHREAD_STATE(r13)
|
|
|
|
#endif
|
2014-02-26 08:08:25 +08:00
|
|
|
cmpwi cr0,r3,1
|
|
|
|
beq 2f
|
|
|
|
IDLE_STATE_ENTER_SEQ(PPC_NAP)
|
|
|
|
/* No return */
|
|
|
|
2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
|
|
|
|
/* No return */
|
2012-02-03 08:54:17 +08:00
|
|
|
|
2014-02-26 08:08:25 +08:00
|
|
|
_GLOBAL(power7_idle)
|
|
|
|
/* Now check if user or arch enabled NAP mode */
|
|
|
|
LOAD_REG_ADDRBASE(r3,powersave_nap)
|
|
|
|
lwz r4,ADDROFF(powersave_nap)(r3)
|
|
|
|
cmpwi 0,r4,0
|
|
|
|
beqlr
|
|
|
|
/* fall through */
|
|
|
|
|
|
|
|
_GLOBAL(power7_nap)
|
|
|
|
li r3,0
|
|
|
|
b power7_powersave_common
|
|
|
|
/* No return */
|
|
|
|
|
|
|
|
_GLOBAL(power7_sleep)
|
|
|
|
li r3,1
|
|
|
|
b power7_powersave_common
|
|
|
|
/* No return */
|
2011-01-24 15:42:41 +08:00
|
|
|
|
2014-02-26 08:08:43 +08:00
|
|
|
_GLOBAL(power7_wakeup_tb_loss)
|
|
|
|
ld r2,PACATOC(r13);
|
|
|
|
ld r1,PACAR1(r13)
|
|
|
|
|
|
|
|
/* Time base re-sync */
|
|
|
|
li r0,OPAL_RESYNC_TIMEBASE
|
|
|
|
LOAD_REG_ADDR(r11,opal);
|
|
|
|
ld r12,8(r11);
|
|
|
|
ld r2,0(r11);
|
|
|
|
mtctr r12
|
|
|
|
bctrl
|
|
|
|
|
|
|
|
/* TODO: Check r3 for failure */
|
|
|
|
|
|
|
|
REST_NVGPRS(r1)
|
|
|
|
REST_GPR(2, r1)
|
|
|
|
ld r3,_CCR(r1)
|
|
|
|
ld r4,_MSR(r1)
|
|
|
|
ld r5,_NIP(r1)
|
|
|
|
addi r1,r1,INT_FRAME_SIZE
|
|
|
|
mtcr r3
|
|
|
|
mfspr r3,SPRN_SRR1 /* Return SRR1 */
|
|
|
|
mtspr SPRN_SRR1,r4
|
|
|
|
mtspr SPRN_SRR0,r5
|
|
|
|
rfid
|
|
|
|
|
2011-01-24 15:42:41 +08:00
|
|
|
_GLOBAL(power7_wakeup_loss)
|
|
|
|
ld r1,PACAR1(r13)
|
|
|
|
REST_NVGPRS(r1)
|
|
|
|
REST_GPR(2, r1)
|
|
|
|
ld r3,_CCR(r1)
|
|
|
|
ld r4,_MSR(r1)
|
|
|
|
ld r5,_NIP(r1)
|
|
|
|
addi r1,r1,INT_FRAME_SIZE
|
|
|
|
mtcr r3
|
|
|
|
mtspr SPRN_SRR1,r4
|
|
|
|
mtspr SPRN_SRR0,r5
|
|
|
|
rfid
|
|
|
|
|
|
|
|
_GLOBAL(power7_wakeup_noloss)
|
2011-12-06 03:47:26 +08:00
|
|
|
lbz r0,PACA_NAPSTATELOST(r13)
|
|
|
|
cmpwi r0,0
|
|
|
|
bne .power7_wakeup_loss
|
2011-01-24 15:42:41 +08:00
|
|
|
ld r1,PACAR1(r13)
|
|
|
|
ld r4,_MSR(r1)
|
|
|
|
ld r5,_NIP(r1)
|
|
|
|
addi r1,r1,INT_FRAME_SIZE
|
|
|
|
mtspr SPRN_SRR1,r4
|
|
|
|
mtspr SPRN_SRR0,r5
|
|
|
|
rfid
|