linux-sg2042/arch/ia64/kernel/minstate.h

251 lines
8.0 KiB
C
Raw Normal View History

#include <asm/cache.h>
#include "entry.h"
#include "paravirt_inst.h"
cputime: Generic on-demand virtual cputime accounting If we want to stop the tick further idle, we need to be able to account the cputime without using the tick. Virtual based cputime accounting solves that problem by hooking into kernel/user boundaries. However implementing CONFIG_VIRT_CPU_ACCOUNTING require low level hooks and involves more overhead. But we already have a generic context tracking subsystem that is required for RCU needs by archs which plan to shut down the tick outside idle. This patch implements a generic virtual based cputime accounting that relies on these generic kernel/user hooks. There are some upsides of doing this: - This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING if context tracking is already built (already necessary for RCU in full tickless mode). - We can rely on the generic context tracking subsystem to dynamically (de)activate the hooks, so that we can switch anytime between virtual and tick based accounting. This way we don't have the overhead of the virtual accounting when the tick is running periodically. And one downside: - There is probably more overhead than a native virtual based cputime accounting. But this relies on hooks that are already set anyway. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Li Zhong <zhong@linux.vnet.ibm.com> Cc: Namhyung Kim <namhyung.kim@lge.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de>
2012-07-25 13:56:04 +08:00
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* read ar.itc in advance, and use it before leaving bank 0 */
#define ACCOUNT_GET_STAMP \
(pUStk) mov.m r20=ar.itc;
#define ACCOUNT_SYS_ENTER \
(pUStk) br.call.spnt rp=account_sys_enter \
;;
#else
#define ACCOUNT_GET_STAMP
#define ACCOUNT_SYS_ENTER
#endif
.section ".data..patch.rse", "a"
[IA64] Workaround for RSE issue Problem: An application violating the architectural rules regarding operation dependencies and having specific Register Stack Engine (RSE) state at the time of the violation, may result in an illegal operation fault and invalid RSE state. Such faults may initiate a cascade of repeated illegal operation faults within OS interruption handlers. The specific behavior is OS dependent. Implication: An application causing an illegal operation fault with specific RSE state may result in a series of illegal operation faults and an eventual OS stack overflow condition. Workaround: OS interruption handlers that switch to kernel backing store implement a check for invalid RSE state to avoid the series of illegal operation faults. The core of the workaround is the RSE_WORKAROUND code sequence inserted into each invocation of the SAVE_MIN_WITH_COVER and SAVE_MIN_WITH_COVER_R19 macros. This sequence includes hard-coded constants that depend on the number of stacked physical registers being 96. The rest of this patch consists of code to disable this workaround should this not be the case (with the presumption that if a future Itanium processor increases the number of registers, it would also remove the need for this patch). Move the start of the RBS up to a mod32 boundary to avoid some corner cases. The dispatch_illegal_op_fault code outgrew the spot it was squatting in when built with this patch and CONFIG_VIRT_CPU_ACCOUNTING=y Move it out to the end of the ivt. Signed-off-by: Tony Luck <tony.luck@intel.com>
2008-05-28 04:23:16 +08:00
.previous
/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
* the minimum state necessary that allows us to turn psr.ic back
* on.
*
* Assumed state upon entry:
* psr.ic: off
* r31: contains saved predicates (pr)
*
* Upon exit, the state is as follows:
* psr.ic: off
* r2 = points to &pt_regs.r16
* r8 = contents of ar.ccv
* r9 = contents of ar.csd
* r10 = contents of ar.ssd
* r11 = FPSR_DEFAULT
* r12 = kernel sp (kernel virtual address)
* r13 = points to current task_struct (kernel virtual address)
* p15 = TRUE if psr.i is set in cr.ipsr
* predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
* preserved
*
* Note that psr.ic is NOT turned on by this macro. This is so that
* we can pass interruption state as arguments to a handler.
*/
#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \
mov r16=IA64_KR(CURRENT); /* M */ \
mov r27=ar.rsc; /* M */ \
mov r20=r1; /* A */ \
mov r25=ar.unat; /* M */ \
MOV_FROM_IPSR(p0,r29); /* M */ \
mov r26=ar.pfs; /* I */ \
MOV_FROM_IIP(r28); /* M */ \
mov r21=ar.fpsr; /* M */ \
__COVER; /* B;; (or nothing) */ \
;; \
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
;; \
ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \
st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \
adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \
/* switch from user to kernel RBS: */ \
;; \
invala; /* M */ \
SAVE_IFS; \
cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \
;; \
(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
;; \
(pUStk) mov.m r24=ar.rnat; \
(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
(pKStk) mov r1=sp; /* get sp */ \
;; \
(pUStk) lfetch.fault.excl.nt1 [r22]; \
(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
;; \
(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
;; \
(pUStk) mov r18=ar.bsp; \
(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
adds r16=PT(CR_IPSR),r1; \
;; \
lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
st8 [r16]=r29; /* save cr.ipsr */ \
;; \
lfetch.fault.excl.nt1 [r17]; \
tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
mov r29=b0 \
;; \
[IA64] Workaround for RSE issue Problem: An application violating the architectural rules regarding operation dependencies and having specific Register Stack Engine (RSE) state at the time of the violation, may result in an illegal operation fault and invalid RSE state. Such faults may initiate a cascade of repeated illegal operation faults within OS interruption handlers. The specific behavior is OS dependent. Implication: An application causing an illegal operation fault with specific RSE state may result in a series of illegal operation faults and an eventual OS stack overflow condition. Workaround: OS interruption handlers that switch to kernel backing store implement a check for invalid RSE state to avoid the series of illegal operation faults. The core of the workaround is the RSE_WORKAROUND code sequence inserted into each invocation of the SAVE_MIN_WITH_COVER and SAVE_MIN_WITH_COVER_R19 macros. This sequence includes hard-coded constants that depend on the number of stacked physical registers being 96. The rest of this patch consists of code to disable this workaround should this not be the case (with the presumption that if a future Itanium processor increases the number of registers, it would also remove the need for this patch). Move the start of the RBS up to a mod32 boundary to avoid some corner cases. The dispatch_illegal_op_fault code outgrew the spot it was squatting in when built with this patch and CONFIG_VIRT_CPU_ACCOUNTING=y Move it out to the end of the ivt. Signed-off-by: Tony Luck <tony.luck@intel.com>
2008-05-28 04:23:16 +08:00
WORKAROUND; \
adds r16=PT(R8),r1; /* initialize first base pointer */ \
adds r17=PT(R9),r1; /* initialize second base pointer */ \
(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
;; \
.mem.offset 0,0; st8.spill [r16]=r8,16; \
.mem.offset 8,0; st8.spill [r17]=r9,16; \
;; \
.mem.offset 0,0; st8.spill [r16]=r10,24; \
.mem.offset 8,0; st8.spill [r17]=r11,24; \
;; \
st8 [r16]=r28,16; /* save cr.iip */ \
st8 [r17]=r30,16; /* save cr.ifs */ \
(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
mov r8=ar.ccv; \
mov r9=ar.csd; \
mov r10=ar.ssd; \
movl r11=FPSR_DEFAULT; /* L-unit */ \
;; \
st8 [r16]=r25,16; /* save ar.unat */ \
st8 [r17]=r26,16; /* save ar.pfs */ \
shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
;; \
st8 [r16]=r27,16; /* save ar.rsc */ \
(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \
(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \
;; /* avoid RAW on r16 & r17 */ \
(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \
st8 [r17]=r31,16; /* save predicates */ \
(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \
;; \
st8 [r16]=r29,16; /* save b0 */ \
st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
;; \
.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
.mem.offset 8,0; st8.spill [r17]=r12,16; \
adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
;; \
.mem.offset 0,0; st8.spill [r16]=r13,16; \
.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \
mov r13=IA64_KR(CURRENT); /* establish `current' */ \
;; \
.mem.offset 0,0; st8.spill [r16]=r15,16; \
.mem.offset 8,0; st8.spill [r17]=r14,16; \
;; \
.mem.offset 0,0; st8.spill [r16]=r2,16; \
.mem.offset 8,0; st8.spill [r17]=r3,16; \
ACCOUNT_GET_STAMP \
adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
;; \
EXTRA; \
movl r1=__gp; /* establish kernel global pointer */ \
;; \
ACCOUNT_SYS_ENTER \
bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
;;
/*
* SAVE_REST saves the remainder of pt_regs (with psr.ic on).
*
* Assumed state upon entry:
* psr.ic: on
* r2: points to &pt_regs.r16
* r3: points to &pt_regs.r17
* r8: contents of ar.ccv
* r9: contents of ar.csd
* r10: contents of ar.ssd
* r11: FPSR_DEFAULT
*
* Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
*/
#define SAVE_REST \
.mem.offset 0,0; st8.spill [r2]=r16,16; \
.mem.offset 8,0; st8.spill [r3]=r17,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r18,16; \
.mem.offset 8,0; st8.spill [r3]=r19,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r20,16; \
.mem.offset 8,0; st8.spill [r3]=r21,16; \
mov r18=b6; \
;; \
.mem.offset 0,0; st8.spill [r2]=r22,16; \
.mem.offset 8,0; st8.spill [r3]=r23,16; \
mov r19=b7; \
;; \
.mem.offset 0,0; st8.spill [r2]=r24,16; \
.mem.offset 8,0; st8.spill [r3]=r25,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r26,16; \
.mem.offset 8,0; st8.spill [r3]=r27,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r28,16; \
.mem.offset 8,0; st8.spill [r3]=r29,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r30,16; \
.mem.offset 8,0; st8.spill [r3]=r31,32; \
;; \
mov ar.fpsr=r11; /* M-unit */ \
st8 [r2]=r8,8; /* ar.ccv */ \
adds r24=PT(B6)-PT(F7),r3; \
;; \
stf.spill [r2]=f6,32; \
stf.spill [r3]=f7,32; \
;; \
stf.spill [r2]=f8,32; \
stf.spill [r3]=f9,32; \
;; \
stf.spill [r2]=f10; \
stf.spill [r3]=f11; \
adds r25=PT(B7)-PT(F11),r3; \
;; \
st8 [r24]=r18,16; /* b6 */ \
st8 [r25]=r19,16; /* b7 */ \
;; \
st8 [r24]=r9; /* ar.csd */ \
st8 [r25]=r10; /* ar.ssd */ \
;;
[IA64] Workaround for RSE issue Problem: An application violating the architectural rules regarding operation dependencies and having specific Register Stack Engine (RSE) state at the time of the violation, may result in an illegal operation fault and invalid RSE state. Such faults may initiate a cascade of repeated illegal operation faults within OS interruption handlers. The specific behavior is OS dependent. Implication: An application causing an illegal operation fault with specific RSE state may result in a series of illegal operation faults and an eventual OS stack overflow condition. Workaround: OS interruption handlers that switch to kernel backing store implement a check for invalid RSE state to avoid the series of illegal operation faults. The core of the workaround is the RSE_WORKAROUND code sequence inserted into each invocation of the SAVE_MIN_WITH_COVER and SAVE_MIN_WITH_COVER_R19 macros. This sequence includes hard-coded constants that depend on the number of stacked physical registers being 96. The rest of this patch consists of code to disable this workaround should this not be the case (with the presumption that if a future Itanium processor increases the number of registers, it would also remove the need for this patch). Move the start of the RBS up to a mod32 boundary to avoid some corner cases. The dispatch_illegal_op_fault code outgrew the spot it was squatting in when built with this patch and CONFIG_VIRT_CPU_ACCOUNTING=y Move it out to the end of the ivt. Signed-off-by: Tony Luck <tony.luck@intel.com>
2008-05-28 04:23:16 +08:00
#define RSE_WORKAROUND \
(pUStk) extr.u r17=r18,3,6; \
(pUStk) sub r16=r18,r22; \
[1:](pKStk) br.cond.sptk.many 1f; \
.xdata4 ".data..patch.rse",1b-. \
[IA64] Workaround for RSE issue Problem: An application violating the architectural rules regarding operation dependencies and having specific Register Stack Engine (RSE) state at the time of the violation, may result in an illegal operation fault and invalid RSE state. Such faults may initiate a cascade of repeated illegal operation faults within OS interruption handlers. The specific behavior is OS dependent. Implication: An application causing an illegal operation fault with specific RSE state may result in a series of illegal operation faults and an eventual OS stack overflow condition. Workaround: OS interruption handlers that switch to kernel backing store implement a check for invalid RSE state to avoid the series of illegal operation faults. The core of the workaround is the RSE_WORKAROUND code sequence inserted into each invocation of the SAVE_MIN_WITH_COVER and SAVE_MIN_WITH_COVER_R19 macros. This sequence includes hard-coded constants that depend on the number of stacked physical registers being 96. The rest of this patch consists of code to disable this workaround should this not be the case (with the presumption that if a future Itanium processor increases the number of registers, it would also remove the need for this patch). Move the start of the RBS up to a mod32 boundary to avoid some corner cases. The dispatch_illegal_op_fault code outgrew the spot it was squatting in when built with this patch and CONFIG_VIRT_CPU_ACCOUNTING=y Move it out to the end of the ivt. Signed-off-by: Tony Luck <tony.luck@intel.com>
2008-05-28 04:23:16 +08:00
;; \
cmp.ge p6,p7 = 33,r17; \
;; \
(p6) mov r17=0x310; \
(p7) mov r17=0x308; \
;; \
cmp.leu p1,p0=r16,r17; \
(p1) br.cond.sptk.many 1f; \
dep.z r17=r26,0,62; \
movl r16=2f; \
;; \
mov ar.pfs=r17; \
dep r27=r0,r27,16,14; \
mov b0=r16; \
;; \
br.ret.sptk b0; \
;; \
2: \
mov ar.rsc=r0 \
;; \
flushrs; \
;; \
mov ar.bspstore=r22 \
;; \
mov r18=ar.bsp; \
;; \
1: \
.pred.rel "mutex", pKStk, pUStk
#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND)
#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
[IA64] Workaround for RSE issue Problem: An application violating the architectural rules regarding operation dependencies and having specific Register Stack Engine (RSE) state at the time of the violation, may result in an illegal operation fault and invalid RSE state. Such faults may initiate a cascade of repeated illegal operation faults within OS interruption handlers. The specific behavior is OS dependent. Implication: An application causing an illegal operation fault with specific RSE state may result in a series of illegal operation faults and an eventual OS stack overflow condition. Workaround: OS interruption handlers that switch to kernel backing store implement a check for invalid RSE state to avoid the series of illegal operation faults. The core of the workaround is the RSE_WORKAROUND code sequence inserted into each invocation of the SAVE_MIN_WITH_COVER and SAVE_MIN_WITH_COVER_R19 macros. This sequence includes hard-coded constants that depend on the number of stacked physical registers being 96. The rest of this patch consists of code to disable this workaround should this not be the case (with the presumption that if a future Itanium processor increases the number of registers, it would also remove the need for this patch). Move the start of the RBS up to a mod32 boundary to avoid some corner cases. The dispatch_illegal_op_fault code outgrew the spot it was squatting in when built with this patch and CONFIG_VIRT_CPU_ACCOUNTING=y Move it out to the end of the ivt. Signed-off-by: Tony Luck <tony.luck@intel.com>
2008-05-28 04:23:16 +08:00
#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , )