xtensa: new fast_alloca handler
Instead of emulating movsp instruction in the kernel use window underflow handler to load missing register window and retry failed movsp. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> Signed-off-by: Chris Zankel <chris@zankel.net>
This commit is contained in:
parent
99d5040ebc
commit
fff96d69f2
|
@ -31,7 +31,6 @@
|
|||
/* Unimplemented features. */
|
||||
|
||||
#undef KERNEL_STACK_OVERFLOW_CHECK
|
||||
#undef ALLOCA_EXCEPTION_IN_IRAM
|
||||
|
||||
/* Not well tested.
|
||||
*
|
||||
|
@ -819,11 +818,27 @@ ENDPROC(unrecoverable_exception)
|
|||
*
|
||||
* The ALLOCA handler is entered when user code executes the MOVSP
|
||||
* instruction and the caller's frame is not in the register file.
|
||||
* In this case, the caller frame's a0..a3 are on the stack just
|
||||
* below sp (a1), and this handler moves them.
|
||||
*
|
||||
* For "MOVSP <ar>,<as>" without destination register a1, this routine
|
||||
* simply moves the value from <as> to <ar> without moving the save area.
|
||||
* This algorithm was taken from the Ross Morley's RTOS Porting Layer:
|
||||
*
|
||||
* /home/ross/rtos/porting/XtensaRTOS-PortingLayer-20090507/xtensa_vectors.S
|
||||
*
|
||||
* It leverages the existing window spill/fill routines and their support for
|
||||
* double exceptions. The 'movsp' instruction will only cause an exception if
|
||||
* the next window needs to be loaded. In fact this ALLOCA exception may be
|
||||
* replaced at some point by changing the hardware to do a underflow exception
|
||||
* of the proper size instead.
|
||||
*
|
||||
* This algorithm simply backs out the register changes started by the user
|
||||
* excpetion handler, makes it appear that we have started a window underflow
|
||||
* by rotating the window back and then setting the old window base (OWB) in
|
||||
* the 'ps' register with the rolled back window base. The 'movsp' instruction
|
||||
* will be re-executed and this time since the next window frames is in the
|
||||
* active AR registers it won't cause an exception.
|
||||
*
|
||||
* If the WindowUnderflow code gets a TLB miss the page will get mapped
|
||||
* the the partial windeowUnderflow will be handeled in the double exception
|
||||
* handler.
|
||||
*
|
||||
* Entry condition:
|
||||
*
|
||||
|
@ -838,155 +853,28 @@ ENDPROC(unrecoverable_exception)
|
|||
* < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
|
||||
*/
|
||||
|
||||
#if XCHAL_HAVE_BE
|
||||
#define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 4, 4
|
||||
#define _EXTUI_MOVSP_DST(ar) extui ar, ar, 0, 4
|
||||
#else
|
||||
#define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 0, 4
|
||||
#define _EXTUI_MOVSP_DST(ar) extui ar, ar, 4, 4
|
||||
#endif
|
||||
|
||||
ENTRY(fast_alloca)
|
||||
rsr a0, windowbase
|
||||
rotw -1
|
||||
rsr a2, ps
|
||||
extui a3, a2, PS_OWB_SHIFT, PS_OWB_WIDTH
|
||||
xor a3, a3, a4
|
||||
l32i a4, a6, PT_AREG0
|
||||
l32i a1, a6, PT_DEPC
|
||||
rsr a6, depc
|
||||
wsr a1, depc
|
||||
slli a3, a3, PS_OWB_SHIFT
|
||||
xor a2, a2, a3
|
||||
wsr a2, ps
|
||||
rsync
|
||||
|
||||
/* We shouldn't be in a double exception. */
|
||||
|
||||
l32i a0, a2, PT_DEPC
|
||||
_bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lunhandled_double
|
||||
|
||||
rsr a0, depc # get a2
|
||||
s32i a4, a2, PT_AREG4 # save a4 and
|
||||
s32i a3, a2, PT_AREG3
|
||||
s32i a0, a2, PT_AREG2 # a2 to stack
|
||||
|
||||
/* Exit critical section. */
|
||||
|
||||
movi a0, 0
|
||||
rsr a3, excsave1
|
||||
s32i a0, a3, EXC_TABLE_FIXUP
|
||||
|
||||
rsr a4, epc1 # get exception address
|
||||
|
||||
#ifdef ALLOCA_EXCEPTION_IN_IRAM
|
||||
#error iram not supported
|
||||
#else
|
||||
/* Note: l8ui not allowed in IRAM/IROM!! */
|
||||
l8ui a0, a4, 1 # read as(src) from MOVSP instruction
|
||||
#endif
|
||||
movi a3, .Lmovsp_src
|
||||
_EXTUI_MOVSP_SRC(a0) # extract source register number
|
||||
addx8 a3, a0, a3
|
||||
jx a3
|
||||
|
||||
.Lunhandled_double:
|
||||
wsr a0, excsave1
|
||||
movi a0, unrecoverable_exception
|
||||
callx0 a0
|
||||
|
||||
.align 8
|
||||
.Lmovsp_src:
|
||||
l32i a3, a2, PT_AREG0; _j 1f; .align 8
|
||||
mov a3, a1; _j 1f; .align 8
|
||||
l32i a3, a2, PT_AREG2; _j 1f; .align 8
|
||||
l32i a3, a2, PT_AREG3; _j 1f; .align 8
|
||||
l32i a3, a2, PT_AREG4; _j 1f; .align 8
|
||||
mov a3, a5; _j 1f; .align 8
|
||||
mov a3, a6; _j 1f; .align 8
|
||||
mov a3, a7; _j 1f; .align 8
|
||||
mov a3, a8; _j 1f; .align 8
|
||||
mov a3, a9; _j 1f; .align 8
|
||||
mov a3, a10; _j 1f; .align 8
|
||||
mov a3, a11; _j 1f; .align 8
|
||||
mov a3, a12; _j 1f; .align 8
|
||||
mov a3, a13; _j 1f; .align 8
|
||||
mov a3, a14; _j 1f; .align 8
|
||||
mov a3, a15; _j 1f; .align 8
|
||||
|
||||
1:
|
||||
|
||||
#ifdef ALLOCA_EXCEPTION_IN_IRAM
|
||||
#error iram not supported
|
||||
#else
|
||||
l8ui a0, a4, 0 # read ar(dst) from MOVSP instruction
|
||||
#endif
|
||||
addi a4, a4, 3 # step over movsp
|
||||
_EXTUI_MOVSP_DST(a0) # extract destination register
|
||||
wsr a4, epc1 # save new epc_1
|
||||
|
||||
_bnei a0, 1, 1f # no 'movsp a1, ax': jump
|
||||
|
||||
/* Move the save area. This implies the use of the L32E
|
||||
* and S32E instructions, because this move must be done with
|
||||
* the user's PS.RING privilege levels, not with ring 0
|
||||
* (kernel's) privileges currently active with PS.EXCM
|
||||
* set. Note that we have stil registered a fixup routine with the
|
||||
* double exception vector in case a double exception occurs.
|
||||
*/
|
||||
|
||||
/* a0,a4:avail a1:old user stack a2:exc. stack a3:new user stack. */
|
||||
|
||||
l32e a0, a1, -16
|
||||
l32e a4, a1, -12
|
||||
s32e a0, a3, -16
|
||||
s32e a4, a3, -12
|
||||
l32e a0, a1, -8
|
||||
l32e a4, a1, -4
|
||||
s32e a0, a3, -8
|
||||
s32e a4, a3, -4
|
||||
|
||||
/* Restore stack-pointer and all the other saved registers. */
|
||||
|
||||
mov a1, a3
|
||||
|
||||
l32i a4, a2, PT_AREG4
|
||||
l32i a3, a2, PT_AREG3
|
||||
l32i a0, a2, PT_AREG0
|
||||
l32i a2, a2, PT_AREG2
|
||||
rfe
|
||||
|
||||
/* MOVSP <at>,<as> was invoked with <at> != a1.
|
||||
* Because the stack pointer is not being modified,
|
||||
* we should be able to just modify the pointer
|
||||
* without moving any save area.
|
||||
* The processor only traps these occurrences if the
|
||||
* caller window isn't live, so unfortunately we can't
|
||||
* use this as an alternate trap mechanism.
|
||||
* So we just do the move. This requires that we
|
||||
* resolve the destination register, not just the source,
|
||||
* so there's some extra work.
|
||||
* (PERHAPS NOT REALLY NEEDED, BUT CLEANER...)
|
||||
*/
|
||||
|
||||
/* a0 dst-reg, a1 user-stack, a2 stack, a3 value of src reg. */
|
||||
|
||||
1: movi a4, .Lmovsp_dst
|
||||
addx8 a4, a0, a4
|
||||
jx a4
|
||||
|
||||
.align 8
|
||||
.Lmovsp_dst:
|
||||
s32i a3, a2, PT_AREG0; _j 1f; .align 8
|
||||
mov a1, a3; _j 1f; .align 8
|
||||
s32i a3, a2, PT_AREG2; _j 1f; .align 8
|
||||
s32i a3, a2, PT_AREG3; _j 1f; .align 8
|
||||
s32i a3, a2, PT_AREG4; _j 1f; .align 8
|
||||
mov a5, a3; _j 1f; .align 8
|
||||
mov a6, a3; _j 1f; .align 8
|
||||
mov a7, a3; _j 1f; .align 8
|
||||
mov a8, a3; _j 1f; .align 8
|
||||
mov a9, a3; _j 1f; .align 8
|
||||
mov a10, a3; _j 1f; .align 8
|
||||
mov a11, a3; _j 1f; .align 8
|
||||
mov a12, a3; _j 1f; .align 8
|
||||
mov a13, a3; _j 1f; .align 8
|
||||
mov a14, a3; _j 1f; .align 8
|
||||
mov a15, a3; _j 1f; .align 8
|
||||
|
||||
1: l32i a4, a2, PT_AREG4
|
||||
l32i a3, a2, PT_AREG3
|
||||
l32i a0, a2, PT_AREG0
|
||||
l32i a2, a2, PT_AREG2
|
||||
rfe
|
||||
|
||||
_bbci.l a4, 31, 4f
|
||||
rotw -1
|
||||
_bbci.l a8, 30, 8f
|
||||
rotw -1
|
||||
j _WindowUnderflow12
|
||||
8: j _WindowUnderflow8
|
||||
4: j _WindowUnderflow4
|
||||
ENDPROC(fast_alloca)
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue