forked from OSchip/llvm-project
103 lines
2.5 KiB
ArmAsm
103 lines
2.5 KiB
ArmAsm
//===----------------------Hexagon builtin routine ------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Functions that implement common sequences in function prologues and epilogues
|
|
// used to save code size
|
|
|
|
.macro FUNCTION_BEGIN name
|
|
.text
|
|
.globl \name
|
|
.type \name, @function
|
|
.falign
|
|
\name:
|
|
.endm
|
|
|
|
.macro FUNCTION_END name
|
|
.size \name, . - \name
|
|
.endm
|
|
|
|
.macro FALLTHROUGH_TAIL_CALL name0 name1
|
|
.size \name0, . - \name0
|
|
.globl \name1
|
|
.type \name1, @function
|
|
.falign
|
|
\name1:
|
|
.endm
|
|
|
|
|
|
|
|
|
|
// Save r25:24 at fp+#-8 and r27:26 at fp+#-16.
|
|
|
|
|
|
|
|
|
|
// The compiler knows that the __save_* functions clobber LR. No other
|
|
// registers should be used without informing the compiler.
|
|
|
|
// Since we can only issue one store per packet, we don't hurt performance by
|
|
// simply jumping to the right point in this sequence of stores.
|
|
|
|
FUNCTION_BEGIN __save_r24_through_r27
|
|
memd(fp+#-16) = r27:26
|
|
FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
|
|
{
|
|
memd(fp+#-8) = r25:24
|
|
jumpr lr
|
|
}
|
|
FUNCTION_END __save_r24_through_r25
|
|
|
|
|
|
|
|
|
|
// For each of the *_before_tailcall functions, jumpr lr is executed in parallel
|
|
// with deallocframe. That way, the return gets the old value of lr, which is
|
|
// where these functions need to return, and at the same time, lr gets the value
|
|
// it needs going into the tail call.
|
|
|
|
FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
|
|
r27:26 = memd(fp+#-16)
|
|
FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
|
|
{
|
|
r25:24 = memd(fp+#-8)
|
|
deallocframe
|
|
jumpr lr
|
|
}
|
|
FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
|
|
|
|
|
|
|
|
|
|
// Here we use the extra load bandwidth to restore LR early, allowing the return
|
|
// to occur in parallel with the deallocframe.
|
|
|
|
FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
|
|
{
|
|
lr = memw(fp+#4)
|
|
r27:26 = memd(fp+#-16)
|
|
}
|
|
{
|
|
r25:24 = memd(fp+#-8)
|
|
deallocframe
|
|
jumpr lr
|
|
}
|
|
FUNCTION_END __restore_r24_through_r27_and_deallocframe
|
|
|
|
|
|
|
|
|
|
// Here the load bandwidth is maximized.
|
|
|
|
FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
|
|
{
|
|
r25:24 = memd(fp+#-8)
|
|
deallocframe
|
|
}
|
|
jumpr lr
|
|
FUNCTION_END __restore_r24_through_r25_and_deallocframe
|