forked from OSchip/llvm-project
[AArch64][SVE] SVE functions should use the SVE calling convention for fast calls
When an SVE function calls another SVE function using the C calling convention we use the more efficient SVE VectorCall PCS. However, for the Fast calling convention we're incorrectly falling back to the generic AArch64 PCS. This patch adds the same "can use SVE vector calling convention" detection used by CallingConv::C to CallingConv::Fast. Co-authored-by: Paul Walker <paul.walker@arm.com> Differential Revision: https://reviews.llvm.org/D99657
This commit is contained in:
parent
65c8bfb509
commit
0934fa4f5d
|
@ -5138,11 +5138,11 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
|
|||
const Function &CallerF = MF.getFunction();
|
||||
CallingConv::ID CallerCC = CallerF.getCallingConv();
|
||||
|
||||
// If this function uses the C calling convention but has an SVE signature,
|
||||
// then it preserves more registers and should assume the SVE_VectorCall CC.
|
||||
// Functions using the C or Fast calling convention that have an SVE signature
|
||||
// preserve more registers and should assume the SVE_VectorCall CC.
|
||||
// The check for matching callee-saved regs will determine whether it is
|
||||
// eligible for TCO.
|
||||
if (CallerCC == CallingConv::C &&
|
||||
if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
|
||||
AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
|
||||
CallerCC = CallingConv::AArch64_SVE_VectorCall;
|
||||
|
||||
|
@ -5335,7 +5335,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||
|
||||
// Check callee args/returns for SVE registers and set calling convention
|
||||
// accordingly.
|
||||
if (CallConv == CallingConv::C) {
|
||||
if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
|
||||
bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
|
||||
return Out.VT.isScalableVector();
|
||||
});
|
||||
|
|
|
@ -37,6 +37,13 @@ define i32 @caller_nosve_signature() nounwind {
|
|||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: caller_nosve_signature_fastcc
|
||||
; CHECK: BL @nosve_signature, csr_aarch64_aapcs
|
||||
define i32 @caller_nosve_signature_fastcc() nounwind {
|
||||
%res = call fastcc i32 @nosve_signature()
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_ret_vec_caller
|
||||
; CHECK: BL @sve_signature_ret_vec, csr_aarch64_sve_aapcs
|
||||
define <vscale x 4 x i32> @sve_signature_ret_vec_caller() nounwind {
|
||||
|
@ -44,6 +51,13 @@ define <vscale x 4 x i32> @sve_signature_ret_vec_caller() nounwind {
|
|||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_ret_vec_caller_fastcc
|
||||
; CHECK: BL @sve_signature_ret_vec, csr_aarch64_sve_aapcs
|
||||
define <vscale x 4 x i32> @sve_signature_ret_vec_caller_fastcc() nounwind {
|
||||
%res = call fastcc <vscale x 4 x i32> @sve_signature_ret_vec()
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_ret_pred_caller
|
||||
; CHECK: BL @sve_signature_ret_pred, csr_aarch64_sve_aapcs
|
||||
define <vscale x 4 x i1> @sve_signature_ret_pred_caller() nounwind {
|
||||
|
@ -51,6 +65,13 @@ define <vscale x 4 x i1> @sve_signature_ret_pred_caller() nounwind {
|
|||
ret <vscale x 4 x i1> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_ret_pred_caller_fastcc
|
||||
; CHECK: BL @sve_signature_ret_pred, csr_aarch64_sve_aapcs
|
||||
define <vscale x 4 x i1> @sve_signature_ret_pred_caller_fastcc() nounwind {
|
||||
%res = call fastcc <vscale x 4 x i1> @sve_signature_ret_pred()
|
||||
ret <vscale x 4 x i1> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_arg_vec_caller
|
||||
; CHECK: BL @sve_signature_arg_vec, csr_aarch64_sve_aapcs
|
||||
define void @sve_signature_arg_vec_caller(<vscale x 4 x i32> %arg) nounwind {
|
||||
|
@ -58,6 +79,13 @@ define void @sve_signature_arg_vec_caller(<vscale x 4 x i32> %arg) nounwind {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_arg_vec_caller_fastcc
|
||||
; CHECK: BL @sve_signature_arg_vec, csr_aarch64_sve_aapcs
|
||||
define void @sve_signature_arg_vec_caller_fastcc(<vscale x 4 x i32> %arg) nounwind {
|
||||
call fastcc void @sve_signature_arg_vec(<vscale x 4 x i32> %arg)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_arg_pred_caller
|
||||
; CHECK: BL @sve_signature_arg_pred, csr_aarch64_sve_aapcs
|
||||
define void @sve_signature_arg_pred_caller(<vscale x 4 x i1> %arg) nounwind {
|
||||
|
@ -65,6 +93,13 @@ define void @sve_signature_arg_pred_caller(<vscale x 4 x i1> %arg) nounwind {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_arg_pred_caller_fastcc
|
||||
; CHECK: BL @sve_signature_arg_pred, csr_aarch64_sve_aapcs
|
||||
define void @sve_signature_arg_pred_caller_fastcc(<vscale x 4 x i1> %arg) nounwind {
|
||||
call fastcc void @sve_signature_arg_pred(<vscale x 4 x i1> %arg)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: sve_signature_many_arg_vec
|
||||
; CHECK: [[RES:%[0-9]+]]:zpr = COPY $z7
|
||||
; CHECK: $z0 = COPY [[RES]]
|
||||
|
|
|
@ -27,6 +27,25 @@ define <vscale x 4 x i32> @sve_caller_sve_callee() nounwind {
|
|||
ret <vscale x 4 x i32> %call
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sve_caller_sve_callee_fastcc() nounwind {
|
||||
; CHECK-LABEL: sve_caller_sve_callee_fastcc:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: //APP
|
||||
; CHECK-NEXT: //NO_APP
|
||||
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: b sve_callee
|
||||
tail call void asm sideeffect "", "~{z9},~{z10}"()
|
||||
%call = tail call fastcc <vscale x 4 x i32> @sve_callee()
|
||||
ret <vscale x 4 x i32> %call
|
||||
}
|
||||
|
||||
declare <vscale x 4 x i32> @sve_callee()
|
||||
|
||||
; Check that a tail call from an SVE function to a non-SVE function
|
||||
|
@ -104,4 +123,79 @@ define i32 @sve_caller_non_sve_callee(<vscale x 4 x i32> %arg) nounwind {
|
|||
ret i32 %call
|
||||
}
|
||||
|
||||
; Check that a tail call from an SVE function to a non-SVE function
|
||||
; does not use a tail-call, because after the call many of the SVE
|
||||
; registers may be clobbered and needs to be restored.
|
||||
define i32 @sve_caller_non_sve_callee_fastcc(<vscale x 4 x i32> %arg) nounwind {
|
||||
; CHECK-LABEL: sve_caller_non_sve_callee_fastcc:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-18
|
||||
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: //APP
|
||||
; CHECK-NEXT: //NO_APP
|
||||
; CHECK-NEXT: bl non_sve_callee
|
||||
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: addvl sp, sp, #18
|
||||
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
tail call void asm sideeffect "", "~{z9},~{z10}"()
|
||||
%call = tail call fastcc i32 @non_sve_callee()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare i32 @non_sve_callee()
|
||||
|
|
Loading…
Reference in New Issue