llvm-project/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

78 lines
2.7 KiB
LLVM
Raw Normal View History

[ARM] Implement PAC return address signing mechanism for PACBTI-M This patch implements PAC return address signing for armv8-m. This patch roughly accomplishes the following things: - PAC and AUT instructions are generated. - They're part of the stack frame setup, so that shrink-wrapping can move them inwards to cover only part of a function - The auth code generated by PAC is saved across subroutine calls so that AUT can find it again to check - PAC is emitted before stacking registers (so that the SP it signs is the one on function entry). - The new pseudo-register ra_auth_code is mentioned in the DWARF frame data - With CMSE also in use: PAC is emitted before stacking FPCXTNS, and AUT validates the corresponding value of SP - Emit correct unwind information when PAC is replaced by PACBTI - Handle tail calls correctly Some notes: We make the assembler accept the `.save {ra_auth_code}` directive that is emitted by the compiler when it saves a register that contains a return address authentication code. For EHABI we need to have the `FrameSetup` flag on the instruction and handle the `t2PACBTI` opcode (identically to `t2PAC`), so we can emit `.save {ra_auth_code}`, instead of `.save {r12}`. For PACBTI-M, the instruction which computes return address PAC should use SP value before adjustment for the argument registers save are (used for variadic functions and when a parameter is is split between stack and register), but at the same it should be after the instruction that saves FPCXT when compiling a CMSE entry function. This patch moves the varargs SP adjustment after the FPCXT save (they are never enabled at the same time), so in a following patch handling of the `PAC` instruction can be placed between them. Epilogue emission code adjusted in a similar manner. PACBTI-M code generation should not emit any instructions for architectures v6-m, v8-m.base, and for A- and R-class cores. Diagnostic message for such cases is handled separately by a future ticket. note on tail calls: If the called function has four arguments that occupy registers `r0`-`r3`, the only option for holding the function pointer itself is `r12`, but this register is used to keep the PAC during function/prologue epilogue and clobbers the function pointer. When we do the tail call we need the five registers (`r0`-`r3` and `r12`) to keep six values - the four function arguments, the function pointer and the PAC, which is obviously impossible. One option would be to authenticate the return address before all callee-saved registers are restored, so we have a scratch register to temporarily keep the value of `r12`. The issue with this approach is that it violates a fundamental invariant that PAC is computed using CFA as a modifier. It would also mean using separate instructions to pop `lr` and the rest of the callee-saved registers, which would offset the advantages of doing a tail call. Instead, this patch disables indirect tail calls when the called function take four or more arguments and the return address sign and authentication is enabled for the caller function, conservatively assuming the caller function would spill LR. This patch is part of a series that adds support for the PACBTI-M extension of the Armv8.1-M architecture, as detailed here: https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/armv8-1-m-pointer-authentication-and-branch-target-identification-extension The PACBTI-M specification can be found in the Armv8-M Architecture Reference Manual: https://developer.arm.com/documentation/ddi0553/latest The following people contributed to this patch: - Momchil Velikov - Ties Stuij Reviewed By: danielkiss Differential Revision: https://reviews.llvm.org/D112429
2021-12-07 18:13:17 +08:00
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
%"struct.std::__va_list" = type { i8* }
define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
entry:
%ap = alloca %"struct.std::__va_list", align 4
%0 = bitcast %"struct.std::__va_list"* %ap to i8*
call void @llvm.va_start(i8* nonnull %0)
%cmp7 = icmp sgt i32 %n, 0
br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
%1 = getelementptr inbounds %"struct.std::__va_list", %"struct.std::__va_list"* %ap, i32 0, i32 0
%argp.cur.pre = load i8*, i8** %1, align 4
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
%s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
call void @llvm.va_end(i8* nonnull %0)
ret i32 %s.0.lcssa
for.body: ; preds = %for.body.lr.ph, %for.body
%argp.cur = phi i8* [ %argp.cur.pre, %for.body.lr.ph ], [ %argp.next, %for.body ]
%i.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%s.08 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
%argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4
store i8* %argp.next, i8** %1, align 4
%2 = bitcast i8* %argp.cur to i32*
%3 = load i32, i32* %2, align 4
%add = add nsw i32 %3, %s.08
%inc = add nuw nsw i32 %i.09, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; CHECK-LABEL: _Z1fiz:
; CHECK: pac r12, lr, sp
; CHECK-NEXT: .pad #12
; CHECK-NEXT: sub sp, #12
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 20
; CHECK-NEXT: .cfi_offset lr, -16
; CHECK-NEXT: .cfi_offset r7, -20
; CHECK-NEXT: .save {ra_auth_code}
; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: .cfi_offset ra_auth_code, -24
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 28
; ...
; CHECK: add.w r[[N:[0-9]*]], sp, #16
; CHECK: stm.w r[[N]], {r1, r2, r3}
; ...
; CHECK: add sp, #4
; CHECK-NEXT: ldr r12, [sp], #4
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: add sp, #12
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
declare void @llvm.va_start(i8*) #1
declare void @llvm.va_end(i8*) #1
attributes #0 = { nounwind optsize}
attributes #1 = { nounwind }
!llvm.module.flags = !{!0, !1, !2}
!0 = !{i32 1, !"branch-target-enforcement", i32 0}
!1 = !{i32 1, !"sign-return-address", i32 1}
!2 = !{i32 1, !"sign-return-address-all", i32 0}