[AArch64] Ensure no tagged memory is left in the unallocated portion of the
stack This patch makes sure that if we tag some memory, we untag that memory before the function returns/throws via any exit, reachable from the tag operation. For that we place the untag operation either at: a) the lifetime end call for the alloca, if that call post-dominates the lifetime start call (where the tag operation is placed), or it (the lifetime end call) dominates all reachable exits, otherwise b) at the reachable exits Differential Revision: llvm-svn: 374182
@ -19,6 +19,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@ -491,6 +492,24 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
Info.AI = NewAI;
// Helper function to check for post-dominance.
static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A,
const IntrinsicInst *B) {
const BasicBlock *ABB = A->getParent();
const BasicBlock *BBB = B->getParent();
if (ABB != BBB)
return PDT->dominates(ABB, BBB);
for (const Instruction &I : *ABB) {
if (&I == B)
return true;
if (&I == A)
return false;
llvm_unreachable("Corrupt instruction list");
// FIXME: check for MTE extension
bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
@ -565,23 +584,31 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (NumInterestingAllocas == 0)
return true;
std::unique_ptr<DominatorTree> DeleteDT;
DominatorTree *DT = nullptr;
if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
DT = &P->getDomTree();
if (DT == nullptr && (NumInterestingAllocas > 1 ||
!F->hasFnAttribute(Attribute::OptimizeNone))) {
DeleteDT = std::make_unique<DominatorTree>(*F);
DT = DeleteDT.get();
std::unique_ptr<PostDominatorTree> DeletePDT;
PostDominatorTree *PDT = nullptr;
if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
PDT = &P->getPostDomTree();
if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
DeletePDT = std::make_unique<PostDominatorTree>(*F);
PDT = DeletePDT.get();
SetTagFunc =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
// Compute DT only if the function has the attribute, there are more than 1
// interesting allocas, and it is not available for free.
Instruction *Base;
if (NumInterestingAllocas > 1) {
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
if (DTWP) {
Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree());
} else {
DominatorTree DT(*F);
Base = insertBaseTaggedPointer(Allocas, &DT);
} else {
Base = insertBaseTaggedPointer(Allocas, nullptr);
Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
for (auto &I : Allocas) {
const AllocaInfo &Info = I.second;
@ -604,11 +631,37 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
Info.LifetimeEnd.size() == 1) {
IntrinsicInst *Start = Info.LifetimeStart[0];
IntrinsicInst *End = Info.LifetimeEnd[0];
uint64_t Size =
Size = alignTo(Size, kTagGranuleSize);
tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
untagAlloca(AI, Info.LifetimeEnd[0], Size);
// We need to ensure that if we tag some object, we certainly untag it
// before the function exits.
if (PDT != nullptr && postDominates(PDT, End, Start)) {
untagAlloca(AI, End, Size);
} else {
SmallVector<Instruction *, 8> ReachableRetVec;
unsigned NumCoveredExits = 0;
for (auto &RI : RetVec) {
if (!isPotentiallyReachable(Start, RI, nullptr, DT))
if (DT != nullptr && DT->dominates(End, RI))
// If there's a mix of covered and non-covered exits, just put the untag
// on exits, so we avoid the redundancy of untagging twice.
if (NumCoveredExits == ReachableRetVec.size()) {
untagAlloca(AI, End, Size);
} else {
for (auto &RI : ReachableRetVec)
untagAlloca(AI, RI, Size);
// We may have inserted untag outside of the lifetime interval.
// Remove the lifetime end call for this alloca.
} else {
uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
@ -0,0 +1,69 @@
; RUN: opt -S -stack-tagging %s -o - | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-arm-unknown-eabi"
define void @f() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; CHECK-LABEL: start:
%a = alloca i8, i32 48, align 8
call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %a) #2
; CHECK: call void @llvm.aarch64.settag(i8* %a.tag, i64 48)
%b = alloca i8, i32 48, align 8
call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %b) #2
; CHECK: call void @llvm.aarch64.settag(i8* %b.tag, i64 48)
invoke void @g (i8 * nonnull %a, i8 * nonnull %b) to label %next0 unwind label %lpad0
; CHECK-NOT: settag
; CHECK-LABEL: next0:
call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %a)
call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %b)
br label %exit
; CHECK-NOT: settag
; CHECK-LABEL: lpad0:
%pad0v = landingpad { i8*, i32 } catch i8* null
%v = extractvalue { i8*, i32 } %pad0v, 0
%x = call i8* @__cxa_begin_catch(i8* %v) #2
invoke void @__cxa_end_catch() to label %next1 unwind label %lpad1
; CHECK-NOT: settag
; CHECK-LABEL: next1:
br label %exit
; CHECK-NOT: settag
; CHECK-LABEL: lpad1:
; CHECK-DAG: call void @llvm.aarch64.settag(i8* %a, i64 48)
; CHECK-DAG: call void @llvm.aarch64.settag(i8* %b, i64 48)
%pad1v = landingpad { i8*, i32 } cleanup
resume { i8*, i32 } %pad1v
; CHECK-LABEL: exit:
; CHECK-DAG: call void @llvm.aarch64.settag(i8* %a, i64 48)
; CHECK-DAG: call void @llvm.aarch64.settag(i8* %b, i64 48)
ret void
; CHECK: ret void
declare void @g(i8 *, i8 *) #0
declare dso_local i32 @__gxx_personality_v0(...)
declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr
declare dso_local void @__cxa_end_catch() local_unnamed_addr
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
attributes #0 = { sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { nounwind }
@ -0,0 +1,183 @@
; clang -target aarch64-eabi -O2 -march=armv8.5-a+memtag -fsanitize=memtag -S -emit-llvm
; void bar() {
; throw 42;
; }
; void foo() {
; int A0;
; __asm volatile("" : : "r"(&A0));
; try {
; bar();
; } catch (int exc) {
; }
; throw 15532;
; }
; int main() {
; try {
; foo();
; } catch (int exc) {
; }
; return 0;
; }
; RUN: opt -S -stack-tagging %s -o - | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-unknown-eabi"
@_ZTIi = external dso_local constant i8*
; Function Attrs: noreturn sanitize_memtag
define dso_local void @_Z3barv() local_unnamed_addr #0 {
%exception = tail call i8* @__cxa_allocate_exception(i64 4) #4
%0 = bitcast i8* %exception to i32*
store i32 42, i32* %0, align 16, !tbaa !2
tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5
declare dso_local i8* @__cxa_allocate_exception(i64) local_unnamed_addr
declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr
; Function Attrs: noreturn sanitize_memtag
define dso_local void @_Z3foov() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
%A0 = alloca i32, align 4
%0 = bitcast i32* %A0 to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
call void asm sideeffect "", "r"(i32* nonnull %A0) #4, !srcloc !6
invoke void @_Z3barv()
to label %try.cont unwind label %lpad
lpad: ; preds = %entry
%1 = landingpad { i8*, i32 }
catch i8* bitcast (i8** @_ZTIi to i8*)
%2 = extractvalue { i8*, i32 } %1, 1
%3 = call i32* bitcast (i8** @_ZTIi to i8*)) #4
%matches = icmp eq i32 %2, %3
br i1 %matches, label %catch, label %ehcleanup
catch: ; preds = %lpad
%4 = extractvalue { i8*, i32 } %1, 0
%5 = call i8* @__cxa_begin_catch(i8* %4) #4
call void @__cxa_end_catch() #4
br label %try.cont
try.cont: ; preds = %entry, %catch
%exception = call i8* @__cxa_allocate_exception(i64 4) #4
%6 = bitcast i8* %exception to i32*
store i32 15532, i32* %6, align 16, !tbaa !2
call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5
ehcleanup: ; preds = %lpad
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4
resume { i8*, i32 } %1
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
declare dso_local i32 @__gxx_personality_v0(...)
; Function Attrs: nounwind readnone
declare i32*) #2
declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr
declare dso_local void @__cxa_end_catch() local_unnamed_addr
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: norecurse sanitize_memtag
define dso_local i32 @main() local_unnamed_addr #3 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; CHECK-LABEL: entry:
%A0.i = alloca i32, align 4
%0 = bitcast i32* %A0.i to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
call void asm sideeffect "", "r"(i32* nonnull %A0.i) #4, !srcloc !6
; CHECK: call void @llvm.aarch64.settag(i8* %1, i64 16)
; CHECK-NEXT: call void asm sideeffect
%exception.i6 = call i8* @__cxa_allocate_exception(i64 4) #4
%1 = bitcast i8* %exception.i6 to i32*
store i32 42, i32* %1, align 16, !tbaa !2
invoke void @__cxa_throw(i8* %exception.i6, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5
to label %.noexc7 unwind label %lpad.i
.noexc7: ; preds = %entry
lpad.i: ; preds = %entry
%2 = landingpad { i8*, i32 }
catch i8* bitcast (i8** @_ZTIi to i8*)
%3 = extractvalue { i8*, i32 } %2, 1
%4 = call i32* bitcast (i8** @_ZTIi to i8*)) #4
%matches.i = icmp eq i32 %3, %4
br i1 %matches.i, label %catch.i, label %ehcleanup.i
catch.i: ; preds = %lpad.i
%5 = extractvalue { i8*, i32 } %2, 0
%6 = call i8* @__cxa_begin_catch(i8* %5) #4
call void @__cxa_end_catch() #4
%exception.i = call i8* @__cxa_allocate_exception(i64 4) #4
%7 = bitcast i8* %exception.i to i32*
store i32 15532, i32* %7, align 16, !tbaa !2
invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5
to label %.noexc unwind label %lpad
.noexc: ; preds = %catch.i
ehcleanup.i: ; preds = %lpad.i
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4
br label %lpad.body
lpad: ; preds = %catch.i
%8 = landingpad { i8*, i32 }
catch i8* bitcast (i8** @_ZTIi to i8*)
%.pre = extractvalue { i8*, i32 } %8, 1
br label %lpad.body
lpad.body: ; preds = %ehcleanup.i, %lpad
%.pre-phi = phi i32 [ %3, %ehcleanup.i ], [ %.pre, %lpad ]
%eh.lpad-body = phi { i8*, i32 } [ %2, %ehcleanup.i ], [ %8, %lpad ]
%matches = icmp eq i32 %.pre-phi, %4
br i1 %matches, label %catch, label %eh.resume
catch: ; preds = %lpad.body
%9 = extractvalue { i8*, i32 } %eh.lpad-body, 0
%10 = call i8* @__cxa_begin_catch(i8* %9) #4
call void @__cxa_end_catch() #4
ret i32 0
eh.resume: ; preds = %lpad.body
resume { i8*, i32 } %eh.lpad-body
attributes #0 = { noreturn sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { nounwind readnone }
attributes #3 = { norecurse sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { nounwind }
attributes #5 = { noreturn }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 10.0.0 ( c38188c5fe41751fda095edde1a878b2a051ae58)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C++ TBAA"}
!6 = !{i32 70}
@ -0,0 +1,82 @@
;; RUN: opt -S -stack-tagging %s -o - | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-arm-unknown-eabi"
define void @f() local_unnamed_addr #0 {
; CHECK: %basetag = call i8* @llvm.aarch64.irg.sp(i64 0)
%v = alloca i8, i32 48, align 8
; CHECK: %v.tag = call i8* @llvm.aarch64.tagp.p0i8(i8* %v, i8* %basetag, i64 0)
%w = alloca i8, i32 48, align 16
; CHECK: %w.tag = call i8* @llvm.aarch64.tagp.p0i8(i8* %w, i8* %basetag, i64 1)
%t0 = call i32 @g0() #1
%b0 = icmp eq i32 %t0, 0
br i1 %b0, label %S1, label %exit3
call void @llvm.lifetime.start.p0i8(i64 48, i8 * nonnull %v) #1
; CHECK: call void @llvm.aarch64.settag(i8* %v.tag, i64 48)
call void @llvm.lifetime.start.p0i8(i64 48, i8 * nonnull %w) #1
; CHECK: call void @llvm.aarch64.settag(i8* %w.tag, i64 48)
%t1 = call i32 @g1(i8 * nonnull %v, i8 * nonnull %w) #1
; CHECK: call i32 @g1
; CHECK-NOT: settag{{.*}}%v
; CHECK: call void @llvm.aarch64.settag(i8* %w, i64 48)
; CHECK-NOT: settag{{.*}}%v
call void @llvm.lifetime.end.p0i8(i64 48, i8 * nonnull %w) #1
; CHECK: call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %w.tag)
%b1 = icmp eq i32 %t1, 0
br i1 %b1, label %S2, label %S3
; CHECK-NOT: settag
call void @z0() #1
br label %exit1
; CHECK-NOT: settag
call void @llvm.lifetime.end.p0i8(i64 48, i8 * nonnull %v) #1
tail call void @z1() #1
br label %exit2
; CHECK-NOT: settag
; CHECK-LABEL: exit1:
; CHECK: call void @llvm.aarch64.settag(i8* %v, i64 48)
ret void
; CHECK-LABEL: exit2:
; CHECK: call void @llvm.aarch64.settag(i8* %v, i64 48)
ret void
; CHECK-LABEL: exit3:
call void @z2() #1
; CHECK-NOT: settag
ret void
; CHECK: ret void
declare i32 @g0() #0
declare i32 @g1(i8 *, i8 *) #0
declare void @z0() #0
declare void @z1() #0
declare void @z2() #0
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8 * nocapture) #1
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8 * nocapture) #1
attributes #0 = { sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
