[Coroutine] Check indirect uses of alloca when checking lifetime info

In the existing logic, we look at the lifetime.start marker of each alloca, and check all uses of the alloca, to see if any pair of the lifetime marker and an use of alloca crosses suspension point.
This approach is unfortunately incorrect. An use of alloca does not need to be a direct use, but can be an indirect use through alias.
Only checking direct uses can miss cases where indirect uses are crossing suspension point.
This can be demonstrated in the newly added test case 007.
In the test case, both x and y are only directly used prior to suspend, but they are captured into an alias, merged through a PHINode (so they couldn't be materialized), and used after CoroSuspend.
If we only check whether the lifetime starts cross suspension points with direct uses, we will put the allocas to the stack, and then capture their addresses in the frame.

Instead of fixing it in D96441 and D96566, this patch takes a different approach which I think is better.
We still checks the lifetime info in the same way as before, but with two differences:
1. The collection of liftime.start is moved into AllocaUseVisitor to make the logic more concentrated.
2. When looking at lifetime.start and use pairs, we not only checks the direct uses as before, but in this patch we check all uses collected by AllocaUseVisitor, which would include all indirect uses through alias. This will make the analysis more accurate without throwing away the lifetime optimization.

Differential Revision: https://reviews.llvm.org/D96922
This commit is contained in:
Xun Li 2021-02-24 18:29:23 -08:00
parent b950de5c13
commit c38000a9fb
3 changed files with 220 additions and 40 deletions

View File

@ -857,7 +857,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}
void visit(Instruction &I) {
UserBBs.insert(I.getParent());
Users.insert(&I);
Base::visit(I);
// If the pointer is escaped prior to CoroBegin, we have to assume it would
// be written into before CoroBegin as well.
@ -960,6 +960,12 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
handleAlias(GEPI);
}
void visitIntrinsicInst(IntrinsicInst &II) {
if (II.getIntrinsicID() != Intrinsic::lifetime_start)
return Base::visitIntrinsicInst(II);
LifetimeStarts.insert(&II);
}
void visitCallBase(CallBase &CB) {
for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)
if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
@ -993,18 +999,40 @@ private:
// after CoroBegin. Each entry contains the instruction and the offset in the
// original Alloca. They need to be recreated after CoroBegin off the frame.
DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{};
SmallPtrSet<BasicBlock *, 2> UserBBs{};
SmallPtrSet<Instruction *, 4> Users{};
SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
bool MayWriteBeforeCoroBegin{false};
mutable llvm::Optional<bool> ShouldLiveOnFrame{};
bool computeShouldLiveOnFrame() const {
// If lifetime information is available, we check it first since it's
// more precise. We look at every pair of lifetime.start intrinsic and
// every basic block that uses the pointer to see if they cross suspension
// points. The uses cover both direct uses as well as indirect uses.
if (!LifetimeStarts.empty()) {
for (auto *I : Users)
for (auto *S : LifetimeStarts)
if (Checker.isDefinitionAcrossSuspend(*S, I))
return true;
return false;
}
// FIXME: Ideally the isEscaped check should come at the beginning.
// However there are a few loose ends that need to be fixed first before
// we can do that. We need to make sure we are not over-conservative, so
// that the data accessed in-between await_suspend and symmetric transfer
// is always put on the stack, and also data accessed after coro.end is
// always put on the stack (esp the return object). To fix that, we need
// to:
// 1) Potentially treat sret as nocapture in calls
// 2) Special handle the return object and put it on the stack
// 3) Utilize lifetime.end intrinsic
if (PI.isEscaped())
return true;
for (auto *BB1 : UserBBs)
for (auto *BB2 : UserBBs)
if (Checker.hasPathCrossingSuspendPoint(BB1, BB2))
for (auto *U1 : Users)
for (auto *U2 : Users)
if (Checker.isDefinitionAcrossSuspend(*U1, U2))
return true;
return false;
@ -2094,24 +2122,6 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
static void collectFrameAllocas(Function &F, coro::Shape &Shape,
const SuspendCrossingInfo &Checker,
SmallVectorImpl<AllocaInfo> &Allocas) {
// Collect lifetime.start info for each alloca.
using LifetimeStart = SmallPtrSet<Instruction *, 2>;
llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap;
for (Instruction &I : instructions(F)) {
auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
continue;
if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) {
if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) {
if (LifetimeMap.find(AI) == LifetimeMap.end())
LifetimeMap[AI] = std::make_unique<LifetimeStart>();
LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst);
}
}
}
for (Instruction &I : instructions(F)) {
auto *AI = dyn_cast<AllocaInst>(&I);
if (!AI)
@ -2121,23 +2131,6 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
if (AI == Shape.SwitchLowering.PromiseAlloca) {
continue;
}
bool ShouldLiveOnFrame = false;
auto Iter = LifetimeMap.find(AI);
if (Iter != LifetimeMap.end()) {
// Check against lifetime.start if the instruction has the info.
for (User *U : I.users()) {
for (auto *S : *Iter->second)
if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U)))
break;
if (ShouldLiveOnFrame)
break;
}
if (!ShouldLiveOnFrame)
continue;
}
// At this point, either ShouldLiveOnFrame is true or we didn't have
// lifetime information. We will need to rely on more precise pointer
// tracking.
DominatorTree DT(F);
AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,
*Shape.CoroBegin, Checker};

View File

@ -0,0 +1,104 @@
; Tests that CoroSplit can succesfully determine allocas should live on the frame
; if their aliases are used across suspension points through PHINode.
; RUN: opt < %s -coro-split -S | FileCheck %s
; RUN: opt < %s -passes=coro-split -S | FileCheck %s
define i8* @f(i1 %n) "coroutine.presplit"="1" {
entry:
%x = alloca i64
%y = alloca i64
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
br i1 %n, label %flag_true, label %flag_false
flag_true:
%x.alias = bitcast i64* %x to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %x.alias)
br label %merge
flag_false:
%y.alias = bitcast i64* %y to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %y.alias)
br label %merge
merge:
%alias_phi = phi i8* [ %x.alias, %flag_true ], [ %y.alias, %flag_false ]
store i8 1, i8* %alias_phi
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %sp1, label %suspend [i8 0, label %resume
i8 1, label %cleanup]
resume:
call void @print(i8* %alias_phi)
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
ret i8* %hdl
}
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end(i8*, i1)
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
declare void @print(i8*)
declare noalias i8* @malloc(i32)
declare void @free(i8*)
; Verify that both x and y are put in the frame.
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i8*, i1 }
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*))
; CHECK-NEXT: [[ALLOC:%.*]] = call i8* @malloc(i32 48)
; CHECK-NEXT: [[HDL:%.*]] = call noalias nonnull i8* @llvm.coro.begin(token [[ID]], i8* [[ALLOC]])
; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast i8* [[HDL]] to %f.Frame*
; CHECK-NEXT: [[RESUME_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0
; CHECK-NEXT: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** [[RESUME_ADDR]], align 8
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1
; CHECK-NEXT: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** [[DESTROY_ADDR]], align 8
; CHECK-NEXT: [[X_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2
; CHECK-NEXT: [[Y_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3
; CHECK-NEXT: br i1 [[N:%.*]], label [[FLAG_TRUE:%.*]], label [[FLAG_FALSE:%.*]]
; CHECK: flag_true:
; CHECK-NEXT: [[X_ALIAS:%.*]] = bitcast i64* [[X_RELOAD_ADDR]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[X_ALIAS]])
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: flag_false:
; CHECK-NEXT: [[Y_ALIAS:%.*]] = bitcast i64* [[Y_RELOAD_ADDR]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[Y_ALIAS]])
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[ALIAS_PHI:%.*]] = phi i8* [ [[X_ALIAS]], [[FLAG_TRUE]] ], [ [[Y_ALIAS]], [[FLAG_FALSE]] ]
; CHECK-NEXT: [[ALIAS_PHI_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4
; CHECK-NEXT: store i8* [[ALIAS_PHI]], i8** [[ALIAS_PHI_SPILL_ADDR]], align 8
; CHECK-NEXT: store i8 1, i8* [[ALIAS_PHI]], align 1
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 5
; CHECK-NEXT: store i1 false, i1* [[INDEX_ADDR1]], align 1
; CHECK-NEXT: ret i8* [[HDL]]
;
;
; CHECK-LABEL: @f.resume(
; CHECK-NEXT: entry.resume:
; CHECK-NEXT: [[VFRAME:%.*]] = bitcast %f.Frame* [[FRAMEPTR:%.*]] to i8*
; CHECK-NEXT: [[ALIAS_PHI_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4
; CHECK-NEXT: [[ALIAS_PHI_RELOAD:%.*]] = load i8*, i8** [[ALIAS_PHI_RELOAD_ADDR]], align 8
; CHECK-NEXT: call void @print(i8* [[ALIAS_PHI_RELOAD]])
; CHECK-NEXT: call void @free(i8* [[VFRAME]])
; CHECK-NEXT: ret void

View File

@ -0,0 +1,83 @@
; RUN: opt < %s -coro-split -S | FileCheck %s
; RUN: opt < %s -passes=coro-split -S | FileCheck %s
%"struct.std::coroutine_handle" = type { i8* }
%"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" }
%"struct.lean_future<int>::Awaiter" = type { i32, %"struct.std::coroutine_handle.0" }
declare i8* @malloc(i64)
%i8.array = type { [100 x i8] }
declare void @consume.i8.array(%i8.array*)
; The lifetime of testval starts and ends before coro.suspend. Even though consume.i8.array
; might capture it, we can safely say it won't live across suspension.
define void @foo() "coroutine.presplit"="1" {
entry:
%testval = alloca %i8.array
%cast = getelementptr inbounds %i8.array, %i8.array* %testval, i64 0, i32 0, i64 0
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%alloc = call i8* @malloc(i64 16) #3
%vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
call void @llvm.lifetime.start.p0i8(i64 100, i8* %cast)
call void @consume.i8.array(%i8.array* %testval)
call void @llvm.lifetime.end.p0i8(i64 100, i8* %cast)
%save = call token @llvm.coro.save(i8* null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
%StrayCoroSave = call token @llvm.coro.save(i8* null)
br label %exit
exit:
call i1 @llvm.coro.end(i8* null, i1 false)
ret void
}
; The lifetime of testval starts after coro.suspend. So it will never live across suspension
; points.
define void @bar() "coroutine.presplit"="1" {
entry:
%testval = alloca %i8.array
%cast = getelementptr inbounds %i8.array, %i8.array* %testval, i64 0, i32 0, i64 0
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%alloc = call i8* @malloc(i64 16) #3
%vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
%save = call token @llvm.coro.save(i8* null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
%StrayCoroSave = call token @llvm.coro.save(i8* null)
call void @llvm.lifetime.start.p0i8(i64 100, i8* %cast)
call void @consume.i8.array(%i8.array* %testval)
call void @llvm.lifetime.end.p0i8(i64 100, i8* %cast)
br label %exit
exit:
call i1 @llvm.coro.end(i8* null, i1 false)
ret void
}
; Verify that for both foo and bar, testval isn't put on the frame.
; CHECK: %foo.Frame = type { void (%foo.Frame*)*, void (%foo.Frame*)*, i1 }
; CHECK: %bar.Frame = type { void (%bar.Frame*)*, void (%bar.Frame*)*, i1 }
declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
declare i1 @llvm.coro.alloc(token) #3
declare i64 @llvm.coro.size.i64() #5
declare i8* @llvm.coro.begin(token, i8* writeonly) #3
declare token @llvm.coro.save(i8*) #3
declare i8* @llvm.coro.frame() #5
declare i8 @llvm.coro.suspend(token, i1) #3
declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2
declare i1 @llvm.coro.end(i8*, i1) #3
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4