[coroutines] Store an address of destroy OR cleanup part in the coroutine frame.

Summary:
If heap allocation of a coroutine is elided, we need to make sure that we will update an address stored in the coroutine frame from f.destroy to f.cleanup.
Before this change, CoroSplit synthesized these stores after coro.begin:

```
    store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr
    store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr

```

In those cases where we did heap elision, but were not able to devirtualize all indirect calls, destroy call will attempt to "free" the coroutine frame stored on the stack. Oops.

Now we use select to put an appropriate coroutine subfunction in the destroy slot. As bellow:

```
    store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr
    %0 = select i1 %need.alloc, void (%f.Frame*)* @f.destroy, void (%f.Frame*)* @f.cleanup
    store void (%f.Frame*)* %0, void (%f.Frame*)** %destroy.addr
```

Reviewers: majnemer

Subscribers: mehdi_amini, llvm-commits

Differential Revision: https://reviews.llvm.org/D25377

llvm-svn: 283625
This commit is contained in:
Gor Nishanov 2016-10-08 00:22:50 +00:00
parent 1023993a33
commit 1b6aec8e25
3 changed files with 56 additions and 18 deletions

View File

@ -80,11 +80,10 @@ class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst {
enum { AlignArg, PromiseArg, CoroutineArg, InfoArg }; enum { AlignArg, PromiseArg, CoroutineArg, InfoArg };
public: public:
IntrinsicInst *getCoroAlloc() { CoroAllocInst *getCoroAlloc() {
for (User *U : users()) for (User *U : users())
if (auto *II = dyn_cast<IntrinsicInst>(U)) if (auto *CA = dyn_cast<CoroAllocInst>(U))
if (II->getIntrinsicID() == Intrinsic::coro_alloc) return CA;
return II;
return nullptr; return nullptr;
} }

View File

@ -273,18 +273,6 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
// FIXME: coming in upcoming patches: // FIXME: coming in upcoming patches:
// replaceUnwindCoroEnds(Shape.CoroEnds, VMap); // replaceUnwindCoroEnds(Shape.CoroEnds, VMap);
// We only store resume(0) and destroy(1) addresses in the coroutine frame.
// The cleanup(2) clone is only used during devirtualization when coroutine is
// eligible for heap elision and thus does not participate in indirect calls
// and does not need its address to be stored in the coroutine frame.
if (FnIndex < 2) {
// Store the address of this clone in the coroutine frame.
Builder.SetInsertPoint(Shape.FramePtr->getNextNode());
auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr,
0, FnIndex, "fn.addr");
Builder.CreateStore(NewF, G);
}
// Eliminate coro.free from the clones, replacing it with 'null' in cleanup, // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
// to suppress deallocation code. // to suppress deallocation code.
coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
@ -348,6 +336,31 @@ static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin,
CoroBegin->getId()->setInfo(BC); CoroBegin->getId()->setInfo(BC);
} }
// Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
Function *DestroyFn, Function *CleanupFn) {
IRBuilder<> Builder(Shape.FramePtr->getNextNode());
auto *ResumeAddr = Builder.CreateConstInBoundsGEP2_32(
Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::ResumeField,
"resume.addr");
Builder.CreateStore(ResumeFn, ResumeAddr);
Value *DestroyOrCleanupFn = DestroyFn;
CoroIdInst *CoroId = Shape.CoroBegin->getId();
if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
// If there is a CoroAlloc and it returns false (meaning we elide the
// allocation, use CleanupFn instead of DestroyFn).
DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
}
auto *DestroyAddr = Builder.CreateConstInBoundsGEP2_32(
Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::DestroyField,
"destroy.addr");
Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
}
static void postSplitCleanup(Function &F) { static void postSplitCleanup(Function &F) {
removeUnreachableBlocks(F); removeUnreachableBlocks(F);
llvm::legacy::FunctionPassManager FPM(F.getParent()); llvm::legacy::FunctionPassManager FPM(F.getParent());
@ -496,7 +509,15 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
postSplitCleanup(*DestroyClone); postSplitCleanup(*DestroyClone);
postSplitCleanup(*CleanupClone); postSplitCleanup(*CleanupClone);
// Store addresses resume/destroy/cleanup functions in the coroutine frame.
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
// Create a constant array referring to resume/destroy/clone functions pointed
// by the last argument of @llvm.coro.info, so that CoroElide pass can
// determined correct function to call.
setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone}); setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone});
// Update call graph and add the functions we created to the SCC.
coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC); coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC);
} }

View File

@ -4,9 +4,17 @@
define i8* @f() "coroutine.presplit"="1" { define i8* @f() "coroutine.presplit"="1" {
entry: entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%need.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.alloc, label %dyn.alloc, label %begin
dyn.alloc:
%size = call i32 @llvm.coro.size.i32() %size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size) %alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) br label %begin
begin:
%phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
%hdl = call i8* @llvm.coro.begin(token %id, i8* %phi)
call void @print(i32 0) call void @print(i32 0)
%0 = call i8 @llvm.coro.suspend(token none, i1 false) %0 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %0, label %suspend [i8 0, label %resume switch i8 %0, label %suspend [i8 0, label %resume
@ -26,6 +34,10 @@ suspend:
; CHECK-LABEL: @f( ; CHECK-LABEL: @f(
; CHECK: call i8* @malloc ; CHECK: call i8* @malloc
; CHECK: @llvm.coro.begin(token %id, i8* %phi)
; CHECK: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr
; CHECK: %[[SEL:.+]] = select i1 %need.alloc, void (%f.Frame*)* @f.destroy, void (%f.Frame*)* @f.cleanup
; CHECK: store void (%f.Frame*)* %[[SEL]], void (%f.Frame*)** %destroy.addr
; CHECK: call void @print(i32 0) ; CHECK: call void @print(i32 0)
; CHECK-NOT: call void @print(i32 1) ; CHECK-NOT: call void @print(i32 1)
; CHECK-NOT: call void @free( ; CHECK-NOT: call void @free(
@ -45,6 +57,12 @@ suspend:
; CHECK: call void @free( ; CHECK: call void @free(
; CHECK: ret void ; CHECK: ret void
; CHECK-LABEL: @f.cleanup(
; CHECK-NOT: call i8* @malloc
; CHECK-NOT: call void @print(
; CHECK-NOT: call void @free(
; CHECK: ret void
declare i8* @llvm.coro.free(token, i8*) declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32() declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1) declare i8 @llvm.coro.suspend(token, i1)
@ -52,7 +70,7 @@ declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*) declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*) declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.alloc(token) declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*) declare i8* @llvm.coro.begin(token, i8*)
declare void @llvm.coro.end(i8*, i1) declare void @llvm.coro.end(i8*, i1)