[Coroutines] Part 9: Add cleanup subfunction.

Summary:
[Coroutines] Part 9: Add cleanup subfunction.

This patch completes coroutine heap allocation elision. Now, the heap elision example from docs\Coroutines.rst compiles and produces expected result (see test/Transform/Coroutines/ex3.ll)

Intrinsic Changes:
* coro.free gets a token parameter tying it to coro.id to allow reliably discovering all coro.frees associated with a particular coroutine.
* coro.id gets an extra parameter that points back to a coroutine function. This allows to check whether a coro.id describes the enclosing function or it belongs to a different function that was later inlined.

CoroSplit now creates three subfunctions:
# f$resume - resume logic
# f$destroy - cleanup logic, followed by a deallocation code
# f$cleanup - just the cleanup code

CoroElide pass during devirtualization replaces coro.destroy with either f$destroy or f$cleanup depending whether heap elision is performed or not.

Other fixes, improvements:
* Fixed buglet in Shape::buildFrame that was not creating coro.save properly if coroutine has more than one suspend point.

* Switched to using variable width suspend index field (no longer limited to 32 bit index field can be as little as i1 or as large as i<whatever-size_t-is>)

Reviewers: majnemer

Subscribers: llvm-commits, mehdi_amini

Differential Revision: https://reviews.llvm.org/D23844

llvm-svn: 279971
This commit is contained in:
Gor Nishanov 2016-08-29 14:34:12 +00:00
parent b57d0a2fda
commit dce9b02677
19 changed files with 328 additions and 137 deletions

View File

@ -93,7 +93,7 @@ The LLVM IR for this coroutine looks like this:
define i8* @f(i32 %n) {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
@ -106,7 +106,7 @@ The LLVM IR for this coroutine looks like this:
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@ -168,7 +168,7 @@ execution of the coroutine until a suspend point is reached:
define i8* @f(i32 %n) {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%alloc = call noalias i8* @malloc(i32 24)
%0 = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
%frame = bitcast i8* %0 to %f.frame*
@ -227,7 +227,7 @@ elided.
.. code-block:: none
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
@ -245,7 +245,7 @@ thus skipping the deallocation code:
.. code-block:: llvm
cleanup:
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
%need.dyn.free = icmp ne i8* %mem, null
br i1 %need.dyn.free, label %dyn.free, label %if.end
dyn.free:
@ -417,7 +417,7 @@ store the current value produced by a coroutine.
entry:
%promise = alloca i32
%pv = bitcast i32* %promise to i8*
%id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
@ -436,7 +436,7 @@ store the current value produced by a coroutine.
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@ -699,7 +699,7 @@ Example:
%promise = alloca i32
%pv = bitcast i32* %promise to i8*
; the second argument to coro.id points to the coroutine promise.
%id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
...
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
...
@ -791,7 +791,7 @@ A frontend should emit exactly one `coro.begin` intrinsic per coroutine.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
declare i8* @llvm.coro.free(i8* <frame>)
declare i8* @llvm.coro.free(token %id, i8* <frame>)
Overview:
"""""""""
@ -803,8 +803,11 @@ dynamically allocated memory for its coroutine frame.
Arguments:
""""""""""
A pointer to the coroutine frame. This should be the same pointer that was
returned by prior `coro.begin` call.
The first argument is a token returned by a call to '``llvm.coro.id``'
identifying the coroutine.
The second argument is a pointer to the coroutine frame. This should be the same
pointer that was returned by prior `coro.begin` call.
Example (custom deallocation function):
"""""""""""""""""""""""""""""""""""""""
@ -812,7 +815,7 @@ Example (custom deallocation function):
.. code-block:: llvm
cleanup:
%mem = call i8* @llvm.coro.free(i8* %frame)
%mem = call i8* @llvm.coro.free(token %id, i8* %frame)
%mem_not_null = icmp ne i8* %mem, null
br i1 %mem_not_null, label %if.then, label %if.end
if.then:
@ -827,7 +830,7 @@ Example (standard deallocation functions):
.. code-block:: llvm
cleanup:
%mem = call i8* @llvm.coro.free(i8* %frame)
%mem = call i8* @llvm.coro.free(token %id, i8* %frame)
call void @free(i8* %mem)
ret void
@ -864,7 +867,7 @@ Example:
.. code-block:: text
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%dyn.alloc.required = call i1 @llvm.coro.alloc(token %id)
br i1 %dyn.alloc.required, label %coro.alloc, label %coro.begin
@ -909,7 +912,8 @@ coroutine frame.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
declare token @llvm.coro.id(i32 <align>, i8* <promise>, i8* <fnaddr>)
declare token @llvm.coro.id(i32 <align>, i8* <promise>, i8* <coroaddr>,
i8* <fnaddrs>)
Overview:
"""""""""
@ -927,7 +931,10 @@ This argument only accepts constants.
The second argument, if not `null`, designates a particular alloca instruction
to be a `coroutine promise`_.
The third argument is `null` before coroutine is split, and later is replaced
The third argument is `null` coming out of the frontend. The CoroEarly pass sets
this argument to point to the function this coro.id belongs to.
The fourth argument is `null` before coroutine is split, and later is replaced
to point to a private global constant array containing function pointers to
outlined resume and destroy parts of the coroutine.
@ -1210,19 +1217,6 @@ CoroCleanup
This pass runs late to lower all coroutine related intrinsics not replaced by
earlier passes.
Upstreaming sequence (rough plan)
=================================
#. Add documentation.
#. Add coroutine intrinsics.
#. Add empty coroutine passes.
#. Add coroutine devirtualization + tests.
#. Add CGSCC restart trigger + tests.
#. Add coroutine heap elision + tests.
#. Add custom allocation heap elision + tests. <== we are here
#. Add coroutine splitting logic + tests.
#. Add simple coroutine frame builder + tests.
#. Add the rest of the logic + tests. (Maybe split further as needed).
Areas Requiring Attention
=========================
#. A coroutine frame is bigger than it could be. Adding stack packing and stack

View File

@ -603,15 +603,16 @@ def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
// Coroutine Structure Intrinsics.
def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty,
llvm_ptr_ty],
llvm_ptr_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrReadMem,
ReadNone<1>, ReadOnly<2>, NoCapture<2>]>;
def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>;
def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
[WriteOnly<1>]>;
def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
[IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>;
def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly, ReadOnly<1>,
NoCapture<1>]>;
def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty], []>;
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;

View File

@ -3873,7 +3873,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
default:
break;
case Intrinsic::coro_id: {
auto *InfoArg = CS.getArgOperand(2)->stripPointerCasts();
auto *InfoArg = CS.getArgOperand(3)->stripPointerCasts();
if (isa<ConstantPointerNull>(InfoArg))
break;
auto *GV = dyn_cast<GlobalVariable>(InfoArg);

View File

@ -81,6 +81,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
if (CII->getInfo().isPreSplit()) {
F.addFnAttr(CORO_PRESPLIT_ATTR, UNPREPARED_FOR_SPLIT);
setCannotDuplicate(CII);
CII->setCoroutineSelf();
}
}
break;

View File

@ -35,6 +35,7 @@ struct Lowerer : coro::LowererBase {
Lowerer(Module &M) : LowererBase(M) {}
void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA);
bool shouldElide() const;
bool processCoroId(CoroIdInst *, AAResults &AA);
};
} // end anonymous namespace
@ -122,14 +123,6 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
CA->eraseFromParent();
}
// To suppress deallocation code, we replace all llvm.coro.free intrinsics
// associated with this coro.begin with null constant.
auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C));
for (auto *CF : CoroFrees) {
CF->replaceAllUsesWith(NullPtr);
CF->eraseFromParent();
}
// FIXME: Design how to transmit alignment information for every alloca that
// is spilled into the coroutine frame and recreate the alignment information
// here. Possibly we will need to do a mini SROA here and break the coroutine
@ -148,9 +141,36 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
removeTailCallAttribute(Frame, AA);
}
bool Lowerer::shouldElide() const {
// If no CoroAllocs, we cannot suppress allocation, so elision is not
// possible.
if (CoroAllocs.empty())
return false;
// Check that for every coro.begin there is a coro.destroy directly
// referencing the SSA value of that coro.begin. If the value escaped, then
// coro.destroy would have been referencing a memory location storing that
// value and not the virtual register.
SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
for (CoroSubFnInst *DA : DestroyAddr) {
if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame()))
ReferencedCoroBegins.insert(CB);
else
return false;
}
// If size of the set is the same as total number of CoroBegins, means we
// found a coro.free or coro.destroy mentioning a coro.begin and we can
// perform heap elision.
return ReferencedCoroBegins.size() == CoroBegins.size();
}
bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) {
CoroBegins.clear();
CoroAllocs.clear();
CoroFrees.clear();
ResumeAddr.clear();
DestroyAddr.clear();
@ -160,6 +180,8 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) {
CoroBegins.push_back(CB);
else if (auto *CA = dyn_cast<CoroAllocInst>(U))
CoroAllocs.push_back(CA);
else if (auto *CF = dyn_cast<CoroFreeInst>(U))
CoroFrees.push_back(CF);
}
// Collect all coro.subfn.addrs associated with coro.begin.
@ -191,27 +213,20 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) {
replaceWithConstant(ResumeAddrConstant, ResumeAddr);
if (DestroyAddr.empty())
return true;
bool ShouldElide = shouldElide();
auto *DestroyAddrConstant =
ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex);
auto *DestroyAddrConstant = ConstantExpr::getExtractValue(
Resumers,
ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex);
replaceWithConstant(DestroyAddrConstant, DestroyAddr);
// If there is a coro.alloc that llvm.coro.id refers to, we have the ability
// to suppress dynamic allocation.
if (!CoroAllocs.empty()) {
// FIXME: The check above is overly lax. It only checks for whether we have
// an ability to elide heap allocations, not whether it is safe to do so.
// We need to do something like:
// If for every exit from the function where coro.begin is
// live, there is a coro.free or coro.destroy dominating that exit block,
// then it is safe to elide heap allocation, since the lifetime of coroutine
// is fully enclosed in its caller.
if (ShouldElide) {
auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant));
elideHeapAllocations(CoroId->getFunction(), FrameTy, AA);
coro::replaceCoroFree(CoroId, /*Elide=*/true);
}
return true;
}
@ -262,21 +277,21 @@ struct CoroElide : FunctionPass {
Changed = replaceDevirtTrigger(F);
L->CoroIds.clear();
L->CoroFrees.clear();
// Collect all PostSplit coro.ids and all coro.free.
// Collect all PostSplit coro.ids.
for (auto &I : instructions(F))
if (auto *CF = dyn_cast<CoroFreeInst>(&I))
L->CoroFrees.push_back(CF);
else if (auto *CII = dyn_cast<CoroIdInst>(&I))
if (auto *CII = dyn_cast<CoroIdInst>(&I))
if (CII->getInfo().isPostSplit())
L->CoroIds.push_back(CII);
// If it is the coroutine itself, don't touch it.
if (CII->getCoroutine() != CII->getFunction())
L->CoroIds.push_back(CII);
// If we did not find any coro.id, there is nothing to do.
if (L->CoroIds.empty())
return Changed;
AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
for (auto *CII : L->CoroIds)
Changed |= L->processCoroId(CII, AA);
@ -284,7 +299,6 @@ struct CoroElide : FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
AU.setPreservesCFG();
}
};
}

View File

@ -24,6 +24,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/circular_raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@ -182,8 +183,9 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
}
// Iterate propagating consumes and kills until they stop changing
int Iteration = 0; (void)Iteration;
int Iteration = 0;
(void)Iteration;
bool Changed;
do {
DEBUG(dbgs() << "iteration " << ++Iteration);
@ -307,10 +309,10 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
/*IsVarArgs=*/false);
auto *FnPtrTy = FnTy->getPointerTo();
if (Shape.CoroSuspends.size() > UINT32_MAX)
report_fatal_error("Cannot handle coroutine with this many suspend points");
// Figure out how wide should be an integer type storing the suspend index.
unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, Type::getInt32Ty(C)};
SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, Type::getIntNTy(C, IndexBits)};
Value *CurrentDef = nullptr;
// Create an entry for every spilled value.
@ -333,13 +335,6 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
return FrameTy;
}
// Returns the index of the last non-spill field in the coroutine frame.
// 2 - if there is no coroutine promise specified or 3, if there is.
static unsigned getLastNonSpillIndex(coro::Shape &Shape) {
// TODO: Add support for coroutine promise.
return 2;
}
// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
@ -373,7 +368,7 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
Value *CurrentValue = nullptr;
BasicBlock *CurrentBlock = nullptr;
Value *CurrentReload = nullptr;
unsigned Index = getLastNonSpillIndex(Shape);
unsigned Index = coro::Shape::LastKnownField;
// We need to keep track of any allocas that need "spilling"
// since they will live in the coroutine frame now, all access to them
@ -622,6 +617,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
// frame.
if (isa<CoroBeginInst>(&I))
continue;
// A token returned CoroIdInst is used to tie together structural intrinsics
// in a coroutine. It should not be saved to the coroutine frame.
if (isa<CoroIdInst>(&I))
continue;
for (User *U : I.users())
if (Checker.isDefinitionAcrossSuspend(I, U)) {

View File

@ -11,7 +11,7 @@
// allows you to do things like:
//
// if (auto *SF = dyn_cast<CoroSubFnInst>(Inst))
// ... SF->getFrame() ...
// ... SF->getFrame() ...
//
// All intrinsic function calls are instances of the call instruction, so these
// are all subclasses of the CallInst class. Note that none of these classes
@ -37,6 +37,7 @@ public:
RestartTrigger = -1,
ResumeIndex,
DestroyIndex,
CleanupIndex,
IndexLast,
IndexFirst = RestartTrigger
};
@ -76,7 +77,8 @@ public:
/// This represents the llvm.coro.alloc instruction.
class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst {
enum { AlignArg, PromiseArg, InfoArg };
enum { AlignArg, PromiseArg, CoroutineArg, InfoArg };
public:
// Info argument of coro.id is
// fresh out of the frontend: null ;
@ -118,6 +120,16 @@ public:
void setInfo(Constant *C) { setArgOperand(InfoArg, C); }
Function *getCoroutine() const {
return cast<Function>(getArgOperand(CoroutineArg)->stripPointerCasts());
}
void setCoroutineSelf() {
assert(isa<ConstantPointerNull>(getArgOperand(CoroutineArg)) &&
"Coroutine argument is already assigned");
auto *const Int8PtrTy = Type::getInt8PtrTy(getContext());
setArgOperand(CoroutineArg,
ConstantExpr::getBitCast(getFunction(), Int8PtrTy));
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const IntrinsicInst *I) {
@ -142,7 +154,11 @@ public:
/// This represents the llvm.coro.free instruction.
class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst {
enum { IdArg, FrameArg };
public:
Value *getFrame() const { return getArgOperand(FrameArg); }
// Methods to support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_free;
@ -157,9 +173,7 @@ class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst {
enum { IdArg, MemArg };
public:
CoroIdInst *getId() const {
return cast<CoroIdInst>(getArgOperand(IdArg));
}
CoroIdInst *getId() const { return cast<CoroIdInst>(getArgOperand(IdArg)); }
Value *getMem() const { return getArgOperand(MemArg); }

View File

@ -46,6 +46,7 @@ namespace coro {
bool declaresIntrinsics(Module &M, std::initializer_list<StringRef>);
void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement);
void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement);
void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs,
CallGraph &CG, CallGraphSCC &SCC);
@ -71,9 +72,10 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
// Field Indexes for known coroutine frame fields.
enum {
ResumeField = 0,
DestroyField = 1,
IndexField = 2,
ResumeField,
DestroyField,
IndexField,
LastKnownField = IndexField
};
StructType *FrameTy;
@ -82,6 +84,14 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
SwitchInst* ResumeSwitch;
bool HasFinalSuspend;
IntegerType* getIndexType() const {
assert(FrameTy && "frame type not assigned");
return cast<IntegerType>(FrameTy->getElementType(IndexField));
}
ConstantInt *getIndex(uint64_t Value) const {
return ConstantInt::get(getIndexType(), Value);
}
Shape() = default;
explicit Shape(Function &F) { buildFrom(F); }
void buildFrom(Function &F);

View File

@ -64,7 +64,7 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
uint32_t SuspendIndex = 0;
for (auto S : Shape.CoroSuspends) {
ConstantInt *IndexVal = Builder.getInt32(SuspendIndex);
ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
// Replace CoroSave with a store to Index:
// %index.addr = getelementptr %f.frame... (index field number)
@ -140,7 +140,6 @@ static void replaceFallthroughCoroEnd(IntrinsicInst *End,
// resume or cleanup pass for every suspend point.
static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
BasicBlock *ResumeEntry, int8_t FnIndex) {
Module *M = F.getParent();
auto *FrameTy = Shape.FrameTy;
auto *FnPtrTy = cast<PointerType>(FrameTy->getElementType(0));
@ -207,7 +206,11 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
OldVFrame->replaceAllUsesWith(NewVFrame);
// Replace coro suspend with the appropriate resume index.
auto *NewValue = Builder.getInt8(FnIndex);
// Replacing coro.suspend with (0) will result in control flow proceeding to
// a resume label associated with a suspend point, replacing it with (1) will
// result in control flow proceeding to a cleanup label associated with this
// suspend point.
auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0);
for (CoroSuspendInst *CS : Shape.CoroSuspends) {
auto *MappedCS = cast<CoroSuspendInst>(VMap[CS]);
MappedCS->replaceAllUsesWith(NewValue);
@ -219,11 +222,23 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
// FIXME: coming in upcoming patches:
// replaceUnwindCoroEnds(Shape.CoroEnds, VMap);
// Store the address of this clone in the coroutine frame.
Builder.SetInsertPoint(Shape.FramePtr->getNextNode());
auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr, 0,
FnIndex, "fn.addr");
Builder.CreateStore(NewF, G);
// We only store resume(0) and destroy(1) addresses in the coroutine frame.
// The cleanup(2) clone is only used during devirtualization when coroutine is
// eligible for heap elision and thus does not participate in indirect calls
// and does not need its address to be stored in the coroutine frame.
if (FnIndex < 2) {
// Store the address of this clone in the coroutine frame.
Builder.SetInsertPoint(Shape.FramePtr->getNextNode());
auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr,
0, FnIndex, "fn.addr");
Builder.CreateStore(NewF, G);
}
// Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
// to suppress deallocation code.
coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
/*Elide=*/FnIndex == 2);
NewF->setCallingConv(CallingConv::Fast);
return NewF;
@ -303,10 +318,12 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
return;
buildCoroutineFrame(F, Shape);
replaceFrameSize(Shape);
auto *ResumeEntry = createResumeEntryBlock(F, Shape);
auto *ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0);
auto *DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1);
auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0);
auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1);
auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2);
// We no longer need coro.end in F.
removeCoroEnds(Shape);
@ -314,11 +331,10 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
postSplitCleanup(F);
postSplitCleanup(*ResumeClone);
postSplitCleanup(*DestroyClone);
postSplitCleanup(*CleanupClone);
replaceFrameSize(Shape);
setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone});
coro::updateCallGraph(F, {ResumeClone, DestroyClone}, CG, SCC);
setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone});
coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC);
}
// When we see the coroutine the first time, we insert an indirect call to a

View File

@ -127,6 +127,27 @@ bool coro::declaresIntrinsics(Module &M,
return false;
}
// Replace all coro.frees associated with the provided CoroId either with 'null'
// if Elide is true and with its frame parameter otherwise.
void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) {
SmallVector<CoroFreeInst *, 4> CoroFrees;
for (User *U : CoroId->users())
if (auto CF = dyn_cast<CoroFreeInst>(U))
CoroFrees.push_back(CF);
if (CoroFrees.empty())
return;
Value *Replacement =
Elide ? ConstantPointerNull::get(Type::getInt8PtrTy(CoroId->getContext()))
: CoroFrees.front()->getFrame();
for (CoroFreeInst *CF : CoroFrees) {
CF->replaceAllUsesWith(Replacement);
CF->eraseFromParent();
}
}
// FIXME: This code is stolen from CallGraph::addToCallGraph(Function *F), which
// happens to be private. It is better for this functionality exposed by the
// CallGraph.
@ -286,5 +307,5 @@ void coro::Shape::buildFrom(Function &F) {
// Canonicalize coro.suspend by inserting a coro.save if needed.
for (CoroSuspendInst *CS : CoroSuspends)
if (!CS->getCoroSave())
createCoroSave(CoroBegin, CoroSuspends.back());
createCoroSave(CoroBegin, CS);
}

View File

@ -23,6 +23,7 @@ define fastcc void @f.destroy(i8*) {
define i8* @f() {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
i8* bitcast (i8*()* @f to i8*),
i8* bitcast ([2 x void (i8*)*]* @f.resumers to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
ret i8* %hdl
@ -31,10 +32,9 @@ entry:
; CHECK-LABEL: @callResume(
define void @callResume() {
entry:
; CHECK: call i8* @llvm.coro.begin
%hdl = call i8* @f()
; CHECK-NEXT: call void @print(i32 0)
; CHECK: call void @print(i32 0)
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%1 = bitcast i8* %0 to void (i8*)*
call fastcc void %1(i8* %hdl)
@ -51,10 +51,9 @@ entry:
; CHECK-LABEL: @eh(
define void @eh() personality i8* null {
entry:
; CHECK: call i8* @llvm.coro.begin
%hdl = call i8* @f()
; CHECK-NEXT: call void @print(i32 0)
; CHECK: call void @print(i32 0)
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%1 = bitcast i8* %0 to void (i8*)*
invoke void %1(i8* %hdl)
@ -71,7 +70,7 @@ ehcleanup:
; no devirtualization here, since coro.begin info parameter is null
define void @no_devirt_info_null() {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
@ -107,7 +106,7 @@ entry:
ret void
}
declare token @llvm.coro.id(i32, i8*, i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i8* @llvm.coro.frame()
declare i8* @llvm.coro.subfn.addr(i8*, i8)

View File

@ -11,19 +11,21 @@ declare void @bar(i8*)
declare fastcc void @f.resume(%f.frame*)
declare fastcc void @f.destroy(%f.frame*)
declare fastcc void @f.cleanup(%f.frame*)
declare void @may_throw()
declare i8* @CustomAlloc(i32)
declare void @CustomFree(i8*)
@f.resumers = internal constant
[2 x void (%f.frame*)*] [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy]
@f.resumers = internal constant [3 x void (%f.frame*)*]
[void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy, void (%f.frame*)* @f.cleanup]
; a coroutine start function
define i8* @f() personality i8* null {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
i8* bitcast (i8*()* @f to i8*),
i8* bitcast ([3 x void (%f.frame*)*]* @f.resumers to i8*))
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
@ -39,7 +41,7 @@ ret:
ehcleanup:
%tok = cleanuppad within none []
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
%need.dyn.free = icmp ne i8* %mem, null
br i1 %need.dyn.free, label %dyn.free, label %if.end
dyn.free:
@ -62,7 +64,7 @@ entry:
; CHECK-NOT: tail call void @bar(
; CHECK: call void @bar(
tail call void @bar(i8* %hdl)
; CHECK: tail call void @bar(
; CHECK: tail call void @bar(
tail call void @bar(i8* null)
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
@ -70,7 +72,7 @@ entry:
%1 = bitcast i8* %0 to void (i8*)*
call fastcc void %1(i8* %hdl)
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* %vFrame)
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
%2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
%3 = bitcast i8* %2 to void (i8*)*
call fastcc void %3(i8* %hdl)
@ -84,7 +86,8 @@ entry:
define i8* @f_no_elision() personality i8* null {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null,
i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
i8* bitcast (i8*()* @f_no_elision to i8*),
i8* bitcast ([3 x void (%f.frame*)*]* @f.resumers to i8*))
%alloc = call i8* @CustomAlloc(i32 4)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
ret i8* %hdl
@ -116,9 +119,9 @@ entry:
ret void
}
declare token @llvm.coro.id(i32, i8*, i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.free(i8*)
declare i8* @llvm.coro.free(token, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i8* @llvm.coro.frame(token)
declare i8* @llvm.coro.subfn.addr(i8*, i8)

View File

@ -3,7 +3,7 @@
define i8* @f() "coroutine.presplit"="1" {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
@ -16,7 +16,7 @@ resume:
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@ -45,13 +45,13 @@ suspend:
; CHECK: call void @free(
; CHECK: ret void
declare i8* @llvm.coro.free(i8*)
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare void @llvm.coro.end(i8*, i1)

View File

@ -3,7 +3,7 @@
define i8* @f() {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
@ -22,7 +22,7 @@ resume:
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@ -35,22 +35,18 @@ entry:
call void @llvm.coro.resume(i8* %hdl)
ret i32 0
; CHECK-LABEL: @main(
; CHECK: call i8* @malloc
; CHECK-NOT: call void @free
; CHECK: call void @print(i32 0)
; CHECK-NOT: call void @free
; CHECK: call void @print(i32 1)
; CHECK: call void @free
; CHECK: ret i32 0
}
declare i8* @llvm.coro.free(i8*)
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
declare token @llvm.coro.id(i32, i8*, i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i8* @llvm.coro.begin(token, i8*)
declare void @llvm.coro.end(i8*, i1)

View File

@ -3,7 +3,7 @@
define i8* @f(i32 %n) {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
@ -20,7 +20,7 @@ resume:
br label %loop
cleanup:
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@ -43,9 +43,9 @@ entry:
; CHECK: ret i32 0
}
declare token @llvm.coro.id(i32, i8*, i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.alloc(token)
declare i8* @llvm.coro.free(i8*)
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)

View File

@ -3,7 +3,7 @@
define i8* @f(i32 %n) {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
@ -16,7 +16,7 @@ loop:
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@ -43,11 +43,11 @@ declare i8* @malloc(i32)
declare void @free(i8*)
declare void @print(i32)
declare token @llvm.coro.id(i32, i8*, i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i32 @llvm.coro.size.i32()
declare i8* @llvm.coro.begin(token, i8*)
declare i8 @llvm.coro.suspend(token, i1)
declare i8* @llvm.coro.free(i8*)
declare i8* @llvm.coro.free(token, i8*)
declare void @llvm.coro.end(i8*, i1)
declare void @llvm.coro.resume(i8*)

View File

@ -0,0 +1,63 @@
; Second example from Doc/Coroutines.rst (custom alloc and free functions)
; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
define i8* @f(i32 %n) {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
dyn.alloc:
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @CustomAlloc(i32 %size)
br label %coro.begin
coro.begin:
%phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
br label %loop
loop:
%n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ]
%inc = add nsw i32 %n.val, 1
call void @print(i32 %n.val)
%0 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %0, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
%need.dyn.free = icmp ne i8* %mem, null
br i1 %need.dyn.free, label %dyn.free, label %suspend
dyn.free:
call void @CustomFree(i8* %mem)
br label %suspend
suspend:
call void @llvm.coro.end(i8* %hdl, i1 false)
ret i8* %hdl
}
; CHECK-LABEL: @main
define i32 @main() {
entry:
%hdl = call i8* @f(i32 4)
call void @llvm.coro.resume(i8* %hdl)
call void @llvm.coro.resume(i8* %hdl)
call void @llvm.coro.destroy(i8* %hdl)
ret i32 0
; CHECK: call void @print(i32 4)
; CHECK-NEXT: call void @print(i32 5)
; CHECK-NEXT: call void @print(i32 6)
; CHECK-NEXT: ret i32 0
}
declare i8* @CustomAlloc(i32)
declare void @CustomFree(i8*)
declare void @print(i32)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i32 @llvm.coro.size.i32()
declare i8* @llvm.coro.begin(token, i8*)
declare i8 @llvm.coro.suspend(token, i1)
declare i8* @llvm.coro.free(token, i8*)
declare void @llvm.coro.end(i8*, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)

View File

@ -0,0 +1,60 @@
; Third example from Doc/Coroutines.rst (two suspend points)
; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
define i8* @f(i32 %n) {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
br label %loop
loop:
%n.val = phi i32 [ %n, %entry ], [ %inc, %loop.resume ]
call void @print(i32 %n.val) #4
%0 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %0, label %suspend [i8 0, label %loop.resume
i8 1, label %cleanup]
loop.resume:
%inc = add nsw i32 %n.val, 1
%sub = xor i32 %n.val, -1
call void @print(i32 %sub)
%1 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %1, label %suspend [i8 0, label %loop
i8 1, label %cleanup]
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
call void @llvm.coro.end(i8* %hdl, i1 false)
ret i8* %hdl
}
; CHECK-LABEL: @main
define i32 @main() {
entry:
%hdl = call i8* @f(i32 4)
call void @llvm.coro.resume(i8* %hdl)
call void @llvm.coro.resume(i8* %hdl)
call void @llvm.coro.destroy(i8* %hdl)
ret i32 0
; CHECK: call void @print(i32 4)
; CHECK-NEXT: call void @print(i32 -5)
; CHECK-NEXT: call void @print(i32 5)
; CHECK: ret i32 0
}
declare i8* @malloc(i32)
declare void @free(i8*)
declare void @print(i32)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i1 @llvm.coro.alloc(token)
declare i32 @llvm.coro.size.i32()
declare i8* @llvm.coro.begin(token, i8*)
declare i8 @llvm.coro.suspend(token, i1)
declare i8* @llvm.coro.free(token, i8*)
declare void @llvm.coro.end(i8*, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)

View File

@ -8,7 +8,7 @@
; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1
define void @f() {
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
@ -21,7 +21,7 @@ resume:
br label %cleanup
cleanup:
%mem = call i8* @llvm.coro.free(i8* %hdl)
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
call void @free(i8* %mem)
br label %suspend
suspend:
@ -29,9 +29,9 @@ suspend:
ret void
}
declare token @llvm.coro.id(i32, i8*, i8*)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i8* @llvm.coro.free(i8*)
declare i8* @llvm.coro.free(token, i8*)
declare i32 @llvm.coro.size.i32()
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)