forked from OSchip/llvm-project
[Coroutines] Only do symmetric transfer if optimization is on
Symmetric transfer is not a part of C++ standards. So the vendors is not forced to implement it any way. Given the symmetric transfer nowadays is an optimization. It makes more sense to enable it only if the optimization is enabled. It is also helpful for the compilation speed in O0.
This commit is contained in:
parent
8b68da2c7d
commit
7782e080e8
|
@ -22,14 +22,14 @@
|
|||
namespace llvm {
|
||||
|
||||
struct CoroSplitPass : PassInfoMixin<CoroSplitPass> {
|
||||
CoroSplitPass(bool OptimizeFrame = false) : OptimizeFrame(OptimizeFrame) {}
|
||||
CoroSplitPass(bool Optimizing = false) : Optimizing(Optimizing) {}
|
||||
|
||||
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
|
||||
LazyCallGraph &CG, CGSCCUpdateResult &UR);
|
||||
static bool isRequired() { return true; }
|
||||
|
||||
// Would be true if the Optimization level isn't O0.
|
||||
bool OptimizeFrame;
|
||||
bool Optimizing;
|
||||
};
|
||||
} // end namespace llvm
|
||||
|
||||
|
|
|
@ -601,7 +601,7 @@ Expected<bool> parseInlinerPassOptions(StringRef Params) {
|
|||
}
|
||||
|
||||
Expected<bool> parseCoroSplitPassOptions(StringRef Params) {
|
||||
return parseSinglePassOption(Params, "reuse-storage", "CoroSplitPass");
|
||||
return parseSinglePassOption(Params, "optimizing", "CoroSplitPass");
|
||||
}
|
||||
|
||||
Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {
|
||||
|
|
|
@ -192,7 +192,7 @@ CGSCC_PASS_WITH_PARAMS("coro-split",
|
|||
return CoroSplitPass(OptimizeFrame);
|
||||
},
|
||||
parseCoroSplitPassOptions,
|
||||
"reuse-storage")
|
||||
"optimizing")
|
||||
#undef CGSCC_PASS_WITH_PARAMS
|
||||
|
||||
#ifndef FUNCTION_ANALYSIS
|
||||
|
|
|
@ -612,7 +612,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
|
|||
}
|
||||
});
|
||||
|
||||
if (!Shape.OptimizeFrame) {
|
||||
if (!Shape.Optimizing) {
|
||||
for (const auto &A : FrameData.Allocas) {
|
||||
AllocaInst *Alloca = A.Alloca;
|
||||
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
|
||||
|
@ -1696,14 +1696,14 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
|
|||
&*Builder.GetInsertPoint());
|
||||
// This dbg.declare is for the main function entry point. It
|
||||
// will be deleted in all coro-split functions.
|
||||
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
|
||||
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.Optimizing);
|
||||
}
|
||||
}
|
||||
|
||||
// Salvage debug info on any dbg.addr that we see. We do not insert them
|
||||
// into each block where we have a use though.
|
||||
if (auto *DI = dyn_cast<DbgAddrIntrinsic>(U)) {
|
||||
coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.OptimizeFrame);
|
||||
coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.Optimizing);
|
||||
}
|
||||
|
||||
// If we have a single edge PHINode, remove it and replace it with a
|
||||
|
@ -2552,7 +2552,7 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
|
|||
|
||||
void coro::salvageDebugInfo(
|
||||
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
|
||||
DbgVariableIntrinsic *DVI, bool OptimizeFrame) {
|
||||
DbgVariableIntrinsic *DVI, bool Optimizing) {
|
||||
Function *F = DVI->getFunction();
|
||||
IRBuilder<> Builder(F->getContext());
|
||||
auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
|
||||
|
@ -2605,7 +2605,7 @@ void coro::salvageDebugInfo(
|
|||
//
|
||||
// Avoid to create the alloca would be eliminated by optimization
|
||||
// passes and the corresponding dbg.declares would be invalid.
|
||||
if (!OptimizeFrame)
|
||||
if (!Optimizing)
|
||||
if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
|
||||
auto &Cached = DbgPtrAllocaCache[Storage];
|
||||
if (!Cached) {
|
||||
|
|
|
@ -31,7 +31,7 @@ void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
|
|||
/// holding a pointer to the coroutine frame.
|
||||
void salvageDebugInfo(
|
||||
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
|
||||
DbgVariableIntrinsic *DVI, bool OptimizeFrame);
|
||||
DbgVariableIntrinsic *DVI, bool Optimizing);
|
||||
|
||||
// Keeps data and helper functions for lowering coroutine intrinsics.
|
||||
struct LowererBase {
|
||||
|
@ -104,7 +104,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
|
|||
BasicBlock *AllocaSpillBlock;
|
||||
|
||||
/// This would only be true if optimization are enabled.
|
||||
bool OptimizeFrame;
|
||||
bool Optimizing;
|
||||
|
||||
struct SwitchLoweringStorage {
|
||||
SwitchInst *ResumeSwitch;
|
||||
|
@ -255,8 +255,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
|
|||
void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
|
||||
|
||||
Shape() = default;
|
||||
explicit Shape(Function &F, bool OptimizeFrame = false)
|
||||
: OptimizeFrame(OptimizeFrame) {
|
||||
explicit Shape(Function &F, bool Optimizing = false)
|
||||
: Optimizing(Optimizing) {
|
||||
buildFrom(F);
|
||||
}
|
||||
void buildFrom(Function &F);
|
||||
|
|
|
@ -683,7 +683,7 @@ void CoroCloner::salvageDebugInfo() {
|
|||
if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
|
||||
Worklist.push_back(DVI);
|
||||
for (DbgVariableIntrinsic *DVI : Worklist)
|
||||
coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.OptimizeFrame);
|
||||
coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.Optimizing);
|
||||
|
||||
// Remove all salvaged dbg.declare intrinsics that became
|
||||
// either unreachable or stale due to the CoroSplit transformation.
|
||||
|
@ -1351,8 +1351,8 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
|
|||
}
|
||||
|
||||
// Add musttail to any resume instructions that is immediately followed by a
|
||||
// suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
|
||||
// for symmetrical coroutine control transfer (C++ Coroutines TS extension).
|
||||
// suspend (i.e. ret) to implement symmetric transfer. We wouldn't do this in
|
||||
// O0 since symmetric transfer is not part of standard now.
|
||||
// This transformation is done only in the resume part of the coroutine that has
|
||||
// identical signature and calling convention as the coro.resume call.
|
||||
static void addMustTailToCoroResumes(Function &F) {
|
||||
|
@ -1580,7 +1580,10 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
|
|||
postSplitCleanup(*DestroyClone);
|
||||
postSplitCleanup(*CleanupClone);
|
||||
|
||||
addMustTailToCoroResumes(*ResumeClone);
|
||||
// Prepare to do symmetric transfer. We only do this if optimization is
|
||||
// enabled since the symmetric transfer is not part of the C++ standard now.
|
||||
if (Shape.Optimizing)
|
||||
addMustTailToCoroResumes(*ResumeClone);
|
||||
|
||||
// Store addresses resume/destroy/cleanup functions in the coroutine frame.
|
||||
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
|
||||
|
@ -1885,14 +1888,14 @@ namespace {
|
|||
|
||||
static coro::Shape splitCoroutine(Function &F,
|
||||
SmallVectorImpl<Function *> &Clones,
|
||||
bool OptimizeFrame) {
|
||||
bool Optimizing) {
|
||||
PrettyStackTraceFunction prettyStackTrace(F);
|
||||
|
||||
// The suspend-crossing algorithm in buildCoroutineFrame get tripped
|
||||
// up by uses in unreachable blocks, so remove them as a first pass.
|
||||
removeUnreachableBlocks(F);
|
||||
|
||||
coro::Shape Shape(F, OptimizeFrame);
|
||||
coro::Shape Shape(F, Optimizing);
|
||||
if (!Shape.CoroBegin)
|
||||
return Shape;
|
||||
|
||||
|
@ -1941,7 +1944,7 @@ static coro::Shape splitCoroutine(Function &F,
|
|||
}
|
||||
}
|
||||
for (auto *DDI : Worklist)
|
||||
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
|
||||
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.Optimizing);
|
||||
|
||||
return Shape;
|
||||
}
|
||||
|
@ -2084,7 +2087,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
|
|||
F.setSplittedCoroutine();
|
||||
|
||||
SmallVector<Function *, 4> Clones;
|
||||
const coro::Shape Shape = splitCoroutine(F, Clones, OptimizeFrame);
|
||||
const coro::Shape Shape = splitCoroutine(F, Clones, Optimizing);
|
||||
updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
|
||||
|
||||
if (!Shape.CoroSuspends.empty()) {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split<reuse-storage>),function(sroa)' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split<optimizing>),function(sroa)' -S | FileCheck %s
|
||||
|
||||
; Checks whether the dbg.declare for `__promise` remains valid under O2.
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; Check that we can handle spills of array allocas
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
%struct.big_structure = type { [500 x i8] }
|
||||
declare void @consume(%struct.big_structure*)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that variables in a Corotuine whose lifetime range is not overlapping each other
|
||||
; re-use the same slot in Coroutine frame.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
%"struct.task::promise_type" = type { i8 }
|
||||
%struct.awaitable = type { i8 }
|
||||
%struct.big_structure = type { [500 x i8] }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other
|
||||
; re-use the same slot in Coroutine frame.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
%"struct.task::promise_type" = type { i8 }
|
||||
%struct.awaitable = type { i8 }
|
||||
%struct.big_structure = type { [500 x i8] }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that variables of different type with incompatible alignment in a Corotuine whose lifetime
|
||||
; range is not overlapping each other should not re-use the same slot in Coroutine frame.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
%"struct.task::promise_type" = type { i8 }
|
||||
%struct.awaitable = type { i8 }
|
||||
%struct.big_structure = type { [500 x i8] }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that variables of different type with incompatible alignment in a Corotuine whose
|
||||
; lifetime range is not overlapping each other re-use the same slot in CorotuineFrame.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
%"struct.task::promise_type" = type { i8 }
|
||||
%struct.awaitable = type { i8 }
|
||||
%struct.big_structure = type { [500 x i8] }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that coro-split will convert coro.resume followed by a suspend to a
|
||||
; musttail call.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define void @f() #0 {
|
||||
entry:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that coro-split will convert coro.resume followed by a suspend to a
|
||||
; musttail call.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define void @f() #0 {
|
||||
entry:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that coro-split will convert coro.resume followed by a suspend to a
|
||||
; musttail call.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define void @fakeresume1(i8*) {
|
||||
entry:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that coro-split will convert coro.resume followed by a suspend to a
|
||||
; musttail call.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define void @f() #0 {
|
||||
entry:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that coro-split will convert a call before coro.suspend to a musttail call
|
||||
; while the user of the coro.suspend is a icmpinst.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define void @fakeresume1(i8*) {
|
||||
entry:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; Tests that sinked lifetime markers wouldn't provent optimization
|
||||
; to convert a resuming call to a musttail call.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
declare void @fakeresume1(i64* align 8)
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
; The difference between this and coro-split-musttail5.ll is that there is
|
||||
; an extra bitcast instruction in the path, which makes it harder to
|
||||
; optimize.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
declare void @fakeresume1(i64* align 8)
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll
|
||||
; is that this contains dead instruction generated during the transformation,
|
||||
; which makes the optimization harder.
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
declare void @fakeresume1(i64* align 8)
|
||||
|
||||
|
|
Loading…
Reference in New Issue