[Coroutines] Only do symmetric transfer if optimization is on

Symmetric transfer is not a part of C++ standards. So the vendors is not
forced to implement it any way. Given the symmetric transfer nowadays is
an optimization. It makes more sense to enable it only if the
optimization is enabled. It is also helpful for the compilation speed in
O0.
This commit is contained in:
Chuanqi Xu 2022-06-20 15:54:23 +08:00
parent 8b68da2c7d
commit 7782e080e8
20 changed files with 38 additions and 35 deletions

View File

@ -22,14 +22,14 @@
namespace llvm {
struct CoroSplitPass : PassInfoMixin<CoroSplitPass> {
CoroSplitPass(bool OptimizeFrame = false) : OptimizeFrame(OptimizeFrame) {}
CoroSplitPass(bool Optimizing = false) : Optimizing(Optimizing) {}
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
static bool isRequired() { return true; }
// Would be true if the Optimization level isn't O0.
bool OptimizeFrame;
bool Optimizing;
};
} // end namespace llvm

View File

@ -601,7 +601,7 @@ Expected<bool> parseInlinerPassOptions(StringRef Params) {
}
Expected<bool> parseCoroSplitPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "reuse-storage", "CoroSplitPass");
return parseSinglePassOption(Params, "optimizing", "CoroSplitPass");
}
Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {

View File

@ -192,7 +192,7 @@ CGSCC_PASS_WITH_PARAMS("coro-split",
return CoroSplitPass(OptimizeFrame);
},
parseCoroSplitPassOptions,
"reuse-storage")
"optimizing")
#undef CGSCC_PASS_WITH_PARAMS
#ifndef FUNCTION_ANALYSIS

View File

@ -612,7 +612,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
}
});
if (!Shape.OptimizeFrame) {
if (!Shape.Optimizing) {
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
@ -1696,14 +1696,14 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
&*Builder.GetInsertPoint());
// This dbg.declare is for the main function entry point. It
// will be deleted in all coro-split functions.
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.Optimizing);
}
}
// Salvage debug info on any dbg.addr that we see. We do not insert them
// into each block where we have a use though.
if (auto *DI = dyn_cast<DbgAddrIntrinsic>(U)) {
coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.OptimizeFrame);
coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.Optimizing);
}
// If we have a single edge PHINode, remove it and replace it with a
@ -2552,7 +2552,7 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
void coro::salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
DbgVariableIntrinsic *DVI, bool OptimizeFrame) {
DbgVariableIntrinsic *DVI, bool Optimizing) {
Function *F = DVI->getFunction();
IRBuilder<> Builder(F->getContext());
auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
@ -2605,7 +2605,7 @@ void coro::salvageDebugInfo(
//
// Avoid to create the alloca would be eliminated by optimization
// passes and the corresponding dbg.declares would be invalid.
if (!OptimizeFrame)
if (!Optimizing)
if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
auto &Cached = DbgPtrAllocaCache[Storage];
if (!Cached) {

View File

@ -31,7 +31,7 @@ void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
/// holding a pointer to the coroutine frame.
void salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
DbgVariableIntrinsic *DVI, bool OptimizeFrame);
DbgVariableIntrinsic *DVI, bool Optimizing);
// Keeps data and helper functions for lowering coroutine intrinsics.
struct LowererBase {
@ -104,7 +104,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
BasicBlock *AllocaSpillBlock;
/// This would only be true if optimization are enabled.
bool OptimizeFrame;
bool Optimizing;
struct SwitchLoweringStorage {
SwitchInst *ResumeSwitch;
@ -255,8 +255,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
Shape() = default;
explicit Shape(Function &F, bool OptimizeFrame = false)
: OptimizeFrame(OptimizeFrame) {
explicit Shape(Function &F, bool Optimizing = false)
: Optimizing(Optimizing) {
buildFrom(F);
}
void buildFrom(Function &F);

View File

@ -683,7 +683,7 @@ void CoroCloner::salvageDebugInfo() {
if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
Worklist.push_back(DVI);
for (DbgVariableIntrinsic *DVI : Worklist)
coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.OptimizeFrame);
coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.Optimizing);
// Remove all salvaged dbg.declare intrinsics that became
// either unreachable or stale due to the CoroSplit transformation.
@ -1351,8 +1351,8 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
}
// Add musttail to any resume instructions that is immediately followed by a
// suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
// for symmetrical coroutine control transfer (C++ Coroutines TS extension).
// suspend (i.e. ret) to implement symmetric transfer. We wouldn't do this in
// O0 since symmetric transfer is not part of standard now.
// This transformation is done only in the resume part of the coroutine that has
// identical signature and calling convention as the coro.resume call.
static void addMustTailToCoroResumes(Function &F) {
@ -1580,7 +1580,10 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
postSplitCleanup(*DestroyClone);
postSplitCleanup(*CleanupClone);
addMustTailToCoroResumes(*ResumeClone);
// Prepare to do symmetric transfer. We only do this if optimization is
// enabled since the symmetric transfer is not part of the C++ standard now.
if (Shape.Optimizing)
addMustTailToCoroResumes(*ResumeClone);
// Store addresses resume/destroy/cleanup functions in the coroutine frame.
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
@ -1885,14 +1888,14 @@ namespace {
static coro::Shape splitCoroutine(Function &F,
SmallVectorImpl<Function *> &Clones,
bool OptimizeFrame) {
bool Optimizing) {
PrettyStackTraceFunction prettyStackTrace(F);
// The suspend-crossing algorithm in buildCoroutineFrame get tripped
// up by uses in unreachable blocks, so remove them as a first pass.
removeUnreachableBlocks(F);
coro::Shape Shape(F, OptimizeFrame);
coro::Shape Shape(F, Optimizing);
if (!Shape.CoroBegin)
return Shape;
@ -1941,7 +1944,7 @@ static coro::Shape splitCoroutine(Function &F,
}
}
for (auto *DDI : Worklist)
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.Optimizing);
return Shape;
}
@ -2084,7 +2087,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
F.setSplittedCoroutine();
SmallVector<Function *, 4> Clones;
const coro::Shape Shape = splitCoroutine(F, Clones, OptimizeFrame);
const coro::Shape Shape = splitCoroutine(F, Clones, Optimizing);
updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
if (!Shape.CoroSuspends.empty()) {

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split<reuse-storage>),function(sroa)' -S | FileCheck %s
; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split<optimizing>),function(sroa)' -S | FileCheck %s
; Checks whether the dbg.declare for `__promise` remains valid under O2.

View File

@ -1,5 +1,5 @@
; Check that we can handle spills of array allocas
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%struct.big_structure = type { [500 x i8] }
declare void @consume(%struct.big_structure*)

View File

@ -1,6 +1,6 @@
; Tests that variables in a Corotuine whose lifetime range is not overlapping each other
; re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other
; re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type with incompatible alignment in a Corotuine whose lifetime
; range is not overlapping each other should not re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type with incompatible alignment in a Corotuine whose
; lifetime range is not overlapping each other re-use the same slot in CorotuineFrame.
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 {
entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 {
entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @fakeresume1(i8*) {
entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 {
entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert a call before coro.suspend to a musttail call
; while the user of the coro.suspend is a icmpinst.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @fakeresume1(i8*) {
entry:

View File

@ -1,6 +1,6 @@
; Tests that sinked lifetime markers wouldn't provent optimization
; to convert a resuming call to a musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8)

View File

@ -3,7 +3,7 @@
; The difference between this and coro-split-musttail5.ll is that there is
; an extra bitcast instruction in the path, which makes it harder to
; optimize.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8)

View File

@ -3,7 +3,7 @@
; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll
; is that this contains dead instruction generated during the transformation,
; which makes the optimization harder.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8)