[Coroutines] Only do symmetric transfer if optimization is on

Symmetric transfer is not a part of C++ standards. So the vendors is not
forced to implement it any way. Given the symmetric transfer nowadays is
an optimization. It makes more sense to enable it only if the
optimization is enabled. It is also helpful for the compilation speed in
O0.
This commit is contained in:
Chuanqi Xu 2022-06-20 15:54:23 +08:00
parent 8b68da2c7d
commit 7782e080e8
20 changed files with 38 additions and 35 deletions

View File

@ -22,14 +22,14 @@
namespace llvm { namespace llvm {
struct CoroSplitPass : PassInfoMixin<CoroSplitPass> { struct CoroSplitPass : PassInfoMixin<CoroSplitPass> {
CoroSplitPass(bool OptimizeFrame = false) : OptimizeFrame(OptimizeFrame) {} CoroSplitPass(bool Optimizing = false) : Optimizing(Optimizing) {}
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR); LazyCallGraph &CG, CGSCCUpdateResult &UR);
static bool isRequired() { return true; } static bool isRequired() { return true; }
// Would be true if the Optimization level isn't O0. // Would be true if the Optimization level isn't O0.
bool OptimizeFrame; bool Optimizing;
}; };
} // end namespace llvm } // end namespace llvm

View File

@ -601,7 +601,7 @@ Expected<bool> parseInlinerPassOptions(StringRef Params) {
} }
Expected<bool> parseCoroSplitPassOptions(StringRef Params) { Expected<bool> parseCoroSplitPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "reuse-storage", "CoroSplitPass"); return parseSinglePassOption(Params, "optimizing", "CoroSplitPass");
} }
Expected<bool> parseEarlyCSEPassOptions(StringRef Params) { Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {

View File

@ -192,7 +192,7 @@ CGSCC_PASS_WITH_PARAMS("coro-split",
return CoroSplitPass(OptimizeFrame); return CoroSplitPass(OptimizeFrame);
}, },
parseCoroSplitPassOptions, parseCoroSplitPassOptions,
"reuse-storage") "optimizing")
#undef CGSCC_PASS_WITH_PARAMS #undef CGSCC_PASS_WITH_PARAMS
#ifndef FUNCTION_ANALYSIS #ifndef FUNCTION_ANALYSIS

View File

@ -612,7 +612,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
} }
}); });
if (!Shape.OptimizeFrame) { if (!Shape.Optimizing) {
for (const auto &A : FrameData.Allocas) { for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca; AllocaInst *Alloca = A.Alloca;
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
@ -1696,14 +1696,14 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
&*Builder.GetInsertPoint()); &*Builder.GetInsertPoint());
// This dbg.declare is for the main function entry point. It // This dbg.declare is for the main function entry point. It
// will be deleted in all coro-split functions. // will be deleted in all coro-split functions.
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame); coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.Optimizing);
} }
} }
// Salvage debug info on any dbg.addr that we see. We do not insert them // Salvage debug info on any dbg.addr that we see. We do not insert them
// into each block where we have a use though. // into each block where we have a use though.
if (auto *DI = dyn_cast<DbgAddrIntrinsic>(U)) { if (auto *DI = dyn_cast<DbgAddrIntrinsic>(U)) {
coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.OptimizeFrame); coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.Optimizing);
} }
// If we have a single edge PHINode, remove it and replace it with a // If we have a single edge PHINode, remove it and replace it with a
@ -2552,7 +2552,7 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
void coro::salvageDebugInfo( void coro::salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache, SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
DbgVariableIntrinsic *DVI, bool OptimizeFrame) { DbgVariableIntrinsic *DVI, bool Optimizing) {
Function *F = DVI->getFunction(); Function *F = DVI->getFunction();
IRBuilder<> Builder(F->getContext()); IRBuilder<> Builder(F->getContext());
auto InsertPt = F->getEntryBlock().getFirstInsertionPt(); auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
@ -2605,7 +2605,7 @@ void coro::salvageDebugInfo(
// //
// Avoid to create the alloca would be eliminated by optimization // Avoid to create the alloca would be eliminated by optimization
// passes and the corresponding dbg.declares would be invalid. // passes and the corresponding dbg.declares would be invalid.
if (!OptimizeFrame) if (!Optimizing)
if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) { if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
auto &Cached = DbgPtrAllocaCache[Storage]; auto &Cached = DbgPtrAllocaCache[Storage];
if (!Cached) { if (!Cached) {

View File

@ -31,7 +31,7 @@ void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
/// holding a pointer to the coroutine frame. /// holding a pointer to the coroutine frame.
void salvageDebugInfo( void salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache, SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
DbgVariableIntrinsic *DVI, bool OptimizeFrame); DbgVariableIntrinsic *DVI, bool Optimizing);
// Keeps data and helper functions for lowering coroutine intrinsics. // Keeps data and helper functions for lowering coroutine intrinsics.
struct LowererBase { struct LowererBase {
@ -104,7 +104,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
BasicBlock *AllocaSpillBlock; BasicBlock *AllocaSpillBlock;
/// This would only be true if optimization are enabled. /// This would only be true if optimization are enabled.
bool OptimizeFrame; bool Optimizing;
struct SwitchLoweringStorage { struct SwitchLoweringStorage {
SwitchInst *ResumeSwitch; SwitchInst *ResumeSwitch;
@ -255,8 +255,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
Shape() = default; Shape() = default;
explicit Shape(Function &F, bool OptimizeFrame = false) explicit Shape(Function &F, bool Optimizing = false)
: OptimizeFrame(OptimizeFrame) { : Optimizing(Optimizing) {
buildFrom(F); buildFrom(F);
} }
void buildFrom(Function &F); void buildFrom(Function &F);

View File

@ -683,7 +683,7 @@ void CoroCloner::salvageDebugInfo() {
if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
Worklist.push_back(DVI); Worklist.push_back(DVI);
for (DbgVariableIntrinsic *DVI : Worklist) for (DbgVariableIntrinsic *DVI : Worklist)
coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.OptimizeFrame); coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.Optimizing);
// Remove all salvaged dbg.declare intrinsics that became // Remove all salvaged dbg.declare intrinsics that became
// either unreachable or stale due to the CoroSplit transformation. // either unreachable or stale due to the CoroSplit transformation.
@ -1351,8 +1351,8 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
} }
// Add musttail to any resume instructions that is immediately followed by a // Add musttail to any resume instructions that is immediately followed by a
// suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call // suspend (i.e. ret) to implement symmetric transfer. We wouldn't do this in
// for symmetrical coroutine control transfer (C++ Coroutines TS extension). // O0 since symmetric transfer is not part of standard now.
// This transformation is done only in the resume part of the coroutine that has // This transformation is done only in the resume part of the coroutine that has
// identical signature and calling convention as the coro.resume call. // identical signature and calling convention as the coro.resume call.
static void addMustTailToCoroResumes(Function &F) { static void addMustTailToCoroResumes(Function &F) {
@ -1580,7 +1580,10 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
postSplitCleanup(*DestroyClone); postSplitCleanup(*DestroyClone);
postSplitCleanup(*CleanupClone); postSplitCleanup(*CleanupClone);
addMustTailToCoroResumes(*ResumeClone); // Prepare to do symmetric transfer. We only do this if optimization is
// enabled since the symmetric transfer is not part of the C++ standard now.
if (Shape.Optimizing)
addMustTailToCoroResumes(*ResumeClone);
// Store addresses resume/destroy/cleanup functions in the coroutine frame. // Store addresses resume/destroy/cleanup functions in the coroutine frame.
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
@ -1885,14 +1888,14 @@ namespace {
static coro::Shape splitCoroutine(Function &F, static coro::Shape splitCoroutine(Function &F,
SmallVectorImpl<Function *> &Clones, SmallVectorImpl<Function *> &Clones,
bool OptimizeFrame) { bool Optimizing) {
PrettyStackTraceFunction prettyStackTrace(F); PrettyStackTraceFunction prettyStackTrace(F);
// The suspend-crossing algorithm in buildCoroutineFrame get tripped // The suspend-crossing algorithm in buildCoroutineFrame get tripped
// up by uses in unreachable blocks, so remove them as a first pass. // up by uses in unreachable blocks, so remove them as a first pass.
removeUnreachableBlocks(F); removeUnreachableBlocks(F);
coro::Shape Shape(F, OptimizeFrame); coro::Shape Shape(F, Optimizing);
if (!Shape.CoroBegin) if (!Shape.CoroBegin)
return Shape; return Shape;
@ -1941,7 +1944,7 @@ static coro::Shape splitCoroutine(Function &F,
} }
} }
for (auto *DDI : Worklist) for (auto *DDI : Worklist)
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame); coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.Optimizing);
return Shape; return Shape;
} }
@ -2084,7 +2087,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
F.setSplittedCoroutine(); F.setSplittedCoroutine();
SmallVector<Function *, 4> Clones; SmallVector<Function *, 4> Clones;
const coro::Shape Shape = splitCoroutine(F, Clones, OptimizeFrame); const coro::Shape Shape = splitCoroutine(F, Clones, Optimizing);
updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
if (!Shape.CoroSuspends.empty()) { if (!Shape.CoroSuspends.empty()) {

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split<reuse-storage>),function(sroa)' -S | FileCheck %s ; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split<optimizing>),function(sroa)' -S | FileCheck %s
; Checks whether the dbg.declare for `__promise` remains valid under O2. ; Checks whether the dbg.declare for `__promise` remains valid under O2.

View File

@ -1,5 +1,5 @@
; Check that we can handle spills of array allocas ; Check that we can handle spills of array allocas
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%struct.big_structure = type { [500 x i8] } %struct.big_structure = type { [500 x i8] }
declare void @consume(%struct.big_structure*) declare void @consume(%struct.big_structure*)

View File

@ -1,6 +1,6 @@
; Tests that variables in a Corotuine whose lifetime range is not overlapping each other ; Tests that variables in a Corotuine whose lifetime range is not overlapping each other
; re-use the same slot in Coroutine frame. ; re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 } %"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 } %struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] } %struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other ; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other
; re-use the same slot in Coroutine frame. ; re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 } %"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 } %struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] } %struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type with incompatible alignment in a Corotuine whose lifetime ; Tests that variables of different type with incompatible alignment in a Corotuine whose lifetime
; range is not overlapping each other should not re-use the same slot in Coroutine frame. ; range is not overlapping each other should not re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 } %"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 } %struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] } %struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type with incompatible alignment in a Corotuine whose ; Tests that variables of different type with incompatible alignment in a Corotuine whose
; lifetime range is not overlapping each other re-use the same slot in CorotuineFrame. ; lifetime range is not overlapping each other re-use the same slot in CorotuineFrame.
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 } %"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 } %struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] } %struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a ; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call. ; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 { define void @f() #0 {
entry: entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a ; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call. ; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 { define void @f() #0 {
entry: entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a ; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call. ; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @fakeresume1(i8*) { define void @fakeresume1(i8*) {
entry: entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a ; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call. ; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 { define void @f() #0 {
entry: entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert a call before coro.suspend to a musttail call ; Tests that coro-split will convert a call before coro.suspend to a musttail call
; while the user of the coro.suspend is a icmpinst. ; while the user of the coro.suspend is a icmpinst.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
define void @fakeresume1(i8*) { define void @fakeresume1(i8*) {
entry: entry:

View File

@ -1,6 +1,6 @@
; Tests that sinked lifetime markers wouldn't provent optimization ; Tests that sinked lifetime markers wouldn't provent optimization
; to convert a resuming call to a musttail call. ; to convert a resuming call to a musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8) declare void @fakeresume1(i64* align 8)

View File

@ -3,7 +3,7 @@
; The difference between this and coro-split-musttail5.ll is that there is ; The difference between this and coro-split-musttail5.ll is that there is
; an extra bitcast instruction in the path, which makes it harder to ; an extra bitcast instruction in the path, which makes it harder to
; optimize. ; optimize.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8) declare void @fakeresume1(i64* align 8)

View File

@ -3,7 +3,7 @@
; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll ; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll
; is that this contains dead instruction generated during the transformation, ; is that this contains dead instruction generated during the transformation,
; which makes the optimization harder. ; which makes the optimization harder.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8) declare void @fakeresume1(i64* align 8)