[Pipelines] Hoist CoroCleanup to avoid blocking optimizations

CoroCleanup is designed to lowering all the remaining coroutine
intrinsics. It is required to run after CoroSplit only. However, the
position of CoroCleanup now is far too late. The downside here is that
the unlowered coroutine instrincs might blocking other optimizations
too. So it should be a pure win to hoist the position of CoroCleanup.

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D124360
This commit is contained in:
Chuanqi Xu 2022-04-21 17:35:56 +08:00
parent 6c10014f1d
commit 7d40f562e7
11 changed files with 23 additions and 61 deletions

View File

@ -989,6 +989,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
else
MPM.addPass(buildInlinerPipeline(Level, Phase));
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
MPM.addPass(ModuleMemProfilerPass());
@ -1246,8 +1248,6 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
OptimizePM.addPass(CoroCleanupPass());
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
PTO.EagerlyInvalidateAnalyses));
@ -1372,11 +1372,6 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
// Reduce the size of the IR as much as possible.
MPM.addPass(GlobalOptPass());
// Module simplification splits coroutines, but does not fully clean up
// coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
// on these, we schedule the cleanup here.
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
PGOOpt->Action == PGOOptions::SampleUse)
MPM.addPass(PseudoProbeUpdatePass());

View File

@ -215,6 +215,7 @@
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
; CHECK-O-NEXT: Running pass: CoroSplitPass
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-DEFAULT-NEXT: Running pass: EliminateAvailableExternallyPass
@ -259,7 +260,6 @@
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-EP-OPTIMIZER-LAST: Running pass: NoOpModulePass
; CHECK-HOT-COLD-SPLIT-NEXT: Running pass: HotColdSplittingPass
; CHECK-IR-OUTLINER-NEXT: Running pass: IROutlinerPass

View File

@ -184,6 +184,7 @@
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: CoroSplitPass
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass
; CHECK-POSTLINK-O-NEXT: Running pass: GlobalOptPass
; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass
@ -223,7 +224,6 @@
; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass
; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-POSTLINK-O-NEXT: Running pass: CGProfilePass
; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-POSTLINK-O-NEXT: Running pass: ConstantMergePass

View File

@ -156,6 +156,7 @@
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: CoroSplitPass
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass
@ -192,7 +193,6 @@
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: CGProfilePass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-O-NEXT: Running pass: ConstantMergePass

View File

@ -168,6 +168,7 @@
; CHECK-O3-NEXT: Running analysis: DominanceFrontierAnalysis on foo
; CHECK-O-NEXT: Running pass: CoroSplitPass
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass
@ -204,7 +205,6 @@
; CHECK-O-NEXT: Running pass: InstSimplifyPass
; CHECK-O-NEXT: Running pass: DivRemPairsPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: CGProfilePass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-O-NEXT: Running pass: ConstantMergePass

View File

@ -197,10 +197,10 @@
; CHECK-O3-NEXT: Running analysis: DominanceFrontierAnalysis on foo
; CHECK-O-NEXT: Running pass: CoroSplitPass
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar
; CHECK-EXT: Running pass: {{.*}}::Bye
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo
; CHECK-O-NEXT: Running pass: CanonicalizeAliasesPass
; CHECK-O-NEXT: Running pass: NameAnonGlobalPass

View File

@ -162,8 +162,8 @@
; CHECK-O3-NEXT: Running analysis: DominanceFrontierAnalysis on foo
; CHECK-O-NEXT: Running pass: CoroSplitPass
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo
; CHECK-O-NEXT: Running pass: CanonicalizeAliasesPass
; CHECK-O-NEXT: Running pass: NameAnonGlobalPass

View File

@ -33,18 +33,9 @@ cleanup: ; preds = %loop
define i32 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 4
; CHECK-NEXT: store i32 4, ptr [[TMP0]], align 4
; CHECK-NEXT: call void @print(i32 4)
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
; CHECK-NEXT: [[N_VAL_RELOAD_I:%.*]] = load i32, ptr [[TMP0]], align 4, !alias.scope !0
; CHECK-NEXT: [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1
; CHECK-NEXT: store i32 [[INC_I]], ptr [[TMP0]], align 4, !alias.scope !0
; CHECK-NEXT: call void @print(i32 [[INC_I]]), !noalias !0
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
; CHECK-NEXT: [[N_VAL_RELOAD_I1:%.*]] = load i32, ptr [[TMP0]], align 4, !alias.scope !3
; CHECK-NEXT: [[INC_I2:%.*]] = add i32 [[N_VAL_RELOAD_I1]], 1
; CHECK-NEXT: call void @print(i32 [[INC_I2]]), !noalias !3
; CHECK-NEXT: call void @print(i32 5), !noalias !0
; CHECK-NEXT: call void @print(i32 6), !noalias !3
; CHECK-NEXT: ret i32 0
;
entry:

View File

@ -39,23 +39,20 @@ define i32 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = alloca i8*, align 8
; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i8** [[TMP0]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @allocate(i32 12)
; CHECK-NEXT: store i8* [[TMP1]], i8** [[TMP0]], align 8
; CHECK-NEXT: [[N_SPILL_ADDR_I:%.*]] = bitcast i8* [[TMP1]] to i32*
; CHECK-NEXT: store i32 1, i32* [[N_SPILL_ADDR_I]], align 4
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP0]] to %f.Frame**
; CHECK-NEXT: [[FRAMEPTR_I:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !0
; CHECK-NEXT: [[N_RELOAD_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 0
; CHECK-NEXT: [[N_RELOAD_I:%.*]] = load i32, i32* [[N_RELOAD_ADDR_I]], align 4, !noalias !0
; CHECK-NEXT: [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 1
; CHECK-NEXT: store i32 [[N_RELOAD_I]], i32* [[N_VAL3_SPILL_ADDR_I]], align 4, !noalias !0
; CHECK-NEXT: [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 2
; CHECK-NEXT: store i32 2, i32* [[INPUT_SPILL_ADDR_I]], align 4, !noalias !0
; CHECK-NEXT: [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[N_VAL3_SPILL_ADDR_I]] to i32*
; CHECK-NEXT: store i32 1, i32* [[TMP3]], align 4, !noalias !0
; CHECK-NEXT: [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[INPUT_SPILL_ADDR_I]] to i32*
; CHECK-NEXT: store i32 2, i32* [[TMP4]], align 4, !noalias !0
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
; CHECK-NEXT: [[FRAMEPTR_I1:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !3
; CHECK-NEXT: [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 2
; CHECK-NEXT: [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 2
; CHECK-NEXT: [[INPUT_RELOAD14_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias !3
; CHECK-NEXT: [[N_VAL3_RELOAD_ADDR11_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 1
; CHECK-NEXT: [[N_VAL3_RELOAD12_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias !3
@ -70,8 +67,8 @@ define i32 @main() {
; CHECK-NEXT: [[N_VAL3_RELOAD12_I6:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR11_I5]], align 4, !noalias !6
; CHECK-NEXT: [[SUM7_I7:%.*]] = add i32 [[N_VAL3_RELOAD12_I6]], [[INPUT_RELOAD14_I4]]
; CHECK-NEXT: call void @print(i32 [[SUM7_I7]]), !noalias !6
; CHECK-NEXT: [[TMP3:%.*]] = bitcast %f.Frame* [[FRAMEPTR_I2]] to i8*
; CHECK-NEXT: call void @deallocate(i8* [[TMP3]]), !noalias !6
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %f.Frame* [[FRAMEPTR_I2]] to i8*
; CHECK-NEXT: call void @deallocate(i8* [[TMP5]]), !noalias !6
; CHECK-NEXT: ret i32 0
;
entry:

View File

@ -35,19 +35,9 @@ cleanup:
define i32 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 4
; CHECK-NEXT: [[DOTSUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[TMP0]], i64 0, i64 0
; CHECK-NEXT: [[N_VAL_SPILL_ADDR_I:%.*]] = bitcast [8 x i8]* [[TMP0]] to i32*
; CHECK-NEXT: store i32 4, i32* [[N_VAL_SPILL_ADDR_I]], align 4
; CHECK-NEXT: call void @print(i32 4)
; CHECK-NEXT: [[N_VAL_RELOAD_I:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
; CHECK-NEXT: [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1
; CHECK-NEXT: store i32 [[INC_I]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
; CHECK-NEXT: call void @print(i32 [[INC_I]])
; CHECK-NEXT: [[N_VAL_RELOAD_I3:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
; CHECK-NEXT: [[INC_I4:%.*]] = add i32 [[N_VAL_RELOAD_I3]], 1
; CHECK-NEXT: store i32 [[INC_I4]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
; CHECK-NEXT: call void @print(i32 [[INC_I4]])
; CHECK-NEXT: call void @print(i32 5)
; CHECK-NEXT: call void @print(i32 6)
; CHECK-NEXT: ret i32 0
;
entry:

View File

@ -35,20 +35,9 @@ cleanup:
define i32 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 4
; CHECK-NEXT: [[DOTSUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[TMP0]], i64 0, i64 0
; CHECK-NEXT: [[N_VAL_SPILL_ADDR_I:%.*]] = bitcast [8 x i8]* [[TMP0]] to i32*
; CHECK-NEXT: store i32 4, i32* [[N_VAL_SPILL_ADDR_I]], align 4
; CHECK-NEXT: call void @print(i32 4)
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
; CHECK-NEXT: [[N_VAL_RELOAD_I:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
; CHECK-NEXT: [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1
; CHECK-NEXT: store i32 [[INC_I]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
; CHECK-NEXT: call void @print(i32 [[INC_I]]), !noalias !0
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
; CHECK-NEXT: [[N_VAL_RELOAD_I3:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
; CHECK-NEXT: [[INC_I4:%.*]] = add i32 [[N_VAL_RELOAD_I3]], 1
; CHECK-NEXT: call void @print(i32 [[INC_I4]]), !noalias !3
; CHECK-NEXT: call void @print(i32 5), !noalias !0
; CHECK-NEXT: call void @print(i32 6), !noalias !3
; CHECK-NEXT: ret i32 0
;
entry: