diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 6f97f3e93123..bc792ca3d8da 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -107,11 +107,6 @@ static cl::opt ControlFlowHoisting( "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); -static cl::opt HoistSinkColdnessThreshold( - "licm-coldness-threshold", cl::Hidden, cl::init(4), - cl::desc("Relative coldness Threshold of hoisting/sinking destination " - "block for LICM to be considered beneficial")); - static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -819,35 +814,6 @@ public: }; } // namespace -// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only -// only worthwhile if the destination block is actually colder than current -// block. -static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock, - OptimizationRemarkEmitter *ORE, - BlockFrequencyInfo *BFI) { - // Check block frequency only when runtime profile is available - // to avoid pathological cases. With static profile, lean towards - // hosting because it helps canonicalize the loop for vectorizer. - if (!DstBlock->getParent()->hasProfileData()) - return true; - - if (!HoistSinkColdnessThreshold || !BFI) - return true; - - BasicBlock *SrcBlock = I.getParent(); - if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold > - BFI->getBlockFreq(SrcBlock).getFrequency()) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I) - << "failed to sink or hoist instruction because containing block " - "has lower frequency than destination block"; - }); - return false; - } - - return true; -} - /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before @@ -909,7 +875,6 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU, true, &Flags, ORE) && - worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) && isSafeToExecuteUnconditionally( I, DT, TLI, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { @@ -1741,7 +1706,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, // First check if I is worth sinking for all uses. Sink only when it is worth // across all uses. SmallSetVector Users(I.user_begin(), I.user_end()); - SmallVector ExitPNs; for (auto *UI : Users) { auto *User = cast(UI); @@ -1751,14 +1715,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, PHINode *PN = cast(User); assert(ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"); - if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) { - return Changed; - } - - ExitPNs.push_back(PN); - } - - for (auto *PN : ExitPNs) { // The PHI must be trivially replaceable. Instruction *New = sinkThroughTriviallyReplaceablePHI( diff --git a/llvm/test/Transforms/LICM/no-hoist-prof.ll b/llvm/test/Transforms/LICM/no-hoist-prof.ll deleted file mode 100644 index 1775ecc21c4d..000000000000 --- a/llvm/test/Transforms/LICM/no-hoist-prof.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: opt -passes='sample-profile,function(loop-mssa(licm))' -aa-pipeline=basic-aa -S -sample-profile-file='%S/Inputs/no-hoist-prof.prof' < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM -; RUN: opt -passes=licm -S < %s | FileCheck %s --check-prefix=CHECK-LICM - -; Original source code: -; -; int bar(int); -; int foo(int iter, int explode) { -; int base = bar(explode); -; for (int i = 0; i != iter; ++i) -; if (i == explode) -; iter = (base * base) + bar(iter); -; return iter; -; } - -; We need debug information in this .ll in order to leverage the pgo file, so: -; .ll generated by running `clang++ -O3 -g -S -emit-llvm`, then: -; - move hoisted mul back into cold section -; - give labels names -; - reindex variables -; - remove metadata calls, attributes, module header -; - remove unnecessary metadata - -; CHECK-LICM: .l.check.preheader:{{.*}} -; CHECK-LICM-NEXT: {{.*}} = mul {{.*}} -; CHECK-LICM-NEXT: br{{.*}} - -; CHECK-BFI-LICM: .l.cold:{{.*}} -; CHECK-BFI-LICM-NEXT: {{.*}} = mul {{.*}} - -define dso_local i32 @_Z3fooii(i32, i32) local_unnamed_addr #0 !dbg !7 { - %3 = tail call i32 @_Z3bari(i32 %1), !dbg !19 - %4 = icmp eq i32 %0, 0, !dbg !22 - br i1 %4, label %.l.ret, label %.l.check.preheader, !dbg !24 - -.l.check.preheader: - br label %.l.check, !dbg !24 - -.l.ret: - %5 = phi i32 [ 0, %2 ], [ %12, %.l.iterate ] - ret i32 %5, !dbg !25 - -.l.check: - %6 = phi i32 [ 0, %.l.check.preheader ], [ %13, %.l.iterate ] - %7 = phi i32 [ %0, %.l.check.preheader ], [ %12, %.l.iterate ] - %8 = icmp eq i32 %6, %1, !dbg !26 - br i1 %8, label %.l.cold, label %.l.iterate, !dbg !28 - -.l.cold: - %9 = mul nsw i32 %3, %3 - %10 = tail call i32 @_Z3bari(i32 %7), !dbg !29 - %11 = add nsw i32 %10, %9, !dbg !30 - br label %.l.iterate, !dbg !31 - -.l.iterate: - %12 = phi i32 [ %11, %.l.cold ], [ %7, %.l.check ] - %13 = add nuw nsw i32 %6, 1, !dbg !32 - %14 = icmp eq i32 %13, %12, !dbg !22 - br i1 %14, label %.l.ret, label %.l.check, !dbg !24, !llvm.loop !33 -} - -attributes #0 = { "use-sample-profile" } - -declare dso_local i32 @_Z3bari(i32) local_unnamed_addr #1 - -!llvm.module.flags = !{!4} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 8.0.20181009 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, nameTableKind: None) -!1 = !DIFile(filename: "foo.cpp", directory: "/tmp/gather_pgo") -!4 = !{i32 2, !"Debug Info Version", i32 3} -!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooii", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !9) -!9 = !{!10, !10, !10} -!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3) -!19 = !DILocation(line: 3, column: 14, scope: !7) -!22 = !DILocation(line: 4, column: 21, scope: !23) -!23 = distinct !DILexicalBlock(scope: !16, file: !1, line: 4, column: 3) -!24 = !DILocation(line: 4, column: 3, scope: !16) -!25 = !DILocation(line: 7, column: 3, scope: !7) -!26 = !DILocation(line: 5, column: 11, scope: !27) -!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 5, column: 9) -!28 = !DILocation(line: 5, column: 9, scope: !23) -!29 = !DILocation(line: 6, column: 30, scope: !27) -!30 = !DILocation(line: 6, column: 28, scope: !27) -!31 = !DILocation(line: 6, column: 7, scope: !27) -!32 = !DILocation(line: 4, column: 30, scope: !23) -!33 = distinct !{!33, !24, !34} -!34 = !DILocation(line: 6, column: 38, scope: !16) diff --git a/llvm/test/Transforms/LICM/sink.ll b/llvm/test/Transforms/LICM/sink.ll deleted file mode 100644 index d82168b147cc..000000000000 --- a/llvm/test/Transforms/LICM/sink.ll +++ /dev/null @@ -1,69 +0,0 @@ -; RUN: opt -S -licm -licm-coldness-threshold=0 < %s | FileCheck %s --check-prefix=CHECK-LICM -; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK -; RUN: opt -S < %s -passes='require,loop-mssa(licm),loop-sink' \ -; RUN: | FileCheck %s --check-prefix=CHECK-SINK -; RUN: opt -S -licm -licm-coldness-threshold=0 -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM -; RUN: opt -S -licm -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM - -; Original source code: -; int g; -; int foo(int p, int x) { -; for (int i = 0; i != x; i++) -; if (__builtin_expect(i == p, 0)) { -; x += g; x *= g; -; } -; return x; -; } -; -; Load of global value g should not be hoisted to preheader. - -@g = global i32 0, align 4 - -define i32 @foo(i32, i32) #0 !prof !2 { - %3 = icmp eq i32 %1, 0 - br i1 %3, label %._crit_edge, label %.lr.ph.preheader - -.lr.ph.preheader: - br label %.lr.ph - -; CHECK-LICM: .lr.ph.preheader: -; CHECK-LICM: load i32, i32* @g -; CHECK-LICM: br label %.lr.ph - -; CHECK-BFI-LICM: .lr.ph.preheader: -; CHECK-BFI-LICM-NOT: load i32, i32* @g -; CHECK-BFI-LICM: br label %.lr.ph - -.lr.ph: - %.03 = phi i32 [ %8, %.combine ], [ 0, %.lr.ph.preheader ] - %.012 = phi i32 [ %.1, %.combine ], [ %1, %.lr.ph.preheader ] - %4 = icmp eq i32 %.03, %0 - br i1 %4, label %.then, label %.combine, !prof !1 - -.then: - %5 = load i32, i32* @g, align 4 - %6 = add nsw i32 %5, %.012 - %7 = mul nsw i32 %6, %5 - br label %.combine - -; CHECK-SINK: .then: -; CHECK-SINK: load i32, i32* @g -; CHECK-SINK: br label %.combine - -.combine: - %.1 = phi i32 [ %7, %.then ], [ %.012, %.lr.ph ] - %8 = add nuw nsw i32 %.03, 1 - %9 = icmp eq i32 %8, %.1 - br i1 %9, label %._crit_edge.loopexit, label %.lr.ph - -._crit_edge.loopexit: - %.1.lcssa = phi i32 [ %.1, %.combine ] - br label %._crit_edge - -._crit_edge: - %.01.lcssa = phi i32 [ 0, %2 ], [ %.1.lcssa, %._crit_edge.loopexit ] - ret i32 %.01.lcssa -} - -!1 = !{!"branch_weights", i32 1, i32 2000} -!2 = !{!"function_entry_count", i64 1}