forked from OSchip/llvm-project
[SimpleLoopUnswitch] Skip non-trivial unswitching of cold loops
With profile data, non-trivial LoopUnswitch will only apply on non-cold loops, as unswitching cold loops may not gain much benefit but significantly increase the code size. Reviewed By: aeubanks, asbirlea Differential Revision: https://reviews.llvm.org/D129599
This commit is contained in:
parent
6c52f82d77
commit
f756f06cc4
|
@ -1399,8 +1399,10 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
|
|||
return Err;
|
||||
// Add the nested pass manager with the appropriate adaptor.
|
||||
bool UseMemorySSA = (Name == "loop-mssa");
|
||||
bool UseBFI = llvm::any_of(
|
||||
InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
|
||||
bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
|
||||
return Pipeline.Name.contains("licm") ||
|
||||
Pipeline.Name.contains("simple-loop-unswitch");
|
||||
});
|
||||
bool UseBPI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
|
||||
return Pipeline.Name == "loop-predication";
|
||||
});
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/GuardUtils.h"
|
||||
|
@ -26,6 +27,7 @@
|
|||
#include "llvm/Analysis/MemorySSA.h"
|
||||
#include "llvm/Analysis/MemorySSAUpdater.h"
|
||||
#include "llvm/Analysis/MustExecute.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
|
@ -3044,6 +3046,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
|
|||
bool NonTrivial,
|
||||
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
|
||||
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
|
||||
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
|
||||
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
|
||||
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
|
||||
"Loops must be in LCSSA form before unswitching.");
|
||||
|
@ -3080,6 +3083,14 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
|
|||
if (L.getHeader()->getParent()->hasOptSize())
|
||||
return false;
|
||||
|
||||
// Skip cold loops, as unswitching them brings little benefit
|
||||
// but increases the code size
|
||||
if (PSI && PSI->hasProfileSummary() && BFI &&
|
||||
PSI->isColdBlock(L.getHeader(), BFI)) {
|
||||
LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip non-trivial unswitching for loops that cannot be cloned.
|
||||
if (!L.isSafeToClone())
|
||||
return false;
|
||||
|
@ -3105,7 +3116,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
|
|||
LPMUpdater &U) {
|
||||
Function &F = *L.getHeader()->getParent();
|
||||
(void)F;
|
||||
|
||||
ProfileSummaryInfo *PSI = nullptr;
|
||||
if (auto OuterProxy =
|
||||
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
|
||||
.getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
|
||||
PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
|
||||
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
|
||||
<< "\n");
|
||||
|
||||
|
@ -3152,7 +3167,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
|
|||
}
|
||||
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
|
||||
UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
|
||||
DestroyLoopCB))
|
||||
PSI, AR.BFI, DestroyLoopCB))
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
if (AR.MSSA && VerifyMemorySSA)
|
||||
|
@ -3214,7 +3229,6 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||
|
||||
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
|
||||
<< "\n");
|
||||
|
||||
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
||||
|
@ -3251,9 +3265,9 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||
|
||||
if (VerifyMemorySSA)
|
||||
MSSA->verifyMemorySSA();
|
||||
|
||||
bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
|
||||
UnswitchCB, SE, &MSSAU, DestroyLoopCB);
|
||||
bool Changed =
|
||||
unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
|
||||
&MSSAU, nullptr, nullptr, DestroyLoopCB);
|
||||
|
||||
if (VerifyMemorySSA)
|
||||
MSSA->verifyMemorySSA();
|
||||
|
|
|
@ -174,6 +174,7 @@
|
|||
; CHECK-O-NEXT: Running pass: LoopRotatePass
|
||||
; CHECK-O-NEXT: Running pass: LICM
|
||||
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
||||
|
|
|
@ -137,6 +137,7 @@
|
|||
; CHECK-O-NEXT: Running pass: LoopRotatePass
|
||||
; CHECK-O-NEXT: Running pass: LICM
|
||||
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
||||
|
|
|
@ -110,6 +110,7 @@
|
|||
; CHECK-O-NEXT: Running pass: LoopRotatePass
|
||||
; CHECK-O-NEXT: Running pass: LICM
|
||||
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
||||
|
|
|
@ -119,6 +119,7 @@
|
|||
; CHECK-O-NEXT: Running pass: LoopRotatePass
|
||||
; CHECK-O-NEXT: Running pass: LICM
|
||||
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
||||
|
|
|
@ -148,6 +148,7 @@
|
|||
; CHECK-O-NEXT: Running pass: LoopRotatePass
|
||||
; CHECK-O-NEXT: Running pass: LICM
|
||||
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
||||
|
|
|
@ -114,6 +114,7 @@
|
|||
; CHECK-O-NEXT: Running pass: LoopRotatePass
|
||||
; CHECK-O-NEXT: Running pass: LICM
|
||||
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
||||
|
|
|
@ -10,6 +10,7 @@ declare void @llvm.experimental.guard(i1, ...)
|
|||
; CHECK: Running pass: LoopPredicationPass on Loop at depth 1
|
||||
; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1
|
||||
; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1
|
||||
; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-NEXT: Running pass: LoopPredicationPass on Loop at depth 1
|
||||
; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1
|
||||
; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1
|
||||
|
|
|
@ -46,31 +46,18 @@ define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !
|
|||
; CHECK: entry_cold_loop:
|
||||
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]]
|
||||
; CHECK: cold_loop_begin.preheader:
|
||||
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
|
||||
; CHECK: cold_loop_begin.preheader.split.us:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN_US:%.*]]
|
||||
; CHECK: cold_loop_begin.us:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_A_US:%.*]]
|
||||
; CHECK: cold_loop_a.us:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH_US:%.*]]
|
||||
; CHECK: cold_loop_latch.us:
|
||||
; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1
|
||||
; CHECK-NEXT: br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
|
||||
; CHECK: cold_loop_exit.loopexit.split.us:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
|
||||
; CHECK: cold_loop_begin.preheader.split:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
|
||||
; CHECK: cold_loop_begin:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_B:%.*]]
|
||||
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
|
||||
; CHECK: cold_loop_a:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
|
||||
; CHECK: cold_loop_b:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @b()
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]]
|
||||
; CHECK: cold_loop_latch:
|
||||
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1
|
||||
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
|
||||
; CHECK: cold_loop_exit.loopexit.split:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT]]
|
||||
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
|
||||
; CHECK: cold_loop_exit.loopexit:
|
||||
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
|
||||
; CHECK: cold_loop_exit:
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
; the analysis caches.
|
||||
;
|
||||
; CHECK: Running pass: SimpleLoopUnswitchPass on Loop at depth 1 containing: %loop_begin<header>,%loop_b,%loop_b_inner,%loop_b_inner_exit,%loop_a,%loop_a_inner,%loop_a_inner_exit,%latch<latch><exiting>
|
||||
; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-NEXT: Clearing all analysis results for: loop_a_inner
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue