[SimpleLoopUnswitch] Skip non-trivial unswitching of cold loops

With profile data, non-trivial LoopUnswitch will only apply on non-cold loops, as unswitching cold loops may not gain much benefit but significantly increase the code size.

Reviewed By: aeubanks, asbirlea

Differential Revision: https://reviews.llvm.org/D129599
This commit is contained in:
Ruobing Han 2022-08-01 17:22:45 +00:00
parent 6c52f82d77
commit f756f06cc4
11 changed files with 38 additions and 27 deletions

View File

@ -1399,8 +1399,10 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
return Err;
// Add the nested pass manager with the appropriate adaptor.
bool UseMemorySSA = (Name == "loop-mssa");
bool UseBFI = llvm::any_of(
InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
return Pipeline.Name.contains("licm") ||
Pipeline.Name.contains("simple-loop-unswitch");
});
bool UseBPI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
return Pipeline.Name == "loop-predication";
});

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GuardUtils.h"
@ -26,6 +27,7 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@ -3044,6 +3046,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
bool NonTrivial,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@ -3080,6 +3083,14 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
if (L.getHeader()->getParent()->hasOptSize())
return false;
// Skip cold loops, as unswitching them brings little benefit
// but increases the code size
if (PSI && PSI->hasProfileSummary() && BFI &&
PSI->isColdBlock(L.getHeader(), BFI)) {
LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
return false;
}
// Skip non-trivial unswitching for loops that cannot be cloned.
if (!L.isSafeToClone())
return false;
@ -3105,7 +3116,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &U) {
Function &F = *L.getHeader()->getParent();
(void)F;
ProfileSummaryInfo *PSI = nullptr;
if (auto OuterProxy =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
.getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
<< "\n");
@ -3152,7 +3167,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
DestroyLoopCB))
PSI, AR.BFI, DestroyLoopCB))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
@ -3214,7 +3229,6 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
<< "\n");
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@ -3251,9 +3265,9 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
UnswitchCB, SE, &MSSAU, DestroyLoopCB);
bool Changed =
unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
&MSSAU, nullptr, nullptr, DestroyLoopCB);
if (VerifyMemorySSA)
MSSA->verifyMemorySSA();

View File

@ -174,6 +174,7 @@
; CHECK-O-NEXT: Running pass: LoopRotatePass
; CHECK-O-NEXT: Running pass: LICM
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

View File

@ -137,6 +137,7 @@
; CHECK-O-NEXT: Running pass: LoopRotatePass
; CHECK-O-NEXT: Running pass: LICM
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

View File

@ -110,6 +110,7 @@
; CHECK-O-NEXT: Running pass: LoopRotatePass
; CHECK-O-NEXT: Running pass: LICM
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

View File

@ -119,6 +119,7 @@
; CHECK-O-NEXT: Running pass: LoopRotatePass
; CHECK-O-NEXT: Running pass: LICM
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

View File

@ -148,6 +148,7 @@
; CHECK-O-NEXT: Running pass: LoopRotatePass
; CHECK-O-NEXT: Running pass: LICM
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

View File

@ -114,6 +114,7 @@
; CHECK-O-NEXT: Running pass: LoopRotatePass
; CHECK-O-NEXT: Running pass: LICM
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

View File

@ -10,6 +10,7 @@ declare void @llvm.experimental.guard(i1, ...)
; CHECK: Running pass: LoopPredicationPass on Loop at depth 1
; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1
; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1
; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-NEXT: Running pass: LoopPredicationPass on Loop at depth 1
; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1
; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1

View File

@ -46,31 +46,18 @@ define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !
; CHECK: entry_cold_loop:
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]]
; CHECK: cold_loop_begin.preheader:
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
; CHECK: cold_loop_begin.preheader.split.us:
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN_US:%.*]]
; CHECK: cold_loop_begin.us:
; CHECK-NEXT: br label [[COLD_LOOP_A_US:%.*]]
; CHECK: cold_loop_a.us:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH_US:%.*]]
; CHECK: cold_loop_latch.us:
; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1
; CHECK-NEXT: br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
; CHECK: cold_loop_exit.loopexit.split.us:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: cold_loop_begin.preheader.split:
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
; CHECK: cold_loop_begin:
; CHECK-NEXT: br label [[COLD_LOOP_B:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
; CHECK: cold_loop_a:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
; CHECK: cold_loop_b:
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @b()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]]
; CHECK: cold_loop_latch:
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
; CHECK: cold_loop_exit.loopexit.split:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT]]
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: cold_loop_exit.loopexit:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
; CHECK: cold_loop_exit:

View File

@ -18,6 +18,7 @@
; the analysis caches.
;
; CHECK: Running pass: SimpleLoopUnswitchPass on Loop at depth 1 containing: %loop_begin<header>,%loop_b,%loop_b_inner,%loop_b_inner_exit,%loop_a,%loop_a_inner,%loop_a_inner_exit,%latch<latch><exiting>
; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-NEXT: Clearing all analysis results for: loop_a_inner