forked from OSchip/llvm-project
[LICM] Create LoopNest Invariant Code Motion (LNICM) pass
This patch adds a new pass called LNICM which is a LoopNest version of LICM and a test case to show how LNICM works. Basically, LNICM only hoists invariants out of loop nest (not a loop) to keep/make perfect loop nest. This enables later optimizations that require perfect loop nest. Reviewed By: Whitney Differential Revision: https://reviews.llvm.org/D104180
This commit is contained in:
parent
9236125ec8
commit
74f0f9a455
|
@ -57,6 +57,22 @@ public:
|
|||
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
|
||||
LoopStandardAnalysisResults &AR, LPMUpdater &U);
|
||||
};
|
||||
|
||||
/// Performs LoopNest Invariant Code Motion Pass.
|
||||
class LNICMPass : public PassInfoMixin<LNICMPass> {
|
||||
unsigned LicmMssaOptCap;
|
||||
unsigned LicmMssaNoAccForPromotionCap;
|
||||
|
||||
public:
|
||||
LNICMPass()
|
||||
: LicmMssaOptCap(SetLicmMssaOptCap),
|
||||
LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
|
||||
LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
|
||||
: LicmMssaOptCap(LicmMssaOptCap),
|
||||
LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
|
||||
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM,
|
||||
LoopStandardAnalysisResults &AR, LPMUpdater &U);
|
||||
};
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_SCALAR_LICM_H
|
||||
|
|
|
@ -165,7 +165,7 @@ bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
|
|||
BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
|
||||
AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
|
||||
ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
|
||||
OptimizationRemarkEmitter *);
|
||||
OptimizationRemarkEmitter *, bool);
|
||||
|
||||
/// This function deletes dead loops. The caller of this function needs to
|
||||
/// guarantee that the loop is infact dead.
|
||||
|
|
|
@ -415,6 +415,7 @@ LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
|
|||
LOOP_PASS("dot-ddg", DDGDotPrinterPass())
|
||||
LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
|
||||
LOOP_PASS("licm", LICMPass())
|
||||
LOOP_PASS("lnicm", LNICMPass())
|
||||
LOOP_PASS("loop-flatten", LoopFlattenPass())
|
||||
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
|
||||
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
|
||||
|
|
|
@ -196,7 +196,7 @@ struct LoopInvariantCodeMotion {
|
|||
bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
|
||||
BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
|
||||
TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
|
||||
OptimizationRemarkEmitter *ORE);
|
||||
OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
|
||||
|
||||
LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
|
||||
unsigned LicmMssaNoAccForPromotionCap)
|
||||
|
@ -295,6 +295,33 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
|
|||
return PA;
|
||||
}
|
||||
|
||||
PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
|
||||
LoopStandardAnalysisResults &AR,
|
||||
LPMUpdater &) {
|
||||
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
|
||||
// pass. Function analyses need to be preserved across loop transformations
|
||||
// but ORE cannot be preserved (see comment before the pass definition).
|
||||
OptimizationRemarkEmitter ORE(LN.getParent());
|
||||
|
||||
LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
|
||||
|
||||
Loop &OutermostLoop = LN.getOutermostLoop();
|
||||
bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
|
||||
&AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
|
||||
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
auto PA = getLoopPassPreservedAnalyses();
|
||||
|
||||
PA.preserve<DominatorTreeAnalysis>();
|
||||
PA.preserve<LoopAnalysis>();
|
||||
if (AR.MSSA)
|
||||
PA.preserve<MemorySSAAnalysis>();
|
||||
|
||||
return PA;
|
||||
}
|
||||
|
||||
char LegacyLICMPass::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
|
||||
false, false)
|
||||
|
@ -347,7 +374,8 @@ llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
|
|||
bool LoopInvariantCodeMotion::runOnLoop(
|
||||
Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
|
||||
BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
|
||||
ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) {
|
||||
ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE,
|
||||
bool LoopNestMode) {
|
||||
bool Changed = false;
|
||||
|
||||
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
|
||||
|
@ -414,7 +442,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
|
|||
if (Preheader)
|
||||
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
|
||||
CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
|
||||
*Flags.get(), ORE);
|
||||
*Flags.get(), ORE, LoopNestMode);
|
||||
|
||||
// Now that all loop invariants have been removed from the loop, promote any
|
||||
// memory references to scalars that we can.
|
||||
|
@ -859,7 +887,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
|
|||
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
|
||||
ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
|
||||
SinkAndHoistLICMFlags &Flags,
|
||||
OptimizationRemarkEmitter *ORE) {
|
||||
OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
|
||||
// Verify inputs.
|
||||
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
|
||||
CurLoop != nullptr && SafetyInfo != nullptr &&
|
||||
|
@ -882,7 +910,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
|
|||
for (BasicBlock *BB : Worklist) {
|
||||
// Only need to process the contents of this block if it is not part of a
|
||||
// subloop (which would already have been processed).
|
||||
if (inSubLoop(BB, CurLoop, LI))
|
||||
if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
|
||||
continue;
|
||||
|
||||
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes INTC
|
||||
; RUN: opt -aa-pipeline=basic-aa -passes='loop(lnicm,loop-interchange)' -S %s | FileCheck %s --check-prefixes LNICM,CHECK
|
||||
; RUN: opt -aa-pipeline=basic-aa -passes='loop(licm,loop-interchange)' -S %s | FileCheck %s --check-prefixes LICM,CHECK
|
||||
|
||||
; This test represents the following function:
|
||||
; void test(int x[10][10], int y[10], int *z) {
|
||||
; for (int k = 0; k < 10; k++) {
|
||||
; int tmp = *z;
|
||||
; for (int i = 0; i < 10; i++)
|
||||
; x[i][k] += y[k] + tmp;
|
||||
; }
|
||||
; }
|
||||
; We only want to hoist the load of z out of the loop nest.
|
||||
; LICM hoists the load of y[k] out of the i-loop, but LNICM doesn't do so
|
||||
; to keep perfect loop nest. This enables optimizations that require
|
||||
; perfect loop nest (e.g. loop-interchange) to perform.
|
||||
|
||||
|
||||
define dso_local void @test([10 x i32]* noalias %x, i32* noalias readonly %y, i32* readonly %z) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[Z:%.*]] = load i32, i32* %z, align 4
|
||||
; CHECK-NEXT: br label [[FOR_BODY3_PREHEADER:%.*]]
|
||||
; LNICM: for.body.preheader:
|
||||
; LICM-NOT: for.body.preheader:
|
||||
; INTC-NOT: for.body.preheader:
|
||||
; LNICM-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; LNICM-NEXT: [[K:%.*]] = phi i32 [ [[INC10:%.*]], [[FOR_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
|
||||
; LNICM-NEXT: br label [[FOR_BODY3_SPLIT1:%.*]]
|
||||
; LICM: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
|
||||
; LNICM: for.body3.preheader:
|
||||
; LICM-NOT: for.body3.preheader:
|
||||
; INTC-NOT: for.body3.preheader:
|
||||
; LNICM-NEXT: br label [[FOR_BODY3:%.*]]
|
||||
; CHECK: for.body3:
|
||||
; LNICM-NEXT: [[I:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER:%.*]] ]
|
||||
; LNICM-NEXT: br label [[FOR_BODY_PREHEADER:%.*]]
|
||||
; LNICM: for.body3.split1:
|
||||
; LNICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K:%.*]] to i64
|
||||
; LNICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %y, i64 [[IDXPROM:%.*]]
|
||||
; LNICM-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
|
||||
; LNICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP:%.*]], [[Z:%.*]]
|
||||
; LNICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I:%.*]] to i64
|
||||
; LNICM-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 [[IDXPROM4:%.*]]
|
||||
; LNICM-NEXT: [[IDXPROM6:%.*]] = sext i32 [[K:%.*]] to i64
|
||||
; LNICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5:%.*]], i64 0, i64 [[IDXPROM6:%.*]]
|
||||
; LNICM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX7:%.*]], align 4
|
||||
; LNICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2:%.*]], [[ADD:%.*]]
|
||||
; LNICM-NEXT: store i32 [[ADD8:%.*]], i32* [[ARRAYIDX7:%.*]], align 4
|
||||
; LNICM-NEXT: [[INC:%.*]] = add nsw i32 [[I:%.*]], 1
|
||||
; LNICM-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC:%.*]], 10
|
||||
; LNICM-NEXT: br label [[FOR_END:%.*]]
|
||||
; LNICM: for.body3.split:
|
||||
; LICM-NOT: for.body3.split:
|
||||
; INTC-NOT: for.body3.split:
|
||||
; LNICM-NEXT: [[TMP3:%.*]] = add nsw i32 [[I:%.*]], 1
|
||||
; LNICM-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3:%.*]], 10
|
||||
; LNICM-NEXT: br i1 [[TMP4:%.*]], label [[FOR_BODY3:%.*]], label [[FOR_END11:%.*]], !llvm.loop !0
|
||||
; LNICM: for.end:
|
||||
; LNICM-NEXT: [[INC10:%.*]] = add nsw i32 [[K:%.*]], 1
|
||||
; LNICM-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC10:%.*]], 10
|
||||
; LNICM-NEXT: br i1 [[CMP:%.*]], label [[FOR_BODY:%.*]], label [[FOR_BODY3_SPLIT:%.*]], !llvm.loop !2
|
||||
; LNICM: for.end11:
|
||||
; LNICM-NEXT: ret void
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ]
|
||||
%0 = load i32, i32* %z, align 4
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
|
||||
%idxprom = sext i32 %k.02 to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
|
||||
%1 = load i32, i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
%idxprom4 = sext i32 %i.01 to i64
|
||||
%arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 %idxprom4
|
||||
%idxprom6 = sext i32 %k.02 to i64
|
||||
%arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %arrayidx5, i64 0, i64 %idxprom6
|
||||
%2 = load i32, i32* %arrayidx7, align 4
|
||||
%add8 = add nsw i32 %2, %add
|
||||
store i32 %add8, i32* %arrayidx7, align 4
|
||||
%inc = add nsw i32 %i.01, 1
|
||||
%cmp2 = icmp slt i32 %inc, 10
|
||||
br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0
|
||||
|
||||
for.end:
|
||||
%inc10 = add nsw i32 %k.02, 1
|
||||
%cmp = icmp slt i32 %inc10, 10
|
||||
br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2
|
||||
|
||||
for.end11:
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = distinct !{!0, !1}
|
||||
!1 = !{!"llvm.loop.mustprogress"}
|
||||
!2 = distinct !{!2, !1}
|
Loading…
Reference in New Issue