Revert "[LoopInterchange] New cost model for loop interchange"

Reverting the commit due to numerous buildbot failures.

This reverts commit 006334470d.
This commit is contained in:
Daniil Suchkov 2022-06-03 00:52:08 +00:00
parent 48d6a6c9ad
commit f1940a5895
35 changed files with 169 additions and 301 deletions

View File

@ -18,7 +18,6 @@
#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/LoopPass.h"
@ -359,10 +358,8 @@ public:
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
/// Check if the loop interchange is profitable. /// Check if the loop interchange is profitable.
bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop, bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix);
CharMatrix &DepMatrix,
const DenseMap<const Loop *, unsigned> &CostMap);
private: private:
int getInstrOrderCost(); int getInstrOrderCost();
@ -413,15 +410,13 @@ struct LoopInterchange {
LoopInfo *LI = nullptr; LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr; DependenceInfo *DI = nullptr;
DominatorTree *DT = nullptr; DominatorTree *DT = nullptr;
std::unique_ptr<CacheCost> CC = nullptr;
/// Interface to emit optimization remarks. /// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE; OptimizationRemarkEmitter *ORE;
LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
DominatorTree *DT, std::unique_ptr<CacheCost> &CC, DominatorTree *DT, OptimizationRemarkEmitter *ORE)
OptimizationRemarkEmitter *ORE) : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
: SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
bool run(Loop *L) { bool run(Loop *L) {
if (L->getParentLoop()) if (L->getParentLoop())
@ -504,21 +499,6 @@ struct LoopInterchange {
} }
unsigned SelecLoopId = selectLoopForInterchange(LoopList); unsigned SelecLoopId = selectLoopForInterchange(LoopList);
// Obtain the loop vector returned from loop cache analysis beforehand,
// and put each <Loop, index> pair into a map for constant time query
// later. Indices in loop vector reprsent the optimal order of the
// corresponding loop, e.g., given a loopnest with depth N, index 0
// indicates the loop should be placed as the outermost loop and index N
// indicates the loop should be placed as the innermost loop.
//
// For the old pass manager CacheCost would be null.
DenseMap<const Loop *, unsigned> CostMap;
if (CC != nullptr) {
const auto &LoopCosts = CC->getLoopCosts();
for (unsigned i = 0; i < LoopCosts.size(); i++) {
CostMap[LoopCosts[i].first] = i;
}
}
// We try to achieve the globally optimal memory access for the loopnest, // We try to achieve the globally optimal memory access for the loopnest,
// and do interchange based on a bubble-sort fasion. We start from // and do interchange based on a bubble-sort fasion. We start from
// the innermost loop, move it outwards to the best possible position // the innermost loop, move it outwards to the best possible position
@ -527,7 +507,7 @@ struct LoopInterchange {
bool ChangedPerIter = false; bool ChangedPerIter = false;
for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) { for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1, bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
DependencyMatrix, CostMap); DependencyMatrix);
if (!Interchanged) if (!Interchanged)
continue; continue;
// Loops interchanged, update LoopList accordingly. // Loops interchanged, update LoopList accordingly.
@ -551,8 +531,7 @@ struct LoopInterchange {
bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
unsigned OuterLoopId, unsigned OuterLoopId,
std::vector<std::vector<char>> &DependencyMatrix, std::vector<std::vector<char>> &DependencyMatrix) {
const DenseMap<const Loop *, unsigned> &CostMap) {
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n"); << " and OuterLoopId = " << OuterLoopId << "\n");
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
@ -562,8 +541,7 @@ struct LoopInterchange {
} }
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
DependencyMatrix, CostMap)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false; return false;
} }
@ -1157,33 +1135,21 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
return !DepMatrix.empty(); return !DepMatrix.empty();
} }
bool LoopInterchangeProfitability::isProfitable( bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, unsigned OuterLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix, CharMatrix &DepMatrix) {
const DenseMap<const Loop *, unsigned> &CostMap) { // TODO: Add better profitability checks.
// TODO: Remove the legacy cost model. // e.g
// 1) Construct dependency matrix and move the one with no loop carried dep
// inside to enable vectorization.
// This is the new cost model returned from loop cache analysis. // This is rough cost estimation algorithm. It counts the good and bad order
// A smaller index means the loop should be placed an outer loop, and vice // of induction variables in the instruction and allows reordering if number
// versa. // of bad orders is more than good.
if (CostMap.find(InnerLoop) != CostMap.end() && int Cost = getInstrOrderCost();
CostMap.find(OuterLoop) != CostMap.end()) { LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
unsigned InnerIndex = 0, OuterIndex = 0; if (Cost < -LoopInterchangeCostThreshold)
InnerIndex = CostMap.find(InnerLoop)->second; return true;
OuterIndex = CostMap.find(OuterLoop)->second;
LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
<< ", OuterIndex = " << OuterIndex << "\n");
if (InnerIndex < OuterIndex)
return true;
} else {
// Legacy cost model: this is rough cost estimation algorithm. It counts the
// good and bad order of induction variables in the instruction and allows
// reordering if number of bad orders is more than good.
int Cost = getInstrOrderCost();
LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
if (Cost < -LoopInterchangeCostThreshold)
return true;
}
// It is not profitable as per current cache profitability model. But check if // It is not profitable as per current cache profitability model. But check if
// we can move this loop outside to improve parallelism. // we can move this loop outside to improve parallelism.
@ -1194,8 +1160,10 @@ bool LoopInterchangeProfitability::isProfitable(
return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
InnerLoop->getStartLoc(), InnerLoop->getStartLoc(),
InnerLoop->getHeader()) InnerLoop->getHeader())
<< "Interchanging loops is too costly and it does not improve " << "Interchanging loops is too costly (cost="
"parallelism."; << ore::NV("Cost", Cost) << ", threshold="
<< ore::NV("Threshold", LoopInterchangeCostThreshold)
<< ") and it does not improve parallelism.";
}); });
return false; return false;
} }
@ -1741,8 +1709,8 @@ struct LoopInterchangeLegacyPass : public LoopPass {
auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
std::unique_ptr<CacheCost> CC = nullptr;
return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L); return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
} }
}; };
} // namespace } // namespace
@ -1769,10 +1737,8 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
Function &F = *LN.getParent(); Function &F = *LN.getParent();
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
std::unique_ptr<CacheCost> CC =
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
OptimizationRemarkEmitter ORE(&F); OptimizationRemarkEmitter ORE(&F);
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
return PreservedAnalyses::all(); return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses(); return getLoopPassPreservedAnalyses();
} }

View File

@ -1,13 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes INTC ; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes INTC
; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes LNICM ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes LNICM,CHECK
; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes LICM ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes LICM,CHECK
; This test represents the following function: ; This test represents the following function:
; void test(int n, int m, int x[m][n], int y[n], int *z) { ; void test(int x[10][10], int y[10], int *z) {
; for (int k = 0; k < n; k++) { ; for (int k = 0; k < 10; k++) {
; int tmp = *z; ; int tmp = *z;
; for (int i = 0; i < m; i++) ; for (int i = 0; i < 10; i++)
; x[i][k] += y[k] + tmp; ; x[i][k] += y[k] + tmp;
; } ; }
; } ; }
@ -16,189 +15,83 @@
; to keep perfect loop nest. This enables optimizations that require ; to keep perfect loop nest. This enables optimizations that require
; perfect loop nest (e.g. loop-interchange) to perform. ; perfect loop nest (e.g. loop-interchange) to perform.
target triple = "powerpc64le-unknown-linux-gnu"
define dso_local void @test(i64 %n, i64 %m, ptr noalias %x, ptr noalias readonly %y, ptr readonly %z) { define dso_local void @test([10 x i32]* noalias %x, i32* noalias readonly %y, i32* readonly %z) {
; The loopnest is not interchanged when we only run loop interchange. ; CHECK-LABEL: @test(
; INTC-LABEL: @test( ; CHECK-NEXT: entry:
; INTC-NEXT: gurad: ; CHECK-NEXT: [[Z:%.*]] = load i32, i32* %z, align 4
; INTC-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0 ; CHECK-NEXT: br label [[FOR_BODY3_PREHEADER:%.*]]
; INTC-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; LNICM: for.body.preheader:
; INTC-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]] ; LICM-NOT: for.body.preheader:
; INTC: for.cond1.preheader.lr.ph: ; INTC-NOT: for.body.preheader:
; INTC-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]] ; LNICM-NEXT: br label [[FOR_BODY:%.*]]
; INTC: for.i.preheader: ; CHECK: for.body:
; INTC-NEXT: br label [[ENTRY:%.*]] ; LNICM-NEXT: [[K:%.*]] = phi i32 [ [[INC10:%.*]], [[FOR_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
; INTC: entry: ; LNICM-NEXT: br label [[FOR_BODY3_SPLIT1:%.*]]
; INTC-NEXT: br label [[FOR_BODY:%.*]] ; LICM: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
; INTC: for.body: ; LNICM: for.body3.preheader:
; INTC-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ] ; LICM-NOT: for.body3.preheader:
; INTC-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4 ; INTC-NOT: for.body3.preheader:
; INTC-NEXT: br label [[FOR_BODY3:%.*]] ; LNICM-NEXT: br label [[FOR_BODY3:%.*]]
; INTC: for.body3: ; CHECK: for.body3:
; INTC-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ] ; LNICM-NEXT: [[I:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER:%.*]] ]
; INTC-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64 ; LNICM-NEXT: br label [[FOR_BODY_PREHEADER:%.*]]
; INTC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]] ; LNICM: for.body3.split1:
; INTC-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; LNICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K:%.*]] to i64
; INTC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] ; LNICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %y, i64 [[IDXPROM:%.*]]
; INTC-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64 ; LNICM-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
; INTC-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]] ; LNICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP:%.*]], [[Z:%.*]]
; INTC-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]] ; LNICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I:%.*]] to i64
; INTC-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]] ; LNICM-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 [[IDXPROM4:%.*]]
; INTC-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 ; LNICM-NEXT: [[IDXPROM6:%.*]] = sext i32 [[K:%.*]] to i64
; INTC-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]] ; LNICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5:%.*]], i64 0, i64 [[IDXPROM6:%.*]]
; INTC-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4 ; LNICM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX7:%.*]], align 4
; INTC-NEXT: [[INC]] = add nsw i32 [[I_01]], 1 ; LNICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2:%.*]], [[ADD:%.*]]
; INTC-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64 ; LNICM-NEXT: store i32 [[ADD8:%.*]], i32* [[ARRAYIDX7:%.*]], align 4
; INTC-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]] ; LNICM-NEXT: [[INC:%.*]] = add nsw i32 [[I:%.*]], 1
; INTC-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]] ; LNICM-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC:%.*]], 10
; INTC: for.end: ; LNICM-NEXT: br label [[FOR_END:%.*]]
; INTC-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1 ; LNICM: for.body3.split:
; INTC-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64 ; LICM-NOT: for.body3.split:
; INTC-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]] ; INTC-NOT: for.body3.split:
; INTC-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] ; LNICM-NEXT: [[TMP3:%.*]] = add nsw i32 [[I:%.*]], 1
; INTC: for.end11.loopexit: ; LNICM-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3:%.*]], 10
; INTC-NEXT: br label [[FOR_END11]] ; LNICM-NEXT: br i1 [[TMP4:%.*]], label [[FOR_BODY3:%.*]], label [[FOR_END11:%.*]], !llvm.loop !0
; INTC: for.end11: ; LNICM: for.end:
; INTC-NEXT: ret void ; LNICM-NEXT: [[INC10:%.*]] = add nsw i32 [[K:%.*]], 1
; ; LNICM-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC10:%.*]], 10
; The loopnest is interchanged when we run lnicm and loop interchange. ; LNICM-NEXT: br i1 [[CMP:%.*]], label [[FOR_BODY:%.*]], label [[FOR_BODY3_SPLIT:%.*]], !llvm.loop !2
; LNICM-LABEL: @test( ; LNICM: for.end11:
; LNICM-NEXT: gurad: ; LNICM-NEXT: ret void
; LNICM-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
; LNICM-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
; LNICM-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
; LNICM: for.cond1.preheader.lr.ph:
; LNICM-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
; LNICM: for.i.preheader:
; LNICM-NEXT: br label [[FOR_BODY3_PREHEADER:%.*]]
; LNICM: entry:
; LNICM-NEXT: br label [[FOR_BODY:%.*]]
; LNICM: for.body:
; LNICM-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
; LNICM-NEXT: br label [[FOR_BODY3_SPLIT1:%.*]]
; LNICM: for.body3.preheader:
; LNICM-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
; LNICM-NEXT: br label [[FOR_BODY3:%.*]]
; LNICM: for.body3:
; LNICM-NEXT: [[I_01:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER]] ]
; LNICM-NEXT: br label [[ENTRY]]
; LNICM: for.body3.split1:
; LNICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
; LNICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
; LNICM-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; LNICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
; LNICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
; LNICM-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
; LNICM-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
; LNICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
; LNICM-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
; LNICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
; LNICM-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
; LNICM-NEXT: [[INC:%.*]] = add nsw i32 [[I_01]], 1
; LNICM-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
; LNICM-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
; LNICM-NEXT: br label [[FOR_END]]
; LNICM: for.body3.split:
; LNICM-NEXT: [[TMP3]] = add nsw i32 [[I_01]], 1
; LNICM-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
; LNICM-NEXT: [[TMP5:%.*]] = icmp slt i64 [[TMP4]], [[M]]
; LNICM-NEXT: br i1 [[TMP5]], label [[FOR_BODY3]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; LNICM: for.end:
; LNICM-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1
; LNICM-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
; LNICM-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
; LNICM-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]], !llvm.loop [[LOOP2:![0-9]+]]
; LNICM: for.end11.loopexit:
; LNICM-NEXT: br label [[FOR_END11]]
; LNICM: for.end11:
; LNICM-NEXT: ret void
;
; The loopnest is not interchanged when we run licm and loop interchange.
; LICM-LABEL: @test(
; LICM-NEXT: gurad:
; LICM-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
; LICM-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
; LICM-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
; LICM: for.cond1.preheader.lr.ph:
; LICM-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
; LICM: for.i.preheader:
; LICM-NEXT: br label [[ENTRY:%.*]]
; LICM: entry:
; LICM-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
; LICM-NEXT: br label [[FOR_BODY:%.*]]
; LICM: for.body:
; LICM-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
; LICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
; LICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
; LICM-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; LICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
; LICM-NEXT: br label [[FOR_BODY3:%.*]]
; LICM: for.body3:
; LICM-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
; LICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
; LICM-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
; LICM-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
; LICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
; LICM-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
; LICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
; LICM-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
; LICM-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
; LICM-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
; LICM-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
; LICM-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
; LICM: for.end:
; LICM-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1
; LICM-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
; LICM-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
; LICM-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
; LICM: for.end11.loopexit:
; LICM-NEXT: br label [[FOR_END11]]
; LICM: for.end11:
; LICM-NEXT: ret void
;
gurad: entry:
%cmp23 = icmp sgt i64 %m, 0
%cmp32 = icmp sgt i64 %n, 0
br i1 %cmp23, label %for.cond1.preheader.lr.ph, label %for.end11
for.cond1.preheader.lr.ph: ; preds = %gurad
br i1 %cmp32, label %for.i.preheader, label %for.end11
for.i.preheader: ; preds = %for.cond1.preheader.lr.ph
br label %entry
entry: ; preds = %for.i.preheader
br label %for.body br label %for.body
for.body: for.body:
%k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ] %k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ]
%0 = load i32, ptr %z, align 4 %0 = load i32, i32* %z, align 4
br label %for.body3 br label %for.body3
for.body3: for.body3:
%i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] %i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
%idxprom = sext i32 %k.02 to i64 %idxprom = sext i32 %k.02 to i64
%arrayidx = getelementptr inbounds i32, ptr %y, i64 %idxprom %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
%1 = load i32, ptr %arrayidx, align 4 %1 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %1, %0 %add = add nsw i32 %1, %0
%idxprom4 = sext i32 %i.01 to i64 %idxprom4 = sext i32 %i.01 to i64
%index0 = mul i64 %idxprom4, %n %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 %idxprom4
%index1 = add i64 %index0, %idxprom %idxprom6 = sext i32 %k.02 to i64
%arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %index1 %arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %arrayidx5, i64 0, i64 %idxprom6
%2 = load i32, ptr %arrayidx7, align 4 %2 = load i32, i32* %arrayidx7, align 4
%add8 = add nsw i32 %2, %add %add8 = add nsw i32 %2, %add
store i32 %add8, ptr %arrayidx7, align 4 store i32 %add8, i32* %arrayidx7, align 4
%inc = add nsw i32 %i.01, 1 %inc = add nsw i32 %i.01, 1
%inc.ext = sext i32 %inc to i64 %cmp2 = icmp slt i32 %inc, 10
%cmp2 = icmp slt i64 %inc.ext, %m
br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0 br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0
for.end: for.end:
%inc10 = add nsw i32 %k.02, 1 %inc10 = add nsw i32 %k.02, 1
%inc10.ext = sext i32 %inc10 to i64 %cmp = icmp slt i32 %inc10, 10
%cmp = icmp slt i64 %inc10.ext, %n
br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2 br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2
for.end11: for.end11:

View File

@ -4,7 +4,7 @@
; RUN: FileCheck --input-file=%t %s ; RUN: FileCheck --input-file=%t %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer @A = common global [100 x [100 x i32]] zeroinitializer

View File

@ -8,7 +8,7 @@
; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s ; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer @A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x [100 x i32]]] zeroinitializer @B = common global [100 x [100 x [100 x i32]]] zeroinitializer

View File

@ -4,7 +4,7 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i64]] zeroinitializer @A = common global [100 x [100 x i64]] zeroinitializer

View File

@ -2,7 +2,6 @@
; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \ ; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s ; RUN: -S -debug 2>&1 | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@A = common global [100 x [100 x i64]] zeroinitializer @A = common global [100 x [100 x i64]] zeroinitializer
@N = dso_local local_unnamed_addr global i64 100, align 8 @N = dso_local local_unnamed_addr global i64 100, align 8

View File

@ -5,7 +5,6 @@
; Inner loop only reductions are not supported currently. See discussion at ; Inner loop only reductions are not supported currently. See discussion at
; D53027 for more information on the required checks. ; D53027 for more information on the required checks.
target triple = "powerpc64le-unknown-linux-gnu"
@A = common global [500 x [500 x i32]] zeroinitializer @A = common global [500 x [500 x i32]] zeroinitializer
@X = common global i32 0 @X = common global i32 0
@B = common global [500 x [500 x i32]] zeroinitializer @B = common global [500 x [500 x i32]] zeroinitializer

View File

@ -2,7 +2,6 @@
; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \ ; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s ; RUN: -S -debug 2>&1 | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@a = common global i32 0, align 4 @a = common global i32 0, align 4
@d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4 @d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4

View File

@ -3,7 +3,7 @@
; RUN: -S -debug 2>&1 | FileCheck %s ; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer @A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x i32] zeroinitializer @B = common global [100 x i32] zeroinitializer

View File

@ -1,7 +1,6 @@
; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \ ; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
; RUN: -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s ; RUN: -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16 @A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
;; Test to make sure we can handle zext instructions introduced by ;; Test to make sure we can handle zext instructions introduced by

View File

@ -4,7 +4,6 @@
; RUN: | FileCheck -check-prefix=STATS %s ; RUN: | FileCheck -check-prefix=STATS %s
; RUN: FileCheck -input-file %t %s ; RUN: FileCheck -input-file %t %s
target triple = "powerpc64le-unknown-linux-gnu"
; no_deps_interchange just accesses a single nested array and can be interchange. ; no_deps_interchange just accesses a single nested array and can be interchange.
; CHECK: Name: Interchanged ; CHECK: Name: Interchanged
@ -35,6 +34,35 @@ exit: ; preds = %for1.inc
} }
; Only the inner loop induction variable is used for memory accesses.
; Interchanging is not beneficial.
; CHECK: Name: InterchangeNotProfitable
; CHECK-NEXT: Function: no_bad_order
define i32 @no_bad_order(i32* %Arr) {
entry:
br label %for1.header
for1.header: ; preds = %entry, %for1.inc
%indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
br label %for2
for2: ; preds = %for1.header, %for2
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
%arrayidx6 = getelementptr inbounds i32, i32* %Arr, i64 %indvars.iv
store i32 0, i32* %arrayidx6, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for2, label %for1.inc
for1.inc:
%indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
%exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
br i1 %exitcond21, label %for1.header, label %exit
exit: ; preds = %for1.inc
ret i32 0
}
; No memory access using any induction variables, interchanging not beneficial. ; No memory access using any induction variables, interchanging not beneficial.
; CHECK: Name: InterchangeNotProfitable ; CHECK: Name: InterchangeNotProfitable
; CHECK-NEXT: Function: no_mem_instrs ; CHECK-NEXT: Function: no_mem_instrs

View File

@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s ; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@b = common dso_local local_unnamed_addr global [200 x [200 x i32]] zeroinitializer, align 4 @b = common dso_local local_unnamed_addr global [200 x [200 x i32]] zeroinitializer, align 4
@a = common dso_local local_unnamed_addr global i32 0, align 4 @a = common dso_local local_unnamed_addr global i32 0, align 4

View File

@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s ; RUN: opt < %s --basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@b = constant [200 x [100 x i32]] zeroinitializer, align 4 @b = constant [200 x [100 x i32]] zeroinitializer, align 4
@a = constant i32 0, align 4 @a = constant i32 0, align 4

View File

@ -3,7 +3,7 @@
; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i64]] zeroinitializer @A = common global [100 x [100 x i64]] zeroinitializer
@B = common global [100 x i64] zeroinitializer @B = common global [100 x i64] zeroinitializer

View File

@ -3,7 +3,7 @@
; RUN: -S -debug 2>&1 | FileCheck %s ; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@D = common global [100 x [100 x [100 x i32]]] zeroinitializer @D = common global [100 x [100 x [100 x i32]]] zeroinitializer
@ -24,31 +24,31 @@ entry:
br label %for.cond1.preheader br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc15, %entry for.cond1.preheader: ; preds = %for.inc15, %entry
%i.028 = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ] %i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
br label %for.cond4.preheader br label %for.cond4.preheader
for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader
%j.027 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ] %j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
br label %for.body6 br label %for.body6
for.body6: ; preds = %for.body6, %for.cond4.preheader for.body6: ; preds = %for.body6, %for.cond4.preheader
%k.026 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ] %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
%arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i64 0, i64 %k.026, i64 %j.027, i64 %i.028 %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %k.026, i32 %j.027, i32 %i.028
%0 = load i32, i32* %arrayidx8 %0 = load i32, i32* %arrayidx8
%add = add nsw i32 %0, %t %add = add nsw i32 %0, %t
store i32 %add, i32* %arrayidx8 store i32 %add, i32* %arrayidx8
%inc = add nuw nsw i64 %k.026, 1 %inc = add nuw nsw i32 %k.026, 1
%exitcond = icmp eq i64 %inc, 100 %exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.inc12, label %for.body6 br i1 %exitcond, label %for.inc12, label %for.body6
for.inc12: ; preds = %for.body6 for.inc12: ; preds = %for.body6
%inc13 = add nuw nsw i64 %j.027, 1 %inc13 = add nuw nsw i32 %j.027, 1
%exitcond29 = icmp eq i64 %inc13, 100 %exitcond29 = icmp eq i32 %inc13, 100
br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
for.inc15: ; preds = %for.inc12 for.inc15: ; preds = %for.inc12
%inc16 = add nuw nsw i64 %i.028, 1 %inc16 = add nuw nsw i32 %i.028, 1
%exitcond30 = icmp eq i64 %inc16, 100 %exitcond30 = icmp eq i32 %inc16, 100
br i1 %exitcond30, label %for.end17, label %for.cond1.preheader br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
for.end17: ; preds = %for.inc15 for.end17: ; preds = %for.inc15

View File

@ -3,7 +3,6 @@
; RUN: opt < %s -basic-aa -loop-interchange -da-disable-delinearization-checks -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck -check-prefix=CHECK-DELIN %s ; RUN: opt < %s -basic-aa -loop-interchange -da-disable-delinearization-checks -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck -check-prefix=CHECK-DELIN %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu"
; void foo(int n, int m) { ; void foo(int n, int m) {
; int temp[16][16]; ; int temp[16][16];

View File

@ -2,7 +2,7 @@
; RUN: FileCheck --input-file %t --check-prefix REMARK %s ; RUN: FileCheck --input-file %t --check-prefix REMARK %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer @A = common global [100 x [100 x i32]] zeroinitializer
@C = common global [100 x [100 x i32]] zeroinitializer @C = common global [100 x [100 x i32]] zeroinitializer

View File

@ -10,7 +10,6 @@
; RUN: -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks ; RUN: -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
; RUN: cat %t | FileCheck --check-prefix=DELIN %s ; RUN: cat %t | FileCheck --check-prefix=DELIN %s
target triple = "powerpc64le-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer @A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x i32]] zeroinitializer @B = common global [100 x [100 x i32]] zeroinitializer
@C = common global [100 x i32] zeroinitializer @C = common global [100 x i32] zeroinitializer
@ -72,7 +71,11 @@ for.end19:
; DELIN-NEXT: Name: InterchangeNotProfitable ; DELIN-NEXT: Name: InterchangeNotProfitable
; DELIN-NEXT: Function: test01 ; DELIN-NEXT: Function: test01
; DELIN-NEXT: Args: ; DELIN-NEXT: Args:
; DELIN-NEXT: - String: Interchanging loops is too costly and it does not improve parallelism. ; DELIN-NEXT: - String: 'Interchanging loops is too costly (cost='
; DELIN-NEXT: - Cost: '2'
; DELIN-NEXT: - String: ', threshold='
; DELIN-NEXT: - Threshold: '0'
; DELIN-NEXT: - String: ') and it does not improve parallelism.'
; DELIN-NEXT: ... ; DELIN-NEXT: ...
;;--------------------------------------Test case 02------------------------------------ ;;--------------------------------------Test case 02------------------------------------

View File

@ -3,7 +3,7 @@
; RUN: -S -debug 2>&1 | FileCheck %s ; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer @A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x i32] zeroinitializer @B = common global [100 x i32] zeroinitializer

View File

@ -3,7 +3,7 @@
; RUN: -S -debug 2>&1 | FileCheck %s ; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@D = common global [100 x [100 x [100 x i32]]] zeroinitializer @D = common global [100 x [100 x [100 x i32]]] zeroinitializer
@ -24,31 +24,31 @@ entry:
br label %for.cond1.preheader br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc15, %entry for.cond1.preheader: ; preds = %for.inc15, %entry
%i.028 = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ] %i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
br label %for.cond4.preheader br label %for.cond4.preheader
for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader
%j.027 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ] %j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
br label %for.body6 br label %for.body6
for.body6: ; preds = %for.body6, %for.cond4.preheader for.body6: ; preds = %for.body6, %for.cond4.preheader
%k.026 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ] %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
%arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i64 %i.028, i64 %k.026, i64 %j.027 %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
%0 = load i32, i32* %arrayidx8 %0 = load i32, i32* %arrayidx8
%add = add nsw i32 %0, %t %add = add nsw i32 %0, %t
store i32 %add, i32* %arrayidx8 store i32 %add, i32* %arrayidx8
%inc = add nuw nsw i64 %k.026, 1 %inc = add nuw nsw i32 %k.026, 1
%exitcond = icmp eq i64 %inc, 100 %exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.inc12, label %for.body6 br i1 %exitcond, label %for.inc12, label %for.body6
for.inc12: ; preds = %for.body6 for.inc12: ; preds = %for.body6
%inc13 = add nuw nsw i64 %j.027, 1 %inc13 = add nuw nsw i32 %j.027, 1
%exitcond29 = icmp eq i64 %inc13, 100 %exitcond29 = icmp eq i32 %inc13, 100
br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
for.inc15: ; preds = %for.inc12 for.inc15: ; preds = %for.inc12
%inc16 = add nuw nsw i64 %i.028, 1 %inc16 = add nuw nsw i32 %i.028, 1
%exitcond30 = icmp eq i64 %inc16, 100 %exitcond30 = icmp eq i32 %inc16, 100
br i1 %exitcond30, label %for.end17, label %for.cond1.preheader br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
for.end17: ; preds = %for.inc15 for.end17: ; preds = %for.inc15

View File

@ -3,7 +3,7 @@
; RUN: -S -debug 2>&1 | FileCheck %s ; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer @A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x i32] zeroinitializer @B = common global [100 x i32] zeroinitializer
@ -108,13 +108,13 @@ for.end12:
;; The outer loop header does not branch to the inner loop preheader, or the ;; The outer loop header does not branch to the inner loop preheader, or the
;; inner loop header, or the outer loop latch. ;; inner loop header, or the outer loop latch.
; CHECK: Not interchanging loops. Cannot prove legality. ; CHECK: Not interchanging loops. Cannot prove legality.
define void @interchange_07(i32 %k, i32 %N, i64 %ny) { define void @interchange_07(i32 %k, i32 %N, i32 %ny) {
entry: entry:
br label %for1.header br label %for1.header
for1.header: for1.header:
%j23 = phi i64 [ 0, %entry ], [ %j.next24, %for1.inc10 ] %j23 = phi i32 [ 0, %entry ], [ %j.next24, %for1.inc10 ]
%cmp21 = icmp slt i64 0, %ny %cmp21 = icmp slt i32 0, %ny
br label %singleSucc br label %singleSucc
singleSucc: singleSucc:
@ -124,18 +124,18 @@ preheader.j:
br label %for2 br label %for2
for2: for2:
%j = phi i64 [ %j.next, %for2 ], [ 0, %preheader.j ] %j = phi i32 [ %j.next, %for2 ], [ 0, %preheader.j ]
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %j, i64 %j23 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i32 0, i32 %j, i32 %j23
%lv = load i32, i32* %arrayidx5 %lv = load i32, i32* %arrayidx5
%add = add nsw i32 %lv, %k %add = add nsw i32 %lv, %k
store i32 %add, i32* %arrayidx5 store i32 %add, i32* %arrayidx5
%j.next = add nuw nsw i64 %j, 1 %j.next = add nuw nsw i32 %j, 1
%exitcond = icmp eq i64 %j, 99 %exitcond = icmp eq i32 %j, 99
br i1 %exitcond, label %for1.inc10, label %for2 br i1 %exitcond, label %for1.inc10, label %for2
for1.inc10: for1.inc10:
%j.next24 = add nuw nsw i64 %j23, 1 %j.next24 = add nuw nsw i32 %j23, 1
%exitcond26 = icmp eq i64 %j23, 99 %exitcond26 = icmp eq i32 %j23, 99
br i1 %exitcond26, label %for.end12, label %for1.header br i1 %exitcond26, label %for.end12, label %for1.header
for.end12: for.end12:

View File

@ -1,6 +1,5 @@
; RUN: opt -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s ; RUN: opt -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@b = global [3 x [5 x [8 x i16]]] [[5 x [8 x i16]] zeroinitializer, [5 x [8 x i16]] [[8 x i16] zeroinitializer, [8 x i16] [i16 0, i16 0, i16 0, i16 6, i16 1, i16 6, i16 0, i16 0], [8 x i16] zeroinitializer, [8 x i16] zeroinitializer, [8 x i16] zeroinitializer], [5 x [8 x i16]] zeroinitializer], align 2 @b = global [3 x [5 x [8 x i16]]] [[5 x [8 x i16]] zeroinitializer, [5 x [8 x i16]] [[8 x i16] zeroinitializer, [8 x i16] [i16 0, i16 0, i16 0, i16 6, i16 1, i16 6, i16 0, i16 0], [8 x i16] zeroinitializer, [8 x i16] zeroinitializer, [8 x i16] zeroinitializer], [5 x [8 x i16]] zeroinitializer], align 2
@a = common global i32 0, align 4 @a = common global i32 0, align 4
@d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4 @d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4

View File

@ -4,7 +4,6 @@
; Outer loop only reductions are not supported currently. ; Outer loop only reductions are not supported currently.
target triple = "powerpc64le-unknown-linux-gnu"
@A = common global [500 x [500 x i32]] zeroinitializer @A = common global [500 x [500 x i32]] zeroinitializer
;; global X ;; global X

View File

@ -3,7 +3,6 @@
; Test case for PR41725. The induction variables in the latches escape the ; Test case for PR41725. The induction variables in the latches escape the
; loops and we must move some PHIs around. ; loops and we must move some PHIs around.
target triple = "powerpc64le-unknown-linux-gnu"
@a = common dso_local global i64 0, align 4 @a = common dso_local global i64 0, align 4
@b = common dso_local global i64 0, align 4 @b = common dso_local global i64 0, align 4
@c = common dso_local global [10 x [10 x i32 ]] zeroinitializer, align 16 @c = common dso_local global [10 x [10 x i32 ]] zeroinitializer, align 16
@ -158,7 +157,7 @@ exit: ; preds = %outer.latch
; Make sure we do not crash for loops without reachable exits. ; Make sure we do not crash for loops without reachable exits.
define void @no_reachable_exits() { define void @no_reachable_exits() {
; Check we interchanged. ; Check we interchanged.
; CHECK-LABEL: @no_reachable_exits() ; CHECK-LABEL: @no_reachable_exits() {
; CHECK-NEXT: bb: ; CHECK-NEXT: bb:
; CHECK-NEXT: br label %inner.ph ; CHECK-NEXT: br label %inner.ph
; CHECK-LABEL: outer.ph: ; CHECK-LABEL: outer.ph:

View File

@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-interchange -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s ; RUN: opt -loop-interchange -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@b = external dso_local global [5 x i32], align 16 @b = external dso_local global [5 x i32], align 16
define void @test1() { define void @test1() {

View File

@ -14,8 +14,6 @@
; } ; }
; } ; }
target triple = "powerpc64le-unknown-linux-gnu"
; REMARKS: --- !Passed ; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: Interchanged ; REMARKS-NEXT: Name: Interchanged

View File

@ -2,7 +2,6 @@
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 ; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
target triple = "powerpc64le-unknown-linux-gnu"
@a = global i32 0 @a = global i32 0
@b = global i8 0 @b = global i8 0
@c = global i32 0 @c = global i32 0

View File

@ -6,8 +6,6 @@
; In the 2 test cases below, we have a LCSSA PHI in the inner loop exit, which ; In the 2 test cases below, we have a LCSSA PHI in the inner loop exit, which
; is used in the outer loop latch. This is not supported. ; is used in the outer loop latch. This is not supported.
target triple = "powerpc64le-unknown-linux-gnu"
define void @test1() { define void @test1() {
; CHECK-LABEL: @test1( ; CHECK-LABEL: @test1(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:

View File

@ -3,7 +3,6 @@
; Tests for PR43797. ; Tests for PR43797.
target triple = "powerpc64le-unknown-linux-gnu"
@wdtdr = external dso_local global [5 x [5 x double]], align 16 @wdtdr = external dso_local global [5 x [5 x double]], align 16
define void @test1() { define void @test1() {

View File

@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-interchange -S %s | FileCheck %s ; RUN: opt -loop-interchange -S %s | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@global = external local_unnamed_addr global [400 x [400 x i32]], align 16 @global = external local_unnamed_addr global [400 x [400 x i32]], align 16
; We need to move %tmp4 from the inner loop pre header to the outer loop header ; We need to move %tmp4 from the inner loop pre header to the outer loop header

View File

@ -2,8 +2,6 @@
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 ; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
target triple = "powerpc64le-unknown-linux-gnu"
; REMARKS: --- !Passed ; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: Interchanged ; REMARKS-NEXT: Name: Interchanged

View File

@ -5,7 +5,7 @@
;; We test profitability model in these test cases. ;; We test profitability model in these test cases.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer @A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x i32]] zeroinitializer @B = common global [100 x [100 x i32]] zeroinitializer

View File

@ -4,7 +4,7 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "powerpc64le-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
; REMARKS: --- !Passed ; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Pass: loop-interchange

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-interchange -S %s | FileCheck %s ; RUN: opt -loop-interchange -S %s | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
@global = external dso_local global [1000 x [1000 x i32]], align 16 @global = external dso_local global [1000 x [1000 x i32]], align 16
; Test that we support updating conditional branches where both targets are the same ; Test that we support updating conditional branches where both targets are the same

View File

@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-interchange -loop-interchange-threshold=-10 -S %s | FileCheck %s ; RUN: opt -loop-interchange -loop-interchange-threshold=-10 -S %s | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
; The test contains a GEP with an operand that is not SCEV-able. Make sure ; The test contains a GEP with an operand that is not SCEV-able. Make sure
; loop-interchange does not crash. ; loop-interchange does not crash.
define void @test([256 x float]* noalias %src, float* %dst) { define void @test([256 x float]* noalias %src, float* %dst) {