[LoopUnrollAndJam] Change LoopUnrollAndJamPass to LoopNest pass

This patch changes LoopUnrollAndJamPass from FunctionPass to LoopNest pass.
The next patch will utilize LoopNest to effectively handle loop nests.

Reviewed By: Whitney

Differential Revision: https://reviews.llvm.org/D99149
This commit is contained in:
maekawatoshiki 2021-05-21 23:37:05 +09:00
parent 7521fcd269
commit cea7a3fe3d
5 changed files with 56 additions and 44 deletions

View File

@ -10,6 +10,7 @@
#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLANDJAMPASS_H #define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLANDJAMPASS_H
#include "llvm/IR/PassManager.h" #include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm { namespace llvm {
class Function; class Function;
@ -20,7 +21,8 @@ class LoopUnrollAndJamPass : public PassInfoMixin<LoopUnrollAndJamPass> {
public: public:
explicit LoopUnrollAndJamPass(int OptLevel = 2) : OptLevel(OptLevel) {} explicit LoopUnrollAndJamPass(int OptLevel = 2) : OptLevel(OptLevel) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
}; };
} // end namespace llvm } // end namespace llvm

View File

@ -1217,7 +1217,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
// across the loop nests. // across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling) if (EnableUnrollAndJam && PTO.LoopUnrolling)
FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); FPM.addPass(createFunctionToLoopPassAdaptor(
LoopUnrollAndJamPass(Level.getSpeedupLevel())));
FPM.addPass(LoopUnrollPass(LoopUnrollOptions( FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll))); PTO.ForgetAllSCEVInLoopUnroll)));
@ -1300,7 +1301,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
// across the loop nests. // across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling) { if (EnableUnrollAndJam && PTO.LoopUnrolling) {
FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); FPM.addPass(createFunctionToLoopPassAdaptor(
LoopUnrollAndJamPass(Level.getSpeedupLevel())));
} }
FPM.addPass(LoopUnrollPass(LoopUnrollOptions( FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,

View File

@ -248,7 +248,6 @@ FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
FUNCTION_PASS("loop-sink", LoopSinkPass()) FUNCTION_PASS("loop-sink", LoopSinkPass())
FUNCTION_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
FUNCTION_PASS("loop-flatten", LoopFlattenPass()) FUNCTION_PASS("loop-flatten", LoopFlattenPass())
FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("lowerinvoke", LowerInvokePass())
FUNCTION_PASS("lowerswitch", LowerSwitchPass()) FUNCTION_PASS("lowerswitch", LowerSwitchPass())
@ -400,6 +399,7 @@ LOOP_PASS("loop-deletion", LoopDeletionPass())
LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("indvars", IndVarSimplifyPass())
LOOP_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs())) LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))

View File

@ -22,6 +22,7 @@
#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfo.h"
@ -424,35 +425,27 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
return UnrollResult; return UnrollResult;
} }
static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI, static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI,
ScalarEvolution &SE, ScalarEvolution &SE,
const TargetTransformInfo &TTI, const TargetTransformInfo &TTI,
AssumptionCache &AC, DependenceInfo &DI, AssumptionCache &AC, DependenceInfo &DI,
OptimizationRemarkEmitter &ORE, OptimizationRemarkEmitter &ORE, int OptLevel,
int OptLevel) { bool &CurrentLoopDeleted) {
bool DidSomething = false; bool DidSomething = false;
ArrayRef<Loop *> Loops = LN.getLoops();
// The loop unroll and jam pass requires loops to be in simplified form, and // Add the loop nests in the reverse order of LN. See method
// also needs LCSSA. Since simplification may add new inner loops, it has to
// run before the legality and profitability checks. This means running the
// loop unroll and jam pass will simplify all loops, regardless of whether
// anything end up being unroll and jammed.
for (auto &L : LI) {
DidSomething |=
simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE);
}
// Add the loop nests in the reverse order of LoopInfo. See method
// declaration. // declaration.
SmallPriorityWorklist<Loop *, 4> Worklist; SmallPriorityWorklist<Loop *, 4> Worklist;
appendLoopsToWorklist(LI, Worklist); appendLoopsToWorklist(Loops, Worklist);
while (!Worklist.empty()) { while (!Worklist.empty()) {
Loop *L = Worklist.pop_back_val(); Loop *L = Worklist.pop_back_val();
LoopUnrollResult Result = LoopUnrollResult Result =
tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel); tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
if (Result != LoopUnrollResult::Unmodified) if (Result != LoopUnrollResult::Unmodified)
DidSomething = true; DidSomething = true;
if (L->isOutermost() && Result == LoopUnrollResult::FullyUnrolled)
CurrentLoopDeleted = true;
} }
return DidSomething; return DidSomething;
@ -460,29 +453,35 @@ static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI,
namespace { namespace {
class LoopUnrollAndJam : public FunctionPass { class LoopUnrollAndJam : public LoopPass {
public: public:
static char ID; // Pass ID, replacement for typeid static char ID; // Pass ID, replacement for typeid
unsigned OptLevel; unsigned OptLevel;
LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) { LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry()); initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry());
} }
bool runOnFunction(Function &F) override { bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipFunction(F)) if (skipLoop(L))
return false; return false;
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *F = L->getHeader()->getParent();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI(); auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel); LoopUnrollResult Result =
tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
if (Result == LoopUnrollResult::FullyUnrolled)
LPM.markLoopAsDeleted(*L);
return Result != LoopUnrollResult::Unmodified;
} }
/// This transformation requires natural loop information & requires that /// This transformation requires natural loop information & requires that
@ -505,7 +504,10 @@ char LoopUnrollAndJam::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false) "Unroll and Jam loops", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
@ -518,19 +520,25 @@ Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) {
return new LoopUnrollAndJam(OptLevel); return new LoopUnrollAndJam(OptLevel);
} }
PreservedAnalyses LoopUnrollAndJamPass::run(Function &F, PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN,
FunctionAnalysisManager &AM) { LoopAnalysisManager &AM,
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F); LoopStandardAnalysisResults &AR,
LoopInfo &LI = AM.getResult<LoopAnalysis>(F); LPMUpdater &U) {
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); Function &F = *LN.getParent();
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); std::string LoopName = std::string(LN.getOutermostLoop().getName());
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
DependenceInfo &DI = AM.getResult<DependenceAnalysis>(F);
OptimizationRemarkEmitter &ORE =
AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel)) DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
OptimizationRemarkEmitter ORE(&F);
bool CurrentLoopDeleted = false;
if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,
OptLevel, CurrentLoopDeleted))
return PreservedAnalyses::all(); return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses(); if (CurrentLoopDeleted)
U.markLoopAsDeleted(LN.getOutermostLoop(), LoopName);
auto PA = getLoopPassPreservedAnalyses();
PA.preserve<LoopNestAnalysis>();
return PA;
} }

View File

@ -1,5 +1,5 @@
; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -verify-loop-info < %s -S | FileCheck %s ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -verify-loop-info < %s -S | FileCheck %s
; RUN: opt -passes='loop-unroll-and-jam,verify<loops>' -allow-unroll-and-jam < %s -S | FileCheck %s ; RUN: opt -passes='loop(loop-unroll-and-jam),verify<loops>' -allow-unroll-and-jam < %s -S | FileCheck %s
; Check that the newly created loops to not fail to be added to LI ; Check that the newly created loops to not fail to be added to LI
; This test deliberately disables UnJ on the middle loop, performing it instead on the ; This test deliberately disables UnJ on the middle loop, performing it instead on the