diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h index bd83a6a0cca4..6125fc7636a0 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h @@ -10,6 +10,7 @@ #define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLANDJAMPASS_H #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { class Function; @@ -20,7 +21,8 @@ class LoopUnrollAndJamPass : public PassInfoMixin { public: explicit LoopUnrollAndJamPass(int OptLevel = 2) : OptLevel(OptLevel) {} - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); }; } // end namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 4f18dac2d1e0..d72896fa0a7f 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1217,7 +1217,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) - FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); + FPM.addPass(createFunctionToLoopPassAdaptor( + LoopUnrollAndJamPass(Level.getSpeedupLevel()))); FPM.addPass(LoopUnrollPass(LoopUnrollOptions( Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll))); @@ -1300,7 +1301,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) { - FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); + FPM.addPass(createFunctionToLoopPassAdaptor( + LoopUnrollAndJamPass(Level.getSpeedupLevel()))); } FPM.addPass(LoopUnrollPass(LoopUnrollOptions( Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 66bbe6eea4ad..24848c29ab09 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -248,7 +248,6 @@ FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-sink", LoopSinkPass()) -FUNCTION_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) FUNCTION_PASS("loop-flatten", LoopFlattenPass()) FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("lowerswitch", LowerSwitchPass()) @@ -400,6 +399,7 @@ LOOP_PASS("loop-deletion", LoopDeletionPass()) LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) +LOOP_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 495906e1a763..74b7b820aedf 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -424,35 +425,27 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, return UnrollResult; } -static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI, +static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, - OptimizationRemarkEmitter &ORE, - int OptLevel) { + OptimizationRemarkEmitter &ORE, int OptLevel, + bool &CurrentLoopDeleted) { bool DidSomething = false; + ArrayRef Loops = LN.getLoops(); - // The loop unroll and jam pass requires loops to be in simplified form, and - // also needs LCSSA. Since simplification may add new inner loops, it has to - // run before the legality and profitability checks. This means running the - // loop unroll and jam pass will simplify all loops, regardless of whether - // anything end up being unroll and jammed. - for (auto &L : LI) { - DidSomething |= - simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */); - DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE); - } - - // Add the loop nests in the reverse order of LoopInfo. See method + // Add the loop nests in the reverse order of LN. See method // declaration. SmallPriorityWorklist Worklist; - appendLoopsToWorklist(LI, Worklist); + appendLoopsToWorklist(Loops, Worklist); while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); LoopUnrollResult Result = tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel); if (Result != LoopUnrollResult::Unmodified) DidSomething = true; + if (L->isOutermost() && Result == LoopUnrollResult::FullyUnrolled) + CurrentLoopDeleted = true; } return DidSomething; @@ -460,29 +453,35 @@ static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI, namespace { -class LoopUnrollAndJam : public FunctionPass { +class LoopUnrollAndJam : public LoopPass { public: static char ID; // Pass ID, replacement for typeid unsigned OptLevel; - LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) { + LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) { initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry()); } - bool runOnFunction(Function &F) override { - if (skipFunction(F)) + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L)) return false; - auto &DT = getAnalysis().getDomTree(); - LoopInfo &LI = getAnalysis().getLoopInfo(); - ScalarEvolution &SE = getAnalysis().getSE(); - const TargetTransformInfo &TTI = - getAnalysis().getTTI(F); - auto &AC = getAnalysis().getAssumptionCache(F); + auto *F = L->getHeader()->getParent(); + auto &SE = getAnalysis().getSE(); + auto *LI = &getAnalysis().getLoopInfo(); auto &DI = getAnalysis().getDI(); + auto &DT = getAnalysis().getDomTree(); + auto &TTI = getAnalysis().getTTI(*F); auto &ORE = getAnalysis().getORE(); + auto &AC = getAnalysis().getAssumptionCache(*F); - return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel); + LoopUnrollResult Result = + tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel); + + if (Result == LoopUnrollResult::FullyUnrolled) + LPM.markLoopAsDeleted(*L); + + return Result != LoopUnrollResult::Unmodified; } /// This transformation requires natural loop information & requires that @@ -505,7 +504,10 @@ char LoopUnrollAndJam::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) @@ -518,19 +520,25 @@ Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) { return new LoopUnrollAndJam(OptLevel); } -PreservedAnalyses LoopUnrollAndJamPass::run(Function &F, - FunctionAnalysisManager &AM) { - ScalarEvolution &SE = AM.getResult(F); - LoopInfo &LI = AM.getResult(F); - TargetTransformInfo &TTI = AM.getResult(F); - AssumptionCache &AC = AM.getResult(F); - DominatorTree &DT = AM.getResult(F); - DependenceInfo &DI = AM.getResult(F); - OptimizationRemarkEmitter &ORE = - AM.getResult(F); +PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN, + LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function &F = *LN.getParent(); + std::string LoopName = std::string(LN.getOutermostLoop().getName()); - if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel)) + DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); + OptimizationRemarkEmitter ORE(&F); + bool CurrentLoopDeleted = false; + + if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE, + OptLevel, CurrentLoopDeleted)) return PreservedAnalyses::all(); - return getLoopPassPreservedAnalyses(); + if (CurrentLoopDeleted) + U.markLoopAsDeleted(LN.getOutermostLoop(), LoopName); + + auto PA = getLoopPassPreservedAnalyses(); + PA.preserve(); + return PA; } diff --git a/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll b/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll index 79c32c90174e..c3a4ebd6dede 100644 --- a/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll +++ b/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll @@ -1,5 +1,5 @@ ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -verify-loop-info < %s -S | FileCheck %s -; RUN: opt -passes='loop-unroll-and-jam,verify' -allow-unroll-and-jam < %s -S | FileCheck %s +; RUN: opt -passes='loop(loop-unroll-and-jam),verify' -allow-unroll-and-jam < %s -S | FileCheck %s ; Check that the newly created loops to not fail to be added to LI ; This test deliberately disables UnJ on the middle loop, performing it instead on the