[LoopDist] Port to new PM

Summary:
The direct motivation for the port is to ensure that the OptRemarkEmitter
tests work with the new PM.

This remains a function pass because we not only create multiple loops
but could also version the original loop.

In the test I need to invoke opt
with -passes='require<aa>,loop-distribute'.  LoopDistribute does not
directly depend on AA however LAA does.  LAA uses getCachedResult so
I *think* we need manually pull in 'aa'.

Reviewers: davidxl, silvas

Subscribers: sanjoy, llvm-commits, mzolotukhin

Differential Revision: https://reviews.llvm.org/D22437

llvm-svn: 275811
This commit is contained in:
Adam Nemet 2016-07-18 16:29:27 +00:00
parent 79ac42a5c9
commit b2593f78ca
8 changed files with 119 additions and 35 deletions

View File

@ -177,7 +177,7 @@ void initializeLocalStackSlotPassPass(PassRegistry&);
void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); void initializeLoopAccessLegacyAnalysisPass(PassRegistry&);
void initializeLoopDataPrefetchPass(PassRegistry&); void initializeLoopDataPrefetchPass(PassRegistry&);
void initializeLoopDeletionLegacyPassPass(PassRegistry&); void initializeLoopDeletionLegacyPassPass(PassRegistry&);
void initializeLoopDistributePass(PassRegistry&); void initializeLoopDistributeLegacyPass(PassRegistry&);
void initializeLoopExtractorPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&);
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&); void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
void initializeLoopInfoWrapperPassPass(PassRegistry&); void initializeLoopInfoWrapperPassPass(PassRegistry&);

View File

@ -0,0 +1,30 @@
//===- LoopDistribute.cpp - Loop Distribution Pass --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Loop Distribution Pass. Its main focus is to
// distribute loops that cannot be vectorized due to dependence cycles. It
// tries to isolate the offending dependences into a new loop allowing
// vectorization of the remaining parts.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H
#define LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H
#include "llvm/IR/PassManager.h"
namespace llvm {
class LoopDistributePass : public PassInfoMixin<LoopDistributePass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H

View File

@ -92,6 +92,7 @@
#include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopRotation.h" #include "llvm/Transforms/Scalar/LoopRotation.h"

View File

@ -151,6 +151,7 @@ FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass())
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
FUNCTION_PASS("lcssa", LCSSAPass()) FUNCTION_PASS("lcssa", LCSSAPass())
FUNCTION_PASS("loop-distribute", LoopDistributePass())
FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))

View File

@ -22,6 +22,7 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
@ -29,6 +30,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h" #include "llvm/IR/Dominators.h"
@ -597,7 +599,7 @@ public:
} }
/// \brief Try to distribute an inner-most loop. /// \brief Try to distribute an inner-most loop.
bool processLoop(LoopAccessLegacyAnalysis *LAA) { bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
assert(L->empty() && "Only process inner loops."); assert(L->empty() && "Only process inner loops.");
DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName() DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
@ -610,7 +612,7 @@ public:
return fail("multiple exit blocks"); return fail("multiple exit blocks");
// LAA will check that we only have a single exiting block. // LAA will check that we only have a single exiting block.
LAI = &LAA->getInfo(L); LAI = &GetLAA(*L);
// Currently, we only distribute to isolate the part of the loop with // Currently, we only distribute to isolate the part of the loop with
// dependence cycles to enable partial vectorization. // dependence cycles to enable partial vectorization.
@ -860,19 +862,50 @@ private:
Optional<bool> IsForced; Optional<bool> IsForced;
}; };
/// Shared implementation between new and old PMs.
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
std::function<const LoopAccessInfo &(Loop &)> &GetLAA,
bool ProcessAllLoops) {
// Build up a worklist of inner-loops to vectorize. This is necessary as the
// act of distributing a loop creates new loops and can invalidate iterators
// across the loops.
SmallVector<Loop *, 8> Worklist;
for (Loop *TopLevelLoop : *LI)
for (Loop *L : depth_first(TopLevelLoop))
// We only handle inner-most loops.
if (L->empty())
Worklist.push_back(L);
// Now walk the identified inner loops.
bool Changed = false;
for (Loop *L : Worklist) {
LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
// If distribution was forced for the specific loop to be
// enabled/disabled, follow that. Otherwise use the global flag.
if (LDL.isForced().getValueOr(ProcessAllLoops))
Changed |= LDL.processLoop(GetLAA);
}
// Process each loop nest in the function.
return Changed;
}
/// \brief The pass class. /// \brief The pass class.
class LoopDistribute : public FunctionPass { class LoopDistributeLegacy : public FunctionPass {
public: public:
/// \p ProcessAllLoopsByDefault specifies whether loop distribution should be /// \p ProcessAllLoopsByDefault specifies whether loop distribution should be
/// performed by default. Pass -enable-loop-distribute={0,1} overrides this /// performed by default. Pass -enable-loop-distribute={0,1} overrides this
/// default. We use this to keep LoopDistribution off by default when invoked /// default. We use this to keep LoopDistribution off by default when invoked
/// from the optimization pipeline but on when invoked explicitly from opt. /// from the optimization pipeline but on when invoked explicitly from opt.
LoopDistribute(bool ProcessAllLoopsByDefault = true) LoopDistributeLegacy(bool ProcessAllLoopsByDefault = true)
: FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) { : FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) {
// The default is set by the caller. // The default is set by the caller.
if (EnableLoopDistribute.getNumOccurrences() > 0) if (EnableLoopDistribute.getNumOccurrences() > 0)
ProcessAllLoops = EnableLoopDistribute; ProcessAllLoops = EnableLoopDistribute;
initializeLoopDistributePass(*PassRegistry::getPassRegistry()); initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
} }
bool runOnFunction(Function &F) override { bool runOnFunction(Function &F) override {
@ -884,31 +917,10 @@ public:
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
// Build up a worklist of inner-loops to vectorize. This is necessary as the return runImpl(F, LI, DT, SE, ORE, GetLAA, ProcessAllLoops);
// act of distributing a loop creates new loops and can invalidate iterators
// across the loops.
SmallVector<Loop *, 8> Worklist;
for (Loop *TopLevelLoop : *LI)
for (Loop *L : depth_first(TopLevelLoop))
// We only handle inner-most loops.
if (L->empty())
Worklist.push_back(L);
// Now walk the identified inner loops.
bool Changed = false;
for (Loop *L : Worklist) {
LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
// If distribution was forced for the specific loop to be
// enabled/disabled, follow that. Otherwise use the global flag.
if (LDL.isForced().getValueOr(ProcessAllLoops))
Changed |= LDL.processLoop(LAA);
}
// Process each loop nest in the function.
return Changed;
} }
void getAnalysisUsage(AnalysisUsage &AU) const override { void getAnalysisUsage(AnalysisUsage &AU) const override {
@ -930,19 +942,49 @@ private:
}; };
} // anonymous namespace } // anonymous namespace
char LoopDistribute::ID; PreservedAnalyses LoopDistributePass::run(Function &F,
FunctionAnalysisManager &AM) {
// FIXME: This does not currently match the behavior from the old PM.
// ProcessAllLoops with the old PM defaults to true when invoked from opt and
// false when invoked from the optimization pipeline.
bool ProcessAllLoops = false;
if (EnableLoopDistribute.getNumOccurrences() > 0)
ProcessAllLoops = EnableLoopDistribute;
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
return LAM.getResult<LoopAccessAnalysis>(L);
};
bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA, ProcessAllLoops);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<LoopAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
return PA;
}
char LoopDistributeLegacy::ID;
static const char ldist_name[] = "Loop Distribition"; static const char ldist_name[] = "Loop Distribition";
INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false) INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false) INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
namespace llvm { namespace llvm {
FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) { FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) {
return new LoopDistribute(ProcessAllLoopsByDefault); return new LoopDistributeLegacy(ProcessAllLoopsByDefault);
} }
} }

View File

@ -86,7 +86,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry); initializePlaceSafepointsPass(Registry);
initializeFloat2IntLegacyPassPass(Registry); initializeFloat2IntLegacyPassPass(Registry);
initializeLoopDistributePass(Registry); initializeLoopDistributeLegacyPass(Registry);
initializeLoopLoadEliminationPass(Registry); initializeLoopLoadEliminationPass(Registry);
initializeLoopSimplifyCFGLegacyPassPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry);
initializeLoopVersioningPassPass(Registry); initializeLoopVersioningPassPass(Registry);

View File

@ -5,6 +5,11 @@
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \ ; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS ; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
; RUN: -debug-only=block-freq -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
; REQUIRES: asserts ; REQUIRES: asserts
; HOTNESS: block-frequency: forced ; HOTNESS: block-frequency: forced

View File

@ -3,6 +3,11 @@
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \ ; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS ; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
; RUN: -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
; REQUIRES: asserts ; REQUIRES: asserts
; This is the input program: ; This is the input program: