forked from OSchip/llvm-project
[LoopDist] Port to new PM
Summary: The direct motivation for the port is to ensure that the OptRemarkEmitter tests work with the new PM. This remains a function pass because we not only create multiple loops but could also version the original loop. In the test I need to invoke opt with -passes='require<aa>,loop-distribute'. LoopDistribute does not directly depend on AA however LAA does. LAA uses getCachedResult so I *think* we need manually pull in 'aa'. Reviewers: davidxl, silvas Subscribers: sanjoy, llvm-commits, mzolotukhin Differential Revision: https://reviews.llvm.org/D22437 llvm-svn: 275811
This commit is contained in:
parent
79ac42a5c9
commit
b2593f78ca
|
@ -177,7 +177,7 @@ void initializeLocalStackSlotPassPass(PassRegistry&);
|
||||||
void initializeLoopAccessLegacyAnalysisPass(PassRegistry&);
|
void initializeLoopAccessLegacyAnalysisPass(PassRegistry&);
|
||||||
void initializeLoopDataPrefetchPass(PassRegistry&);
|
void initializeLoopDataPrefetchPass(PassRegistry&);
|
||||||
void initializeLoopDeletionLegacyPassPass(PassRegistry&);
|
void initializeLoopDeletionLegacyPassPass(PassRegistry&);
|
||||||
void initializeLoopDistributePass(PassRegistry&);
|
void initializeLoopDistributeLegacyPass(PassRegistry&);
|
||||||
void initializeLoopExtractorPass(PassRegistry&);
|
void initializeLoopExtractorPass(PassRegistry&);
|
||||||
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
|
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
|
||||||
void initializeLoopInfoWrapperPassPass(PassRegistry&);
|
void initializeLoopInfoWrapperPassPass(PassRegistry&);
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
//===- LoopDistribute.cpp - Loop Distribution Pass --------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file implements the Loop Distribution Pass. Its main focus is to
|
||||||
|
// distribute loops that cannot be vectorized due to dependence cycles. It
|
||||||
|
// tries to isolate the offending dependences into a new loop allowing
|
||||||
|
// vectorization of the remaining parts.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H
|
||||||
|
#define LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H
|
||||||
|
|
||||||
|
#include "llvm/IR/PassManager.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class LoopDistributePass : public PassInfoMixin<LoopDistributePass> {
|
||||||
|
public:
|
||||||
|
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||||
|
};
|
||||||
|
} // end namespace llvm
|
||||||
|
|
||||||
|
#endif // LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H
|
|
@ -92,6 +92,7 @@
|
||||||
#include "llvm/Transforms/Scalar/JumpThreading.h"
|
#include "llvm/Transforms/Scalar/JumpThreading.h"
|
||||||
#include "llvm/Transforms/Scalar/LICM.h"
|
#include "llvm/Transforms/Scalar/LICM.h"
|
||||||
#include "llvm/Transforms/Scalar/LoopDeletion.h"
|
#include "llvm/Transforms/Scalar/LoopDeletion.h"
|
||||||
|
#include "llvm/Transforms/Scalar/LoopDistribute.h"
|
||||||
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
|
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
|
||||||
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
|
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
|
||||||
#include "llvm/Transforms/Scalar/LoopRotation.h"
|
#include "llvm/Transforms/Scalar/LoopRotation.h"
|
||||||
|
|
|
@ -151,6 +151,7 @@ FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
|
||||||
FUNCTION_PASS("jump-threading", JumpThreadingPass())
|
FUNCTION_PASS("jump-threading", JumpThreadingPass())
|
||||||
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
|
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
|
||||||
FUNCTION_PASS("lcssa", LCSSAPass())
|
FUNCTION_PASS("lcssa", LCSSAPass())
|
||||||
|
FUNCTION_PASS("loop-distribute", LoopDistributePass())
|
||||||
FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
|
FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
|
||||||
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
|
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
|
||||||
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
|
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "llvm/Transforms/Scalar/LoopDistribute.h"
|
||||||
#include "llvm/ADT/DepthFirstIterator.h"
|
#include "llvm/ADT/DepthFirstIterator.h"
|
||||||
#include "llvm/ADT/EquivalenceClasses.h"
|
#include "llvm/ADT/EquivalenceClasses.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
@ -29,6 +30,7 @@
|
||||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||||
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||||
#include "llvm/Analysis/LoopInfo.h"
|
#include "llvm/Analysis/LoopInfo.h"
|
||||||
|
#include "llvm/Analysis/LoopPassManager.h"
|
||||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||||
#include "llvm/IR/DiagnosticInfo.h"
|
#include "llvm/IR/DiagnosticInfo.h"
|
||||||
#include "llvm/IR/Dominators.h"
|
#include "llvm/IR/Dominators.h"
|
||||||
|
@ -597,7 +599,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Try to distribute an inner-most loop.
|
/// \brief Try to distribute an inner-most loop.
|
||||||
bool processLoop(LoopAccessLegacyAnalysis *LAA) {
|
bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
|
||||||
assert(L->empty() && "Only process inner loops.");
|
assert(L->empty() && "Only process inner loops.");
|
||||||
|
|
||||||
DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
|
DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
|
||||||
|
@ -610,7 +612,7 @@ public:
|
||||||
return fail("multiple exit blocks");
|
return fail("multiple exit blocks");
|
||||||
|
|
||||||
// LAA will check that we only have a single exiting block.
|
// LAA will check that we only have a single exiting block.
|
||||||
LAI = &LAA->getInfo(L);
|
LAI = &GetLAA(*L);
|
||||||
|
|
||||||
// Currently, we only distribute to isolate the part of the loop with
|
// Currently, we only distribute to isolate the part of the loop with
|
||||||
// dependence cycles to enable partial vectorization.
|
// dependence cycles to enable partial vectorization.
|
||||||
|
@ -860,19 +862,50 @@ private:
|
||||||
Optional<bool> IsForced;
|
Optional<bool> IsForced;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Shared implementation between new and old PMs.
|
||||||
|
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
|
||||||
|
ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
|
||||||
|
std::function<const LoopAccessInfo &(Loop &)> &GetLAA,
|
||||||
|
bool ProcessAllLoops) {
|
||||||
|
// Build up a worklist of inner-loops to vectorize. This is necessary as the
|
||||||
|
// act of distributing a loop creates new loops and can invalidate iterators
|
||||||
|
// across the loops.
|
||||||
|
SmallVector<Loop *, 8> Worklist;
|
||||||
|
|
||||||
|
for (Loop *TopLevelLoop : *LI)
|
||||||
|
for (Loop *L : depth_first(TopLevelLoop))
|
||||||
|
// We only handle inner-most loops.
|
||||||
|
if (L->empty())
|
||||||
|
Worklist.push_back(L);
|
||||||
|
|
||||||
|
// Now walk the identified inner loops.
|
||||||
|
bool Changed = false;
|
||||||
|
for (Loop *L : Worklist) {
|
||||||
|
LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
|
||||||
|
|
||||||
|
// If distribution was forced for the specific loop to be
|
||||||
|
// enabled/disabled, follow that. Otherwise use the global flag.
|
||||||
|
if (LDL.isForced().getValueOr(ProcessAllLoops))
|
||||||
|
Changed |= LDL.processLoop(GetLAA);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process each loop nest in the function.
|
||||||
|
return Changed;
|
||||||
|
}
|
||||||
|
|
||||||
/// \brief The pass class.
|
/// \brief The pass class.
|
||||||
class LoopDistribute : public FunctionPass {
|
class LoopDistributeLegacy : public FunctionPass {
|
||||||
public:
|
public:
|
||||||
/// \p ProcessAllLoopsByDefault specifies whether loop distribution should be
|
/// \p ProcessAllLoopsByDefault specifies whether loop distribution should be
|
||||||
/// performed by default. Pass -enable-loop-distribute={0,1} overrides this
|
/// performed by default. Pass -enable-loop-distribute={0,1} overrides this
|
||||||
/// default. We use this to keep LoopDistribution off by default when invoked
|
/// default. We use this to keep LoopDistribution off by default when invoked
|
||||||
/// from the optimization pipeline but on when invoked explicitly from opt.
|
/// from the optimization pipeline but on when invoked explicitly from opt.
|
||||||
LoopDistribute(bool ProcessAllLoopsByDefault = true)
|
LoopDistributeLegacy(bool ProcessAllLoopsByDefault = true)
|
||||||
: FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) {
|
: FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) {
|
||||||
// The default is set by the caller.
|
// The default is set by the caller.
|
||||||
if (EnableLoopDistribute.getNumOccurrences() > 0)
|
if (EnableLoopDistribute.getNumOccurrences() > 0)
|
||||||
ProcessAllLoops = EnableLoopDistribute;
|
ProcessAllLoops = EnableLoopDistribute;
|
||||||
initializeLoopDistributePass(*PassRegistry::getPassRegistry());
|
initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool runOnFunction(Function &F) override {
|
bool runOnFunction(Function &F) override {
|
||||||
|
@ -884,31 +917,10 @@ public:
|
||||||
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||||
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
||||||
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
|
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
|
||||||
|
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
|
||||||
|
[&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
|
||||||
|
|
||||||
// Build up a worklist of inner-loops to vectorize. This is necessary as the
|
return runImpl(F, LI, DT, SE, ORE, GetLAA, ProcessAllLoops);
|
||||||
// act of distributing a loop creates new loops and can invalidate iterators
|
|
||||||
// across the loops.
|
|
||||||
SmallVector<Loop *, 8> Worklist;
|
|
||||||
|
|
||||||
for (Loop *TopLevelLoop : *LI)
|
|
||||||
for (Loop *L : depth_first(TopLevelLoop))
|
|
||||||
// We only handle inner-most loops.
|
|
||||||
if (L->empty())
|
|
||||||
Worklist.push_back(L);
|
|
||||||
|
|
||||||
// Now walk the identified inner loops.
|
|
||||||
bool Changed = false;
|
|
||||||
for (Loop *L : Worklist) {
|
|
||||||
LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
|
|
||||||
|
|
||||||
// If distribution was forced for the specific loop to be
|
|
||||||
// enabled/disabled, follow that. Otherwise use the global flag.
|
|
||||||
if (LDL.isForced().getValueOr(ProcessAllLoops))
|
|
||||||
Changed |= LDL.processLoop(LAA);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process each loop nest in the function.
|
|
||||||
return Changed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
|
@ -930,19 +942,49 @@ private:
|
||||||
};
|
};
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
char LoopDistribute::ID;
|
PreservedAnalyses LoopDistributePass::run(Function &F,
|
||||||
|
FunctionAnalysisManager &AM) {
|
||||||
|
// FIXME: This does not currently match the behavior from the old PM.
|
||||||
|
// ProcessAllLoops with the old PM defaults to true when invoked from opt and
|
||||||
|
// false when invoked from the optimization pipeline.
|
||||||
|
bool ProcessAllLoops = false;
|
||||||
|
if (EnableLoopDistribute.getNumOccurrences() > 0)
|
||||||
|
ProcessAllLoops = EnableLoopDistribute;
|
||||||
|
|
||||||
|
auto &LI = AM.getResult<LoopAnalysis>(F);
|
||||||
|
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
|
||||||
|
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
|
||||||
|
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
|
||||||
|
|
||||||
|
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
|
||||||
|
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
|
||||||
|
[&](Loop &L) -> const LoopAccessInfo & {
|
||||||
|
return LAM.getResult<LoopAccessAnalysis>(L);
|
||||||
|
};
|
||||||
|
|
||||||
|
bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA, ProcessAllLoops);
|
||||||
|
if (!Changed)
|
||||||
|
return PreservedAnalyses::all();
|
||||||
|
PreservedAnalyses PA;
|
||||||
|
PA.preserve<LoopAnalysis>();
|
||||||
|
PA.preserve<DominatorTreeAnalysis>();
|
||||||
|
return PA;
|
||||||
|
}
|
||||||
|
|
||||||
|
char LoopDistributeLegacy::ID;
|
||||||
static const char ldist_name[] = "Loop Distribition";
|
static const char ldist_name[] = "Loop Distribition";
|
||||||
|
|
||||||
INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
|
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
|
||||||
|
false)
|
||||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
|
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
|
||||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
|
||||||
INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
|
INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) {
|
FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) {
|
||||||
return new LoopDistribute(ProcessAllLoopsByDefault);
|
return new LoopDistributeLegacy(ProcessAllLoopsByDefault);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,7 +86,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
||||||
initializePlaceBackedgeSafepointsImplPass(Registry);
|
initializePlaceBackedgeSafepointsImplPass(Registry);
|
||||||
initializePlaceSafepointsPass(Registry);
|
initializePlaceSafepointsPass(Registry);
|
||||||
initializeFloat2IntLegacyPassPass(Registry);
|
initializeFloat2IntLegacyPassPass(Registry);
|
||||||
initializeLoopDistributePass(Registry);
|
initializeLoopDistributeLegacyPass(Registry);
|
||||||
initializeLoopLoadEliminationPass(Registry);
|
initializeLoopLoadEliminationPass(Registry);
|
||||||
initializeLoopSimplifyCFGLegacyPassPass(Registry);
|
initializeLoopSimplifyCFGLegacyPassPass(Registry);
|
||||||
initializeLoopVersioningPassPass(Registry);
|
initializeLoopVersioningPassPass(Registry);
|
||||||
|
|
|
@ -5,6 +5,11 @@
|
||||||
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
|
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
|
||||||
; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
|
; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
|
||||||
|
|
||||||
|
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
|
||||||
|
; RUN: -debug-only=block-freq -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
|
||||||
|
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
|
||||||
|
; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
|
||||||
|
|
||||||
; REQUIRES: asserts
|
; REQUIRES: asserts
|
||||||
|
|
||||||
; HOTNESS: block-frequency: forced
|
; HOTNESS: block-frequency: forced
|
||||||
|
|
|
@ -3,6 +3,11 @@
|
||||||
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
|
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
|
||||||
; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
|
; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
|
||||||
|
|
||||||
|
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
|
||||||
|
; RUN: -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
|
||||||
|
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
|
||||||
|
; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
|
||||||
|
|
||||||
; REQUIRES: asserts
|
; REQUIRES: asserts
|
||||||
|
|
||||||
; This is the input program:
|
; This is the input program:
|
||||||
|
|
Loading…
Reference in New Issue