[LoopGuardWidening] Split out a loop pass version of GuardWidening

The idea is to have a pass which performs the same transformation as GuardWidening, but can be run within a loop pass manager without disrupting the pass manager structure.  As demonstrated by the test case, this doesn't quite get there because of issues with post dom, but it gives a good step in the right direction.  the motivation is purely to reduce compile time since we can now preserve locality during the loop walk.

This patch only includes a legacy pass.  A follow up will add a new style pass as well.

llvm-svn: 331060
This commit is contained in:
Philip Reames 2018-04-27 17:29:10 +00:00
parent f6d595bd44
commit 9258e9d190
6 changed files with 128 additions and 9 deletions

View File

@ -162,6 +162,7 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&);
void initializeGlobalSplitPass(PassRegistry&); void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&);
void initializeLoopGuardWideningLegacyPassPass(PassRegistry&);
void initializeIPCPPass(PassRegistry&); void initializeIPCPPass(PassRegistry&);
void initializeIPSCCPLegacyPassPass(PassRegistry&); void initializeIPSCCPLegacyPassPass(PassRegistry&);
void initializeIRTranslatorPass(PassRegistry&); void initializeIRTranslatorPass(PassRegistry&);

View File

@ -111,6 +111,7 @@ namespace {
(void) llvm::createGlobalOptimizerPass(); (void) llvm::createGlobalOptimizerPass();
(void) llvm::createGlobalsAAWrapperPass(); (void) llvm::createGlobalsAAWrapperPass();
(void) llvm::createGuardWideningPass(); (void) llvm::createGuardWideningPass();
(void) llvm::createLoopGuardWideningPass();
(void) llvm::createIPConstantPropagationPass(); (void) llvm::createIPConstantPropagationPass();
(void) llvm::createIPSCCPPass(); (void) llvm::createIPSCCPPass();
(void) llvm::createInductiveRangeCheckEliminationPass(); (void) llvm::createInductiveRangeCheckEliminationPass();

View File

@ -97,6 +97,16 @@ FunctionPass *createAggressiveDCEPass();
FunctionPass *createGuardWideningPass(); FunctionPass *createGuardWideningPass();
//===----------------------------------------------------------------------===//
//
// LoopGuardWidening - Analogous to the GuardWidening pass, but restricted to a
// single loop at a time for use within a LoopPassManager. Desired effect is
// to widen guards into preheader or a single guard within loop if that's not
// possible.
//
Pass *createLoopGuardWideningPass();
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// //
// BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to // BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to

View File

@ -40,9 +40,11 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/GuardWidening.h" #include "llvm/Transforms/Scalar/GuardWidening.h"
#include <functional>
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h" #include "llvm/IR/ConstantRange.h"
@ -53,6 +55,7 @@
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h" #include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm; using namespace llvm;
@ -65,6 +68,11 @@ class GuardWideningImpl {
PostDominatorTree &PDT; PostDominatorTree &PDT;
LoopInfo &LI; LoopInfo &LI;
/// Together, these describe the region of interest. This might be all of
/// the blocks within a function, or only a given loop's blocks and preheader.
DomTreeNode *Root;
std::function<bool(BasicBlock*)> BlockFilter;
/// The set of guards whose conditions have been widened into dominating /// The set of guards whose conditions have been widened into dominating
/// guards. /// guards.
SmallVector<IntrinsicInst *, 16> EliminatedGuards; SmallVector<IntrinsicInst *, 16> EliminatedGuards;
@ -205,9 +213,11 @@ class GuardWideningImpl {
} }
public: public:
explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree &PDT, explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree &PDT,
LoopInfo &LI) LoopInfo &LI, DomTreeNode *Root,
: DT(DT), PDT(PDT), LI(LI) {} std::function<bool(BasicBlock*)> BlockFilter)
: DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter) {}
/// The entry point for this pass. /// The entry point for this pass.
bool run(); bool run();
@ -220,9 +230,12 @@ bool GuardWideningImpl::run() {
DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock; DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock;
bool Changed = false; bool Changed = false;
for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode()); for (auto DFI = df_begin(Root), DFE = df_end(Root);
DFI != DFE; ++DFI) { DFI != DFE; ++DFI) {
auto *BB = (*DFI)->getBlock(); auto *BB = (*DFI)->getBlock();
if (!BlockFilter(BB))
continue;
auto &CurrentList = GuardsInBlock[BB]; auto &CurrentList = GuardsInBlock[BB];
for (auto &I : *BB) for (auto &I : *BB)
@ -233,6 +246,7 @@ bool GuardWideningImpl::run() {
Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock); Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
} }
assert(EliminatedGuards.empty() || Changed);
for (auto *II : EliminatedGuards) for (auto *II : EliminatedGuards)
if (!WidenedGuards.count(II)) if (!WidenedGuards.count(II))
II->eraseFromParent(); II->eraseFromParent();
@ -252,6 +266,8 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
// for the most profit. // for the most profit.
for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) {
auto *CurBB = DFSI.getPath(i)->getBlock(); auto *CurBB = DFSI.getPath(i)->getBlock();
if (!BlockFilter(CurBB))
break;
auto *CurLoop = LI.getLoopFor(CurBB); auto *CurLoop = LI.getLoopFor(CurBB);
assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
@ -647,7 +663,8 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F); auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
if (!GuardWideningImpl(DT, PDT, LI).run()) if (!GuardWideningImpl(DT, PDT, LI, DT.getRootNode(),
[](BasicBlock*) { return true; } ).run())
return PreservedAnalyses::all(); return PreservedAnalyses::all();
PreservedAnalyses PA; PreservedAnalyses PA;
@ -658,7 +675,6 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
namespace { namespace {
struct GuardWideningLegacyPass : public FunctionPass { struct GuardWideningLegacyPass : public FunctionPass {
static char ID; static char ID;
GuardWideningPass Impl;
GuardWideningLegacyPass() : FunctionPass(ID) { GuardWideningLegacyPass() : FunctionPass(ID) {
initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
@ -667,10 +683,11 @@ struct GuardWideningLegacyPass : public FunctionPass {
bool runOnFunction(Function &F) override { bool runOnFunction(Function &F) override {
if (skipFunction(F)) if (skipFunction(F))
return false; return false;
return GuardWideningImpl( auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
getAnalysis<DominatorTreeWrapperPass>().getDomTree(), auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(), auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
getAnalysis<LoopInfoWrapperPass>().getLoopInfo()).run(); return GuardWideningImpl(DT, PDT, LI, DT.getRootNode(),
[](BasicBlock*) { return true; } ).run();
} }
void getAnalysisUsage(AnalysisUsage &AU) const override { void getAnalysisUsage(AnalysisUsage &AU) const override {
@ -680,9 +697,43 @@ struct GuardWideningLegacyPass : public FunctionPass {
AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>();
} }
}; };
/// Same as above, but restricted to a single loop at a time. Can be
/// scheduled with other loop passes w/o breaking out of LPM
struct LoopGuardWideningLegacyPass : public LoopPass {
static char ID;
LoopGuardWideningLegacyPass() : LoopPass(ID) {
initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipLoop(L))
return false;
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
BasicBlock *RootBB = L->getLoopPredecessor();
if (!RootBB)
RootBB = L->getHeader();
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L->contains(BB);
};
return GuardWideningImpl(DT, PDT, LI,
DT.getNode(RootBB), BlockFilter).run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
getLoopAnalysisUsage(AU);
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addPreserved<PostDominatorTreeWrapperPass>();
}
};
} }
char GuardWideningLegacyPass::ID = 0; char GuardWideningLegacyPass::ID = 0;
char LoopGuardWideningLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards", INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
false, false) false, false)
@ -692,6 +743,20 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards", INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
false, false) false, false)
INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
"Widen guards (within a single loop, as a loop pass)",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
"Widen guards (within a single loop, as a loop pass)",
false, false)
FunctionPass *llvm::createGuardWideningPass() { FunctionPass *llvm::createGuardWideningPass() {
return new GuardWideningLegacyPass(); return new GuardWideningLegacyPass();
} }
Pass *llvm::createLoopGuardWideningPass() {
return new LoopGuardWideningLegacyPass();
}

View File

@ -45,6 +45,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeScalarizerPass(Registry); initializeScalarizerPass(Registry);
initializeDSELegacyPassPass(Registry); initializeDSELegacyPassPass(Registry);
initializeGuardWideningLegacyPassPass(Registry); initializeGuardWideningLegacyPassPass(Registry);
initializeLoopGuardWideningLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry);
initializeNewGVNLegacyPassPass(Registry); initializeNewGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry);

View File

@ -0,0 +1,41 @@
; RUN: opt -S -licm -loop-guard-widening -licm -debug-pass=Structure < %s 2>&1 | FileCheck %s
; Main point of this test is to check the scheduling -- there should be
; no analysis passes needed between LICM and LoopGuardWidening
; TODO: Because guard widdening currently requires post-dom, we end up
; breaking the loop pass manager to compute it. Need to either make all
; loop passes preserve postdom (hard) or make it optional in guard widdening
; CHECK: Loop Pass Manager
; CHECK: Loop Invariant Code Motion
; CHECK: Post-Dominator Tree Construction
; CHECK: Loop Pass Manager
; CHECK: Widen guards (within a single loop, as a loop pass)
; CHECK: Loop Invariant Code Motion
declare void @llvm.experimental.guard(i1,...)
define void @iter(i32 %a, i32 %b, i1* %c_p) {
; CHECK-LABEL @iter
; CHECK: %cond_0 = icmp ult i32 %a, 10
; CHECK: %cond_1 = icmp ult i32 %b, 10
; CHECK: %wide.chk = and i1 %cond_0, %cond_1
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
entry:
%cond_0 = icmp ult i32 %a, 10
call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
br label %loop
loop: ; preds = %loop.preheader, %loop
%cond_1 = icmp ult i32 %b, 10
call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
%cnd = load i1, i1* %c_p
br i1 %cnd, label %loop, label %leave.loopexit
leave.loopexit: ; preds = %loop
br label %leave
leave: ; preds = %leave.loopexit, %entry
ret void
}