forked from OSchip/llvm-project
Eliminate the use of dominance frontiers in PromoteMemToReg. In addition to
eliminating a potentially quadratic data structure, this also gives a 17% speedup when running -scalarrepl on test-suite + SPEC2000 + SPEC2006. My initial experiment gave a greater speedup around 25%, but I moved the dominator tree level computation from dominator tree construction to PromoteMemToReg. Since this approach to computing IDFs has a much lower overhead than the old code using precomputed DFs, it is worth looking at using this new code for the second scalarrepl pass as well. llvm-svn: 123609
This commit is contained in:
parent
7d92ee3852
commit
814cd9233e
|
@ -38,8 +38,7 @@ bool isAllocaPromotable(const AllocaInst *AI);
|
|||
/// made to the IR.
|
||||
///
|
||||
void PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
|
||||
DominatorTree &DT, DominanceFrontier &DF,
|
||||
AliasSetTracker *AST = 0);
|
||||
DominatorTree &DT, AliasSetTracker *AST = 0);
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
|
|
|
@ -152,7 +152,6 @@ namespace {
|
|||
// will not alter the CFG, so say so.
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<DominatorTree>();
|
||||
AU.addRequired<DominanceFrontier>();
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
};
|
||||
|
@ -180,7 +179,6 @@ char SROA_SSAUp::ID = 0;
|
|||
INITIALIZE_PASS_BEGIN(SROA_DF, "scalarrepl",
|
||||
"Scalar Replacement of Aggregates (DF)", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
|
||||
INITIALIZE_PASS_END(SROA_DF, "scalarrepl",
|
||||
"Scalar Replacement of Aggregates (DF)", false, false)
|
||||
|
||||
|
@ -877,11 +875,8 @@ public:
|
|||
bool SROA::performPromotion(Function &F) {
|
||||
std::vector<AllocaInst*> Allocas;
|
||||
DominatorTree *DT = 0;
|
||||
DominanceFrontier *DF = 0;
|
||||
if (HasDomFrontiers) {
|
||||
if (HasDomFrontiers)
|
||||
DT = &getAnalysis<DominatorTree>();
|
||||
DF = &getAnalysis<DominanceFrontier>();
|
||||
}
|
||||
|
||||
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
|
||||
|
||||
|
@ -900,7 +895,7 @@ bool SROA::performPromotion(Function &F) {
|
|||
if (Allocas.empty()) break;
|
||||
|
||||
if (HasDomFrontiers)
|
||||
PromoteMemToReg(Allocas, *DT, *DF);
|
||||
PromoteMemToReg(Allocas, *DT);
|
||||
else {
|
||||
SSAUpdater SSA;
|
||||
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
|
||||
|
|
|
@ -40,7 +40,6 @@ namespace {
|
|||
//
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<DominatorTree>();
|
||||
AU.addRequired<DominanceFrontier>();
|
||||
AU.setPreservesCFG();
|
||||
// This is a cluster of orthogonal Transforms
|
||||
AU.addPreserved<UnifyFunctionExitNodes>();
|
||||
|
@ -54,7 +53,6 @@ char PromotePass::ID = 0;
|
|||
INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
|
||||
false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
|
||||
INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
|
||||
false, false)
|
||||
|
||||
|
@ -66,7 +64,6 @@ bool PromotePass::runOnFunction(Function &F) {
|
|||
bool Changed = false;
|
||||
|
||||
DominatorTree &DT = getAnalysis<DominatorTree>();
|
||||
DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
|
||||
|
||||
while (1) {
|
||||
Allocas.clear();
|
||||
|
@ -80,7 +77,7 @@ bool PromotePass::runOnFunction(Function &F) {
|
|||
|
||||
if (Allocas.empty()) break;
|
||||
|
||||
PromoteMemToReg(Allocas, DT, DF);
|
||||
PromoteMemToReg(Allocas, DT);
|
||||
NumPromoted += Allocas.size();
|
||||
Changed = true;
|
||||
}
|
||||
|
|
|
@ -9,10 +9,19 @@
|
|||
//
|
||||
// This file promotes memory references to be register references. It promotes
|
||||
// alloca instructions which only have loads and stores as uses. An alloca is
|
||||
// transformed by using dominator frontiers to place PHI nodes, then traversing
|
||||
// the function in depth-first order to rewrite loads and stores as appropriate.
|
||||
// This is just the standard SSA construction algorithm to construct "pruned"
|
||||
// SSA form.
|
||||
// transformed by using iterated dominator frontiers to place PHI nodes, then
|
||||
// traversing the function in depth-first order to rewrite loads and stores as
|
||||
// appropriate.
|
||||
//
|
||||
// The algorithm used here is based on:
|
||||
//
|
||||
// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
|
||||
// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
|
||||
// Programming Languages
|
||||
// POPL '95. ACM, New York, NY, 62-73.
|
||||
//
|
||||
// It has been modified to not explicitly use the DJ graph data structure and to
|
||||
// directly compute pruned SSA using per-variable liveness information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
@ -35,6 +44,7 @@
|
|||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Support/CFG.h"
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
using namespace llvm;
|
||||
|
||||
STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
|
||||
|
@ -179,7 +189,6 @@ namespace {
|
|||
///
|
||||
std::vector<AllocaInst*> Allocas;
|
||||
DominatorTree &DT;
|
||||
DominanceFrontier &DF;
|
||||
DIFactory *DIF;
|
||||
|
||||
/// AST - An AliasSetTracker object to update. If null, don't update it.
|
||||
|
@ -217,12 +226,15 @@ namespace {
|
|||
/// non-determinstic behavior.
|
||||
DenseMap<BasicBlock*, unsigned> BBNumbers;
|
||||
|
||||
/// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
|
||||
DenseMap<DomTreeNode*, unsigned> DomLevels;
|
||||
|
||||
/// BBNumPreds - Lazily compute the number of predecessors a block has.
|
||||
DenseMap<const BasicBlock*, unsigned> BBNumPreds;
|
||||
public:
|
||||
PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
|
||||
DominanceFrontier &df, AliasSetTracker *ast)
|
||||
: Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {}
|
||||
AliasSetTracker *ast)
|
||||
: Allocas(A), DT(dt), DIF(0), AST(ast) {}
|
||||
~PromoteMem2Reg() {
|
||||
delete DIF;
|
||||
}
|
||||
|
@ -329,7 +341,7 @@ namespace {
|
|||
|
||||
|
||||
void PromoteMem2Reg::run() {
|
||||
Function &F = *DF.getRoot()->getParent();
|
||||
Function &F = *DT.getRoot()->getParent();
|
||||
|
||||
if (AST) PointerAllocaValues.resize(Allocas.size());
|
||||
AllocaDbgDeclares.resize(Allocas.size());
|
||||
|
@ -422,7 +434,26 @@ void PromoteMem2Reg::run() {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If we haven't computed dominator tree levels, do so now.
|
||||
if (DomLevels.empty()) {
|
||||
SmallVector<DomTreeNode*, 32> Worklist;
|
||||
|
||||
DomTreeNode *Root = DT.getRootNode();
|
||||
DomLevels[Root] = 0;
|
||||
Worklist.push_back(Root);
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
DomTreeNode *Node = Worklist.pop_back_val();
|
||||
unsigned ChildLevel = DomLevels[Node] + 1;
|
||||
for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
|
||||
CI != CE; ++CI) {
|
||||
DomLevels[*CI] = ChildLevel;
|
||||
Worklist.push_back(*CI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we haven't computed a numbering for the BB's in the function, do so
|
||||
// now.
|
||||
if (BBNumbers.empty()) {
|
||||
|
@ -657,6 +688,14 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
|
|||
}
|
||||
}
|
||||
|
||||
typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
|
||||
|
||||
struct DomTreeNodeCompare {
|
||||
bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
|
||||
return LHS.second <= RHS.second;
|
||||
}
|
||||
};
|
||||
|
||||
/// DetermineInsertionPoint - At this point, we're committed to promoting the
|
||||
/// alloca using IDF's, and the standard SSA construction algorithm. Determine
|
||||
/// which blocks need phi nodes and see if we can optimize out some work by
|
||||
|
@ -673,46 +712,77 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
|
|||
SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
|
||||
ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
|
||||
|
||||
// Compute the locations where PhiNodes need to be inserted. Look at the
|
||||
// dominance frontier of EACH basic-block we have a write in.
|
||||
unsigned CurrentVersion = 0;
|
||||
std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
|
||||
while (!Info.DefiningBlocks.empty()) {
|
||||
BasicBlock *BB = Info.DefiningBlocks.back();
|
||||
Info.DefiningBlocks.pop_back();
|
||||
|
||||
// Look up the DF for this write, add it to defining blocks.
|
||||
DominanceFrontier::const_iterator it = DF.find(BB);
|
||||
if (it == DF.end()) continue;
|
||||
|
||||
const DominanceFrontier::DomSetType &S = it->second;
|
||||
|
||||
// In theory we don't need the indirection through the DFBlocks vector.
|
||||
// In practice, the order of calling QueuePhiNode would depend on the
|
||||
// (unspecified) ordering of basic blocks in the dominance frontier,
|
||||
// which would give PHI nodes non-determinstic subscripts. Fix this by
|
||||
// processing blocks in order of the occurance in the function.
|
||||
for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
|
||||
PE = S.end(); P != PE; ++P) {
|
||||
// If the frontier block is not in the live-in set for the alloca, don't
|
||||
// bother processing it.
|
||||
if (!LiveInBlocks.count(*P))
|
||||
continue;
|
||||
|
||||
DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
|
||||
}
|
||||
|
||||
// Sort by which the block ordering in the function.
|
||||
if (DFBlocks.size() > 1)
|
||||
std::sort(DFBlocks.begin(), DFBlocks.end());
|
||||
|
||||
for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
|
||||
BasicBlock *BB = DFBlocks[i].second;
|
||||
if (QueuePhiNode(BB, AllocaNum, CurrentVersion))
|
||||
Info.DefiningBlocks.push_back(BB);
|
||||
}
|
||||
DFBlocks.clear();
|
||||
// Use a priority queue keyed on dominator tree level so that inserted nodes
|
||||
// are handled from the bottom of the dominator tree upwards.
|
||||
typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
|
||||
DomTreeNodeCompare> IDFPriorityQueue;
|
||||
IDFPriorityQueue PQ;
|
||||
|
||||
for (unsigned i = 0, e = Info.DefiningBlocks.size(); i != e; ++i) {
|
||||
if (DomTreeNode *Node = DT.getNode(Info.DefiningBlocks[i]))
|
||||
PQ.push(std::make_pair(Node, DomLevels[Node]));
|
||||
}
|
||||
|
||||
std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
|
||||
SmallPtrSet<DomTreeNode*, 32> Visited;
|
||||
SmallVector<DomTreeNode*, 32> Worklist;
|
||||
while (!PQ.empty()) {
|
||||
DomTreeNodePair RootPair = PQ.top();
|
||||
PQ.pop();
|
||||
DomTreeNode *Root = RootPair.first;
|
||||
unsigned RootLevel = RootPair.second;
|
||||
|
||||
// Walk all dominator tree children of Root, inspecting their CFG edges with
|
||||
// targets elsewhere on the dominator tree. Only targets whose level is at
|
||||
// most Root's level are added to the iterated dominance frontier of the
|
||||
// definition set.
|
||||
|
||||
Worklist.clear();
|
||||
Worklist.push_back(Root);
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
DomTreeNode *Node = Worklist.pop_back_val();
|
||||
BasicBlock *BB = Node->getBlock();
|
||||
|
||||
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
|
||||
++SI) {
|
||||
DomTreeNode *SuccNode = DT.getNode(*SI);
|
||||
|
||||
// Quickly skip all CFG edges that are also dominator tree edges instead
|
||||
// of catching them below.
|
||||
if (SuccNode->getIDom() == Node)
|
||||
continue;
|
||||
|
||||
unsigned SuccLevel = DomLevels[SuccNode];
|
||||
if (SuccLevel > RootLevel)
|
||||
continue;
|
||||
|
||||
if (!Visited.insert(SuccNode))
|
||||
continue;
|
||||
|
||||
BasicBlock *SuccBB = SuccNode->getBlock();
|
||||
if (!LiveInBlocks.count(SuccBB))
|
||||
continue;
|
||||
|
||||
DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
|
||||
if (!DefBlocks.count(SuccBB))
|
||||
PQ.push(std::make_pair(SuccNode, SuccLevel));
|
||||
}
|
||||
|
||||
for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
|
||||
++CI) {
|
||||
if (!Visited.count(*CI))
|
||||
Worklist.push_back(*CI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DFBlocks.size() > 1)
|
||||
std::sort(DFBlocks.begin(), DFBlocks.end());
|
||||
|
||||
unsigned CurrentVersion = 0;
|
||||
for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
|
||||
QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
|
||||
}
|
||||
|
||||
/// RewriteSingleStoreAlloca - If there is only a single store to this value,
|
||||
|
@ -1040,10 +1110,9 @@ NextIteration:
|
|||
/// made to the IR.
|
||||
///
|
||||
void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
|
||||
DominatorTree &DT, DominanceFrontier &DF,
|
||||
AliasSetTracker *AST) {
|
||||
DominatorTree &DT, AliasSetTracker *AST) {
|
||||
// If there is nothing to do, bail out...
|
||||
if (Allocas.empty()) return;
|
||||
|
||||
PromoteMem2Reg(Allocas, DT, DF, AST).run();
|
||||
PromoteMem2Reg(Allocas, DT, AST).run();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue