forked from OSchip/llvm-project
[LV][CFG] Add irreducible CFG detection for outer loops
This patch adds support for detecting outer loops with irreducible control flow in LV. Current detection uses SCCs and only works for innermost loops. This patch adds a utility function that works on any CFG, given its RPO traversal and its LoopInfoBase. This function is a generalization of isIrreducibleCFG from lib/CodeGen/ShrinkWrap.cpp. The code in lib/CodeGen/ShrinkWrap.cpp is also updated to use the new generic utility function. Patch by Diego Caballero <diego.caballero@intel.com> Differential Revision: https://reviews.llvm.org/D40874 llvm-svn: 326568
This commit is contained in:
parent
b46c191c49
commit
515acd64fd
|
@ -89,6 +89,73 @@ bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
|
|||
BasicBlock *StopBB,
|
||||
const DominatorTree *DT = nullptr,
|
||||
const LoopInfo *LI = nullptr);
|
||||
|
||||
/// \brief Return true if the control flow in \p RPOTraversal is irreducible.
|
||||
///
|
||||
/// This is a generic implementation to detect CFG irreducibility based on loop
|
||||
/// info analysis. It can be used for any kind of CFG (Loop, MachineLoop,
|
||||
/// Function, MachineFunction, etc.) by providing an RPO traversal (\p
|
||||
/// RPOTraversal) and the loop info analysis (\p LI) of the CFG. This utility
|
||||
/// function is only recommended when loop info analysis is available. If loop
|
||||
/// info analysis isn't available, please, don't compute it explicitly for this
|
||||
/// purpose. There are more efficient ways to detect CFG irreducibility that
|
||||
/// don't require recomputing loop info analysis (e.g., T1/T2 or Tarjan's
|
||||
/// algorithm).
|
||||
///
|
||||
/// Requirements:
|
||||
/// 1) GraphTraits must be implemented for NodeT type. It is used to access
|
||||
/// NodeT successors.
|
||||
// 2) \p RPOTraversal must be a valid reverse post-order traversal of the
|
||||
/// target CFG with begin()/end() iterator interfaces.
|
||||
/// 3) \p LI must be a valid LoopInfoBase that contains up-to-date loop
|
||||
/// analysis information of the CFG.
|
||||
///
|
||||
/// This algorithm uses the information about reducible loop back-edges already
|
||||
/// computed in \p LI. When a back-edge is found during the RPO traversal, the
|
||||
/// algorithm checks whether the back-edge is one of the reducible back-edges in
|
||||
/// loop info. If it isn't, the CFG is irreducible. For example, for the CFG
|
||||
/// below (canonical irreducible graph) loop info won't contain any loop, so the
|
||||
/// algorithm will return that the CFG is irreducible when checking the B <-
|
||||
/// -> C back-edge.
|
||||
///
|
||||
/// (A->B, A->C, B->C, C->B, C->D)
|
||||
/// A
|
||||
/// / \
|
||||
/// B<- ->C
|
||||
/// |
|
||||
/// D
|
||||
///
|
||||
template <class NodeT, class RPOTraversalT, class LoopInfoT,
|
||||
class GT = GraphTraits<NodeT>>
|
||||
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI) {
|
||||
/// Check whether the edge (\p Src, \p Dst) is a reducible loop backedge
|
||||
/// according to LI. I.e., check if there exists a loop that contains Src and
|
||||
/// where Dst is the loop header.
|
||||
auto isProperBackedge = [&](NodeT Src, NodeT Dst) {
|
||||
for (const auto *Lp = LI.getLoopFor(Src); Lp; Lp = Lp->getParentLoop()) {
|
||||
if (Lp->getHeader() == Dst)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
SmallPtrSet<NodeT, 32> Visited;
|
||||
for (NodeT Node : RPOTraversal) {
|
||||
Visited.insert(Node);
|
||||
for (NodeT Succ : make_range(GT::child_begin(Node), GT::child_end(Node))) {
|
||||
// Succ hasn't been visited yet
|
||||
if (!Visited.count(Succ))
|
||||
continue;
|
||||
// We already visited Succ, thus Node->Succ must be a backedge. Check that
|
||||
// the head matches what we have in the loop information. Otherwise, we
|
||||
// have an irreducible graph.
|
||||
if (!isProperBackedge(Node, Succ))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
|
|
@ -168,6 +168,25 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
/// Wrapper class to LoopBlocksDFS that provides a standard begin()/end()
|
||||
/// interface for the DFS reverse post-order traversal of blocks in a loop body.
|
||||
class LoopBlocksRPO {
|
||||
private:
|
||||
LoopBlocksDFS DFS;
|
||||
|
||||
public:
|
||||
LoopBlocksRPO(Loop *Container) : DFS(Container) {}
|
||||
|
||||
/// Traverse the loop blocks and store the DFS result.
|
||||
void perform(LoopInfo *LI) {
|
||||
DFS.perform(LI);
|
||||
}
|
||||
|
||||
/// Reverse iterate over the cached postorder blocks.
|
||||
LoopBlocksDFS::RPOIterator begin() const { return DFS.beginRPO(); }
|
||||
LoopBlocksDFS::RPOIterator end() const { return DFS.endRPO(); }
|
||||
};
|
||||
|
||||
/// Specialize po_iterator_storage to record postorder numbers.
|
||||
template<> class po_iterator_storage<LoopBlocksTraversal, true> {
|
||||
LoopBlocksTraversal &LBT;
|
||||
|
|
|
@ -53,6 +53,7 @@
|
|||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
|
@ -413,41 +414,6 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
|
|||
}
|
||||
}
|
||||
|
||||
/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI.
|
||||
/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the
|
||||
/// loop header.
|
||||
static bool isProperBackedge(const MachineLoopInfo &MLI,
|
||||
const MachineBasicBlock *SrcBB,
|
||||
const MachineBasicBlock *DestBB) {
|
||||
for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop;
|
||||
Loop = Loop->getParentLoop()) {
|
||||
if (Loop->getHeader() == DestBB)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Check if the CFG of \p MF is irreducible.
|
||||
static bool isIrreducibleCFG(const MachineFunction &MF,
|
||||
const MachineLoopInfo &MLI) {
|
||||
const MachineBasicBlock *Entry = &*MF.begin();
|
||||
ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry);
|
||||
BitVector VisitedBB(MF.getNumBlockIDs());
|
||||
for (const MachineBasicBlock *MBB : RPOT) {
|
||||
VisitedBB.set(MBB->getNumber());
|
||||
for (const MachineBasicBlock *SuccBB : MBB->successors()) {
|
||||
if (!VisitedBB.test(SuccBB->getNumber()))
|
||||
continue;
|
||||
// We already visited SuccBB, thus MBB->SuccBB must be a backedge.
|
||||
// Check that the head matches what we have in the loop information.
|
||||
// Otherwise, we have an irreducible graph.
|
||||
if (!isProperBackedge(MLI, MBB, SuccBB))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
|
||||
return false;
|
||||
|
@ -456,7 +422,8 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
init(MF);
|
||||
|
||||
if (isIrreducibleCFG(MF, *MLI)) {
|
||||
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
|
||||
if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
|
||||
// If MF is irreducible, a block may be in a loop without
|
||||
// MachineLoopInfo reporting it. I.e., we may use the
|
||||
// post-dominance property in loops, which lead to incorrect
|
||||
|
|
|
@ -56,7 +56,6 @@
|
|||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/SCCIterator.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
|
@ -69,6 +68,7 @@
|
|||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/DemandedBits.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
|
@ -283,24 +283,6 @@ class LoopVectorizationRequirements;
|
|||
|
||||
} // end anonymous namespace
|
||||
|
||||
/// Returns true if the given loop body has a cycle, excluding the loop
|
||||
/// itself.
|
||||
static bool hasCyclesInLoopBody(const Loop &L) {
|
||||
if (!L.empty())
|
||||
return true;
|
||||
|
||||
for (const auto &SCC :
|
||||
make_range(scc_iterator<Loop, LoopBodyTraits>::begin(L),
|
||||
scc_iterator<Loop, LoopBodyTraits>::end(L))) {
|
||||
if (SCC.size() > 1) {
|
||||
DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n");
|
||||
DEBUG(L.dump());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// A helper function for converting Scalar types to vector types.
|
||||
/// If the incoming type is void, we return void. If the VF is 1, we return
|
||||
/// the scalar type.
|
||||
|
@ -2302,14 +2284,17 @@ private:
|
|||
|
||||
} // end anonymous namespace
|
||||
|
||||
static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
|
||||
static void addAcyclicInnerLoop(Loop &L, LoopInfo &LI,
|
||||
SmallVectorImpl<Loop *> &V) {
|
||||
if (L.empty()) {
|
||||
if (!hasCyclesInLoopBody(L))
|
||||
LoopBlocksRPO RPOT(&L);
|
||||
RPOT.perform(&LI);
|
||||
if (!containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
|
||||
V.push_back(&L);
|
||||
return;
|
||||
}
|
||||
for (Loop *InnerL : L)
|
||||
addAcyclicInnerLoop(*InnerL, V);
|
||||
addAcyclicInnerLoop(*InnerL, LI, V);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -8637,7 +8622,7 @@ bool LoopVectorizePass::runImpl(
|
|||
SmallVector<Loop *, 8> Worklist;
|
||||
|
||||
for (Loop *L : *LI)
|
||||
addAcyclicInnerLoop(*L, Worklist);
|
||||
addAcyclicInnerLoop(*L, *LI, Worklist);
|
||||
|
||||
LoopsAnalyzed += Worklist.size();
|
||||
|
||||
|
|
Loading…
Reference in New Issue