[LV][CFG] Add irreducible CFG detection for outer loops

This patch adds support for detecting outer loops with irreducible control
flow in LV. Current detection uses SCCs and only works for innermost loops.
This patch adds a utility function that works on any CFG, given its RPO
traversal and its LoopInfoBase. This function is a generalization
of isIrreducibleCFG  from lib/CodeGen/ShrinkWrap.cpp. The code in
lib/CodeGen/ShrinkWrap.cpp is also updated to use the new generic utility
function.

Patch by Diego Caballero <diego.caballero@intel.com>

Differential Revision: https://reviews.llvm.org/D40874

llvm-svn: 326568
This commit is contained in:
Florian Hahn 2018-03-02 12:24:25 +00:00
parent b46c191c49
commit 515acd64fd
4 changed files with 97 additions and 59 deletions

View File

@ -89,6 +89,73 @@ bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
BasicBlock *StopBB,
const DominatorTree *DT = nullptr,
const LoopInfo *LI = nullptr);
/// \brief Return true if the control flow in \p RPOTraversal is irreducible.
///
/// This is a generic implementation to detect CFG irreducibility based on loop
/// info analysis. It can be used for any kind of CFG (Loop, MachineLoop,
/// Function, MachineFunction, etc.) by providing an RPO traversal (\p
/// RPOTraversal) and the loop info analysis (\p LI) of the CFG. This utility
/// function is only recommended when loop info analysis is available. If loop
/// info analysis isn't available, please, don't compute it explicitly for this
/// purpose. There are more efficient ways to detect CFG irreducibility that
/// don't require recomputing loop info analysis (e.g., T1/T2 or Tarjan's
/// algorithm).
///
/// Requirements:
/// 1) GraphTraits must be implemented for NodeT type. It is used to access
/// NodeT successors.
// 2) \p RPOTraversal must be a valid reverse post-order traversal of the
/// target CFG with begin()/end() iterator interfaces.
/// 3) \p LI must be a valid LoopInfoBase that contains up-to-date loop
/// analysis information of the CFG.
///
/// This algorithm uses the information about reducible loop back-edges already
/// computed in \p LI. When a back-edge is found during the RPO traversal, the
/// algorithm checks whether the back-edge is one of the reducible back-edges in
/// loop info. If it isn't, the CFG is irreducible. For example, for the CFG
/// below (canonical irreducible graph) loop info won't contain any loop, so the
/// algorithm will return that the CFG is irreducible when checking the B <-
/// -> C back-edge.
///
/// (A->B, A->C, B->C, C->B, C->D)
/// A
/// / \
/// B<- ->C
/// |
/// D
///
template <class NodeT, class RPOTraversalT, class LoopInfoT,
class GT = GraphTraits<NodeT>>
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI) {
/// Check whether the edge (\p Src, \p Dst) is a reducible loop backedge
/// according to LI. I.e., check if there exists a loop that contains Src and
/// where Dst is the loop header.
auto isProperBackedge = [&](NodeT Src, NodeT Dst) {
for (const auto *Lp = LI.getLoopFor(Src); Lp; Lp = Lp->getParentLoop()) {
if (Lp->getHeader() == Dst)
return true;
}
return false;
};
SmallPtrSet<NodeT, 32> Visited;
for (NodeT Node : RPOTraversal) {
Visited.insert(Node);
for (NodeT Succ : make_range(GT::child_begin(Node), GT::child_end(Node))) {
// Succ hasn't been visited yet
if (!Visited.count(Succ))
continue;
// We already visited Succ, thus Node->Succ must be a backedge. Check that
// the head matches what we have in the loop information. Otherwise, we
// have an irreducible graph.
if (!isProperBackedge(Node, Succ))
return true;
}
}
return false;
}
} // End llvm namespace
#endif

View File

@ -168,6 +168,25 @@ public:
}
};
/// Wrapper class to LoopBlocksDFS that provides a standard begin()/end()
/// interface for the DFS reverse post-order traversal of blocks in a loop body.
class LoopBlocksRPO {
private:
LoopBlocksDFS DFS;
public:
LoopBlocksRPO(Loop *Container) : DFS(Container) {}
/// Traverse the loop blocks and store the DFS result.
void perform(LoopInfo *LI) {
DFS.perform(LI);
}
/// Reverse iterate over the cached postorder blocks.
LoopBlocksDFS::RPOIterator begin() const { return DFS.beginRPO(); }
LoopBlocksDFS::RPOIterator end() const { return DFS.endRPO(); }
};
/// Specialize po_iterator_storage to record postorder numbers.
template<> class po_iterator_storage<LoopBlocksTraversal, true> {
LoopBlocksTraversal &LBT;

View File

@ -53,6 +53,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
@ -413,41 +414,6 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
}
}
/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI.
/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the
/// loop header.
static bool isProperBackedge(const MachineLoopInfo &MLI,
const MachineBasicBlock *SrcBB,
const MachineBasicBlock *DestBB) {
for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop;
Loop = Loop->getParentLoop()) {
if (Loop->getHeader() == DestBB)
return true;
}
return false;
}
/// Check if the CFG of \p MF is irreducible.
static bool isIrreducibleCFG(const MachineFunction &MF,
const MachineLoopInfo &MLI) {
const MachineBasicBlock *Entry = &*MF.begin();
ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry);
BitVector VisitedBB(MF.getNumBlockIDs());
for (const MachineBasicBlock *MBB : RPOT) {
VisitedBB.set(MBB->getNumber());
for (const MachineBasicBlock *SuccBB : MBB->successors()) {
if (!VisitedBB.test(SuccBB->getNumber()))
continue;
// We already visited SuccBB, thus MBB->SuccBB must be a backedge.
// Check that the head matches what we have in the loop information.
// Otherwise, we have an irreducible graph.
if (!isProperBackedge(MLI, MBB, SuccBB))
return true;
}
}
return false;
}
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
return false;
@ -456,7 +422,8 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
init(MF);
if (isIrreducibleCFG(MF, *MLI)) {
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
// If MF is irreducible, a block may be in a loop without
// MachineLoopInfo reporting it. I.e., we may use the
// post-dominance property in loops, which lead to incorrect

View File

@ -56,7 +56,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@ -69,6 +68,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
@ -283,24 +283,6 @@ class LoopVectorizationRequirements;
} // end anonymous namespace
/// Returns true if the given loop body has a cycle, excluding the loop
/// itself.
static bool hasCyclesInLoopBody(const Loop &L) {
if (!L.empty())
return true;
for (const auto &SCC :
make_range(scc_iterator<Loop, LoopBodyTraits>::begin(L),
scc_iterator<Loop, LoopBodyTraits>::end(L))) {
if (SCC.size() > 1) {
DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n");
DEBUG(L.dump());
return true;
}
}
return false;
}
/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
@ -2302,14 +2284,17 @@ private:
} // end anonymous namespace
static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
static void addAcyclicInnerLoop(Loop &L, LoopInfo &LI,
SmallVectorImpl<Loop *> &V) {
if (L.empty()) {
if (!hasCyclesInLoopBody(L))
LoopBlocksRPO RPOT(&L);
RPOT.perform(&LI);
if (!containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
V.push_back(&L);
return;
}
for (Loop *InnerL : L)
addAcyclicInnerLoop(*InnerL, V);
addAcyclicInnerLoop(*InnerL, LI, V);
}
namespace {
@ -8637,7 +8622,7 @@ bool LoopVectorizePass::runImpl(
SmallVector<Loop *, 8> Worklist;
for (Loop *L : *LI)
addAcyclicInnerLoop(*L, Worklist);
addAcyclicInnerLoop(*L, *LI, Worklist);
LoopsAnalyzed += Worklist.size();