forked from OSchip/llvm-project
380 lines
13 KiB
C++
380 lines
13 KiB
C++
//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
|
|
//--===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements an algorithm that returns for a divergent branch
|
|
// the set of basic blocks whose phi nodes become divergent due to divergent
|
|
// control. These are the blocks that are reachable by two disjoint paths from
|
|
// the branch or loop exits that have a reaching path that is disjoint from a
|
|
// path to the loop latch.
|
|
//
|
|
// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
|
|
// control-induced divergence in phi nodes.
|
|
//
|
|
// -- Summary --
|
|
// The SyncDependenceAnalysis lazily computes sync dependences [3].
|
|
// The analysis evaluates the disjoint path criterion [2] by a reduction
|
|
// to SSA construction. The SSA construction algorithm is implemented as
|
|
// a simple data-flow analysis [1].
|
|
//
|
|
// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
|
|
// [2] "Efficiently Computing Static Single Assignment Form
|
|
// and the Control Dependence Graph", TOPLAS '91,
|
|
// Cytron, Ferrante, Rosen, Wegman and Zadeck
|
|
// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
|
|
// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
|
|
//
|
|
// -- Sync dependence --
|
|
// Sync dependence [4] characterizes the control flow aspect of the
|
|
// propagation of branch divergence. For example,
|
|
//
|
|
// %cond = icmp slt i32 %tid, 10
|
|
// br i1 %cond, label %then, label %else
|
|
// then:
|
|
// br label %merge
|
|
// else:
|
|
// br label %merge
|
|
// merge:
|
|
// %a = phi i32 [ 0, %then ], [ 1, %else ]
|
|
//
|
|
// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
|
|
// because %tid is not on its use-def chains, %a is sync dependent on %tid
|
|
// because the branch "br i1 %cond" depends on %tid and affects which value %a
|
|
// is assigned to.
|
|
//
|
|
// -- Reduction to SSA construction --
|
|
// There are two disjoint paths from A to X, if a certain variant of SSA
|
|
// construction places a phi node in X under the following set-up scheme [2].
|
|
//
|
|
// This variant of SSA construction ignores incoming undef values.
|
|
// That is paths from the entry without a definition do not result in
|
|
// phi nodes.
|
|
//
|
|
// entry
|
|
// / \
|
|
// A \
|
|
// / \ Y
|
|
// B C /
|
|
// \ / \ /
|
|
// D E
|
|
// \ /
|
|
// F
|
|
// Assume that A contains a divergent branch. We are interested
|
|
// in the set of all blocks where each block is reachable from A
|
|
// via two disjoint paths. This would be the set {D, F} in this
|
|
// case.
|
|
// To generally reduce this query to SSA construction we introduce
|
|
// a virtual variable x and assign to x different values in each
|
|
// successor block of A.
|
|
// entry
|
|
// / \
|
|
// A \
|
|
// / \ Y
|
|
// x = 0 x = 1 /
|
|
// \ / \ /
|
|
// D E
|
|
// \ /
|
|
// F
|
|
// Our flavor of SSA construction for x will construct the following
|
|
// entry
|
|
// / \
|
|
// A \
|
|
// / \ Y
|
|
// x0 = 0 x1 = 1 /
|
|
// \ / \ /
|
|
// x2=phi E
|
|
// \ /
|
|
// x3=phi
|
|
// The blocks D and F contain phi nodes and are thus each reachable
|
|
// by two disjoins paths from A.
|
|
//
|
|
// -- Remarks --
|
|
// In case of loop exits we need to check the disjoint path criterion for loops
|
|
// [2]. To this end, we check whether the definition of x differs between the
|
|
// loop exit and the loop header (_after_ SSA construction).
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "llvm/ADT/PostOrderIterator.h"
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
#include "llvm/Analysis/PostDominators.h"
|
|
#include "llvm/Analysis/SyncDependenceAnalysis.h"
|
|
#include "llvm/IR/BasicBlock.h"
|
|
#include "llvm/IR/CFG.h"
|
|
#include "llvm/IR/Dominators.h"
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include <stack>
|
|
#include <unordered_set>
|
|
|
|
#define DEBUG_TYPE "sync-dependence"
|
|
|
|
namespace llvm {
|
|
|
|
ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
|
|
|
|
SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
|
|
const PostDominatorTree &PDT,
|
|
const LoopInfo &LI)
|
|
: FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
|
|
|
|
SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
|
|
|
|
using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
|
|
|
|
// divergence propagator for reducible CFGs
|
|
struct DivergencePropagator {
|
|
const FunctionRPOT &FuncRPOT;
|
|
const DominatorTree &DT;
|
|
const PostDominatorTree &PDT;
|
|
const LoopInfo &LI;
|
|
|
|
// identified join points
|
|
std::unique_ptr<ConstBlockSet> JoinBlocks;
|
|
|
|
// reached loop exits (by a path disjoint to a path to the loop header)
|
|
SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
|
|
|
|
// if DefMap[B] == C then C is the dominating definition at block B
|
|
// if DefMap[B] ~ undef then we haven't seen B yet
|
|
// if DefMap[B] == B then B is a join point of disjoint paths from X or B is
|
|
// an immediate successor of X (initial value).
|
|
using DefiningBlockMap = std::map<const BasicBlock *, const BasicBlock *>;
|
|
DefiningBlockMap DefMap;
|
|
|
|
// all blocks with pending visits
|
|
std::unordered_set<const BasicBlock *> PendingUpdates;
|
|
|
|
DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
|
|
const PostDominatorTree &PDT, const LoopInfo &LI)
|
|
: FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
|
|
JoinBlocks(new ConstBlockSet) {}
|
|
|
|
// set the definition at @block and mark @block as pending for a visit
|
|
void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
|
|
bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
|
|
if (WasAdded)
|
|
PendingUpdates.insert(&Block);
|
|
}
|
|
|
|
void printDefs(raw_ostream &Out) {
|
|
Out << "Propagator::DefMap {\n";
|
|
for (const auto *Block : FuncRPOT) {
|
|
auto It = DefMap.find(Block);
|
|
Out << Block->getName() << " : ";
|
|
if (It == DefMap.end()) {
|
|
Out << "\n";
|
|
} else {
|
|
const auto *DefBlock = It->second;
|
|
Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
|
|
}
|
|
}
|
|
Out << "}\n";
|
|
}
|
|
|
|
// process @succBlock with reaching definition @defBlock
|
|
// the original divergent branch was in @parentLoop (if any)
|
|
void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
|
|
const BasicBlock &DefBlock) {
|
|
|
|
// @succBlock is a loop exit
|
|
if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
|
|
DefMap.emplace(&SuccBlock, &DefBlock);
|
|
ReachedLoopExits.insert(&SuccBlock);
|
|
return;
|
|
}
|
|
|
|
// first reaching def?
|
|
auto ItLastDef = DefMap.find(&SuccBlock);
|
|
if (ItLastDef == DefMap.end()) {
|
|
addPending(SuccBlock, DefBlock);
|
|
return;
|
|
}
|
|
|
|
// a join of at least two definitions
|
|
if (ItLastDef->second != &DefBlock) {
|
|
// do we know this join already?
|
|
if (!JoinBlocks->insert(&SuccBlock).second)
|
|
return;
|
|
|
|
// update the definition
|
|
addPending(SuccBlock, SuccBlock);
|
|
}
|
|
}
|
|
|
|
// find all blocks reachable by two disjoint paths from @rootTerm.
|
|
// This method works for both divergent terminators and loops with
|
|
// divergent exits.
|
|
// @rootBlock is either the block containing the branch or the header of the
|
|
// divergent loop.
|
|
// @nodeSuccessors is the set of successors of the node (Loop or Terminator)
|
|
// headed by @rootBlock.
|
|
// @parentLoop is the parent loop of the Loop or the loop that contains the
|
|
// Terminator.
|
|
template <typename SuccessorIterable>
|
|
std::unique_ptr<ConstBlockSet>
|
|
computeJoinPoints(const BasicBlock &RootBlock,
|
|
SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
|
|
assert(JoinBlocks);
|
|
|
|
// immediate post dominator (no join block beyond that block)
|
|
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
|
|
const auto *IpdNode = PdNode->getIDom();
|
|
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
|
|
|
// bootstrap with branch targets
|
|
for (const auto *SuccBlock : NodeSuccessors) {
|
|
DefMap.emplace(SuccBlock, SuccBlock);
|
|
|
|
if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
|
|
// immediate loop exit from node.
|
|
ReachedLoopExits.insert(SuccBlock);
|
|
continue;
|
|
} else {
|
|
// regular successor
|
|
PendingUpdates.insert(SuccBlock);
|
|
}
|
|
}
|
|
|
|
auto ItBeginRPO = FuncRPOT.begin();
|
|
|
|
// skip until term (TODO RPOT won't let us start at @term directly)
|
|
for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {}
|
|
|
|
auto ItEndRPO = FuncRPOT.end();
|
|
assert(ItBeginRPO != ItEndRPO);
|
|
|
|
// propagate definitions at the immediate successors of the node in RPO
|
|
auto ItBlockRPO = ItBeginRPO;
|
|
while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
|
|
const auto *Block = *ItBlockRPO;
|
|
|
|
// skip @block if not pending update
|
|
auto ItPending = PendingUpdates.find(Block);
|
|
if (ItPending == PendingUpdates.end())
|
|
continue;
|
|
PendingUpdates.erase(ItPending);
|
|
|
|
// propagate definition at @block to its successors
|
|
auto ItDef = DefMap.find(Block);
|
|
const auto *DefBlock = ItDef->second;
|
|
assert(DefBlock);
|
|
|
|
auto *BlockLoop = LI.getLoopFor(Block);
|
|
if (ParentLoop &&
|
|
(ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
|
|
// if the successor is the header of a nested loop pretend its a
|
|
// single node with the loop's exits as successors
|
|
SmallVector<BasicBlock *, 4> BlockLoopExits;
|
|
BlockLoop->getExitBlocks(BlockLoopExits);
|
|
for (const auto *BlockLoopExit : BlockLoopExits) {
|
|
visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
|
|
}
|
|
|
|
} else {
|
|
// the successors are either on the same loop level or loop exits
|
|
for (const auto *SuccBlock : successors(Block)) {
|
|
visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
|
|
}
|
|
}
|
|
}
|
|
|
|
// We need to know the definition at the parent loop header to decide
|
|
// whether the definition at the header is different from the definition at
|
|
// the loop exits, which would indicate a divergent loop exits.
|
|
//
|
|
// A // loop header
|
|
// |
|
|
// B // nested loop header
|
|
// |
|
|
// C -> X (exit from B loop) -..-> (A latch)
|
|
// |
|
|
// D -> back to B (B latch)
|
|
// |
|
|
// proper exit from both loops
|
|
//
|
|
// D post-dominates B as it is the only proper exit from the "A loop".
|
|
// If C has a divergent branch, propagation will therefore stop at D.
|
|
// That implies that B will never receive a definition.
|
|
// But that definition can only be the same as at D (D itself in thise case)
|
|
// because all paths to anywhere have to pass through D.
|
|
//
|
|
const BasicBlock *ParentLoopHeader =
|
|
ParentLoop ? ParentLoop->getHeader() : nullptr;
|
|
if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
|
|
DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
|
|
}
|
|
|
|
// analyze reached loop exits
|
|
if (!ReachedLoopExits.empty()) {
|
|
assert(ParentLoop);
|
|
const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
|
|
LLVM_DEBUG(printDefs(dbgs()));
|
|
assert(HeaderDefBlock && "no definition in header of carrying loop");
|
|
|
|
for (const auto *ExitBlock : ReachedLoopExits) {
|
|
auto ItExitDef = DefMap.find(ExitBlock);
|
|
assert((ItExitDef != DefMap.end()) &&
|
|
"no reaching def at reachable loop exit");
|
|
if (ItExitDef->second != HeaderDefBlock) {
|
|
JoinBlocks->insert(ExitBlock);
|
|
}
|
|
}
|
|
}
|
|
|
|
return std::move(JoinBlocks);
|
|
}
|
|
};
|
|
|
|
const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
|
|
using LoopExitVec = SmallVector<BasicBlock *, 4>;
|
|
LoopExitVec LoopExits;
|
|
Loop.getExitBlocks(LoopExits);
|
|
if (LoopExits.size() < 1) {
|
|
return EmptyBlockSet;
|
|
}
|
|
|
|
// already available in cache?
|
|
auto ItCached = CachedLoopExitJoins.find(&Loop);
|
|
if (ItCached != CachedLoopExitJoins.end())
|
|
return *ItCached->second;
|
|
|
|
// compute all join points
|
|
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
|
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
|
|
*Loop.getHeader(), LoopExits, Loop.getParentLoop());
|
|
|
|
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
|
|
assert(ItInserted.second);
|
|
return *ItInserted.first->second;
|
|
}
|
|
|
|
const ConstBlockSet &
|
|
SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
|
|
// trivial case
|
|
if (Term.getNumSuccessors() < 1) {
|
|
return EmptyBlockSet;
|
|
}
|
|
|
|
// already available in cache?
|
|
auto ItCached = CachedBranchJoins.find(&Term);
|
|
if (ItCached != CachedBranchJoins.end())
|
|
return *ItCached->second;
|
|
|
|
// compute all join points
|
|
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
|
const auto &TermBlock = *Term.getParent();
|
|
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
|
|
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
|
|
|
|
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
|
|
assert(ItInserted.second);
|
|
return *ItInserted.first->second;
|
|
}
|
|
|
|
} // namespace llvm
|