llvm-project/bolt/lib/Passes/MCF.cpp

488 lines
16 KiB
C++
Raw Normal View History

//===--- Passes/MCF.cpp ---------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "bolt/Passes/MCF.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/CommandLine.h"
#include <algorithm>
#include <vector>
#undef DEBUG_TYPE
#define DEBUG_TYPE "mcf"
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> TimeOpts;
static cl::opt<bool>
IterativeGuess("iterative-guess",
cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
cl::ZeroOrMore,
cl::init(false),
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
EqualizeBBCounts("equalize-bb-counts",
cl::desc("in non-LBR mode, use same count for BBs "
"that should have equivalent count"),
cl::ZeroOrMore,
cl::init(false),
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
UseRArcs("mcf-use-rarcs",
cl::desc("in MCF, consider the possibility of cancelling flow to balance "
"edges"),
cl::ZeroOrMore,
cl::init(false),
cl::Hidden,
cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
namespace bolt {
namespace {
// Edge Weight Inference Heuristic
//
// We start by maintaining the invariant used in LBR mode where the sum of
// pred edges count is equal to the block execution count. This loop will set
// pred edges count by balancing its own execution count in different pred
// edges. The weight of each edge is guessed by looking at how hot each pred
// block is (in terms of samples).
// There are two caveats in this approach. One is for critical edges and the
// other is for self-referencing blocks (loops of 1 BB). For critical edges,
// we can't infer the hotness of them based solely on pred BBs execution
// count. For each critical edge we look at the pred BB, then look at its
// succs to adjust its weight.
//
// [ 60 ] [ 25 ]
// | \ |
// [ 10 ] [ 75 ]
//
// The illustration above shows a critical edge \. We wish to adjust bb count
// 60 to 50 to properly determine the weight of the critical edge to be
// 50 / 75.
// For self-referencing edges, we attribute its weight by subtracting the
// current BB execution count by the sum of predecessors count if this result
// is non-negative.
using EdgeWeightMap =
DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
double>;
template <class NodeT>
void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
const BinaryBasicBlock *B, double Weight);
template <>
void updateEdgeWeight<BinaryBasicBlock *>(
EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
const BinaryBasicBlock *B, double Weight) {
EdgeWeights[std::make_pair(A, B)] = Weight;
return;
}
template <>
void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(
EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
const BinaryBasicBlock *B, double Weight) {
EdgeWeights[std::make_pair(B, A)] = Weight;
return;
}
template <class NodeT>
void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
typedef GraphTraits<NodeT> GraphT;
typedef GraphTraits<Inverse<NodeT> > InvTraits;
double TotalChildrenCount = 0.0;
SmallVector<double, 4> ChildrenExecCount;
// First pass computes total children execution count that directly
// contribute to this BB.
for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
E = GraphT::child_end(BB); CI != E; ++CI) {
typename GraphT::NodeRef Child = *CI;
double ChildExecCount = Child->getExecutionCount();
// Is self-reference?
if (Child == BB) {
ChildExecCount = 0.0; // will fill this in second pass
} else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
1) {
// Handle critical edges. This will cause a skew towards crit edges, but
// it is a quick solution.
double CritWeight = 0.0;
uint64_t Denominator = 0;
for (typename InvTraits::ChildIteratorType
II = InvTraits::child_begin(Child),
IE = InvTraits::child_end(Child);
II != IE; ++II) {
typename GraphT::NodeRef N = *II;
Denominator += N->getExecutionCount();
if (N != BB) {
continue;
}
CritWeight = N->getExecutionCount();
}
if (Denominator)
CritWeight /= static_cast<double>(Denominator);
ChildExecCount *= CritWeight;
}
ChildrenExecCount.push_back(ChildExecCount);
TotalChildrenCount += ChildExecCount;
}
// Second pass fixes the weight of a possible self-reference edge
uint32_t ChildIndex = 0;
for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
E = GraphT::child_end(BB); CI != E; ++CI) {
typename GraphT::NodeRef Child = *CI;
if (Child != BB) {
++ChildIndex;
continue;
}
if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
ChildrenExecCount[ChildIndex] =
BB->getExecutionCount() - TotalChildrenCount;
TotalChildrenCount += ChildrenExecCount[ChildIndex];
}
break;
}
// Third pass finally assigns weights to edges
ChildIndex = 0;
for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
E = GraphT::child_end(BB); CI != E; ++CI) {
typename GraphT::NodeRef Child = *CI;
double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
if (TotalChildrenCount != 0.0)
Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
++ChildIndex;
}
}
template<class NodeT>
void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
for (BinaryBasicBlock &BB : BF) {
computeEdgeWeights<NodeT>(&BB, EdgeWeights);
}
}
/// Make BB count match the sum of all incoming edges. If AllEdges is true,
/// make it match max(SumPredEdges, SumSuccEdges).
void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
for (BinaryBasicBlock &BB : BF) {
uint64_t TotalPredsEWeight = 0;
for (BinaryBasicBlock *Pred : BB.predecessors()) {
TotalPredsEWeight += Pred->getBranchInfo(BB).Count;
}
if (TotalPredsEWeight > BB.getExecutionCount()) {
BB.setExecutionCount(TotalPredsEWeight);
}
if (!AllEdges)
continue;
uint64_t TotalSuccsEWeight = 0;
for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info()) {
TotalSuccsEWeight += BI.Count;
}
if (TotalSuccsEWeight > BB.getExecutionCount()) {
BB.setExecutionCount(TotalSuccsEWeight);
}
}
}
// This is our main edge count guessing heuristic. Look at predecessors and
// assign a proportionally higher count to pred edges coming from blocks with
// a higher execution count in comparison with the other predecessor blocks,
// making SumPredEdges match the current BB count.
// If "UseSucc" is true, apply the same logic to successor edges as well. Since
// some successor edges may already have assigned a count, only update it if the
// new count is higher.
void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
EdgeWeightMap &PredEdgeWeights,
EdgeWeightMap &SuccEdgeWeights) {
for (BinaryBasicBlock &BB : BF) {
for (BinaryBasicBlock *Pred : BB.predecessors()) {
double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
RelativeExec *= BB.getExecutionCount();
BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
if (static_cast<uint64_t>(RelativeExec) > BI.Count)
BI.Count = static_cast<uint64_t>(RelativeExec);
}
if (!UseSucc)
continue;
auto BI = BB.branch_info_begin();
for (BinaryBasicBlock *Succ : BB.successors()) {
double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)];
RelativeExec *= BB.getExecutionCount();
if (static_cast<uint64_t>(RelativeExec) > BI->Count)
BI->Count = static_cast<uint64_t>(RelativeExec);
++BI;
}
}
}
using ArcSet =
DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
/// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
/// all edges we already established their count. Try to guess the count of
/// the remaining edge, if there is only one to guess, and return true if we
/// were able to guess.
bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
if (BB->pred_size() == 0)
return false;
uint64_t TotalPredCount = 0;
unsigned NumGuessedEdges = 0;
for (BinaryBasicBlock *Pred : BB->predecessors()) {
if (GuessedArcs.count(std::make_pair(Pred, BB)))
++NumGuessedEdges;
TotalPredCount += Pred->getBranchInfo(*BB).Count;
}
if (NumGuessedEdges != BB->pred_size() - 1)
return false;
int64_t Guessed =
static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
if (Guessed < 0)
Guessed = 0;
for (BinaryBasicBlock *Pred : BB->predecessors()) {
if (GuessedArcs.count(std::make_pair(Pred, BB)))
continue;
Pred->getBranchInfo(*BB).Count = Guessed;
return true;
}
llvm_unreachable("Expected unguessed arc");
}
/// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
/// all edges we already established their count. Try to guess the count of
/// the remaining edge, if there is only one to guess, and return true if we
/// were able to guess.
bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
if (BB->succ_size() == 0)
return false;
uint64_t TotalSuccCount = 0;
unsigned NumGuessedEdges = 0;
auto BI = BB->branch_info_begin();
for (BinaryBasicBlock *Succ : BB->successors()) {
if (GuessedArcs.count(std::make_pair(BB, Succ)))
++NumGuessedEdges;
TotalSuccCount += BI->Count;
++BI;
}
if (NumGuessedEdges != BB->succ_size() - 1)
return false;
int64_t Guessed =
static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
if (Guessed < 0)
Guessed = 0;
BI = BB->branch_info_begin();
for (BinaryBasicBlock *Succ : BB->successors()) {
if (GuessedArcs.count(std::make_pair(BB, Succ))) {
++BI;
continue;
}
BI->Count = Guessed;
GuessedArcs.insert(std::make_pair(BB, Succ));
return true;
}
llvm_unreachable("Expected unguessed arc");
}
/// Guess edge count whenever we have only one edge (pred or succ) left
/// to guess. Then make its count equal to BB count minus all other edge
/// counts we already know their count. Repeat this until there is no
/// change.
void guessEdgeByIterativeApproach(BinaryFunction &BF) {
ArcSet KnownArcs;
bool Changed = false;
do {
Changed = false;
for (BinaryBasicBlock &BB : BF) {
if (guessPredEdgeCounts(&BB, KnownArcs)) Changed = true;
if (guessSuccEdgeCounts(&BB, KnownArcs)) Changed = true;
}
} while (Changed);
// Guess count for non-inferred edges
for (BinaryBasicBlock &BB : BF) {
for (BinaryBasicBlock *Pred : BB.predecessors()) {
if (KnownArcs.count(std::make_pair(Pred, &BB)))
continue;
BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
BI.Count =
std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2;
KnownArcs.insert(std::make_pair(Pred, &BB));
}
auto BI = BB.branch_info_begin();
for (BinaryBasicBlock *Succ : BB.successors()) {
if (KnownArcs.count(std::make_pair(&BB, Succ))) {
++BI;
continue;
}
BI->Count =
std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2;
KnownArcs.insert(std::make_pair(&BB, Succ));
break;
}
}
}
/// Associate each basic block with the BinaryLoop object corresponding to the
/// innermost loop containing this block.
DenseMap<const BinaryBasicBlock *, const BinaryLoop*>
createLoopNestLevelMap(BinaryFunction &BF) {
DenseMap<const BinaryBasicBlock *, const BinaryLoop*> LoopNestLevel;
const BinaryLoopInfo &BLI = BF.getLoopInfo();
for (BinaryBasicBlock &BB : BF) {
LoopNestLevel[&BB] = BLI[&BB];
}
return LoopNestLevel;
}
/// Implement the idea in "SamplePGO - The Power of Profile Guided Optimizations
/// without the Usability Burden" by Diego Novillo to make basic block counts
/// equal if we show that A dominates B, B post-dominates A and they are in the
/// same loop and same loop nesting level.
void equalizeBBCounts(BinaryFunction &BF) {
auto Info = DataflowInfoManager(BF, nullptr, nullptr);
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
auto &InsnToBB = Info.getInsnToBBMap();
// These analyses work at the instruction granularity, but we really only need
// basic block granularity here. So we'll use a set of visited edges to avoid
// revisiting the same BBs again and again.
DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
Visited;
// Equivalence classes mapping. Each equivalence class is defined by the set
// of BBs that obeys the aforementioned properties.
DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
std::vector<std::vector<BinaryBasicBlock *>> Classes;
BF.calculateLoopInfo();
DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
createLoopNestLevelMap(BF);
for (BinaryBasicBlock &BB : BF) {
BBsToEC[&BB] = -1;
}
for (BinaryBasicBlock &BB : BF) {
auto I = BB.begin();
if (I == BB.end())
continue;
DA.doForAllDominators(*I, [&](const MCInst &DomInst) {
BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
if (Visited[DomBB].count(&BB))
return;
Visited[DomBB].insert(&BB);
if (!PDA.doesADominateB(*I, DomInst))
return;
if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
return;
if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
BBsToEC[DomBB] = Classes.size();
BBsToEC[&BB] = Classes.size();
Classes.emplace_back();
Classes.back().push_back(DomBB);
Classes.back().push_back(&BB);
return;
}
if (BBsToEC[DomBB] == -1) {
BBsToEC[DomBB] = BBsToEC[&BB];
Classes[BBsToEC[&BB]].push_back(DomBB);
return;
}
if (BBsToEC[&BB] == -1) {
BBsToEC[&BB] = BBsToEC[DomBB];
Classes[BBsToEC[DomBB]].push_back(&BB);
return;
}
signed BBECNum = BBsToEC[&BB];
std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
for (BinaryBasicBlock *Block : DomEC) {
BBsToEC[Block] = BBECNum;
BBEC.push_back(Block);
}
DomEC.clear();
});
}
for (std::vector<BinaryBasicBlock *> &Class : Classes) {
uint64_t Max = 0ULL;
for (BinaryBasicBlock *BB : Class) {
Max = std::max(Max, BB->getExecutionCount());
}
for (BinaryBasicBlock *BB : Class) {
BB->setExecutionCount(Max);
}
}
}
} // end anonymous namespace
void estimateEdgeCounts(BinaryFunction &BF) {
EdgeWeightMap PredEdgeWeights;
EdgeWeightMap SuccEdgeWeights;
if (!opts::IterativeGuess) {
computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights);
computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights);
}
if (opts::EqualizeBBCounts) {
LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts", true));
equalizeBBCounts(BF);
LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts", true));
}
if (opts::IterativeGuess)
guessEdgeByIterativeApproach(BF);
else
guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
SuccEdgeWeights);
recalculateBBCounts(BF, /*AllEdges=*/false);
}
void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
llvm_unreachable("not implemented");
}
}
}