forked from OSchip/llvm-project
487 lines
16 KiB
C++
487 lines
16 KiB
C++
//===- bolt/Passes/MCF.cpp ------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements functions for solving minimum-cost flow problem.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "bolt/Passes/MCF.h"
|
|
#include "bolt/Core/BinaryFunction.h"
|
|
#include "bolt/Passes/DataflowInfoManager.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include <algorithm>
|
|
#include <vector>
|
|
|
|
#undef DEBUG_TYPE
|
|
#define DEBUG_TYPE "mcf"
|
|
|
|
using namespace llvm;
|
|
using namespace bolt;
|
|
|
|
namespace opts {
|
|
|
|
extern cl::OptionCategory BoltOptCategory;
|
|
|
|
extern cl::opt<bool> TimeOpts;
|
|
|
|
static cl::opt<bool>
|
|
IterativeGuess("iterative-guess",
|
|
cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
|
|
cl::ZeroOrMore,
|
|
cl::init(false),
|
|
cl::Hidden,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
static cl::opt<bool>
|
|
EqualizeBBCounts("equalize-bb-counts",
|
|
cl::desc("in non-LBR mode, use same count for BBs "
|
|
"that should have equivalent count"),
|
|
cl::ZeroOrMore,
|
|
cl::init(false),
|
|
cl::Hidden,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
static cl::opt<bool>
|
|
UseRArcs("mcf-use-rarcs",
|
|
cl::desc("in MCF, consider the possibility of cancelling flow to balance "
|
|
"edges"),
|
|
cl::ZeroOrMore,
|
|
cl::init(false),
|
|
cl::Hidden,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
} // namespace opts
|
|
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
namespace {
|
|
|
|
// Edge Weight Inference Heuristic
|
|
//
|
|
// We start by maintaining the invariant used in LBR mode where the sum of
|
|
// pred edges count is equal to the block execution count. This loop will set
|
|
// pred edges count by balancing its own execution count in different pred
|
|
// edges. The weight of each edge is guessed by looking at how hot each pred
|
|
// block is (in terms of samples).
|
|
// There are two caveats in this approach. One is for critical edges and the
|
|
// other is for self-referencing blocks (loops of 1 BB). For critical edges,
|
|
// we can't infer the hotness of them based solely on pred BBs execution
|
|
// count. For each critical edge we look at the pred BB, then look at its
|
|
// succs to adjust its weight.
|
|
//
|
|
// [ 60 ] [ 25 ]
|
|
// | \ |
|
|
// [ 10 ] [ 75 ]
|
|
//
|
|
// The illustration above shows a critical edge \. We wish to adjust bb count
|
|
// 60 to 50 to properly determine the weight of the critical edge to be
|
|
// 50 / 75.
|
|
// For self-referencing edges, we attribute its weight by subtracting the
|
|
// current BB execution count by the sum of predecessors count if this result
|
|
// is non-negative.
|
|
using EdgeWeightMap =
|
|
DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
|
|
double>;
|
|
|
|
template <class NodeT>
|
|
void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
|
|
const BinaryBasicBlock *B, double Weight);
|
|
|
|
template <>
|
|
void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
|
|
const BinaryBasicBlock *A,
|
|
const BinaryBasicBlock *B,
|
|
double Weight) {
|
|
EdgeWeights[std::make_pair(A, B)] = Weight;
|
|
return;
|
|
}
|
|
|
|
template <>
|
|
void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
|
|
const BinaryBasicBlock *A,
|
|
const BinaryBasicBlock *B,
|
|
double Weight) {
|
|
EdgeWeights[std::make_pair(B, A)] = Weight;
|
|
return;
|
|
}
|
|
|
|
template <class NodeT>
|
|
void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
|
|
typedef GraphTraits<NodeT> GraphT;
|
|
typedef GraphTraits<Inverse<NodeT>> InvTraits;
|
|
|
|
double TotalChildrenCount = 0.0;
|
|
SmallVector<double, 4> ChildrenExecCount;
|
|
// First pass computes total children execution count that directly
|
|
// contribute to this BB.
|
|
for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
|
|
E = GraphT::child_end(BB);
|
|
CI != E; ++CI) {
|
|
typename GraphT::NodeRef Child = *CI;
|
|
double ChildExecCount = Child->getExecutionCount();
|
|
// Is self-reference?
|
|
if (Child == BB) {
|
|
ChildExecCount = 0.0; // will fill this in second pass
|
|
} else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
|
|
InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
|
|
1) {
|
|
// Handle critical edges. This will cause a skew towards crit edges, but
|
|
// it is a quick solution.
|
|
double CritWeight = 0.0;
|
|
uint64_t Denominator = 0;
|
|
for (typename InvTraits::ChildIteratorType
|
|
II = InvTraits::child_begin(Child),
|
|
IE = InvTraits::child_end(Child);
|
|
II != IE; ++II) {
|
|
typename GraphT::NodeRef N = *II;
|
|
Denominator += N->getExecutionCount();
|
|
if (N != BB)
|
|
continue;
|
|
CritWeight = N->getExecutionCount();
|
|
}
|
|
if (Denominator)
|
|
CritWeight /= static_cast<double>(Denominator);
|
|
ChildExecCount *= CritWeight;
|
|
}
|
|
ChildrenExecCount.push_back(ChildExecCount);
|
|
TotalChildrenCount += ChildExecCount;
|
|
}
|
|
// Second pass fixes the weight of a possible self-reference edge
|
|
uint32_t ChildIndex = 0;
|
|
for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
|
|
E = GraphT::child_end(BB);
|
|
CI != E; ++CI) {
|
|
typename GraphT::NodeRef Child = *CI;
|
|
if (Child != BB) {
|
|
++ChildIndex;
|
|
continue;
|
|
}
|
|
if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
|
|
ChildrenExecCount[ChildIndex] =
|
|
BB->getExecutionCount() - TotalChildrenCount;
|
|
TotalChildrenCount += ChildrenExecCount[ChildIndex];
|
|
}
|
|
break;
|
|
}
|
|
// Third pass finally assigns weights to edges
|
|
ChildIndex = 0;
|
|
for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
|
|
E = GraphT::child_end(BB);
|
|
CI != E; ++CI) {
|
|
typename GraphT::NodeRef Child = *CI;
|
|
double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
|
|
if (TotalChildrenCount != 0.0)
|
|
Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
|
|
updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
|
|
++ChildIndex;
|
|
}
|
|
}
|
|
|
|
template <class NodeT>
|
|
void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
|
|
for (BinaryBasicBlock &BB : BF)
|
|
computeEdgeWeights<NodeT>(&BB, EdgeWeights);
|
|
}
|
|
|
|
/// Make BB count match the sum of all incoming edges. If AllEdges is true,
|
|
/// make it match max(SumPredEdges, SumSuccEdges).
|
|
void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
uint64_t TotalPredsEWeight = 0;
|
|
for (BinaryBasicBlock *Pred : BB.predecessors())
|
|
TotalPredsEWeight += Pred->getBranchInfo(BB).Count;
|
|
|
|
if (TotalPredsEWeight > BB.getExecutionCount())
|
|
BB.setExecutionCount(TotalPredsEWeight);
|
|
|
|
if (!AllEdges)
|
|
continue;
|
|
|
|
uint64_t TotalSuccsEWeight = 0;
|
|
for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
|
|
TotalSuccsEWeight += BI.Count;
|
|
|
|
if (TotalSuccsEWeight > BB.getExecutionCount())
|
|
BB.setExecutionCount(TotalSuccsEWeight);
|
|
}
|
|
}
|
|
|
|
// This is our main edge count guessing heuristic. Look at predecessors and
|
|
// assign a proportionally higher count to pred edges coming from blocks with
|
|
// a higher execution count in comparison with the other predecessor blocks,
|
|
// making SumPredEdges match the current BB count.
|
|
// If "UseSucc" is true, apply the same logic to successor edges as well. Since
|
|
// some successor edges may already have assigned a count, only update it if the
|
|
// new count is higher.
|
|
void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
|
|
EdgeWeightMap &PredEdgeWeights,
|
|
EdgeWeightMap &SuccEdgeWeights) {
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (BinaryBasicBlock *Pred : BB.predecessors()) {
|
|
double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
|
|
RelativeExec *= BB.getExecutionCount();
|
|
BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
|
|
if (static_cast<uint64_t>(RelativeExec) > BI.Count)
|
|
BI.Count = static_cast<uint64_t>(RelativeExec);
|
|
}
|
|
|
|
if (!UseSucc)
|
|
continue;
|
|
|
|
auto BI = BB.branch_info_begin();
|
|
for (BinaryBasicBlock *Succ : BB.successors()) {
|
|
double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)];
|
|
RelativeExec *= BB.getExecutionCount();
|
|
if (static_cast<uint64_t>(RelativeExec) > BI->Count)
|
|
BI->Count = static_cast<uint64_t>(RelativeExec);
|
|
++BI;
|
|
}
|
|
}
|
|
}
|
|
|
|
using ArcSet =
|
|
DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
|
|
|
|
/// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
|
|
/// all edges we already established their count. Try to guess the count of
|
|
/// the remaining edge, if there is only one to guess, and return true if we
|
|
/// were able to guess.
|
|
bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
|
|
if (BB->pred_size() == 0)
|
|
return false;
|
|
|
|
uint64_t TotalPredCount = 0;
|
|
unsigned NumGuessedEdges = 0;
|
|
for (BinaryBasicBlock *Pred : BB->predecessors()) {
|
|
if (GuessedArcs.count(std::make_pair(Pred, BB)))
|
|
++NumGuessedEdges;
|
|
TotalPredCount += Pred->getBranchInfo(*BB).Count;
|
|
}
|
|
|
|
if (NumGuessedEdges != BB->pred_size() - 1)
|
|
return false;
|
|
|
|
int64_t Guessed =
|
|
static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
|
|
if (Guessed < 0)
|
|
Guessed = 0;
|
|
|
|
for (BinaryBasicBlock *Pred : BB->predecessors()) {
|
|
if (GuessedArcs.count(std::make_pair(Pred, BB)))
|
|
continue;
|
|
|
|
Pred->getBranchInfo(*BB).Count = Guessed;
|
|
return true;
|
|
}
|
|
llvm_unreachable("Expected unguessed arc");
|
|
}
|
|
|
|
/// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
|
|
/// all edges we already established their count. Try to guess the count of
|
|
/// the remaining edge, if there is only one to guess, and return true if we
|
|
/// were able to guess.
|
|
bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
|
|
if (BB->succ_size() == 0)
|
|
return false;
|
|
|
|
uint64_t TotalSuccCount = 0;
|
|
unsigned NumGuessedEdges = 0;
|
|
auto BI = BB->branch_info_begin();
|
|
for (BinaryBasicBlock *Succ : BB->successors()) {
|
|
if (GuessedArcs.count(std::make_pair(BB, Succ)))
|
|
++NumGuessedEdges;
|
|
TotalSuccCount += BI->Count;
|
|
++BI;
|
|
}
|
|
|
|
if (NumGuessedEdges != BB->succ_size() - 1)
|
|
return false;
|
|
|
|
int64_t Guessed =
|
|
static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
|
|
if (Guessed < 0)
|
|
Guessed = 0;
|
|
|
|
BI = BB->branch_info_begin();
|
|
for (BinaryBasicBlock *Succ : BB->successors()) {
|
|
if (GuessedArcs.count(std::make_pair(BB, Succ))) {
|
|
++BI;
|
|
continue;
|
|
}
|
|
|
|
BI->Count = Guessed;
|
|
GuessedArcs.insert(std::make_pair(BB, Succ));
|
|
return true;
|
|
}
|
|
llvm_unreachable("Expected unguessed arc");
|
|
}
|
|
|
|
/// Guess edge count whenever we have only one edge (pred or succ) left
|
|
/// to guess. Then make its count equal to BB count minus all other edge
|
|
/// counts we already know their count. Repeat this until there is no
|
|
/// change.
|
|
void guessEdgeByIterativeApproach(BinaryFunction &BF) {
|
|
ArcSet KnownArcs;
|
|
bool Changed = false;
|
|
|
|
do {
|
|
Changed = false;
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
if (guessPredEdgeCounts(&BB, KnownArcs))
|
|
Changed = true;
|
|
if (guessSuccEdgeCounts(&BB, KnownArcs))
|
|
Changed = true;
|
|
}
|
|
} while (Changed);
|
|
|
|
// Guess count for non-inferred edges
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (BinaryBasicBlock *Pred : BB.predecessors()) {
|
|
if (KnownArcs.count(std::make_pair(Pred, &BB)))
|
|
continue;
|
|
BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
|
|
BI.Count =
|
|
std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2;
|
|
KnownArcs.insert(std::make_pair(Pred, &BB));
|
|
}
|
|
auto BI = BB.branch_info_begin();
|
|
for (BinaryBasicBlock *Succ : BB.successors()) {
|
|
if (KnownArcs.count(std::make_pair(&BB, Succ))) {
|
|
++BI;
|
|
continue;
|
|
}
|
|
BI->Count =
|
|
std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2;
|
|
KnownArcs.insert(std::make_pair(&BB, Succ));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Associate each basic block with the BinaryLoop object corresponding to the
|
|
/// innermost loop containing this block.
|
|
DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
|
|
createLoopNestLevelMap(BinaryFunction &BF) {
|
|
DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
|
|
const BinaryLoopInfo &BLI = BF.getLoopInfo();
|
|
|
|
for (BinaryBasicBlock &BB : BF)
|
|
LoopNestLevel[&BB] = BLI[&BB];
|
|
|
|
return LoopNestLevel;
|
|
}
|
|
|
|
/// Implement the idea in "SamplePGO - The Power of Profile Guided Optimizations
|
|
/// without the Usability Burden" by Diego Novillo to make basic block counts
|
|
/// equal if we show that A dominates B, B post-dominates A and they are in the
|
|
/// same loop and same loop nesting level.
|
|
void equalizeBBCounts(BinaryFunction &BF) {
|
|
auto Info = DataflowInfoManager(BF, nullptr, nullptr);
|
|
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
|
|
DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
|
|
auto &InsnToBB = Info.getInsnToBBMap();
|
|
// These analyses work at the instruction granularity, but we really only need
|
|
// basic block granularity here. So we'll use a set of visited edges to avoid
|
|
// revisiting the same BBs again and again.
|
|
DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
|
|
Visited;
|
|
// Equivalence classes mapping. Each equivalence class is defined by the set
|
|
// of BBs that obeys the aforementioned properties.
|
|
DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
|
|
std::vector<std::vector<BinaryBasicBlock *>> Classes;
|
|
|
|
BF.calculateLoopInfo();
|
|
DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
|
|
createLoopNestLevelMap(BF);
|
|
|
|
for (BinaryBasicBlock &BB : BF)
|
|
BBsToEC[&BB] = -1;
|
|
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
auto I = BB.begin();
|
|
if (I == BB.end())
|
|
continue;
|
|
|
|
DA.doForAllDominators(*I, [&](const MCInst &DomInst) {
|
|
BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
|
|
if (Visited[DomBB].count(&BB))
|
|
return;
|
|
Visited[DomBB].insert(&BB);
|
|
if (!PDA.doesADominateB(*I, DomInst))
|
|
return;
|
|
if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
|
|
return;
|
|
if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
|
|
BBsToEC[DomBB] = Classes.size();
|
|
BBsToEC[&BB] = Classes.size();
|
|
Classes.emplace_back();
|
|
Classes.back().push_back(DomBB);
|
|
Classes.back().push_back(&BB);
|
|
return;
|
|
}
|
|
if (BBsToEC[DomBB] == -1) {
|
|
BBsToEC[DomBB] = BBsToEC[&BB];
|
|
Classes[BBsToEC[&BB]].push_back(DomBB);
|
|
return;
|
|
}
|
|
if (BBsToEC[&BB] == -1) {
|
|
BBsToEC[&BB] = BBsToEC[DomBB];
|
|
Classes[BBsToEC[DomBB]].push_back(&BB);
|
|
return;
|
|
}
|
|
signed BBECNum = BBsToEC[&BB];
|
|
std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
|
|
std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
|
|
for (BinaryBasicBlock *Block : DomEC) {
|
|
BBsToEC[Block] = BBECNum;
|
|
BBEC.push_back(Block);
|
|
}
|
|
DomEC.clear();
|
|
});
|
|
}
|
|
|
|
for (std::vector<BinaryBasicBlock *> &Class : Classes) {
|
|
uint64_t Max = 0ULL;
|
|
for (BinaryBasicBlock *BB : Class)
|
|
Max = std::max(Max, BB->getExecutionCount());
|
|
for (BinaryBasicBlock *BB : Class)
|
|
BB->setExecutionCount(Max);
|
|
}
|
|
}
|
|
|
|
} // end anonymous namespace
|
|
|
|
void estimateEdgeCounts(BinaryFunction &BF) {
|
|
EdgeWeightMap PredEdgeWeights;
|
|
EdgeWeightMap SuccEdgeWeights;
|
|
if (!opts::IterativeGuess) {
|
|
computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights);
|
|
computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights);
|
|
}
|
|
if (opts::EqualizeBBCounts) {
|
|
LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts", true));
|
|
equalizeBBCounts(BF);
|
|
LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts", true));
|
|
}
|
|
if (opts::IterativeGuess)
|
|
guessEdgeByIterativeApproach(BF);
|
|
else
|
|
guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
|
|
SuccEdgeWeights);
|
|
recalculateBBCounts(BF, /*AllEdges=*/false);
|
|
}
|
|
|
|
void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
|
|
llvm_unreachable("not implemented");
|
|
}
|
|
|
|
} // namespace bolt
|
|
} // namespace llvm
|