forked from OSchip/llvm-project
629 lines
20 KiB
C++
629 lines
20 KiB
C++
//===- bolt/Passes/HFSortPlus.cpp - Order functions by hotness ------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// hfsort+ - layout of hot functions with i-TLB cache optimization.
|
|
//
|
|
// Given an ordering of hot functions (and hence, their assignment to the
|
|
// i-TLB pages), we can divide all functions calls Into two categories:
|
|
// - 'short' ones that have a caller-callee distance less than a page;
|
|
// - 'long' ones where the distance exceeds a page.
|
|
// The short calls are likely to result in a i-TLB cache hit. For the long ones,
|
|
// the hit/miss result depends on the 'hotness' of the page (i.e., how often
|
|
// the page is accessed). Assuming that functions are sent to the i-TLB cache
|
|
// in a random order, the probability that a page is present in the cache is
|
|
// proportional to the number of samples corresponding to the functions on the
|
|
// page. The following algorithm detects short and long calls, and optimizes
|
|
// the expected number of cache misses for the long ones.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "bolt/Passes/HFSort.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include <cmath>
|
|
#include <set>
|
|
#include <vector>
|
|
|
|
#define DEBUG_TYPE "hfsort"
|
|
|
|
using namespace llvm;
|
|
using namespace bolt;
|
|
|
|
namespace opts {
|
|
|
|
extern cl::OptionCategory BoltOptCategory;
|
|
|
|
cl::opt<unsigned>
|
|
ITLBPageSize("itlb-page-size",
|
|
cl::desc("The size of i-tlb cache page"),
|
|
cl::init(4096),
|
|
cl::ReallyHidden,
|
|
cl::ZeroOrMore,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
cl::opt<unsigned>
|
|
ITLBEntries("itlb-entries",
|
|
cl::desc("The number of entries in i-tlb cache"),
|
|
cl::init(16),
|
|
cl::ReallyHidden,
|
|
cl::ZeroOrMore,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
static cl::opt<unsigned>
|
|
ITLBDensity("itlb-density",
|
|
cl::desc("The density of i-tlb cache"),
|
|
cl::init(4096),
|
|
cl::ReallyHidden,
|
|
cl::ZeroOrMore,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
static cl::opt<double>
|
|
MergeProbability("merge-probability",
|
|
cl::desc("The minimum probability of a call for merging two clusters"),
|
|
cl::init(0.9),
|
|
cl::ReallyHidden,
|
|
cl::ZeroOrMore,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
static cl::opt<double>
|
|
ArcThreshold("arc-threshold",
|
|
cl::desc("The threshold for ignoring arcs with a small relative weight"),
|
|
cl::init(0.00000001),
|
|
cl::ReallyHidden,
|
|
cl::ZeroOrMore,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
} // namespace opts
|
|
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
using NodeId = CallGraph::NodeId;
|
|
using Arc = CallGraph::Arc;
|
|
|
|
namespace {
|
|
|
|
class Edge;
|
|
using ArcList = std::vector<const Arc *>;
|
|
|
|
// A chain (ordered sequence) of nodes (functions) in the call graph
|
|
class Chain {
|
|
public:
|
|
Chain(const Chain &) = delete;
|
|
Chain(Chain &&) = default;
|
|
Chain &operator=(const Chain &) = delete;
|
|
Chain &operator=(Chain &&) = default;
|
|
|
|
explicit Chain(size_t Id_, NodeId Node, size_t Samples_, size_t Size_)
|
|
: Id(Id_), Samples(Samples_), Size(Size_), Nodes(1, Node) {}
|
|
|
|
double density() const { return static_cast<double>(Samples) / Size; }
|
|
|
|
Edge *getEdge(Chain *Other) const {
|
|
for (std::pair<Chain *, Edge *> It : Edges)
|
|
if (It.first == Other)
|
|
return It.second;
|
|
return nullptr;
|
|
}
|
|
|
|
void removeEdge(Chain *Other) {
|
|
auto It = Edges.begin();
|
|
while (It != Edges.end()) {
|
|
if (It->first == Other) {
|
|
Edges.erase(It);
|
|
return;
|
|
}
|
|
It++;
|
|
}
|
|
}
|
|
|
|
void addEdge(Chain *Other, Edge *Edge) { Edges.emplace_back(Other, Edge); }
|
|
|
|
void merge(Chain *Other) {
|
|
Nodes.insert(Nodes.end(), Other->Nodes.begin(), Other->Nodes.end());
|
|
Samples += Other->Samples;
|
|
Size += Other->Size;
|
|
}
|
|
|
|
void mergeEdges(Chain *Other);
|
|
|
|
void clear() {
|
|
Nodes.clear();
|
|
Edges.clear();
|
|
}
|
|
|
|
public:
|
|
size_t Id;
|
|
uint64_t Samples;
|
|
uint64_t Size;
|
|
// Cached score for the chain
|
|
double Score{0};
|
|
// Cached short-calls for the chain
|
|
double ShortCalls{0};
|
|
// Nodes in the chain
|
|
std::vector<NodeId> Nodes;
|
|
// Adjacent chains and corresponding edges (lists of arcs)
|
|
std::vector<std::pair<Chain *, Edge *>> Edges;
|
|
};
|
|
|
|
// An edge in the call graph representing Arcs between two Chains.
|
|
// When functions are merged Into chains, the edges are combined too so that
|
|
// there is always at most one edge between a pair of chains
|
|
class Edge {
|
|
public:
|
|
Edge(const Edge &) = delete;
|
|
Edge(Edge &&) = default;
|
|
Edge &operator=(const Edge &) = delete;
|
|
Edge &operator=(Edge &&) = default;
|
|
|
|
explicit Edge(Chain *SrcChain_, Chain *DstChain_, const Arc *A)
|
|
: SrcChain(SrcChain_), DstChain(DstChain_), Arcs(1, A) {}
|
|
|
|
void changeEndpoint(Chain *From, Chain *To) {
|
|
if (From == SrcChain)
|
|
SrcChain = To;
|
|
if (From == DstChain)
|
|
DstChain = To;
|
|
}
|
|
|
|
void moveArcs(Edge *Other) {
|
|
Arcs.insert(Arcs.end(), Other->Arcs.begin(), Other->Arcs.end());
|
|
Other->Arcs.clear();
|
|
}
|
|
|
|
void setMergeGain(Chain *PredChain, double ForwardGain, double BackwardGain) {
|
|
// When forward and backward gains are the same, prioritize merging that
|
|
// preserves the original order of the functions in the binary
|
|
if (std::abs(ForwardGain - BackwardGain) < 1e-8) {
|
|
if (SrcChain->Id < DstChain->Id) {
|
|
IsGainForward = true;
|
|
CachedGain = PredChain == SrcChain ? ForwardGain : BackwardGain;
|
|
} else {
|
|
IsGainForward = false;
|
|
CachedGain = PredChain == SrcChain ? BackwardGain : ForwardGain;
|
|
}
|
|
} else if (ForwardGain > BackwardGain) {
|
|
IsGainForward = PredChain == SrcChain;
|
|
CachedGain = ForwardGain;
|
|
} else {
|
|
IsGainForward = PredChain != SrcChain;
|
|
CachedGain = BackwardGain;
|
|
}
|
|
}
|
|
|
|
double gain() const { return CachedGain; }
|
|
|
|
Chain *predChain() const { return IsGainForward ? SrcChain : DstChain; }
|
|
|
|
Chain *succChain() const { return IsGainForward ? DstChain : SrcChain; }
|
|
|
|
private:
|
|
Chain *SrcChain{nullptr};
|
|
Chain *DstChain{nullptr};
|
|
|
|
public:
|
|
// Original arcs in the binary with corresponding execution counts
|
|
ArcList Arcs;
|
|
// Cached gain of merging the pair of chains
|
|
double CachedGain{-1.0};
|
|
// Since the gain of merging (Src, Dst) and (Dst, Src) might be different,
|
|
// we store a flag indicating which of the options results in a higher gain
|
|
bool IsGainForward;
|
|
};
|
|
|
|
void Chain::mergeEdges(Chain *Other) {
|
|
// Update edges adjacent to chain other
|
|
for (auto EdgeIt : Other->Edges) {
|
|
Chain *const DstChain = EdgeIt.first;
|
|
Edge *const DstEdge = EdgeIt.second;
|
|
Chain *const TargetChain = DstChain == Other ? this : DstChain;
|
|
|
|
// Find the corresponding edge in the current chain
|
|
Edge *CurEdge = getEdge(TargetChain);
|
|
if (CurEdge == nullptr) {
|
|
DstEdge->changeEndpoint(Other, this);
|
|
this->addEdge(TargetChain, DstEdge);
|
|
if (DstChain != this && DstChain != Other)
|
|
DstChain->addEdge(this, DstEdge);
|
|
} else {
|
|
CurEdge->moveArcs(DstEdge);
|
|
}
|
|
// Cleanup leftover edge
|
|
if (DstChain != Other)
|
|
DstChain->removeEdge(Other);
|
|
}
|
|
}
|
|
|
|
class HFSortPlus {
|
|
public:
|
|
explicit HFSortPlus(const CallGraph &Cg) : Cg(Cg) { initialize(); }
|
|
|
|
/// Run the algorithm and return ordered set of function clusters.
|
|
std::vector<Cluster> run() {
|
|
// Pass 1
|
|
runPassOne();
|
|
|
|
// Pass 2
|
|
runPassTwo();
|
|
|
|
outs() << "BOLT-INFO: hfsort+ reduced the number of chains from "
|
|
<< Cg.numNodes() << " to " << HotChains.size() << "\n";
|
|
|
|
// Sorting chains by density in decreasing order
|
|
auto DensityComparator = [](const Chain *L, const Chain *R) {
|
|
if (L->density() != R->density())
|
|
return L->density() > R->density();
|
|
// Making sure the comparison is deterministic
|
|
return L->Id < R->Id;
|
|
};
|
|
std::stable_sort(HotChains.begin(), HotChains.end(), DensityComparator);
|
|
|
|
// Return the set of clusters that are left, which are the ones that
|
|
// didn't get merged (so their first func is its original func)
|
|
std::vector<Cluster> Clusters;
|
|
Clusters.reserve(HotChains.size());
|
|
for (Chain *Chain : HotChains)
|
|
Clusters.emplace_back(Cluster(Chain->Nodes, Cg));
|
|
return Clusters;
|
|
}
|
|
|
|
private:
|
|
/// Initialize the set of active chains, function id to chain mapping,
|
|
/// total number of samples and function addresses.
|
|
void initialize() {
|
|
OutWeight.resize(Cg.numNodes(), 0);
|
|
InWeight.resize(Cg.numNodes(), 0);
|
|
AllChains.reserve(Cg.numNodes());
|
|
HotChains.reserve(Cg.numNodes());
|
|
NodeChain.resize(Cg.numNodes(), nullptr);
|
|
Addr.resize(Cg.numNodes(), 0);
|
|
|
|
// Initialize chains
|
|
for (NodeId F = 0; F < Cg.numNodes(); ++F) {
|
|
AllChains.emplace_back(F, F, Cg.samples(F), Cg.size(F));
|
|
HotChains.push_back(&AllChains.back());
|
|
NodeChain[F] = &AllChains.back();
|
|
TotalSamples += Cg.samples(F);
|
|
for (NodeId Succ : Cg.successors(F)) {
|
|
if (F == Succ)
|
|
continue;
|
|
const Arc &Arc = *Cg.findArc(F, Succ);
|
|
OutWeight[F] += Arc.weight();
|
|
InWeight[Succ] += Arc.weight();
|
|
}
|
|
}
|
|
|
|
AllEdges.reserve(Cg.numArcs());
|
|
for (NodeId F = 0; F < Cg.numNodes(); ++F) {
|
|
for (NodeId Succ : Cg.successors(F)) {
|
|
if (F == Succ)
|
|
continue;
|
|
const Arc &Arc = *Cg.findArc(F, Succ);
|
|
if (Arc.weight() == 0.0 ||
|
|
Arc.weight() / TotalSamples < opts::ArcThreshold) {
|
|
continue;
|
|
}
|
|
|
|
Edge *CurEdge = NodeChain[F]->getEdge(NodeChain[Succ]);
|
|
if (CurEdge != nullptr) {
|
|
// This edge is already present in the graph
|
|
assert(NodeChain[Succ]->getEdge(NodeChain[F]) != nullptr);
|
|
CurEdge->Arcs.push_back(&Arc);
|
|
} else {
|
|
// This is a new edge
|
|
AllEdges.emplace_back(NodeChain[F], NodeChain[Succ], &Arc);
|
|
NodeChain[F]->addEdge(NodeChain[Succ], &AllEdges.back());
|
|
NodeChain[Succ]->addEdge(NodeChain[F], &AllEdges.back());
|
|
}
|
|
}
|
|
}
|
|
|
|
for (Chain *&Chain : HotChains) {
|
|
Chain->ShortCalls = shortCalls(Chain);
|
|
Chain->Score = score(Chain);
|
|
}
|
|
}
|
|
|
|
/// The probability that a page with a given density is not in the cache.
|
|
///
|
|
/// Assume that the hot functions are called in a random order; then the
|
|
/// probability of an i-TLB page being accessed after a function call is
|
|
/// p = pageSamples / TotalSamples. The probability that the page is not
|
|
/// accessed is (1 - p), and the probability that it is not in the cache
|
|
/// (i.e. not accessed during the last kCacheEntries function calls)
|
|
/// is (1 - p)^kCacheEntries
|
|
double missProbability(double ChainDensity) const {
|
|
double PageSamples = ChainDensity * opts::ITLBDensity;
|
|
|
|
if (PageSamples >= TotalSamples)
|
|
return 0;
|
|
|
|
double P = PageSamples / TotalSamples;
|
|
return pow(1.0 - P, double(opts::ITLBEntries));
|
|
}
|
|
|
|
/// The expected number of calls on different i-TLB pages for an arc of the
|
|
/// call graph with a specified weight
|
|
double expectedCalls(uint64_t SrcAddr, uint64_t DstAddr,
|
|
double Weight) const {
|
|
uint64_t Dist = SrcAddr >= DstAddr ? SrcAddr - DstAddr : DstAddr - SrcAddr;
|
|
if (Dist >= opts::ITLBPageSize)
|
|
return 0;
|
|
|
|
double D = double(Dist) / double(opts::ITLBPageSize);
|
|
// Increasing the importance of shorter calls
|
|
return (1.0 - D * D) * Weight;
|
|
}
|
|
|
|
/// The expected number of calls within a given chain with both endpoints on
|
|
/// the same cache page
|
|
double shortCalls(Chain *Chain) const {
|
|
Edge *Edge = Chain->getEdge(Chain);
|
|
if (Edge == nullptr)
|
|
return 0;
|
|
|
|
double Calls = 0;
|
|
for (const Arc *Arc : Edge->Arcs) {
|
|
uint64_t SrcAddr = Addr[Arc->src()] + uint64_t(Arc->avgCallOffset());
|
|
uint64_t DstAddr = Addr[Arc->dst()];
|
|
Calls += expectedCalls(SrcAddr, DstAddr, Arc->weight());
|
|
}
|
|
return Calls;
|
|
}
|
|
|
|
/// The number of calls between the two chains with both endpoints on
|
|
/// the same i-TLB page, assuming that a given pair of chains gets merged
|
|
double shortCalls(Chain *ChainPred, Chain *ChainSucc, Edge *Edge) const {
|
|
double Calls = 0;
|
|
for (const Arc *Arc : Edge->Arcs) {
|
|
Chain *SrcChain = NodeChain[Arc->src()];
|
|
uint64_t SrcAddr;
|
|
uint64_t DstAddr;
|
|
if (SrcChain == ChainPred) {
|
|
SrcAddr = Addr[Arc->src()] + uint64_t(Arc->avgCallOffset());
|
|
DstAddr = Addr[Arc->dst()] + ChainPred->Size;
|
|
} else {
|
|
SrcAddr =
|
|
Addr[Arc->src()] + uint64_t(Arc->avgCallOffset()) + ChainPred->Size;
|
|
DstAddr = Addr[Arc->dst()];
|
|
}
|
|
Calls += expectedCalls(SrcAddr, DstAddr, Arc->weight());
|
|
}
|
|
|
|
Calls += ChainPred->ShortCalls;
|
|
Calls += ChainSucc->ShortCalls;
|
|
|
|
return Calls;
|
|
}
|
|
|
|
double score(Chain *Chain) const {
|
|
double LongCalls = Chain->Samples - Chain->ShortCalls;
|
|
return LongCalls * missProbability(Chain->density());
|
|
}
|
|
|
|
/// The gain of merging two chains.
|
|
///
|
|
/// We assume that the final chains are sorted by their density, and hence
|
|
/// every chain is likely to be adjacent with chains of the same density.
|
|
/// Thus, the 'hotness' of every chain can be estimated by density*pageSize,
|
|
/// which is used to compute the probability of cache misses for long calls
|
|
/// of a given chain.
|
|
/// The result is also scaled by the size of the resulting chain in order to
|
|
/// increase the chance of merging short chains, which is helpful for
|
|
/// the i-cache performance.
|
|
double mergeGain(Chain *ChainPred, Chain *ChainSucc, Edge *Edge) const {
|
|
// Cache misses on the chains before merging
|
|
double CurScore = ChainPred->Score + ChainSucc->Score;
|
|
|
|
// Cache misses on the merged chain
|
|
double LongCalls = ChainPred->Samples + ChainSucc->Samples -
|
|
shortCalls(ChainPred, ChainSucc, Edge);
|
|
const double MergedSamples = ChainPred->Samples + ChainSucc->Samples;
|
|
const double MergedSize = ChainPred->Size + ChainSucc->Size;
|
|
double NewScore = LongCalls * missProbability(MergedSamples / MergedSize);
|
|
|
|
double Gain = CurScore - NewScore;
|
|
// Scale the result to increase the importance of merging short chains
|
|
Gain /= std::min(ChainPred->Size, ChainSucc->Size);
|
|
|
|
return Gain;
|
|
}
|
|
|
|
/// Run the first optimization pass of the algorithm:
|
|
/// Merge chains that call each other with a high probability.
|
|
void runPassOne() {
|
|
// Find candidate pairs of chains for merging
|
|
std::vector<const Arc *> ArcsToMerge;
|
|
for (Chain *ChainPred : HotChains) {
|
|
NodeId F = ChainPred->Nodes.back();
|
|
for (NodeId Succ : Cg.successors(F)) {
|
|
if (F == Succ)
|
|
continue;
|
|
|
|
const Arc &Arc = *Cg.findArc(F, Succ);
|
|
if (Arc.weight() == 0.0 ||
|
|
Arc.weight() / TotalSamples < opts::ArcThreshold)
|
|
continue;
|
|
|
|
const double CallsFromPred = OutWeight[F];
|
|
const double CallsToSucc = InWeight[Succ];
|
|
const double CallsPredSucc = Arc.weight();
|
|
|
|
// Probability that the first chain is calling the second one
|
|
const double ProbOut =
|
|
CallsFromPred > 0 ? CallsPredSucc / CallsFromPred : 0;
|
|
assert(0.0 <= ProbOut && ProbOut <= 1.0 && "incorrect out-probability");
|
|
|
|
// Probability that the second chain is called From the first one
|
|
const double ProbIn = CallsToSucc > 0 ? CallsPredSucc / CallsToSucc : 0;
|
|
assert(0.0 <= ProbIn && ProbIn <= 1.0 && "incorrect in-probability");
|
|
|
|
if (std::min(ProbOut, ProbIn) >= opts::MergeProbability)
|
|
ArcsToMerge.push_back(&Arc);
|
|
}
|
|
}
|
|
|
|
// Sort the pairs by the weight in reverse order
|
|
std::sort(
|
|
ArcsToMerge.begin(), ArcsToMerge.end(),
|
|
[](const Arc *L, const Arc *R) { return L->weight() > R->weight(); });
|
|
|
|
// Merge the pairs of chains
|
|
for (const Arc *Arc : ArcsToMerge) {
|
|
Chain *ChainPred = NodeChain[Arc->src()];
|
|
Chain *ChainSucc = NodeChain[Arc->dst()];
|
|
if (ChainPred == ChainSucc)
|
|
continue;
|
|
if (ChainPred->Nodes.back() == Arc->src() &&
|
|
ChainSucc->Nodes.front() == Arc->dst())
|
|
mergeChains(ChainPred, ChainSucc);
|
|
}
|
|
}
|
|
|
|
/// Run the second optimization pass of the hfsort+ algorithm:
|
|
/// Merge pairs of chains while there is an improvement in the
|
|
/// expected cache miss ratio.
|
|
void runPassTwo() {
|
|
// Creating a priority queue containing all edges ordered by the merge gain
|
|
auto GainComparator = [](Edge *L, Edge *R) {
|
|
if (std::abs(L->gain() - R->gain()) > 1e-8)
|
|
return L->gain() > R->gain();
|
|
|
|
// Making sure the comparison is deterministic
|
|
if (L->predChain()->Id != R->predChain()->Id)
|
|
return L->predChain()->Id < R->predChain()->Id;
|
|
|
|
return L->succChain()->Id < R->succChain()->Id;
|
|
};
|
|
std::set<Edge *, decltype(GainComparator)> Queue(GainComparator);
|
|
|
|
// Inserting the edges Into the queue
|
|
for (Chain *ChainPred : HotChains) {
|
|
for (auto EdgeIt : ChainPred->Edges) {
|
|
Chain *ChainSucc = EdgeIt.first;
|
|
Edge *ChainEdge = EdgeIt.second;
|
|
// Ignore loop edges
|
|
if (ChainPred == ChainSucc)
|
|
continue;
|
|
// Ignore already processed edges
|
|
if (ChainEdge->gain() != -1.0)
|
|
continue;
|
|
|
|
// Compute the gain of merging the two chains
|
|
auto ForwardGain = mergeGain(ChainPred, ChainSucc, ChainEdge);
|
|
auto BackwardGain = mergeGain(ChainSucc, ChainPred, ChainEdge);
|
|
ChainEdge->setMergeGain(ChainPred, ForwardGain, BackwardGain);
|
|
if (ChainEdge->gain() > 0.0)
|
|
Queue.insert(ChainEdge);
|
|
}
|
|
}
|
|
|
|
// Merge the chains while the gain of merging is positive
|
|
while (!Queue.empty()) {
|
|
// Extract the best (top) edge for merging
|
|
Edge *It = *Queue.begin();
|
|
Queue.erase(Queue.begin());
|
|
Edge *BestEdge = It;
|
|
Chain *BestChainPred = BestEdge->predChain();
|
|
Chain *BestChainSucc = BestEdge->succChain();
|
|
if (BestChainPred == BestChainSucc || BestEdge->gain() <= 0.0)
|
|
continue;
|
|
|
|
// Remove outdated edges
|
|
for (std::pair<Chain *, Edge *> EdgeIt : BestChainPred->Edges)
|
|
Queue.erase(EdgeIt.second);
|
|
for (std::pair<Chain *, Edge *> EdgeIt : BestChainSucc->Edges)
|
|
Queue.erase(EdgeIt.second);
|
|
|
|
// Merge the best pair of chains
|
|
mergeChains(BestChainPred, BestChainSucc);
|
|
|
|
// Insert newly created edges Into the queue
|
|
for (auto EdgeIt : BestChainPred->Edges) {
|
|
Chain *ChainSucc = EdgeIt.first;
|
|
Edge *ChainEdge = EdgeIt.second;
|
|
// Ignore loop edges
|
|
if (BestChainPred == ChainSucc)
|
|
continue;
|
|
|
|
// Compute the gain of merging the two chains
|
|
auto ForwardGain = mergeGain(BestChainPred, ChainSucc, ChainEdge);
|
|
auto BackwardGain = mergeGain(ChainSucc, BestChainPred, ChainEdge);
|
|
ChainEdge->setMergeGain(BestChainPred, ForwardGain, BackwardGain);
|
|
if (ChainEdge->gain() > 0.0)
|
|
Queue.insert(ChainEdge);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Merge chain From into chain Into and update the list of active chains.
|
|
void mergeChains(Chain *Into, Chain *From) {
|
|
assert(Into != From && "cannot merge a chain with itself");
|
|
Into->merge(From);
|
|
|
|
// Update the chains and addresses for functions merged from From
|
|
size_t CurAddr = 0;
|
|
for (NodeId F : Into->Nodes) {
|
|
NodeChain[F] = Into;
|
|
Addr[F] = CurAddr;
|
|
CurAddr += Cg.size(F);
|
|
}
|
|
|
|
// Merge edges
|
|
Into->mergeEdges(From);
|
|
From->clear();
|
|
|
|
// Update cached scores for the new chain
|
|
Into->ShortCalls = shortCalls(Into);
|
|
Into->Score = score(Into);
|
|
|
|
// Remove chain From From the list of active chains
|
|
auto it = std::remove(HotChains.begin(), HotChains.end(), From);
|
|
HotChains.erase(it, HotChains.end());
|
|
}
|
|
|
|
private:
|
|
// The call graph
|
|
const CallGraph &Cg;
|
|
|
|
// All chains of functions
|
|
std::vector<Chain> AllChains;
|
|
|
|
// Active chains. The vector gets updated at runtime when chains are merged
|
|
std::vector<Chain *> HotChains;
|
|
|
|
// All edges between chains
|
|
std::vector<Edge> AllEdges;
|
|
|
|
// Node_id => chain
|
|
std::vector<Chain *> NodeChain;
|
|
|
|
// Current address of the function From the beginning of its chain
|
|
std::vector<uint64_t> Addr;
|
|
|
|
// Total weight of outgoing arcs for each function
|
|
std::vector<double> OutWeight;
|
|
|
|
// Total weight of incoming arcs for each function
|
|
std::vector<double> InWeight;
|
|
// The total number of samples in the graph
|
|
double TotalSamples{0};
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
std::vector<Cluster> hfsortPlus(CallGraph &Cg) {
|
|
// It is required that the sum of incoming arc weights is not greater
|
|
// than the number of samples for every function.
|
|
// Ensuring the call graph obeys the property before running the algorithm.
|
|
Cg.adjustArcWeights();
|
|
return HFSortPlus(Cg).run();
|
|
}
|
|
|
|
} // namespace bolt
|
|
} // namespace llvm
|