[BOLT] Fix hfsort+ caching mechanism

Summary:
There's good news and bad news.

The good news is that this fixes the caching mechanism used by hfsort+ so that we always get the correct end results, i.e. the order is the same whether the cache is enabled or not.
The bad news is that it takes about the same amount of time as the original to run. (~6min)
The good news is that I can make some improvements on this implementation which I'll put up in another diff.

The problem with the old caching mechanism is that it was caching values that were dependent on adjacent sets of clusters.  It only invalidated the clusters being merged and none of other clusters that might have been affected.  This version computes the adjacency information up front and updates it after every merge, rather than recomputing it for each iteration.  It uses the adjacency data to properly invalidate any cached values.

(cherry picked from FBD5203023)
This commit is contained in:
Bill Nell 2017-06-06 17:43:45 -07:00 committed by Maksim Panchenko
parent 583790ee22
commit ea53066287
7 changed files with 338 additions and 163 deletions

View File

@ -12,13 +12,14 @@
#include "BinaryFunctionCallGraph.h"
#include "BinaryFunction.h"
#include "BinaryContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Options.h"
#include "llvm/Support/Timer.h"
#define DEBUG_TYPE "callgraph"
namespace opts {
extern llvm::cl::opt<bool> TimeOpts;
extern llvm::cl::opt<unsigned> Verbosity;
}
namespace llvm {
@ -130,8 +131,11 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
const auto DstId = lookupNode(DstFunc);
const auto AvgDelta = !UseEdgeCounts ? Offset - DstFunc->getAddress() : 0;
Cg.incArcWeight(SrcId, DstId, Count, AvgDelta);
DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
<< " -> " << *DstFunc << " @ " << Offset << "\n");
DEBUG(
if (opts::Verbosity > 1) {
dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
<< " -> " << *DstFunc << " @ " << Offset << "\n";
});
return true;
}
return false;
@ -194,8 +198,16 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
}
}
outs() << "BOLT-WARNING: buildCallGraph: " << NotProcessed
<< " callsites not processed out of " << TotalCalls << "\n";
#ifndef NDEBUG
bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
#else
bool PrintInfo = false;
#endif
if (PrintInfo || opts::Verbosity > 0) {
outs() << format("BOLT-INFO: buildCallGraph: %u nodes, density = %.6lf, "
"%u callsites not processed out of %u.\n",
Cg.numNodes(), Cg.density(), NotProcessed, TotalCalls);
}
return Cg;
}

View File

@ -10,8 +10,6 @@
//===----------------------------------------------------------------------===//
#include "CallGraph.h"
#include "BinaryFunction.h"
#include "BinaryContext.h"
#define DEBUG_TYPE "callgraph"

View File

@ -130,6 +130,10 @@ public:
return Arcs;
}
double density() const {
return double(Arcs.size()) / (Nodes.size()*Nodes.size());
}
void normalizeArcWeights(bool UseEdgeCounts);
template <typename L>

View File

@ -112,17 +112,22 @@ void Cluster::reverseTargets() {
std::reverse(Targets.begin(), Targets.end());
}
void Cluster::merge(Cluster&& Other, const double Aw) {
void Cluster::merge(const Cluster& Other, const double Aw) {
Targets.insert(Targets.end(),
Other.Targets.begin(),
Other.Targets.end());
Size += Other.Size;
Samples += Other.Samples;
Density = (double)Samples / Size;
}
Other.Size = 0;
Other.Samples = 0;
Other.Targets.clear();
void Cluster::clear() {
Id = -1u;
Size = 0;
Samples = 0;
Density = 0.0;
Targets.clear();
Frozen = false;
}
std::vector<Cluster> clusterize(const CallGraph &Cg) {
@ -218,7 +223,8 @@ std::vector<Cluster> clusterize(const CallGraph &Cg) {
FuncCluster[F] = PredCluster;
}
PredCluster->merge(std::move(*Cluster));
PredCluster->merge(*Cluster);
Cluster->clear();
}
// Return the set of Clusters that are left, which are the ones that
@ -281,7 +287,7 @@ std::vector<Cluster> randomClusters(const CallGraph &Cg) {
if (MergeIdx == Clusters.size()) {
++Idx;
} else {
Clusters[Idx].merge(std::move(Clusters[MergeIdx]));
Clusters[Idx].merge(Clusters[MergeIdx]);
Clusters.erase(Clusters.begin() + MergeIdx);
}
}

View File

@ -55,7 +55,8 @@ public:
uint32_t size() const { return Size; }
bool frozen() const { return Frozen; }
void freeze() { Frozen = true; }
void merge(Cluster &&Other, const double Aw = 0);
void merge(const Cluster &Other, const double Aw = 0);
void clear();
size_t numTargets() const {
return Targets.size();
}
@ -66,12 +67,13 @@ public:
return Targets[N];
}
void reverseTargets();
bool hasId() const { return Id != -1u; }
void setId(uint32_t NewId) {
assert(Id == -1u);
assert(!hasId());
Id = NewId;
}
uint32_t id() const {
assert(Id != -1u);
assert(hasId());
return Id;
}
private:

View File

@ -28,18 +28,53 @@
*/
#include "HFSort.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Options.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "hfsort"
namespace opts {
extern llvm::cl::OptionCategory BoltOptCategory;
extern llvm::cl::opt<bool> Verbosity;
static llvm::cl::opt<bool>
UseGainCache("hfsort+-use-cache",
llvm::cl::desc("Use a cache for mergeGain results when computing hfsort+."),
llvm::cl::ZeroOrMore,
llvm::cl::init(true),
llvm::cl::Hidden,
llvm::cl::cat(BoltOptCategory));
static llvm::cl::opt<bool>
UseShortCallCache("hfsort+-use-short-call-cache",
llvm::cl::desc("Use a cache for shortCall results when computing hfsort+."),
llvm::cl::ZeroOrMore,
llvm::cl::init(true),
llvm::cl::Hidden,
llvm::cl::cat(BoltOptCategory));
const char* cacheKindString() {
if (opts::UseGainCache && opts::UseShortCallCache)
return "gain + short call cache";
else if (opts::UseGainCache)
return "gain cache";
else if (opts::UseShortCallCache)
return "short call cache";
else
return "no cache";
}
}
namespace llvm {
namespace bolt {
@ -60,66 +95,136 @@ constexpr uint32_t ITLBEntries = 16;
constexpr size_t InvalidAddr = -1;
template <typename A, typename B>
class HashPair {
// This class maintains adjacency information for all Clusters being
// processed. It is used to invalidate cache entries when merging
// Clusters and for visiting all neighbors of any given Cluster.
class AdjacencyMatrix {
public:
size_t operator()(const std::pair<A, B> &P) const {
size_t Seed(0);
Seed = hashCombine(Seed, (int64_t)P.first);
Seed = hashCombine(Seed, (int64_t)P.second);
return Seed;
AdjacencyMatrix(const CallGraph &Cg,
std::vector<Cluster *> &Clusters,
const std::vector<Cluster *> &FuncCluster)
: Clusters(Clusters),
Bits(Cg.numNodes(), BitVector(Cg.numNodes(), false)) {
initialize(Cg, FuncCluster);
}
template <typename F>
void forallAdjacent(const Cluster *C, F Func) const {
const_cast<AdjacencyMatrix *>(this)->forallAdjacent(C, Func);
}
template <typename F>
void forallAdjacent(const Cluster *C, F Func) {
for (auto I = Bits[C->id()].find_first(); I != -1; I = Bits[C->id()].find_next(I)) {
Func(Clusters[I]);
}
}
void merge(const Cluster *A, const Cluster *B) {
Bits[A->id()] |= Bits[B->id()];
Bits[A->id()][A->id()] = false;
Bits[A->id()][B->id()] = false;
Bits[B->id()][A->id()] = false;
for (auto I = Bits[B->id()].find_first(); I != -1; I = Bits[B->id()].find_next(I)) {
Bits[I][A->id()] = true;
Bits[I][B->id()] = false;
}
}
void dump(const Cluster *A) const {
outs() << "Cluster " << A->id() << ":";
forallAdjacent(A,
[this,A](const Cluster *B) {
outs() << " " << B->id();
});
}
void dump() const {
for (auto *A : Clusters) {
if (!A) continue;
dump(A);
outs() << "\n";
}
}
private:
void set(const Cluster *A, const Cluster *B, bool Value) {
assert(A != B);
Bits[A->id()][B->id()] = Value;
Bits[B->id()][A->id()] = Value;
}
void initialize(const CallGraph &Cg, const std::vector<Cluster *> &FuncCluster) {
for (auto *A : Clusters) {
for (auto TargetId : A->targets()) {
for (auto Succ : Cg.successors(TargetId)) {
auto *B = FuncCluster[Succ];
if (!B || B == A) continue;
set(A, B, true);
}
for (auto Pred : Cg.predecessors(TargetId)) {
auto *B = FuncCluster[Pred];
if (!B || B == A) continue;
set(A, B, true);
}
}
}
}
std::vector<Cluster *> Clusters;
std::vector<BitVector> Bits;
};
// A cache of precomputed results for a pair of clusters
class PrecomputedResults {
public:
PrecomputedResults() {}
bool contains(Cluster *First, Cluster *Second) const {
if (InvalidKeys.count(First) || InvalidKeys.count(Second)) {
return false;
}
const auto Key = std::make_pair(First, Second);
return Cache.find(Key) != Cache.end();
explicit PrecomputedResults(size_t Size)
: Size(Size),
Cache(new double[Size*Size]),
Valid(Size * Size, false) {
memset(Cache, 0, sizeof(double)*Size*Size);
}
~PrecomputedResults() {
delete[] Cache;
}
double get(Cluster *First, Cluster *Second) const {
bool contains(const Cluster *First, const Cluster *Second) const {
return Valid[index(First, Second)];
}
double get(const Cluster *First, const Cluster *Second) const {
assert(contains(First, Second));
const auto Key = std::make_pair(First, Second); // TODO: use min/max?
return Cache.find(Key)->second;
return Cache[index(First, Second)];
}
void set(Cluster *First, Cluster *Second, double Value) {
const auto Key = std::make_pair(First, Second);
Cache[Key] = Value;
validate(First);
validate(Second);
void set(const Cluster *First, const Cluster *Second, double Value) {
const auto Index = index(First, Second);
Cache[Index] = Value;
Valid[Index] = true;
}
void validate(Cluster *C) {
auto Itr = InvalidKeys.find(C);
if (Itr != InvalidKeys.end())
InvalidKeys.erase(Itr);
void invalidate(const AdjacencyMatrix &Adjacent, const Cluster *C) {
invalidate(C);
Adjacent.forallAdjacent(C, [&](const Cluster *A) { invalidate(A); });
}
void validateAll() {
InvalidKeys.clear();
}
void invalidate(Cluster *Cluster) {
InvalidKeys.insert(Cluster);
}
private:
std::unordered_map<std::pair<Cluster *, Cluster *>,
double,
HashPair<Cluster *,Cluster *>> Cache;
std::unordered_set<Cluster *> InvalidKeys;
void invalidate(const Cluster *C) {
Valid.reset(C->id() * Size, (C->id() + 1) * Size);
}
size_t index(const Cluster *First, const Cluster *Second) const {
return (First->id() * Size) + Second->id();
}
size_t Size;
double *Cache;
BitVector Valid;
};
// A wrapper for algorthm-wide variables
// A wrapper for algorithm-wide variables
struct AlgoState {
explicit AlgoState(size_t Size)
: Cache(Size), ShortCallPairCache(Size) { }
// the call graph
const CallGraph *Cg;
// the total number of samples in the graph
@ -130,42 +235,72 @@ struct AlgoState {
std::vector<size_t> Addr;
// maximum cluster id.
size_t MaxClusterId{0};
// A cache that keeps precomputed values of mergeGain for pairs of clusters;
// when a pair of clusters (x,y) gets merged, we need to invalidate the pairs
// containing both x and y (and recompute them on the next iteration)
PrecomputedResults Cache;
// Cache for shortCalls for a single cluster.
std::unordered_map<const Cluster *, double> ShortCallCache;
// Cache for shortCalls for a pair of Clusters
PrecomputedResults ShortCallPairCache;
};
}
/*
* Sorting clusters by their density in decreasing order
*/
void sortByDensity(std::vector<Cluster *> &Clusters) {
std::stable_sort(
Clusters.begin(),
Clusters.end(),
[&] (const Cluster *C1, const Cluster *C2) {
const double D1 = C1->density();
const double D2 = C2->density();
// making sure the sorting is deterministic
if (D1 != D2) return D1 > D2;
if (C1->size() != C2->size()) return C1->size() < C2->size();
if (C1->samples() != C2->samples()) return C1->samples() > C2->samples();
return C1->target(0) < C2->target(0);
}
);
}
/*
* Density of a cluster formed by merging a given pair of clusters
*/
double density(Cluster *ClusterPred, Cluster *ClusterSucc) {
double density(const Cluster *ClusterPred, const Cluster *ClusterSucc) {
const double CombinedSamples = ClusterPred->samples() + ClusterSucc->samples();
const double CombinedSize = ClusterPred->size() + ClusterSucc->size();
return CombinedSamples / CombinedSize;
}
/*
* Deterministically compare clusters by their density in decreasing order.
*/
bool compareClusters(const Cluster *C1, const Cluster *C2) {
const double D1 = C1->density();
const double D2 = C2->density();
// making sure the sorting is deterministic
if (D1 != D2) return D1 > D2;
if (C1->size() != C2->size()) return C1->size() < C2->size();
if (C1->samples() != C2->samples()) return C1->samples() > C2->samples();
return C1->target(0) < C2->target(0);
}
/*
* Deterministically compare pairs of clusters by their density
* in decreasing order.
*/
bool compareClusterPairs(const Cluster *A1, const Cluster *B1,
const Cluster *A2, const Cluster *B2) {
const auto D1 = density(A1, B1);
const auto D2 = density(A2, B2);
if (D1 != D2) return D1 > D2;
const auto Size1 = A1->size() + B1->size();
const auto Size2 = A2->size() + B2->size();
if (Size1 != Size2) return Size1 < Size2;
const auto Samples1 = A1->samples() + B1->samples();
const auto Samples2 = A2->samples() + B2->samples();
if (Samples1 != Samples2) return Samples1 > Samples2;
return A1->target(0) < A2->target(0);
}
/*
* Sorting clusters by their density in decreasing order
*/
template <typename C>
std::vector<Cluster *> sortByDensity(const C &Clusters_) {
std::vector<Cluster *> Clusters(Clusters_.begin(), Clusters_.end());
std::stable_sort(Clusters.begin(), Clusters.end(), compareClusters);
return Clusters;
}
/*
* The probability that a page with a given weight is not present in the cache.
*
* Assume that the hot function are called in a random order; then the
* Assume that the hot functions are called in a random order; then the
* probability of a TLB page being accessed after a function call is
* p=pageSamples/totalSamples. The probability that the page is not accessed
* is (1-p), and the probability that it is not in the cache (i.e. not accessed
@ -194,11 +329,10 @@ double missProbability(const AlgoState &State, double PageSamples) {
* page. The following procedure detects short and long calls, and estimates
* the expected number of cache misses for the long ones.
*/
double expectedCacheHitRatio(const AlgoState &State,
const std::vector<Cluster *> &Clusters_) {
// copy and sort by density
std::vector<Cluster *> Clusters(Clusters_);
sortByDensity(Clusters);
template <typename C>
double expectedCacheHitRatio(const AlgoState &State, const C &Clusters_) {
// sort by density
std::vector<Cluster *> Clusters(sortByDensity(Clusters_));
// generate function addresses with an alignment
std::vector<size_t> Addr(State.Cg->numNodes(), InvalidAddr);
@ -247,35 +381,6 @@ double expectedCacheHitRatio(const AlgoState &State,
return 100.0 * (1.0 - Misses / State.TotalSamples);
}
/*
* Get adjacent clusters (the ones that share an arc) with the given one
*/
std::vector<Cluster *> adjacentClusters(const AlgoState &State, Cluster *C) {
std::vector<Cluster *> Result;
Result.reserve(State.MaxClusterId);
for (auto TargetId : C->targets()) {
for (auto Succ : State.Cg->successors(TargetId)) {
auto SuccCluster = State.FuncCluster[Succ];
if (SuccCluster != nullptr && SuccCluster != C) {
Result.push_back(SuccCluster);
}
}
for (auto Pred : State.Cg->predecessors(TargetId)) {
auto PredCluster = State.FuncCluster[Pred];
if (PredCluster != nullptr && PredCluster != C) {
Result.push_back(PredCluster);
}
}
}
std::sort(Result.begin(), Result.end(),
[](const Cluster *A, const Cluster *B) {
return A->id() < B->id();
});
auto Last = std::unique(Result.begin(), Result.end());
Result.erase(Last, Result.end());
return Result;
}
/*
* The expected number of calls for an edge withing the same TLB page
*/
@ -291,7 +396,13 @@ double expectedCalls(int64_t SrcAddr, int64_t DstAddr, double EdgeWeight) {
* The expected number of calls within a given cluster with both endpoints on
* the same TLB cache page
*/
double shortCalls(const AlgoState &State, Cluster *Cluster) {
double shortCalls(AlgoState &State, const Cluster *Cluster) {
if (opts::UseShortCallCache) {
auto Itr = State.ShortCallCache.find(Cluster);
if (Itr != State.ShortCallCache.end())
return Itr->second;
}
double Calls = 0;
for (auto TargetId : Cluster->targets()) {
for (auto Succ : State.Cg->successors(TargetId)) {
@ -306,6 +417,10 @@ double shortCalls(const AlgoState &State, Cluster *Cluster) {
}
}
if (opts::UseShortCallCache) {
State.ShortCallCache[Cluster] = Calls;
}
return Calls;
}
@ -313,9 +428,14 @@ double shortCalls(const AlgoState &State, Cluster *Cluster) {
* The number of calls between the two clusters with both endpoints on
* the same TLB page, assuming that a given pair of clusters gets merged
*/
double shortCalls(const AlgoState &State,
Cluster *ClusterPred,
Cluster *ClusterSucc) {
double shortCalls(AlgoState &State,
const Cluster *ClusterPred,
const Cluster *ClusterSucc) {
if (opts::UseShortCallCache &&
State.ShortCallPairCache.contains(ClusterPred, ClusterSucc)) {
return State.ShortCallPairCache.get(ClusterPred, ClusterSucc);
}
double Calls = 0;
for (auto TargetId : ClusterPred->targets()) {
for (auto Succ : State.Cg->successors(TargetId)) {
@ -344,6 +464,10 @@ double shortCalls(const AlgoState &State,
}
}
if (opts::UseShortCallCache) {
State.ShortCallPairCache.set(ClusterPred, ClusterSucc, Calls);
}
return Calls;
}
@ -359,9 +483,13 @@ double shortCalls(const AlgoState &State,
* increse the chance of merging short clusters, which is helpful for
* the i-cache performance.
*/
double mergeGain(const AlgoState &State,
Cluster *ClusterPred,
Cluster *ClusterSucc) {
double mergeGain(AlgoState &State,
const Cluster *ClusterPred,
const Cluster *ClusterSucc) {
if (opts::UseGainCache && State.Cache.contains(ClusterPred, ClusterSucc)) {
return State.Cache.get(ClusterPred, ClusterSucc);
}
// cache misses on the first cluster
double LongCallsPred = ClusterPred->samples() - shortCalls(State, ClusterPred);
double ProbPred = missProbability(State, ClusterPred->density() * PageSize);
@ -381,7 +509,20 @@ double mergeGain(const AlgoState &State,
double Gain = ExpectedMissesPred + ExpectedMissesSucc - MissesNew;
// scaling the result to increase the importance of merging short clusters
return Gain / (ClusterPred->size() + ClusterSucc->size());
Gain /= (ClusterPred->size() + ClusterSucc->size());
if (opts::UseGainCache) {
State.Cache.set(ClusterPred, ClusterSucc, Gain);
}
return Gain;
}
template <typename C, typename V>
void maybeErase(C &Container, const V& Value) {
auto Itr = Container.find(Value);
if (Itr != Container.end())
Container.erase(Itr);
}
/*
@ -393,37 +534,35 @@ std::vector<Cluster> hfsortPlus(const CallGraph &Cg) {
AllClusters.reserve(Cg.numNodes());
for (NodeId F = 0; F < Cg.numNodes(); F++) {
AllClusters.emplace_back(F, Cg.getNode(F));
AllClusters.back().setId(F);
}
// initialize objects used by the algorithm
std::vector<Cluster *> Clusters;
Clusters.reserve(Cg.numNodes());
AlgoState State;
AlgoState State(AllClusters.size()); // TODO: should use final Clusters.size()
State.Cg = &Cg;
State.TotalSamples = 0;
State.FuncCluster = std::vector<Cluster *>(Cg.numNodes(), nullptr);
State.Addr = std::vector<size_t>(Cg.numNodes(), InvalidAddr);
if (!AllClusters.empty()) {
State.MaxClusterId = AllClusters.back().id();
}
State.Addr = std::vector<size_t>(Cg.numNodes(), InvalidAddr);
uint32_t Id = 0;
for (NodeId F = 0; F < Cg.numNodes(); F++) {
if (Cg.samples(F) == 0) continue;
Clusters.push_back(&AllClusters[F]);
Clusters.back()->setId(Id);
State.FuncCluster[F] = &AllClusters[F];
State.Addr[F] = 0;
State.TotalSamples += Cg.samples(F);
++Id;
}
State.MaxClusterId = Id;
DEBUG(dbgs() << "Starting hfsort+ for " << Clusters.size() << " clusters\n"
AdjacencyMatrix Adjacent(Cg, Clusters, State.FuncCluster);
DEBUG(dbgs() << "Starting hfsort+ w/" << opts::cacheKindString() << " for "
<< Clusters.size() << " clusters\n"
<< format("Initial expected iTLB cache hit ratio: %.4lf\n",
expectedCacheHitRatio(State, Clusters)));
// the cache keeps precomputed values of mergeGain for pairs of clusters;
// when a pair of clusters (x,y) gets merged, we need to invalidate the pairs
// containing both x and y (and recompute them on the next iteration)
PrecomputedResults Cache;
int Steps = 0;
// merge pairs of clusters while there is an improvement
while (Clusters.size() > 1) {
@ -435,44 +574,46 @@ std::vector<Cluster> hfsortPlus(const CallGraph &Cg) {
expectedCacheHitRatio(State, Clusters));
}
);
Steps++;
++Steps;
Cluster *BestClusterPred = nullptr;
Cluster *BestClusterSucc = nullptr;
double BestGain = -1;
for (auto ClusterPred : Clusters) {
// get candidates for merging with the current cluster
auto CandidateClusters = adjacentClusters(State, ClusterPred);
Adjacent.forallAdjacent(
ClusterPred,
// find the best candidate
[&](Cluster *ClusterSucc) {
assert(ClusterPred != ClusterSucc);
// get a cost of merging two clusters
const double Gain = mergeGain(State, ClusterPred, ClusterSucc);
// find the best candidate
for (auto ClusterSucc : CandidateClusters) {
// get a cost of merging two clusters
if (!Cache.contains(ClusterPred, ClusterSucc)) {
double Value = mergeGain(State, ClusterPred, ClusterSucc);
Cache.set(ClusterPred, ClusterSucc, Value);
assert(Cache.contains(ClusterPred, ClusterSucc));
// breaking ties by density to make the hottest clusters be merged first
if (Gain > BestGain || (std::abs(Gain - BestGain) < 1e-8 &&
compareClusterPairs(ClusterPred,
ClusterSucc,
BestClusterPred,
BestClusterSucc))) {
BestGain = Gain;
BestClusterPred = ClusterPred;
BestClusterSucc = ClusterSucc;
}
}
double Gain = Cache.get(ClusterPred, ClusterSucc);
// breaking ties by density to make the hottest clusters be merged first
if (Gain > BestGain || (std::abs(Gain - BestGain) < 1e-8 &&
density(ClusterPred, ClusterSucc) >
density(BestClusterPred, BestClusterSucc))) {
BestGain = Gain;
BestClusterPred = ClusterPred;
BestClusterSucc = ClusterSucc;
}
}
);
}
Cache.validateAll();
if (BestGain <= 0.0) break;
Cache.invalidate(BestClusterPred);
Cache.invalidate(BestClusterSucc);
// merge the best pair of clusters
BestClusterPred->merge(std::move(*BestClusterSucc));
DEBUG(
if (opts::Verbosity > 0) {
dbgs() << "Merging cluster " << BestClusterSucc->id()
<< " into cluster " << BestClusterPred->id() << "\n";
});
Adjacent.merge(BestClusterPred, BestClusterSucc);
BestClusterPred->merge(*BestClusterSucc);
size_t CurAddr = 0;
for (auto TargetId : BestClusterPred->targets()) {
@ -481,6 +622,18 @@ std::vector<Cluster> hfsortPlus(const CallGraph &Cg) {
CurAddr += State.Cg->size(TargetId);
}
if (opts::UseShortCallCache) {
maybeErase(State.ShortCallCache, BestClusterPred);
Adjacent.forallAdjacent(BestClusterPred,
[&State](const Cluster *C) {
maybeErase(State.ShortCallCache, C);
});
State.ShortCallPairCache.invalidate(Adjacent, BestClusterPred);
}
if (opts::UseGainCache) {
State.Cache.invalidate(Adjacent, BestClusterPred);
}
// remove BestClusterSucc from the list of active clusters
auto Iter = std::remove(Clusters.begin(), Clusters.end(), BestClusterSucc);
Clusters.erase(Iter, Clusters.end());
@ -492,9 +645,8 @@ std::vector<Cluster> hfsortPlus(const CallGraph &Cg) {
// Return the set of clusters that are left, which are the ones that
// didn't get merged (so their first func is its original func).
sortByDensity(Clusters);
std::vector<Cluster> Result;
for (auto Cluster : Clusters) {
for (auto Cluster : sortByDensity(Clusters)) {
Result.emplace_back(std::move(*Cluster));
}

View File

@ -179,7 +179,8 @@ std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) {
for (auto F : C2->targets()) {
FuncCluster[F] = C1;
}
C1->merge(std::move(*C2), Max.Weight);
C1->merge(*C2, Max.Weight);
C2->clear();
}
// Return the set of Clusters that are left, which are the ones that