forked from OSchip/llvm-project
[BOLT] More CG refactoring
Summary: Do some additional refactoring of the CallGraph class. Add a BinaryFunctionCallGraph class that has the BOLT specific bits. This is in preparation to moving the generic CallGraph class into a library that both BOLT and HHVM can use. Make data members of CallGraph private and add the appropriate accessor methods. (cherry picked from FBD5143468)
This commit is contained in:
parent
95ab659fe4
commit
5feee9f1d8
|
@ -0,0 +1,195 @@
|
|||
//===--- Passes/BinaryFunctionCallGraph.cpp -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "BinaryFunctionCallGraph.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "BinaryContext.h"
|
||||
|
||||
#define DEBUG_TYPE "callgraph"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
|
||||
uint32_t Size,
|
||||
uint64_t Samples) {
|
||||
auto Id = CallGraph::addNode(Size, Samples);
|
||||
assert(size_t(Id) == Funcs.size());
|
||||
Funcs.push_back(BF);
|
||||
FuncToNodeId[BF] = Id;
|
||||
assert(Funcs[Id] == BF);
|
||||
return Id;
|
||||
}
|
||||
|
||||
std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
|
||||
std::deque<BinaryFunction *> TopologicalOrder;
|
||||
enum NodeStatus { NEW, VISITING, VISITED };
|
||||
std::vector<NodeStatus> NodeStatus(Funcs.size());
|
||||
std::stack<NodeId> Worklist;
|
||||
|
||||
for (auto *Func : Funcs) {
|
||||
const auto Id = FuncToNodeId.at(Func);
|
||||
Worklist.push(Id);
|
||||
NodeStatus[Id] = NEW;
|
||||
}
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
const auto FuncId = Worklist.top();
|
||||
Worklist.pop();
|
||||
|
||||
if (NodeStatus[FuncId] == VISITED)
|
||||
continue;
|
||||
|
||||
if (NodeStatus[FuncId] == VISITING) {
|
||||
TopologicalOrder.push_back(Funcs[FuncId]);
|
||||
NodeStatus[FuncId] = VISITED;
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(NodeStatus[FuncId] == NEW);
|
||||
NodeStatus[FuncId] = VISITING;
|
||||
Worklist.push(FuncId);
|
||||
for (const auto Callee : successors(FuncId)) {
|
||||
if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
|
||||
continue;
|
||||
Worklist.push(Callee);
|
||||
}
|
||||
}
|
||||
|
||||
return TopologicalOrder;
|
||||
}
|
||||
|
||||
BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
CgFilterFunction Filter,
|
||||
bool IncludeColdCalls,
|
||||
bool UseFunctionHotSize,
|
||||
bool UseEdgeCounts) {
|
||||
BinaryFunctionCallGraph Cg;
|
||||
|
||||
// Add call graph nodes.
|
||||
auto lookupNode = [&](BinaryFunction *Function) {
|
||||
const auto Id = Cg.maybeGetNodeId(Function);
|
||||
if (Id == CallGraph::InvalidId) {
|
||||
// It's ok to use the hot size here when the function is split. This is
|
||||
// because emitFunctions will emit the hot part first in the order that is
|
||||
// computed by ReorderFunctions. The cold part will be emitted with the
|
||||
// rest of the cold functions and code.
|
||||
const auto Size = UseFunctionHotSize && Function->isSplit()
|
||||
? Function->estimateHotSize()
|
||||
: Function->estimateSize();
|
||||
// NOTE: for functions without a profile, we set the number of samples
|
||||
// to zero. This will keep these functions from appearing in the hot
|
||||
// section. This is a little weird because we wouldn't be trying to
|
||||
// create a node for a function unless it was the target of a call from
|
||||
// a hot block. The alternative would be to set the count to one or
|
||||
// accumulate the number of calls from the callsite into the function
|
||||
// samples. Results from perfomance testing seem to favor the zero
|
||||
// count though, so I'm leaving it this way for now.
|
||||
const auto Samples =
|
||||
Function->hasProfile() ? Function->getExecutionCount() : 0;
|
||||
return Cg.addNode(Function, Size, Samples);
|
||||
} else {
|
||||
return Id;
|
||||
}
|
||||
};
|
||||
|
||||
// Add call graph edges.
|
||||
uint64_t NotProcessed = 0;
|
||||
uint64_t TotalCalls = 0;
|
||||
for (auto &It : BFs) {
|
||||
auto *Function = &It.second;
|
||||
|
||||
if(Filter(*Function)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto BranchDataOrErr = BC.DR.getFuncBranchData(Function->getNames());
|
||||
const auto SrcId = lookupNode(Function);
|
||||
uint64_t Offset = Function->getAddress();
|
||||
|
||||
auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
|
||||
if (auto *DstFunc = BC.getFunctionForSymbol(DestSymbol)) {
|
||||
const auto DstId = lookupNode(DstFunc);
|
||||
const auto AvgDelta = !UseEdgeCounts ? Offset - DstFunc->getAddress() : 0;
|
||||
Cg.incArcWeight(SrcId, DstId, Count, AvgDelta);
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
|
||||
<< " -> " << *DstFunc << " @ " << Offset << "\n");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
for (auto *BB : Function->layout()) {
|
||||
// Don't count calls from cold blocks
|
||||
if (BB->isCold() && !IncludeColdCalls)
|
||||
continue;
|
||||
|
||||
for (auto &Inst : *BB) {
|
||||
// Find call instructions and extract target symbols from each one.
|
||||
if (!BC.MIA->isCall(Inst))
|
||||
continue;
|
||||
|
||||
++TotalCalls;
|
||||
if (const auto *DstSym = BC.MIA->getTargetSymbol(Inst)) {
|
||||
// For direct calls, just use the BB execution count.
|
||||
const auto Count = UseEdgeCounts && BB->hasProfile()
|
||||
? BB->getExecutionCount() : 1;
|
||||
if (!recordCall(DstSym, Count))
|
||||
++NotProcessed;
|
||||
} else if (BC.MIA->hasAnnotation(Inst, "EdgeCountData")) {
|
||||
// For indirect calls and jump tables, use branch data.
|
||||
if (!BranchDataOrErr) {
|
||||
++NotProcessed;
|
||||
continue;
|
||||
}
|
||||
const FuncBranchData &BranchData = BranchDataOrErr.get();
|
||||
const auto DataOffset =
|
||||
BC.MIA->getAnnotationAs<uint64_t>(Inst, "EdgeCountData");
|
||||
|
||||
for (const auto &BI : BranchData.getBranchRange(DataOffset)) {
|
||||
// Count each target as a separate call.
|
||||
++TotalCalls;
|
||||
|
||||
if (!BI.To.IsSymbol) {
|
||||
++NotProcessed;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto Itr = BC.GlobalSymbols.find(BI.To.Name);
|
||||
if (Itr == BC.GlobalSymbols.end()) {
|
||||
++NotProcessed;
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto *DstSym =
|
||||
BC.getOrCreateGlobalSymbol(Itr->second, "FUNCat");
|
||||
|
||||
if (!recordCall(DstSym, UseEdgeCounts ? BI.Branches : 1))
|
||||
++NotProcessed;
|
||||
}
|
||||
}
|
||||
|
||||
if (!UseEdgeCounts) {
|
||||
Offset += BC.computeCodeSize(&Inst, &Inst + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outs() << "BOLT-WARNING: buildCallGraph: " << NotProcessed
|
||||
<< " callsites not processed out of " << TotalCalls << "\n";
|
||||
|
||||
return Cg;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
//===--- Passes/CallGraph.h -----------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_BINARY_FUNCTION_CALLGRAPH_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_BINARY_FUNCTION_CALLGRAPH_H
|
||||
|
||||
#include "CallGraph.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <functional>
|
||||
#include <deque>
|
||||
#include <map>
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class BinaryFunction;
|
||||
class BinaryContext;
|
||||
|
||||
class BinaryFunctionCallGraph : public CallGraph {
|
||||
public:
|
||||
NodeId maybeGetNodeId(const BinaryFunction *BF) const {
|
||||
auto Itr = FuncToNodeId.find(BF);
|
||||
return Itr != FuncToNodeId.end() ? Itr->second : InvalidId;
|
||||
}
|
||||
NodeId getNodeId(const BinaryFunction *BF) const {
|
||||
auto Itr = FuncToNodeId.find(BF);
|
||||
assert(Itr != FuncToNodeId.end());
|
||||
return Itr->second;
|
||||
}
|
||||
BinaryFunction *nodeIdToFunc(NodeId Id) {
|
||||
assert(Id < Funcs.size());
|
||||
return Funcs[Id];
|
||||
}
|
||||
const BinaryFunction *nodeIdToFunc(NodeId Id) const {
|
||||
assert(Id < Funcs.size());
|
||||
return Funcs[Id];
|
||||
}
|
||||
NodeId addNode(BinaryFunction *BF, uint32_t Size, uint64_t Samples = 0);
|
||||
|
||||
/// Compute a DFS traversal of the call graph.
|
||||
std::deque<BinaryFunction *> buildTraversalOrder();
|
||||
|
||||
private:
|
||||
std::unordered_map<const BinaryFunction *, NodeId> FuncToNodeId;
|
||||
std::vector<BinaryFunction *> Funcs;
|
||||
};
|
||||
|
||||
using CgFilterFunction = std::function<bool (const BinaryFunction &BF)>;
|
||||
inline bool NoFilter(const BinaryFunction &) { return false; }
|
||||
|
||||
/// Builds a call graph from the map of BinaryFunctions provided in BFs.
|
||||
/// The arguments control how the graph is constructed.
|
||||
/// Filter is called on each function, any function that it returns true for
|
||||
/// is omitted from the graph.
|
||||
/// If IncludeColdCalls is true, then calls from cold BBs are considered for the
|
||||
/// graph, otherwise they are ignored.
|
||||
/// UseFunctionHotSize controls whether the hot size of a function is used when
|
||||
/// filling in the Size attribute of new Nodes.
|
||||
/// UseEdgeCounts is used to control if the AvgCallOffset attribute on Arcs is
|
||||
/// computed using the offsets of call instructions.
|
||||
BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
CgFilterFunction Filter = NoFilter,
|
||||
bool IncludeColdCalls = true,
|
||||
bool UseFunctionHotSize = false,
|
||||
bool UseEdgeCounts = false);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,5 +1,6 @@
|
|||
add_llvm_library(LLVMBOLTPasses
|
||||
BinaryPasses.cpp
|
||||
BinaryFunctionCallGraph.cpp
|
||||
CallGraph.cpp
|
||||
DataflowAnalysis.cpp
|
||||
DataflowInfoManager.cpp
|
||||
|
|
|
@ -67,195 +67,52 @@ namespace bolt {
|
|||
int64_t CallGraph::Arc::Hash::operator()(const Arc &Arc) const {
|
||||
#ifdef USE_STD_HASH
|
||||
std::hash<int64_t> Hasher;
|
||||
return hashCombine(Hasher(Arc.Src), Arc.Dst);
|
||||
return hashCombine(Hasher(Arc.src()), Arc.dst());
|
||||
#else
|
||||
return hash_int64_pair(int64_t(Arc.Src), int64_t(Arc.Dst));
|
||||
return hash_int64_pair(int64_t(Arc.src()), int64_t(Arc.dst()));
|
||||
#endif
|
||||
}
|
||||
|
||||
CallGraph buildCallGraph(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::function<bool (const BinaryFunction &BF)> Filter,
|
||||
bool IncludeColdCalls,
|
||||
bool UseFunctionHotSize,
|
||||
bool UseEdgeCounts) {
|
||||
CallGraph Cg;
|
||||
|
||||
// Add call graph nodes.
|
||||
auto lookupNode = [&](BinaryFunction *Function) {
|
||||
auto It = Cg.FuncToNodeId.find(Function);
|
||||
if (It == Cg.FuncToNodeId.end()) {
|
||||
// It's ok to use the hot size here when the function is split. This is
|
||||
// because emitFunctions will emit the hot part first in the order that is
|
||||
// computed by ReorderFunctions. The cold part will be emitted with the
|
||||
// rest of the cold functions and code.
|
||||
const auto Size = UseFunctionHotSize && Function->isSplit()
|
||||
? Function->estimateHotSize()
|
||||
: Function->estimateSize();
|
||||
const auto Id = Cg.addNode(Size);
|
||||
assert(size_t(Id) == Cg.Funcs.size());
|
||||
Cg.Funcs.push_back(Function);
|
||||
Cg.FuncToNodeId[Function] = Id;
|
||||
// NOTE: for functions without a profile, we set the number of samples
|
||||
// to zero. This will keep these functions from appearing in the hot
|
||||
// section. This is a little weird because we wouldn't be trying to
|
||||
// create a node for a function unless it was the target of a call from
|
||||
// a hot block. The alternative would be to set the count to one or
|
||||
// accumulate the number of calls from the callsite into the function
|
||||
// samples. Results from perfomance testing seem to favor the zero
|
||||
// count though, so I'm leaving it this way for now.
|
||||
Cg.Nodes[Id].Samples = Function->hasProfile() ? Function->getExecutionCount() : 0;
|
||||
assert(Cg.Funcs[Id] == Function);
|
||||
return Id;
|
||||
} else {
|
||||
return It->second;
|
||||
}
|
||||
};
|
||||
|
||||
// Add call graph edges.
|
||||
uint64_t NotProcessed = 0;
|
||||
uint64_t TotalCalls = 0;
|
||||
for (auto &It : BFs) {
|
||||
auto *Function = &It.second;
|
||||
|
||||
if(Filter(*Function)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto BranchDataOrErr = BC.DR.getFuncBranchData(Function->getNames());
|
||||
const auto SrcId = lookupNode(Function);
|
||||
uint64_t Offset = Function->getAddress();
|
||||
|
||||
auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
|
||||
if (auto *DstFunc = BC.getFunctionForSymbol(DestSymbol)) {
|
||||
const auto DstId = lookupNode(DstFunc);
|
||||
auto &A = Cg.incArcWeight(SrcId, DstId, Count);
|
||||
if (!UseEdgeCounts) {
|
||||
A.AvgCallOffset += (Offset - DstFunc->getAddress());
|
||||
}
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
|
||||
<< " -> " << *DstFunc << " @ " << Offset << "\n");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
for (auto *BB : Function->layout()) {
|
||||
// Don't count calls from cold blocks
|
||||
if (BB->isCold() && !IncludeColdCalls)
|
||||
continue;
|
||||
|
||||
for (auto &Inst : *BB) {
|
||||
// Find call instructions and extract target symbols from each one.
|
||||
if (!BC.MIA->isCall(Inst))
|
||||
continue;
|
||||
|
||||
++TotalCalls;
|
||||
if (const auto *DstSym = BC.MIA->getTargetSymbol(Inst)) {
|
||||
// For direct calls, just use the BB execution count.
|
||||
const auto Count = UseEdgeCounts && BB->hasProfile()
|
||||
? BB->getExecutionCount() : 1;
|
||||
if (!recordCall(DstSym, Count))
|
||||
++NotProcessed;
|
||||
} else if (BC.MIA->hasAnnotation(Inst, "EdgeCountData")) {
|
||||
// For indirect calls and jump tables, use branch data.
|
||||
if(!BranchDataOrErr) {
|
||||
++NotProcessed;
|
||||
continue;
|
||||
}
|
||||
const FuncBranchData &BranchData = BranchDataOrErr.get();
|
||||
const auto DataOffset =
|
||||
BC.MIA->getAnnotationAs<uint64_t>(Inst, "EdgeCountData");
|
||||
|
||||
for (const auto &BI : BranchData.getBranchRange(DataOffset)) {
|
||||
// Count each target as a separate call.
|
||||
++TotalCalls;
|
||||
|
||||
if (!BI.To.IsSymbol) {
|
||||
++NotProcessed;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto Itr = BC.GlobalSymbols.find(BI.To.Name);
|
||||
if (Itr == BC.GlobalSymbols.end()) {
|
||||
++NotProcessed;
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto *DstSym =
|
||||
BC.getOrCreateGlobalSymbol(Itr->second, "FUNCat");
|
||||
|
||||
if (!recordCall(DstSym, UseEdgeCounts ? BI.Branches : 1))
|
||||
++NotProcessed;
|
||||
}
|
||||
}
|
||||
|
||||
if (!UseEdgeCounts) {
|
||||
Offset += BC.computeCodeSize(&Inst, &Inst + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outs() << "BOLT-WARNING: buildCallGraph: " << NotProcessed
|
||||
<< " callsites not processed out of " << TotalCalls << "\n";
|
||||
|
||||
return Cg;
|
||||
}
|
||||
|
||||
CallGraph::NodeId CallGraph::addNode(uint32_t Size, uint32_t Samples) {
|
||||
CallGraph::NodeId CallGraph::addNode(uint32_t Size, uint64_t Samples) {
|
||||
auto Id = Nodes.size();
|
||||
Nodes.emplace_back(Size, Samples);
|
||||
return Id;
|
||||
}
|
||||
|
||||
const CallGraph::Arc &CallGraph::incArcWeight(NodeId Src, NodeId Dst, double W) {
|
||||
const CallGraph::Arc &CallGraph::incArcWeight(NodeId Src, NodeId Dst, double W,
|
||||
double Offset) {
|
||||
auto Res = Arcs.emplace(Src, Dst, W);
|
||||
if (!Res.second) {
|
||||
Res.first->Weight += W;
|
||||
return *Res.first;
|
||||
}
|
||||
Res.first->AvgCallOffset += Offset;
|
||||
Nodes[Src].Succs.push_back(Dst);
|
||||
Nodes[Dst].Preds.push_back(Src);
|
||||
return *Res.first;
|
||||
}
|
||||
|
||||
std::deque<BinaryFunction *> CallGraph::buildTraversalOrder() {
|
||||
std::deque<BinaryFunction *> TopologicalOrder;
|
||||
enum NodeStatus { NEW, VISITING, VISITED };
|
||||
std::vector<NodeStatus> NodeStatus(Funcs.size());
|
||||
std::stack<NodeId> Worklist;
|
||||
|
||||
for (auto *Func : Funcs) {
|
||||
const auto Id = FuncToNodeId.at(Func);
|
||||
Worklist.push(Id);
|
||||
NodeStatus[Id] = NEW;
|
||||
}
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
const auto FuncId = Worklist.top();
|
||||
Worklist.pop();
|
||||
|
||||
if (NodeStatus[FuncId] == VISITED)
|
||||
continue;
|
||||
|
||||
if (NodeStatus[FuncId] == VISITING) {
|
||||
TopologicalOrder.push_back(Funcs[FuncId]);
|
||||
NodeStatus[FuncId] = VISITED;
|
||||
continue;
|
||||
void CallGraph::normalizeArcWeights(bool UseEdgeCounts) {
|
||||
// Normalize arc weights.
|
||||
if (!UseEdgeCounts) {
|
||||
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
|
||||
auto& Func = getNode(FuncId);
|
||||
for (auto Caller : Func.predecessors()) {
|
||||
auto Arc = findArc(Caller, FuncId);
|
||||
Arc->NormalizedWeight = Arc->weight() / Func.samples();
|
||||
Arc->AvgCallOffset /= Arc->weight();
|
||||
assert(Arc->AvgCallOffset < size(Caller));
|
||||
}
|
||||
}
|
||||
|
||||
assert(NodeStatus[FuncId] == NEW);
|
||||
NodeStatus[FuncId] = VISITING;
|
||||
Worklist.push(FuncId);
|
||||
for (const auto Callee : Nodes[FuncId].Succs) {
|
||||
if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
|
||||
continue;
|
||||
Worklist.push(Callee);
|
||||
} else {
|
||||
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
|
||||
auto &Func = getNode(FuncId);
|
||||
for (auto Caller : Func.predecessors()) {
|
||||
auto Arc = findArc(Caller, FuncId);
|
||||
Arc->NormalizedWeight = Arc->weight() / Func.samples();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return TopologicalOrder;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -12,20 +12,14 @@
|
|||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_CALLGRAPH_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_CALLGRAPH_H
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <deque>
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class BinaryFunction;
|
||||
class BinaryContext;
|
||||
|
||||
// TODO: find better place for this
|
||||
inline int64_t hashCombine(const int64_t Seed, const int64_t Val) {
|
||||
std::hash<int64_t> Hasher;
|
||||
|
@ -55,6 +49,14 @@ public:
|
|||
return Lhs.Src == Rhs.Src && Lhs.Dst == Rhs.Dst;
|
||||
}
|
||||
|
||||
NodeId src() const { return Src; }
|
||||
NodeId dst() const { return Dst; }
|
||||
double weight() const { return Weight; }
|
||||
double avgCallOffset() const { return AvgCallOffset; }
|
||||
double normalizedWeight() const { return NormalizedWeight; }
|
||||
|
||||
private:
|
||||
friend class CallGraph;
|
||||
const NodeId Src;
|
||||
const NodeId Dst;
|
||||
mutable double Weight;
|
||||
|
@ -62,50 +64,115 @@ public:
|
|||
mutable double AvgCallOffset{0};
|
||||
};
|
||||
|
||||
using ArcsType = std::unordered_set<Arc, Arc::Hash>;
|
||||
using ArcIterator = ArcsType::iterator;
|
||||
using ArcConstIterator = ArcsType::const_iterator;
|
||||
|
||||
class Node {
|
||||
public:
|
||||
explicit Node(uint32_t Size, uint32_t Samples = 0)
|
||||
explicit Node(uint32_t Size, uint64_t Samples = 0)
|
||||
: Size(Size), Samples(Samples)
|
||||
{}
|
||||
|
||||
uint32_t size() const { return Size; }
|
||||
uint64_t samples() const { return Samples; }
|
||||
|
||||
const std::vector<NodeId> &successors() const {
|
||||
return Succs;
|
||||
}
|
||||
const std::vector<NodeId> &predecessors() const {
|
||||
return Preds;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class CallGraph;
|
||||
uint32_t Size;
|
||||
uint32_t Samples;
|
||||
uint64_t Samples;
|
||||
|
||||
// preds and succs contain no duplicate elements and self arcs are not allowed
|
||||
std::vector<NodeId> Preds;
|
||||
std::vector<NodeId> Succs;
|
||||
};
|
||||
|
||||
NodeId addNode(uint32_t Size, uint32_t Samples = 0);
|
||||
const Arc &incArcWeight(NodeId Src, NodeId Dst, double W = 1.0);
|
||||
size_t numNodes() const {
|
||||
return Nodes.size();
|
||||
}
|
||||
const Node &getNode(const NodeId Id) const {
|
||||
assert(Id < Nodes.size());
|
||||
return Nodes[Id];
|
||||
}
|
||||
uint32_t size(const NodeId Id) const {
|
||||
assert(Id < Nodes.size());
|
||||
return Nodes[Id].Size;
|
||||
}
|
||||
uint64_t samples(const NodeId Id) const {
|
||||
assert(Id < Nodes.size());
|
||||
return Nodes[Id].Samples;
|
||||
}
|
||||
const std::vector<NodeId> &successors(const NodeId Id) const {
|
||||
assert(Id < Nodes.size());
|
||||
return Nodes[Id].Succs;
|
||||
}
|
||||
const std::vector<NodeId> &predecessors(const NodeId Id) const {
|
||||
assert(Id < Nodes.size());
|
||||
return Nodes[Id].Preds;
|
||||
}
|
||||
NodeId addNode(uint32_t Size, uint64_t Samples = 0);
|
||||
const Arc &incArcWeight(NodeId Src, NodeId Dst, double W = 1.0,
|
||||
double Offset = 0.0);
|
||||
ArcIterator findArc(NodeId Src, NodeId Dst) {
|
||||
return Arcs.find(Arc(Src, Dst));
|
||||
}
|
||||
ArcConstIterator findArc(NodeId Src, NodeId Dst) const {
|
||||
return Arcs.find(Arc(Src, Dst));
|
||||
}
|
||||
const ArcsType &getArcs() const {
|
||||
return Arcs;
|
||||
}
|
||||
|
||||
/// Compute a DFS traversal of the call graph.
|
||||
std::deque<BinaryFunction *> buildTraversalOrder();
|
||||
void normalizeArcWeights(bool UseEdgeCounts);
|
||||
|
||||
template <typename L>
|
||||
void printDot(char* fileName, L getLabel) const;
|
||||
private:
|
||||
std::vector<Node> Nodes;
|
||||
std::unordered_set<Arc, Arc::Hash> Arcs;
|
||||
std::vector<BinaryFunction *> Funcs;
|
||||
std::unordered_map<const BinaryFunction *, NodeId> FuncToNodeId;
|
||||
ArcsType Arcs;
|
||||
};
|
||||
|
||||
inline bool NoFilter(const BinaryFunction &) { return false; }
|
||||
template<class L>
|
||||
void CallGraph::printDot(char* FileName, L GetLabel) const {
|
||||
FILE* File = fopen(FileName, "wt");
|
||||
if (!File) return;
|
||||
|
||||
/// Builds a call graph from the map of BinaryFunctions provided in BFs.
|
||||
/// The arguments control how the graph is constructed.
|
||||
/// Filter is called on each function, any function that it returns true for
|
||||
/// is omitted from the graph.
|
||||
/// If IncludeColdCalls is true, then calls from cold BBs are considered for the
|
||||
/// graph, otherwise they are ignored.
|
||||
/// UseFunctionHotSize controls whether the hot size of a function is used when
|
||||
/// filling in the Size attribute of new Nodes.
|
||||
/// UseEdgeCounts is used to control if the AvgCallOffset attribute on Arcs is
|
||||
/// computed using the offsets of call instructions.
|
||||
CallGraph buildCallGraph(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::function<bool (const BinaryFunction &BF)> Filter = NoFilter,
|
||||
bool IncludeColdCalls = true,
|
||||
bool UseFunctionHotSize = false,
|
||||
bool UseEdgeCounts = false);
|
||||
fprintf(File, "digraph g {\n");
|
||||
for (NodeId F = 0; F < Nodes.size(); F++) {
|
||||
if (Nodes[F].samples() == 0) continue;
|
||||
fprintf(
|
||||
File,
|
||||
"f%lu [label=\"%s\\nsamples=%u\\nsize=%u\"];\n",
|
||||
F,
|
||||
GetLabel(F),
|
||||
Nodes[F].samples(),
|
||||
Nodes[F].size());
|
||||
}
|
||||
for (NodeId F = 0; F < Nodes.size(); F++) {
|
||||
if (Nodes[F].samples() == 0) continue;
|
||||
for (auto Dst : Nodes[F].successors()) {
|
||||
auto Arc = findArc(F, Dst);
|
||||
fprintf(
|
||||
File,
|
||||
"f%lu -> f%u [label=\"normWgt=%.3lf,weight=%.0lf,callOffset=%.1lf\"];"
|
||||
"\n",
|
||||
F,
|
||||
Dst,
|
||||
Arc->normalizedWeight(),
|
||||
Arc->weight(),
|
||||
Arc->avgCallOffset());
|
||||
}
|
||||
}
|
||||
fprintf(File, "}\n");
|
||||
fclose(File);
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
|
|
@ -347,8 +347,8 @@ void FrameAnalysis::buildClobberMap(const BinaryContext &BC) {
|
|||
}
|
||||
|
||||
if (RegsKilledMap[Func] != RegsKilled || Updated) {
|
||||
for (auto Caller : Cg.Nodes[Cg.FuncToNodeId.at(Func)].Preds) {
|
||||
Queue.push(Cg.Funcs[Caller]);
|
||||
for (auto Caller : Cg.predecessors(Cg.getNodeId(Func))) {
|
||||
Queue.push(Cg.nodeIdToFunc(Caller));
|
||||
}
|
||||
}
|
||||
RegsKilledMap[Func] = std::move(RegsKilled);
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEANALYSIS_H
|
||||
|
||||
#include "BinaryPasses.h"
|
||||
#include "CallGraph.h"
|
||||
#include "BinaryFunctionCallGraph.h"
|
||||
#include "StackPointerTracking.h"
|
||||
|
||||
namespace llvm {
|
||||
|
@ -113,7 +113,7 @@ raw_ostream &operator<<(raw_ostream &OS,
|
|||
///
|
||||
class FrameAnalysis : public BinaryFunctionPass {
|
||||
/// Call graph info
|
||||
CallGraph Cg;
|
||||
BinaryFunctionCallGraph Cg;
|
||||
|
||||
/// DFS or reverse post-ordering of the call graph nodes to allow us to
|
||||
/// traverse the call graph bottom-up
|
||||
|
|
|
@ -96,8 +96,8 @@ void FrameOptimizerPass::buildClobberMap(const BinaryContext &BC) {
|
|||
}
|
||||
|
||||
if (RegsKilledMap[Func] != RegsKilled) {
|
||||
for (auto Caller : Cg.Nodes[Cg.FuncToNodeId.at(Func)].Preds) {
|
||||
Queue.push(Cg.Funcs[Caller]);
|
||||
for (auto Caller : Cg.predecessors(Cg.getNodeId(Func))) {
|
||||
Queue.push(Cg.nodeIdToFunc(Caller));
|
||||
}
|
||||
}
|
||||
RegsKilledMap[Func] = std::move(RegsKilled);
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEOPTIMIZER_H
|
||||
|
||||
#include "BinaryPasses.h"
|
||||
#include "CallGraph.h"
|
||||
#include "BinaryFunctionCallGraph.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
@ -76,7 +76,7 @@ class FrameOptimizerPass : public BinaryFunctionPass {
|
|||
uint64_t CountFunctionsAllClobber{0};
|
||||
|
||||
/// Call graph info
|
||||
CallGraph Cg;
|
||||
BinaryFunctionCallGraph Cg;
|
||||
|
||||
/// DFS or reverse post-ordering of the call graph nodes to allow us to
|
||||
/// traverse the call graph bottom-up
|
||||
|
|
|
@ -30,13 +30,17 @@
|
|||
#include "HFSort.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/Options.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "hfsort"
|
||||
|
||||
namespace opts {
|
||||
extern llvm::cl::opt<unsigned> Verbosity;
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
|
@ -65,10 +69,10 @@ constexpr int CallerDegradeFactor = 8;
|
|||
|
||||
Cluster::Cluster(NodeId Id, const Node &Func) {
|
||||
Targets.push_back(Id);
|
||||
Size = Func.Size;
|
||||
Samples = Func.Samples;
|
||||
Size = Func.size();
|
||||
Samples = Func.samples();
|
||||
Density = (double)Samples / Size;
|
||||
Frozen = false;
|
||||
DEBUG(dbgs() << "new Cluster: " << toString() << "\n");
|
||||
}
|
||||
|
||||
std::string Cluster::toString() const {
|
||||
|
@ -91,25 +95,31 @@ void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) {
|
|||
uint32_t TotalSize = 0;
|
||||
std::sort(Clusters.begin(), Clusters.end(), compareClustersDensity);
|
||||
for (auto &C : Clusters) {
|
||||
uint32_t NewSize = TotalSize + C.Size;
|
||||
uint32_t NewSize = TotalSize + C.size();
|
||||
if (NewSize > FrozenPages * HugePageSize) break;
|
||||
C.Frozen = true;
|
||||
C.freeze();
|
||||
TotalSize = NewSize;
|
||||
auto Fid = C.Targets[0];
|
||||
DEBUG(dbgs() <<
|
||||
format("freezing cluster for func %d, size = %u, samples = %u)\n",
|
||||
Fid, Cg.Nodes[Fid].Size, Cg.Nodes[Fid].Samples););
|
||||
DEBUG(
|
||||
auto Fid = C.target(0);
|
||||
dbgs() <<
|
||||
format("freezing cluster for func %d, size = %u, samples = %lu)\n",
|
||||
Fid, Cg.size(Fid), Cg.samples(Fid)););
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Cluster::reverseTargets() {
|
||||
std::reverse(Targets.begin(), Targets.end());
|
||||
}
|
||||
|
||||
void Cluster::merge(Cluster&& Other, const double Aw) {
|
||||
Targets.insert(Targets.end(),
|
||||
Other.Targets.begin(),
|
||||
Other.Targets.end());
|
||||
Size += Other.Size;
|
||||
Samples += Other.Samples;
|
||||
Density = (double)Samples / Size;
|
||||
|
||||
Other.Size = 0;
|
||||
Other.Samples = 0;
|
||||
|
@ -120,13 +130,13 @@ std::vector<Cluster> clusterize(const CallGraph &Cg) {
|
|||
std::vector<NodeId> SortedFuncs;
|
||||
|
||||
// indexed by NodeId, keeps it's current cluster
|
||||
std::vector<Cluster*> FuncCluster(Cg.Nodes.size(), nullptr);
|
||||
std::vector<Cluster*> FuncCluster(Cg.numNodes(), nullptr);
|
||||
std::vector<Cluster> Clusters;
|
||||
Clusters.reserve(Cg.Nodes.size());
|
||||
Clusters.reserve(Cg.numNodes());
|
||||
|
||||
for (NodeId F = 0; F < Cg.Nodes.size(); F++) {
|
||||
if (Cg.Nodes[F].Samples == 0) continue;
|
||||
Clusters.emplace_back(F, Cg.Nodes[F]);
|
||||
for (NodeId F = 0; F < Cg.numNodes(); F++) {
|
||||
if (Cg.samples(F) == 0) continue;
|
||||
Clusters.emplace_back(F, Cg.getNode(F));
|
||||
SortedFuncs.push_back(F);
|
||||
}
|
||||
|
||||
|
@ -135,18 +145,18 @@ std::vector<Cluster> clusterize(const CallGraph &Cg) {
|
|||
// The size and order of Clusters is fixed until we reshuffle it immediately
|
||||
// before returning.
|
||||
for (auto &Cluster : Clusters) {
|
||||
FuncCluster[Cluster.Targets.front()] = &Cluster;
|
||||
FuncCluster[Cluster.targets().front()] = &Cluster;
|
||||
}
|
||||
|
||||
std::sort(
|
||||
SortedFuncs.begin(),
|
||||
SortedFuncs.end(),
|
||||
[&] (const NodeId F1, const NodeId F2) {
|
||||
const auto &Func1 = Cg.Nodes[F1];
|
||||
const auto &Func2 = Cg.Nodes[F2];
|
||||
const auto &Func1 = Cg.getNode(F1);
|
||||
const auto &Func2 = Cg.getNode(F2);
|
||||
return
|
||||
(uint64_t)Func1.Samples * Func2.Size > // TODO: is this correct?
|
||||
(uint64_t)Func2.Samples * Func1.Size;
|
||||
Func1.samples() * Func2.size() > // TODO: is this correct?
|
||||
Func2.samples() * Func1.size();
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -154,17 +164,17 @@ std::vector<Cluster> clusterize(const CallGraph &Cg) {
|
|||
// one containing its most likely predecessor.
|
||||
for (const auto Fid : SortedFuncs) {
|
||||
auto Cluster = FuncCluster[Fid];
|
||||
if (Cluster->Frozen) continue;
|
||||
if (Cluster->frozen()) continue;
|
||||
|
||||
// Find best predecessor.
|
||||
NodeId BestPred = CallGraph::InvalidId;
|
||||
double BestProb = 0;
|
||||
|
||||
for (const auto Src : Cg.Nodes[Fid].Preds) {
|
||||
auto &A = *Cg.Arcs.find(Arc(Src, Fid));
|
||||
if (BestPred == CallGraph::InvalidId || A.NormalizedWeight > BestProb) {
|
||||
BestPred = A.Src;
|
||||
BestProb = A.NormalizedWeight;
|
||||
for (const auto Src : Cg.predecessors(Fid)) {
|
||||
const auto &Arc = *Cg.findArc(Src, Fid);
|
||||
if (BestPred == CallGraph::InvalidId || Arc.normalizedWeight() > BestProb) {
|
||||
BestPred = Arc.src();
|
||||
BestProb = Arc.normalizedWeight();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -180,29 +190,32 @@ std::vector<Cluster> clusterize(const CallGraph &Cg) {
|
|||
// Skip if no predCluster (predecessor w/ no samples), or if same
|
||||
// as cluster, of it's frozen.
|
||||
if (PredCluster == nullptr || PredCluster == Cluster ||
|
||||
PredCluster->Frozen) {
|
||||
PredCluster->frozen()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip if merged cluster would be bigger than the threshold.
|
||||
if (Cluster->Size + PredCluster->Size > MaxClusterSize) continue;
|
||||
if (Cluster->size() + PredCluster->size() > MaxClusterSize) continue;
|
||||
|
||||
// Check if the merge is good for the caller.
|
||||
// Don't merge if the caller's density is significantly better
|
||||
// than the density resulting from the merge.
|
||||
const double NewDensity =
|
||||
((double)PredCluster->Samples + Cluster->Samples) /
|
||||
(PredCluster->Size + Cluster->Size);
|
||||
((double)PredCluster->samples() + Cluster->samples()) /
|
||||
(PredCluster->size() + Cluster->size());
|
||||
if (PredCluster->density() > NewDensity * CallerDegradeFactor) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << format("merging %s -> %s: %u\n",
|
||||
PredCluster->toString().c_str(),
|
||||
Cluster->toString().c_str(),
|
||||
Cg.Nodes[Fid].Samples););
|
||||
DEBUG(
|
||||
if (opts::Verbosity > 1) {
|
||||
dbgs() << format("merging %s -> %s: %u\n",
|
||||
PredCluster->toString().c_str(),
|
||||
Cluster->toString().c_str(),
|
||||
Cg.samples(Fid));
|
||||
});
|
||||
|
||||
for (auto F : Cluster->Targets) {
|
||||
for (auto F : Cluster->targets()) {
|
||||
FuncCluster[F] = PredCluster;
|
||||
}
|
||||
|
||||
|
@ -212,12 +225,16 @@ std::vector<Cluster> clusterize(const CallGraph &Cg) {
|
|||
// Return the set of Clusters that are left, which are the ones that
|
||||
// didn't get merged (so their first func is its original func).
|
||||
std::vector<Cluster> SortedClusters;
|
||||
std::unordered_set<Cluster *> Visited;
|
||||
for (const auto Func : SortedFuncs) {
|
||||
auto Cluster = FuncCluster[Func];
|
||||
if (!Cluster || Cluster->Targets.empty()) continue;
|
||||
if (Cluster->Targets[0] != Func) continue;
|
||||
if (!Cluster ||
|
||||
Visited.count(Cluster) == 1 ||
|
||||
Cluster->target(0) != Func) {
|
||||
continue;
|
||||
}
|
||||
SortedClusters.emplace_back(std::move(*Cluster));
|
||||
Cluster->Targets.clear();
|
||||
Visited.insert(Cluster);
|
||||
}
|
||||
|
||||
std::sort(SortedClusters.begin(),
|
||||
|
@ -228,32 +245,32 @@ std::vector<Cluster> clusterize(const CallGraph &Cg) {
|
|||
}
|
||||
|
||||
std::vector<Cluster> randomClusters(const CallGraph &Cg) {
|
||||
std::vector<NodeId> FuncIds(Cg.Nodes.size(), 0);
|
||||
std::vector<NodeId> FuncIds(Cg.numNodes(), 0);
|
||||
std::vector<Cluster> Clusters;
|
||||
Clusters.reserve(Cg.Nodes.size());
|
||||
Clusters.reserve(Cg.numNodes());
|
||||
|
||||
for (NodeId F = 0; F < Cg.Nodes.size(); F++) {
|
||||
if (Cg.Nodes[F].Samples == 0) continue;
|
||||
Clusters.emplace_back(F, Cg.Nodes[F]);
|
||||
for (NodeId F = 0; F < Cg.numNodes(); F++) {
|
||||
if (Cg.samples(F) == 0) continue;
|
||||
Clusters.emplace_back(F, Cg.getNode(F));
|
||||
}
|
||||
|
||||
std::sort(Clusters.begin(),
|
||||
Clusters.end(),
|
||||
[](const Cluster &A, const Cluster &B) {
|
||||
return A.Size < B.Size;
|
||||
return A.size() < B.size();
|
||||
});
|
||||
|
||||
auto pickMergeCluster = [&Clusters](const size_t Idx) {
|
||||
size_t MaxIdx = Idx + 1;
|
||||
|
||||
while (MaxIdx < Clusters.size() &&
|
||||
Clusters[Idx].Size + Clusters[MaxIdx].Size <= MaxClusterSize) {
|
||||
Clusters[Idx].size() + Clusters[MaxIdx].size() <= MaxClusterSize) {
|
||||
++MaxIdx;
|
||||
}
|
||||
|
||||
if (MaxIdx - Idx > 1) {
|
||||
size_t MergeIdx = (std::rand() % (MaxIdx - Idx - 1)) + Idx + 1;
|
||||
assert(Clusters[MergeIdx].Size + Clusters[Idx].Size <= MaxClusterSize);
|
||||
assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <= MaxClusterSize);
|
||||
return MergeIdx;
|
||||
}
|
||||
return Clusters.size();
|
||||
|
|
|
@ -50,15 +50,27 @@ public:
|
|||
Cluster(CallGraph::NodeId Id, const CallGraph::Node &F);
|
||||
|
||||
std::string toString() const;
|
||||
double density() const {
|
||||
return (double)Samples / Size;
|
||||
}
|
||||
|
||||
double density() const { return Density; }
|
||||
uint64_t samples() const { return Samples; }
|
||||
uint32_t size() const { return Size; }
|
||||
bool frozen() const { return Frozen; }
|
||||
void freeze() { Frozen = true; }
|
||||
void merge(Cluster &&Other, const double Aw = 0);
|
||||
|
||||
size_t numTargets() const {
|
||||
return Targets.size();
|
||||
}
|
||||
const std::vector<CallGraph::NodeId> &targets() const {
|
||||
return Targets;
|
||||
}
|
||||
CallGraph::NodeId target(size_t N) const {
|
||||
return Targets[N];
|
||||
}
|
||||
void reverseTargets();
|
||||
private:
|
||||
std::vector<CallGraph::NodeId> Targets;
|
||||
uint32_t Samples;
|
||||
uint64_t Samples;
|
||||
uint32_t Size;
|
||||
double Density;
|
||||
bool Frozen; // not a candidate for merging
|
||||
};
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//===--- HFSort.cpp - Cluster functions by hotness ------------------------===//
|
||||
//===--- HFSortPlus.cpp - Cluster functions by hotness --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -144,9 +144,9 @@ void sortByDensity(std::vector<Cluster *> &Clusters) {
|
|||
const double D2 = C2->density();
|
||||
// making sure the sorting is deterministic
|
||||
if (D1 != D2) return D1 > D2;
|
||||
if (C1->Size != C2->Size) return C1->Size < C2->Size;
|
||||
if (C1->Samples != C2->Samples) return C1->Samples > C2->Samples;
|
||||
return C1->Targets[0] < C2->Targets[0];
|
||||
if (C1->size() != C2->size()) return C1->size() < C2->size();
|
||||
if (C1->samples() != C2->samples()) return C1->samples() > C2->samples();
|
||||
return C1->target(0) < C2->target(0);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
@ -155,8 +155,8 @@ void sortByDensity(std::vector<Cluster *> &Clusters) {
|
|||
* Density of a cluster formed by merging a given pair of clusters
|
||||
*/
|
||||
double density(Cluster *ClusterPred, Cluster *ClusterSucc) {
|
||||
const double CombinedSamples = ClusterPred->Samples + ClusterSucc->Samples;
|
||||
const double CombinedSize = ClusterPred->Size + ClusterSucc->Size;
|
||||
const double CombinedSamples = ClusterPred->samples() + ClusterSucc->samples();
|
||||
const double CombinedSize = ClusterPred->size() + ClusterSucc->size();
|
||||
return CombinedSamples / CombinedSize;
|
||||
}
|
||||
|
||||
|
@ -199,42 +199,42 @@ double expectedCacheHitRatio(const AlgoState &State,
|
|||
sortByDensity(Clusters);
|
||||
|
||||
// generate function addresses with an alignment
|
||||
std::vector<size_t> Addr(State.Cg->Nodes.size(), InvalidAddr);
|
||||
std::vector<size_t> Addr(State.Cg->numNodes(), InvalidAddr);
|
||||
size_t CurAddr = 0;
|
||||
// 'hotness' of the pages
|
||||
std::vector<double> PageSamples;
|
||||
for (auto Cluster : Clusters) {
|
||||
for (auto TargetId : Cluster->Targets) {
|
||||
for (auto TargetId : Cluster->targets()) {
|
||||
if (CurAddr & 0xf) CurAddr = (CurAddr & ~0xf) + 16;
|
||||
Addr[TargetId] = CurAddr;
|
||||
CurAddr += State.Cg->Nodes[TargetId].Size;
|
||||
CurAddr += State.Cg->size(TargetId);
|
||||
// update page weight
|
||||
size_t Page = Addr[TargetId] / PageSize;
|
||||
while (PageSamples.size() <= Page) PageSamples.push_back(0.0);
|
||||
PageSamples[Page] += State.Cg->Nodes[TargetId].Samples;
|
||||
PageSamples[Page] += State.Cg->samples(TargetId);
|
||||
}
|
||||
}
|
||||
|
||||
// computing expected number of misses for every function
|
||||
double Misses = 0;
|
||||
for (auto Cluster : Clusters) {
|
||||
for (auto TargetId : Cluster->Targets) {
|
||||
for (auto TargetId : Cluster->targets()) {
|
||||
size_t Page = Addr[TargetId] / PageSize;
|
||||
double Samples = State.Cg->Nodes[TargetId].Samples;
|
||||
double Samples = State.Cg->samples(TargetId);
|
||||
// probability that the page is not present in the cache
|
||||
double MissProb = missProbability(State, PageSamples[Page]);
|
||||
|
||||
for (auto Pred : State.Cg->Nodes[TargetId].Preds) {
|
||||
if (State.Cg->Nodes[Pred].Samples == 0) continue;
|
||||
auto A = State.Cg->Arcs.find(Arc(Pred, TargetId));
|
||||
for (auto Pred : State.Cg->predecessors(TargetId)) {
|
||||
if (State.Cg->samples(Pred) == 0) continue;
|
||||
const auto &Arc = *State.Cg->findArc(Pred, TargetId);
|
||||
|
||||
// the source page
|
||||
size_t SrcPage = (Addr[Pred] + (size_t)A->AvgCallOffset) / PageSize;
|
||||
size_t SrcPage = (Addr[Pred] + (size_t)Arc.avgCallOffset()) / PageSize;
|
||||
if (Page != SrcPage) {
|
||||
// this is a miss
|
||||
Misses += A->Weight * MissProb;
|
||||
Misses += Arc.weight() * MissProb;
|
||||
}
|
||||
Samples -= A->Weight;
|
||||
Samples -= Arc.weight();
|
||||
}
|
||||
|
||||
// the remaining samples come from the jitted code
|
||||
|
@ -251,14 +251,14 @@ double expectedCacheHitRatio(const AlgoState &State,
|
|||
std::unordered_set<Cluster *> adjacentClusters(const AlgoState &State,
|
||||
Cluster *C) {
|
||||
std::unordered_set<Cluster *> Result;
|
||||
for (auto TargetId : C->Targets) {
|
||||
for (auto Succ : State.Cg->Nodes[TargetId].Succs) {
|
||||
for (auto TargetId : C->targets()) {
|
||||
for (auto Succ : State.Cg->successors(TargetId)) {
|
||||
auto SuccCluster = State.FuncCluster[Succ];
|
||||
if (SuccCluster != nullptr && SuccCluster != C) {
|
||||
Result.insert(SuccCluster);
|
||||
}
|
||||
}
|
||||
for (auto Pred : State.Cg->Nodes[TargetId].Preds) {
|
||||
for (auto Pred : State.Cg->predecessors(TargetId)) {
|
||||
auto PredCluster = State.FuncCluster[Pred];
|
||||
if (PredCluster != nullptr && PredCluster != C) {
|
||||
Result.insert(PredCluster);
|
||||
|
@ -285,15 +285,15 @@ double expectedCalls(int64_t SrcAddr, int64_t DstAddr, double EdgeWeight) {
|
|||
*/
|
||||
double shortCalls(const AlgoState &State, Cluster *Cluster) {
|
||||
double Calls = 0;
|
||||
for (auto TargetId : Cluster->Targets) {
|
||||
for (auto Succ : State.Cg->Nodes[TargetId].Succs) {
|
||||
for (auto TargetId : Cluster->targets()) {
|
||||
for (auto Succ : State.Cg->successors(TargetId)) {
|
||||
if (State.FuncCluster[Succ] == Cluster) {
|
||||
auto A = State.Cg->Arcs.find(Arc(TargetId, Succ));
|
||||
const auto &Arc = *State.Cg->findArc(TargetId, Succ);
|
||||
|
||||
auto SrcAddr = State.Addr[TargetId] + A->AvgCallOffset;
|
||||
auto SrcAddr = State.Addr[TargetId] + Arc.avgCallOffset();
|
||||
auto DstAddr = State.Addr[Succ];
|
||||
|
||||
Calls += expectedCalls(SrcAddr, DstAddr, A->Weight);
|
||||
Calls += expectedCalls(SrcAddr, DstAddr, Arc.weight());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -309,29 +309,29 @@ double shortCalls(const AlgoState &State,
|
|||
Cluster *ClusterPred,
|
||||
Cluster *ClusterSucc) {
|
||||
double Calls = 0;
|
||||
for (auto TargetId : ClusterPred->Targets) {
|
||||
for (auto Succ : State.Cg->Nodes[TargetId].Succs) {
|
||||
for (auto TargetId : ClusterPred->targets()) {
|
||||
for (auto Succ : State.Cg->successors(TargetId)) {
|
||||
if (State.FuncCluster[Succ] == ClusterSucc) {
|
||||
auto A = State.Cg->Arcs.find(Arc(TargetId, Succ));
|
||||
const auto &Arc = *State.Cg->findArc(TargetId, Succ);
|
||||
|
||||
auto SrcAddr = State.Addr[TargetId] + A->AvgCallOffset;
|
||||
auto DstAddr = State.Addr[Succ] + ClusterPred->Size;
|
||||
auto SrcAddr = State.Addr[TargetId] + Arc.avgCallOffset();
|
||||
auto DstAddr = State.Addr[Succ] + ClusterPred->size();
|
||||
|
||||
Calls += expectedCalls(SrcAddr, DstAddr, A->Weight);
|
||||
Calls += expectedCalls(SrcAddr, DstAddr, Arc.weight());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto TargetId : ClusterPred->Targets) {
|
||||
for (auto Pred : State.Cg->Nodes[TargetId].Preds) {
|
||||
for (auto TargetId : ClusterPred->targets()) {
|
||||
for (auto Pred : State.Cg->predecessors(TargetId)) {
|
||||
if (State.FuncCluster[Pred] == ClusterSucc) {
|
||||
auto A = State.Cg->Arcs.find(Arc(Pred, TargetId));
|
||||
const auto &Arc = *State.Cg->findArc(Pred, TargetId);
|
||||
|
||||
auto SrcAddr = State.Addr[Pred] + A->AvgCallOffset +
|
||||
ClusterPred->Size;
|
||||
auto SrcAddr = State.Addr[Pred] + Arc.avgCallOffset() +
|
||||
ClusterPred->size();
|
||||
auto DstAddr = State.Addr[TargetId];
|
||||
|
||||
Calls += expectedCalls(SrcAddr, DstAddr, A->Weight);
|
||||
Calls += expectedCalls(SrcAddr, DstAddr, Arc.weight());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -355,12 +355,12 @@ double mergeGain(const AlgoState &State,
|
|||
Cluster *ClusterPred,
|
||||
Cluster *ClusterSucc) {
|
||||
// cache misses on the first cluster
|
||||
double LongCallsPred = ClusterPred->Samples - shortCalls(State, ClusterPred);
|
||||
double LongCallsPred = ClusterPred->samples() - shortCalls(State, ClusterPred);
|
||||
double ProbPred = missProbability(State, ClusterPred->density() * PageSize);
|
||||
double ExpectedMissesPred = LongCallsPred * ProbPred;
|
||||
|
||||
// cache misses on the second cluster
|
||||
double LongCallsSucc = ClusterSucc->Samples - shortCalls(State, ClusterSucc);
|
||||
double LongCallsSucc = ClusterSucc->samples() - shortCalls(State, ClusterSucc);
|
||||
double ProbSucc = missProbability(State, ClusterSucc->density() * PageSize);
|
||||
double ExpectedMissesSucc = LongCallsSucc * ProbSucc;
|
||||
|
||||
|
@ -373,28 +373,7 @@ double mergeGain(const AlgoState &State,
|
|||
|
||||
double Gain = ExpectedMissesPred + ExpectedMissesSucc - MissesNew;
|
||||
// scaling the result to increase the importance of merging short clusters
|
||||
return Gain / (ClusterPred->Size + ClusterSucc->Size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge two clusters
|
||||
*/
|
||||
void mergeInto(AlgoState &State, Cluster *Into, Cluster *Other) {
|
||||
auto &Targets = Other->Targets;
|
||||
Into->Targets.insert(Into->Targets.end(), Targets.begin(), Targets.end());
|
||||
Into->Size += Other->Size;
|
||||
Into->Samples += Other->Samples;
|
||||
|
||||
size_t CurAddr = 0;
|
||||
for (auto TargetId : Into->Targets) {
|
||||
State.FuncCluster[TargetId] = Into;
|
||||
State.Addr[TargetId] = CurAddr;
|
||||
CurAddr += State.Cg->Nodes[TargetId].Size;
|
||||
}
|
||||
|
||||
Other->Size = 0;
|
||||
Other->Samples = 0;
|
||||
Other->Targets.clear();
|
||||
return Gain / (ClusterPred->size() + ClusterSucc->size());
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -403,26 +382,26 @@ void mergeInto(AlgoState &State, Cluster *Into, Cluster *Other) {
|
|||
std::vector<Cluster> hfsortPlus(const CallGraph &Cg) {
|
||||
// create a cluster for every function
|
||||
std::vector<Cluster> AllClusters;
|
||||
AllClusters.reserve(Cg.Nodes.size());
|
||||
for (NodeId F = 0; F < Cg.Nodes.size(); F++) {
|
||||
AllClusters.emplace_back(F, Cg.Nodes[F]);
|
||||
AllClusters.reserve(Cg.numNodes());
|
||||
for (NodeId F = 0; F < Cg.numNodes(); F++) {
|
||||
AllClusters.emplace_back(F, Cg.getNode(F));
|
||||
}
|
||||
|
||||
// initialize objects used by the algorithm
|
||||
std::vector<Cluster *> Clusters;
|
||||
Clusters.reserve(Cg.Nodes.size());
|
||||
Clusters.reserve(Cg.numNodes());
|
||||
AlgoState State;
|
||||
State.Cg = &Cg;
|
||||
State.TotalSamples = 0;
|
||||
State.FuncCluster = std::vector<Cluster *>(Cg.Nodes.size(), nullptr);
|
||||
State.Addr = std::vector<size_t>(Cg.Nodes.size(), InvalidAddr);
|
||||
for (NodeId F = 0; F < Cg.Nodes.size(); F++) {
|
||||
if (Cg.Nodes[F].Samples == 0) continue;
|
||||
State.FuncCluster = std::vector<Cluster *>(Cg.numNodes(), nullptr);
|
||||
State.Addr = std::vector<size_t>(Cg.numNodes(), InvalidAddr);
|
||||
for (NodeId F = 0; F < Cg.numNodes(); F++) {
|
||||
if (Cg.samples(F) == 0) continue;
|
||||
|
||||
Clusters.push_back(&AllClusters[F]);
|
||||
State.FuncCluster[F] = &AllClusters[F];
|
||||
State.Addr[F] = 0;
|
||||
State.TotalSamples += Cg.Nodes[F].Samples;
|
||||
State.TotalSamples += Cg.samples(F);
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Starting hfsort+ for " << Clusters.size() << " clusters\n"
|
||||
|
@ -482,7 +461,15 @@ std::vector<Cluster> hfsortPlus(const CallGraph &Cg) {
|
|||
Cache.invalidate(BestClusterSucc);
|
||||
|
||||
// merge the best pair of clusters
|
||||
mergeInto(State, BestClusterPred, BestClusterSucc);
|
||||
BestClusterPred->merge(std::move(*BestClusterSucc));
|
||||
|
||||
size_t CurAddr = 0;
|
||||
for (auto TargetId : BestClusterPred->targets()) {
|
||||
State.FuncCluster[TargetId] = BestClusterPred;
|
||||
State.Addr[TargetId] = CurAddr;
|
||||
CurAddr += State.Cg->size(TargetId);
|
||||
}
|
||||
|
||||
// remove BestClusterSucc from the list of active clusters
|
||||
auto Iter = std::remove(Clusters.begin(), Clusters.end(), BestClusterSucc);
|
||||
Clusters.erase(Iter, Clusters.end());
|
||||
|
|
|
@ -44,29 +44,29 @@ public:
|
|||
using ClusterArcSet = std::unordered_set<ClusterArc, ClusterArcHash>;
|
||||
|
||||
void orderFuncs(const CallGraph &Cg, Cluster *C1, Cluster *C2) {
|
||||
auto C1head = C1->Targets.front();
|
||||
auto C1tail = C1->Targets.back();
|
||||
auto C2head = C2->Targets.front();
|
||||
auto C2tail = C2->Targets.back();
|
||||
auto C1head = C1->targets().front();
|
||||
auto C1tail = C1->targets().back();
|
||||
auto C2head = C2->targets().front();
|
||||
auto C2tail = C2->targets().back();
|
||||
|
||||
double C1headC2head = 0;
|
||||
double C1headC2tail = 0;
|
||||
double C1tailC2head = 0;
|
||||
double C1tailC2tail = 0;
|
||||
|
||||
for (const auto &Arc : Cg.Arcs) {
|
||||
if ((Arc.Src == C1head && Arc.Dst == C2head) ||
|
||||
(Arc.Dst == C1head && Arc.Src == C2head)) {
|
||||
C1headC2head += Arc.Weight;
|
||||
} else if ((Arc.Src == C1head && Arc.Dst == C2tail) ||
|
||||
(Arc.Dst == C1head && Arc.Src == C2tail)) {
|
||||
C1headC2tail += Arc.Weight;
|
||||
} else if ((Arc.Src == C1tail && Arc.Dst == C2head) ||
|
||||
(Arc.Dst == C1tail && Arc.Src == C2head)) {
|
||||
C1tailC2head += Arc.Weight;
|
||||
} else if ((Arc.Src == C1tail && Arc.Dst == C2tail) ||
|
||||
(Arc.Dst == C1tail && Arc.Src == C2tail)) {
|
||||
C1tailC2tail += Arc.Weight;
|
||||
for (const auto &Arc : Cg.getArcs()) {
|
||||
if ((Arc.src() == C1head && Arc.dst() == C2head) ||
|
||||
(Arc.dst() == C1head && Arc.src() == C2head)) {
|
||||
C1headC2head += Arc.weight();
|
||||
} else if ((Arc.src() == C1head && Arc.dst() == C2tail) ||
|
||||
(Arc.dst() == C1head && Arc.src() == C2tail)) {
|
||||
C1headC2tail += Arc.weight();
|
||||
} else if ((Arc.src() == C1tail && Arc.dst() == C2head) ||
|
||||
(Arc.dst() == C1tail && Arc.src() == C2head)) {
|
||||
C1tailC2head += Arc.weight();
|
||||
} else if ((Arc.src() == C1tail && Arc.dst() == C2tail) ||
|
||||
(Arc.dst() == C1tail && Arc.src() == C2tail)) {
|
||||
C1tailC2tail += Arc.weight();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,29 +75,29 @@ void orderFuncs(const CallGraph &Cg, Cluster *C1, Cluster *C2) {
|
|||
|
||||
if (C1headC2head == Max) {
|
||||
// flip C1
|
||||
std::reverse(C1->Targets.begin(), C1->Targets.end());
|
||||
C1->reverseTargets();
|
||||
} else if (C1headC2tail == Max) {
|
||||
// flip C1 C2
|
||||
std::reverse(C1->Targets.begin(), C1->Targets.end());
|
||||
std::reverse(C2->Targets.begin(), C2->Targets.end());
|
||||
C1->reverseTargets();
|
||||
C2->reverseTargets();
|
||||
} else if (C1tailC2tail == Max) {
|
||||
// flip C2
|
||||
std::reverse(C2->Targets.begin(), C2->Targets.end());
|
||||
C2->reverseTargets();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) {
|
||||
// indexed by NodeId, keeps its current cluster
|
||||
std::vector<Cluster*> FuncCluster(Cg.Nodes.size(), nullptr);
|
||||
std::vector<Cluster*> FuncCluster(Cg.numNodes(), nullptr);
|
||||
std::vector<Cluster> Clusters;
|
||||
std::vector<NodeId> Funcs;
|
||||
|
||||
Clusters.reserve(Cg.Nodes.size());
|
||||
Clusters.reserve(Cg.numNodes());
|
||||
|
||||
for (NodeId F = 0; F < Cg.Nodes.size(); F++) {
|
||||
if (Cg.Nodes[F].Samples == 0) continue;
|
||||
Clusters.emplace_back(F, Cg.Nodes[F]);
|
||||
for (NodeId F = 0; F < Cg.numNodes(); F++) {
|
||||
if (Cg.samples(F) == 0) continue;
|
||||
Clusters.emplace_back(F, Cg.getNode(F));
|
||||
FuncCluster[F] = &Clusters.back();
|
||||
Funcs.push_back(F);
|
||||
}
|
||||
|
@ -113,11 +113,11 @@ std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) {
|
|||
|
||||
// Create a std::vector of cluster arcs
|
||||
|
||||
for (auto &Arc : Cg.Arcs) {
|
||||
if (Arc.Weight == 0) continue;
|
||||
for (auto &Arc : Cg.getArcs()) {
|
||||
if (Arc.weight() == 0) continue;
|
||||
|
||||
auto const S = FuncCluster[Arc.Src];
|
||||
auto const D = FuncCluster[Arc.Dst];
|
||||
auto const S = FuncCluster[Arc.src()];
|
||||
auto const D = FuncCluster[Arc.dst()];
|
||||
|
||||
// ignore if s or d is nullptr
|
||||
|
||||
|
@ -127,7 +127,7 @@ std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) {
|
|||
|
||||
if (S == D) continue;
|
||||
|
||||
insertOrInc(S, D, Arc.Weight);
|
||||
insertOrInc(S, D, Arc.weight());
|
||||
}
|
||||
|
||||
// Find an arc with max weight and merge its nodes
|
||||
|
@ -147,9 +147,9 @@ std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) {
|
|||
auto const C1 = Max.C1;
|
||||
auto const C2 = Max.C2;
|
||||
|
||||
if (C1->Size + C2->Size > MaxClusterSize) continue;
|
||||
if (C1->size() + C2->size() > MaxClusterSize) continue;
|
||||
|
||||
if (C1->Frozen || C2->Frozen) continue;
|
||||
if (C1->frozen() || C2->frozen()) continue;
|
||||
|
||||
// order functions and merge cluster
|
||||
|
||||
|
@ -176,7 +176,7 @@ std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) {
|
|||
|
||||
// update FuncCluster
|
||||
|
||||
for (auto F : C2->Targets) {
|
||||
for (auto F : C2->targets()) {
|
||||
FuncCluster[F] = C1;
|
||||
}
|
||||
C1->merge(std::move(*C2), Max.Weight);
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ReorderFunctions.h"
|
||||
#include "HFSort.h"
|
||||
#include "llvm/Support/Options.h"
|
||||
#include <fstream>
|
||||
|
||||
|
@ -90,42 +91,19 @@ using NodeId = CallGraph::NodeId;
|
|||
using Arc = CallGraph::Arc;
|
||||
using Node = CallGraph::Node;
|
||||
|
||||
void ReorderFunctions::normalizeArcWeights() {
|
||||
// Normalize arc weights.
|
||||
if (!opts::UseEdgeCounts) {
|
||||
for (NodeId FuncId = 0; FuncId < Cg.Nodes.size(); ++FuncId) {
|
||||
auto& Func = Cg.Nodes[FuncId];
|
||||
for (auto Caller : Func.Preds) {
|
||||
auto& A = *Cg.Arcs.find(Arc(Caller, FuncId));
|
||||
A.NormalizedWeight = A.Weight / Func.Samples;
|
||||
A.AvgCallOffset /= A.Weight;
|
||||
assert(A.AvgCallOffset < Cg.Nodes[Caller].Size);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (NodeId FuncId = 0; FuncId < Cg.Nodes.size(); ++FuncId) {
|
||||
auto &Func = Cg.Nodes[FuncId];
|
||||
for (auto Caller : Func.Preds) {
|
||||
auto& A = *Cg.Arcs.find(Arc(Caller, FuncId));
|
||||
A.NormalizedWeight = A.Weight / Func.Samples;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
|
||||
std::map<uint64_t, BinaryFunction> &BFs) {
|
||||
std::vector<uint64_t> FuncAddr(Cg.Nodes.size()); // Just for computing stats
|
||||
std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
|
||||
uint64_t TotalSize = 0;
|
||||
uint32_t Index = 0;
|
||||
|
||||
// Set order of hot functions based on clusters.
|
||||
for (const auto& Cluster : Clusters) {
|
||||
for (const auto FuncId : Cluster.Targets) {
|
||||
assert(Cg.Nodes[FuncId].Samples > 0);
|
||||
Cg.Funcs[FuncId]->setIndex(Index++);
|
||||
for (const auto FuncId : Cluster.targets()) {
|
||||
assert(Cg.samples(FuncId) > 0);
|
||||
Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
|
||||
FuncAddr[FuncId] = TotalSize;
|
||||
TotalSize += Cg.Nodes[FuncId].Size;
|
||||
TotalSize += Cg.size(FuncId);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,6 +119,11 @@ void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
|
|||
#endif
|
||||
}
|
||||
|
||||
bool PrintDetailed = opts::Verbosity > 1;
|
||||
#ifndef NDEBUG
|
||||
PrintDetailed |=
|
||||
(DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
|
||||
#endif
|
||||
TotalSize = 0;
|
||||
uint64_t CurPage = 0;
|
||||
uint64_t Hotfuncs = 0;
|
||||
|
@ -149,65 +132,84 @@ void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
|
|||
double TotalCalls64B = 0;
|
||||
double TotalCalls4KB = 0;
|
||||
double TotalCalls2MB = 0;
|
||||
dbgs() << "============== page 0 ==============\n";
|
||||
if (PrintDetailed) {
|
||||
outs() << "BOLT-INFO: Function reordering page layout\n"
|
||||
<< "BOLT-INFO: ============== page 0 ==============\n";
|
||||
}
|
||||
for (auto& Cluster : Clusters) {
|
||||
dbgs() <<
|
||||
format("-------- density = %.3lf (%u / %u) --------\n",
|
||||
(double) Cluster.Samples / Cluster.Size,
|
||||
Cluster.Samples, Cluster.Size);
|
||||
if (PrintDetailed) {
|
||||
outs() <<
|
||||
format("BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
|
||||
Cluster.density(), Cluster.samples(), Cluster.size());
|
||||
}
|
||||
|
||||
for (auto FuncId : Cluster.Targets) {
|
||||
if (Cg.Nodes[FuncId].Samples > 0) {
|
||||
for (auto FuncId : Cluster.targets()) {
|
||||
if (Cg.samples(FuncId) > 0) {
|
||||
Hotfuncs++;
|
||||
|
||||
dbgs() << "BOLT-INFO: hot func " << *Cg.Funcs[FuncId]
|
||||
<< " (" << Cg.Nodes[FuncId].Size << ")\n";
|
||||
if (PrintDetailed) {
|
||||
outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId)
|
||||
<< " (" << Cg.size(FuncId) << ")\n";
|
||||
}
|
||||
|
||||
uint64_t Dist = 0;
|
||||
uint64_t Calls = 0;
|
||||
for (auto Dst : Cg.Nodes[FuncId].Succs) {
|
||||
auto& A = *Cg.Arcs.find(Arc(FuncId, Dst));
|
||||
auto D =
|
||||
std::abs(FuncAddr[A.Dst] - (FuncAddr[FuncId] + A.AvgCallOffset));
|
||||
auto W = A.Weight;
|
||||
for (auto Dst : Cg.successors(FuncId)) {
|
||||
const auto& Arc = *Cg.findArc(FuncId, Dst);
|
||||
const auto D = std::abs(FuncAddr[Arc.dst()] -
|
||||
(FuncAddr[FuncId] + Arc.avgCallOffset()));
|
||||
const auto W = Arc.weight();
|
||||
Calls += W;
|
||||
if (D < 64) TotalCalls64B += W;
|
||||
if (D < 4096) TotalCalls4KB += W;
|
||||
if (D < (2 << 20)) TotalCalls2MB += W;
|
||||
Dist += A.Weight * D;
|
||||
dbgs() << format("arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
|
||||
"weight = %.0lf, callDist = %f\n",
|
||||
A.Src, FuncAddr[A.Src], A.AvgCallOffset,
|
||||
A.Dst, FuncAddr[A.Dst], A.Weight, D);
|
||||
Dist += Arc.weight() * D;
|
||||
if (PrintDetailed) {
|
||||
outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
|
||||
"weight = %.0lf, callDist = %f\n",
|
||||
Arc.src(),
|
||||
FuncAddr[Arc.src()],
|
||||
Arc.avgCallOffset(),
|
||||
Arc.dst(),
|
||||
FuncAddr[Arc.dst()],
|
||||
Arc.weight(), D);
|
||||
}
|
||||
}
|
||||
TotalCalls += Calls;
|
||||
TotalDistance += Dist;
|
||||
dbgs() << format("start = %6u : avgCallDist = %lu : %s\n",
|
||||
TotalSize,
|
||||
Calls ? Dist / Calls : 0,
|
||||
Cg.Funcs[FuncId]->getPrintName().c_str());
|
||||
TotalSize += Cg.Nodes[FuncId].Size;
|
||||
auto NewPage = TotalSize / HugePageSize;
|
||||
if (NewPage != CurPage) {
|
||||
CurPage = NewPage;
|
||||
dbgs() << format("============== page %u ==============\n", CurPage);
|
||||
TotalSize += Cg.size(FuncId);
|
||||
|
||||
if (PrintDetailed) {
|
||||
outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : %s\n",
|
||||
TotalSize,
|
||||
Calls ? Dist / Calls : 0,
|
||||
Cg.nodeIdToFunc(FuncId)->getPrintName().c_str());
|
||||
const auto NewPage = TotalSize / HugePageSize;
|
||||
if (NewPage != CurPage) {
|
||||
CurPage = NewPage;
|
||||
outs() <<
|
||||
format("BOLT-INFO: ============== page %u ==============\n",
|
||||
CurPage);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dbgs() << format(" Number of hot functions: %u\n"
|
||||
" Number of clusters: %lu\n",
|
||||
outs() << "BOLT-INFO: Function reordering stats\n"
|
||||
<< format("BOLT-INFO: Number of hot functions: %u\n"
|
||||
"BOLT-INFO: Number of clusters: %lu\n",
|
||||
Hotfuncs, Clusters.size())
|
||||
<< format(" Final average call distance = %.1lf (%.0lf / %.0lf)\n",
|
||||
<< format("BOLT-INFO: Final average call distance = %.1lf "
|
||||
"(%.0lf / %.0lf)\n",
|
||||
TotalCalls ? TotalDistance / TotalCalls : 0,
|
||||
TotalDistance, TotalCalls)
|
||||
<< format(" Total Calls = %.0lf\n", TotalCalls);
|
||||
<< format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls);
|
||||
if (TotalCalls) {
|
||||
dbgs() << format(" Total Calls within 64B = %.0lf (%.2lf%%)\n",
|
||||
outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n",
|
||||
TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
|
||||
<< format(" Total Calls within 4KB = %.0lf (%.2lf%%)\n",
|
||||
<< format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n",
|
||||
TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
|
||||
<< format(" Total Calls within 2MB = %.0lf (%.2lf%%)\n",
|
||||
<< format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n",
|
||||
TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
|
||||
}
|
||||
}
|
||||
|
@ -251,7 +253,7 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC,
|
|||
false, // IncludeColdCalls
|
||||
opts::ReorderFunctionsUseHotSize,
|
||||
opts::UseEdgeCounts);
|
||||
normalizeArcWeights();
|
||||
Cg.normalizeArcWeights(opts::UseEdgeCounts);
|
||||
}
|
||||
|
||||
std::vector<Cluster> Clusters;
|
||||
|
|
|
@ -13,16 +13,15 @@
|
|||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_REORDER_FNCTIONS_H
|
||||
|
||||
#include "BinaryPasses.h"
|
||||
#include "HFSort.h"
|
||||
#include "BinaryFunctionCallGraph.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
/// Modify function order for streaming based on hotness.
|
||||
class ReorderFunctions : public BinaryFunctionPass {
|
||||
CallGraph Cg;
|
||||
BinaryFunctionCallGraph Cg;
|
||||
|
||||
void normalizeArcWeights();
|
||||
void reorder(std::vector<Cluster> &&Clusters,
|
||||
std::map<uint64_t, BinaryFunction> &BFs);
|
||||
public:
|
||||
|
|
|
@ -1534,14 +1534,14 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
|
|||
Rel.getType() != ELF::R_X86_64_GOTTPOFF &&
|
||||
Rel.getType() != ELF::R_X86_64_GOTPCREL) {
|
||||
if (!IsPCRelative) {
|
||||
if (opts::Verbosity > 1 &&
|
||||
if (opts::Verbosity > 2 &&
|
||||
ExtractedValue != Address) {
|
||||
errs() << "BOLT-WARNING: mismatch ExtractedValue = 0x"
|
||||
<< Twine::utohexstr(ExtractedValue) << '\n';
|
||||
}
|
||||
Address = ExtractedValue;
|
||||
} else {
|
||||
if (opts::Verbosity > 1 &&
|
||||
if (opts::Verbosity > 2 &&
|
||||
ExtractedValue != Address - Rel.getOffset() + Addend) {
|
||||
errs() << "BOLT-WARNING: PC-relative mismatch ExtractedValue = 0x"
|
||||
<< Twine::utohexstr(ExtractedValue) << '\n';
|
||||
|
|
Loading…
Reference in New Issue