ICF improvements.

Summary:
Re-worked the way ICF operates. The pass now checks for more than just
call instructions, but also for all references including function
pointers. Jump tables are handled too.

(cherry picked from FBD4372491)
This commit is contained in:
Maksim Panchenko 2016-12-21 17:13:56 -08:00
parent 55fc5417f8
commit bc8a456309
12 changed files with 617 additions and 695 deletions

View File

@ -121,7 +121,7 @@ void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) {
}
void BinaryBasicBlock::addLandingPad(BinaryBasicBlock *LPBlock) {
LandingPads.insert(LPBlock);
LandingPads.push_back(LPBlock);
LPBlock->Throwers.insert(this);
}
@ -190,7 +190,7 @@ BinaryBasicBlock::getBranchStats(const BinaryBasicBlock *Succ) const {
uint64_t TotalCount = 0;
uint64_t TotalMispreds = 0;
for (const auto &BI : BranchInfo) {
if (BI.Count != COUNT_FALLTHROUGH_EDGE) {
if (BI.Count != COUNT_NO_PROFILE) {
TotalCount += BI.Count;
TotalMispreds += BI.MispredictedCount;
}
@ -200,7 +200,7 @@ BinaryBasicBlock::getBranchStats(const BinaryBasicBlock *Succ) const {
auto Itr = std::find(Successors.begin(), Successors.end(), Succ);
assert(Itr != Successors.end());
const auto &BI = BranchInfo[Itr - Successors.begin()];
if (BI.Count && BI.Count != COUNT_FALLTHROUGH_EDGE) {
if (BI.Count && BI.Count != COUNT_NO_PROFILE) {
if (TotalMispreds == 0) TotalMispreds = 1;
return std::make_pair(double(BI.Count) / TotalCount,
double(BI.MispredictedCount) / TotalMispreds);

View File

@ -34,6 +34,17 @@ class BinaryFunction;
/// we might switch to it at some point.
class BinaryBasicBlock {
public:
/// Profile execution information for a given edge in CFG.
///
/// If MispredictedCount equals COUNT_INFERRED, then we have a profile
/// data for a fall-through edge with a Count representing an inferred
/// execution count, i.e. the count we calculated internally, not the one
/// coming from profile data.
///
/// For all other values of MispredictedCount, Count represents the number of
/// branch executions from a profile, and MispredictedCount is the number
/// of times the branch was mispredicted according to this profile.
struct BinaryBranchInfo {
uint64_t Count;
uint64_t MispredictedCount; /// number of branches mispredicted
@ -47,7 +58,7 @@ private:
std::vector<BinaryBasicBlock *> Predecessors;
std::vector<BinaryBasicBlock *> Successors;
std::set<BinaryBasicBlock *> Throwers;
std::set<BinaryBasicBlock *> LandingPads;
std::vector<BinaryBasicBlock *> LandingPads;
/// Each successor has a corresponding BranchInfo entry in the list.
std::vector<BinaryBranchInfo> BranchInfo;
@ -121,7 +132,7 @@ private:
}
public:
static constexpr uint64_t COUNT_FALLTHROUGH_EDGE =
static constexpr uint64_t COUNT_INFERRED =
std::numeric_limits<uint64_t>::max();
static constexpr uint64_t COUNT_NO_PROFILE =
std::numeric_limits<uint64_t>::max();
@ -478,6 +489,12 @@ public:
return ExecutionCount;
}
/// Return the execution count for blocks with known profile.
/// Return 0 if the block has no profile.
uint64_t getKnownExecutionCount() const {
return ExecutionCount == COUNT_NO_PROFILE ? 0 : ExecutionCount;
}
/// Set the execution count for this block.
void setExecutionCount(uint64_t Count) {
ExecutionCount = Count;

View File

@ -17,11 +17,14 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
namespace llvm {
namespace bolt {
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::opt<bool> Relocs;
static cl::opt<bool>
PrintDebugInfo("print-debug-info",
cl::desc("print debug info when printing functions"),
@ -68,6 +71,42 @@ MCSymbol *BinaryContext::getGlobalSymbolAtAddress(uint64_t Address) const {
return Symbol;
}
void BinaryContext::foldFunction(BinaryFunction &ChildBF,
BinaryFunction &ParentBF,
std::map<uint64_t, BinaryFunction> &BFs) {
// Copy name list.
ParentBF.addNewNames(ChildBF.getNames());
// Update internal bookkeeping info.
for (auto &Name : ChildBF.getNames()) {
// Calls to functions are handled via symbols, and we keep the lookup table
// that we need to update.
auto *Symbol = Ctx->lookupSymbol(Name);
assert(Symbol && "symbol cannot be NULL at this point");
SymbolToFunctionMap[Symbol] = &ParentBF;
// NB: there's no need to update GlobalAddresses and GlobalSymbols.
}
// Merge execution counts of ChildBF into those of ParentBF.
ChildBF.mergeProfileDataInto(ParentBF);
if (opts::Relocs) {
// Remove ChildBF from the global set of functions in relocs mode.
auto FI = BFs.find(ChildBF.getAddress());
assert(FI != BFs.end() && "function not found");
assert(&ChildBF == &FI->second && "function mismatch");
FI = BFs.erase(FI);
} else {
// In non-relocation mode we keep the function, but rename it.
std::string NewName = "__ICF_" + ChildBF.Names.back();
ChildBF.Names.clear();
ChildBF.Names.push_back(NewName);
ChildBF.OutputSymbol = Ctx->getOrCreateSymbol(NewName);
}
}
void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
for (auto &entry : GlobalSymbols) {
OS << "(" << entry.first << " -> " << entry.second << ")\n";
@ -378,6 +417,3 @@ void BinaryContext::addSectionRelocation(SectionRef Section, uint64_t Address,
}
RI->second.emplace_back(Relocation{Address, Symbol, Type, Addend});
}
} // namespace bolt
} // namespace llvm

View File

@ -88,9 +88,13 @@ public:
SymbolMapType GlobalSymbols;
/// [address] -> [name1], [name2], ...
/// Global addresses never change.
std::multimap<uint64_t, std::string> GlobalAddresses;
/// [MCSymbol] -> [BinaryFunction]
///
/// As we fold identical functions, multiple symbols can point
/// to the same BinaryFunction.
std::unordered_map<const MCSymbol *,
const BinaryFunction *> SymbolToFunctionMap;
@ -220,6 +224,13 @@ public:
return Ctx->getOrCreateSymbol(Name);
}
/// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
/// removed from the list of functions \p BFs. The profile data of \p ChildBF
/// is merged into that of \p ParentBF.
void foldFunction(BinaryFunction &ChildBF,
BinaryFunction &ParentBF,
std::map<uint64_t, BinaryFunction> &BFs);
/// Add section relocation.
void addSectionRelocation(SectionRef Section, uint64_t Address,
MCSymbol *Symbol, uint64_t Type,

View File

@ -198,6 +198,9 @@ BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
size_t
BinaryFunction::getBasicBlockOriginalSize(const BinaryBasicBlock *BB) const {
if (CurrentState != State::CFG)
return 0;
auto Index = getIndex(BB);
if (Index + 1 == BasicBlocks.size()) {
return Size - BB->getOffset();
@ -322,6 +325,10 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
<< "\n IsSimple : " << IsSimple
<< "\n IsSplit : " << IsSplit
<< "\n BB Count : " << BasicBlocksLayout.size();
if (CurrentState == State::CFG) {
OS << "\n Hash : " << Twine::utohexstr(hash());
}
if (FrameInstructions.size()) {
OS << "\n CFI Instrs : " << FrameInstructions.size();
}
@ -339,18 +346,6 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "\n Exec Count : " << ExecutionCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
if (getIdenticalFunction()) {
OS << "\n Copy Of : " << *getIdenticalFunction();
}
if (!Twins.empty()) {
OS << "\n Twins : ";
auto Sep = "";
for (auto *TwinFunction : Twins) {
OS << Sep << *TwinFunction;
Sep = ", ";
}
}
if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
OS << '\n';
@ -387,8 +382,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "------- HOT-COLD SPLIT POINT -------\n\n";
OS << BB->getName() << " ("
<< BB->size() << " instructions, align : "
<< BB->getAlignment() << ")\n";
<< BB->size() << " instructions, align : " << BB->getAlignment()
<< ")\n";
if (BB->isEntryPoint())
OS << " Entry Point\n";
@ -397,7 +392,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << " Landing Pad\n";
uint64_t BBExecCount = BB->getExecutionCount();
if (BBExecCount != BinaryBasicBlock::COUNT_NO_PROFILE) {
if (hasValidProfile()) {
OS << " Exec Count : " << BBExecCount << "\n";
}
if (!BBCFIState.empty()) {
@ -435,11 +430,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
assert(BI != BB->branch_info_end() && "missing BranchInfo entry");
OS << Sep << Succ->getName();
if (ExecutionCount != COUNT_NO_PROFILE &&
BI->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
OS << " (mispreds: " << BI->MispredictedCount
<< ", count: " << BI->Count << ")";
} else if (ExecutionCount != COUNT_NO_PROFILE &&
BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
OS << " (inferred count: " << BI->Count << ")";
}
Sep = ", ";
@ -1221,12 +1216,8 @@ void BinaryFunction::postProcessJumpTables() {
if (TargetOffset < getSize())
TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
// The relocations for PIC-style jump table have to be ignored.
//
// We can ignore the rest too if we output jump table to a different
// section.
if (JT->Type == JumpTable::JTT_PIC)
BC.IgnoredRelocations.emplace(JT->Address + EntryOffset);
// Ignore relocations for jump tables.
BC.IgnoredRelocations.emplace(JT->Address + EntryOffset);
EntryOffset += JT->EntrySize;
@ -1651,8 +1642,8 @@ bool BinaryFunction::buildCFG() {
bool IsPrevFT = false; // Is previous block a fall-through.
for (auto BB : BasicBlocks) {
if (IsPrevFT) {
PrevBB->addSuccessor(BB, BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE,
BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE);
PrevBB->addSuccessor(BB, BinaryBasicBlock::COUNT_NO_PROFILE,
BinaryBasicBlock::COUNT_INFERRED);
}
if (BB->empty()) {
IsPrevFT = true;
@ -1708,6 +1699,8 @@ bool BinaryFunction::buildCFG() {
// Infer frequency for non-taken branches
if (hasValidProfile())
inferFallThroughCounts();
else
clearProfile();
// Update CFI information for each BB
BBCFIState = annotateCFIState();
@ -1914,6 +1907,19 @@ void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
}
}
void BinaryFunction::clearProfile() {
// Keep function execution profile the same. Only clear basic block and edge
// counts.
for (auto *BB : BasicBlocks) {
BB->ExecutionCount = 0;
for (auto &BI : BB->branch_info()) {
BI.Count = 0;
BI.MispredictedCount = 0;
}
}
}
void BinaryFunction::inferFallThroughCounts() {
assert(!BasicBlocks.empty() && "basic block list should not be empty");
@ -1921,23 +1927,20 @@ void BinaryFunction::inferFallThroughCounts() {
// Compute preliminary execution time for each basic block
for (auto CurBB : BasicBlocks) {
if (CurBB == *BasicBlocks.begin()) {
CurBB->setExecutionCount(ExecutionCount);
continue;
}
CurBB->ExecutionCount = 0;
}
BasicBlocks.front()->setExecutionCount(ExecutionCount);
for (auto CurBB : BasicBlocks) {
auto SuccCount = CurBB->branch_info_begin();
for (auto Succ : CurBB->successors()) {
// Do not update execution count of the entry block (when we have tail
// calls). We already accounted for those when computing the func count.
if (Succ == *BasicBlocks.begin()) {
if (Succ == BasicBlocks.front()) {
++SuccCount;
continue;
}
if (SuccCount->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
if (SuccCount->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
Succ->setExecutionCount(Succ->getExecutionCount() + SuccCount->Count);
++SuccCount;
}
@ -1954,8 +1957,9 @@ void BinaryFunction::inferFallThroughCounts() {
}
}
// Work on a basic block at a time, propagating frequency information forwards
// It is important to walk in the layout order
// Work on a basic block at a time, propagating frequency information
// forwards.
// It is important to walk in the layout order.
for (auto CurBB : BasicBlocks) {
uint64_t BBExecCount = CurBB->getExecutionCount();
@ -1965,15 +1969,15 @@ void BinaryFunction::inferFallThroughCounts() {
continue;
// Calculate frequency of outgoing branches from this node according to
// LBR data
// LBR data.
uint64_t ReportedBranches = 0;
for (const auto &SuccCount : CurBB->branch_info()) {
if (SuccCount.Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
if (SuccCount.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
ReportedBranches += SuccCount.Count;
}
// Calculate frequency of outgoing tail calls from this node according to
// LBR data
// LBR data.
uint64_t ReportedTailCalls = 0;
auto TCI = TailCallTerminatedBlocks.find(CurBB);
if (TCI != TailCallTerminatedBlocks.end()) {
@ -1993,7 +1997,7 @@ void BinaryFunction::inferFallThroughCounts() {
ReportedBranches + ReportedTailCalls + ReportedThrows;
// Infer the frequency of the fall-through edge, representing not taking the
// branch
// branch.
uint64_t Inferred = 0;
if (BBExecCount > TotalReportedJumps)
Inferred = BBExecCount - TotalReportedJumps;
@ -2012,7 +2016,7 @@ void BinaryFunction::inferFallThroughCounts() {
// If there is an FT it will be the last successor.
auto &SuccCount = *CurBB->branch_info_rbegin();
auto &Succ = *CurBB->succ_rbegin();
if (SuccCount.Count == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
if (SuccCount.Count == BinaryBasicBlock::COUNT_NO_PROFILE) {
SuccCount.Count = Inferred;
Succ->ExecutionCount += Inferred;
}
@ -2662,10 +2666,10 @@ void BinaryFunction::dumpGraph(raw_ostream& OS) const {
Branch.c_str());
if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
BI->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
OS << "\\n(M:" << BI->MispredictedCount << ",C:" << BI->Count << ")";
} else if (ExecutionCount != COUNT_NO_PROFILE &&
BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
OS << "\\n(IC:" << BI->Count << ")";
}
OS << "\"]\n";
@ -2881,61 +2885,54 @@ void BinaryFunction::propagateGnuArgsSizeInfo() {
}
void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
if (!hasValidProfile() || !BF.hasValidProfile())
// No reason to merge invalid or empty profiles into BF.
if (!hasValidProfile())
return;
// Update BF's execution count.
uint64_t MyExecutionCount = getExecutionCount();
if (MyExecutionCount != BinaryFunction::COUNT_NO_PROFILE) {
uint64_t OldExecCount = BF.getExecutionCount();
uint64_t NewExecCount =
OldExecCount == BinaryFunction::COUNT_NO_PROFILE ?
MyExecutionCount :
MyExecutionCount + OldExecCount;
BF.setExecutionCount(NewExecCount);
// Update function execution count.
if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
}
// Update BF's basic block and edge counts.
// Since we are merging a valid profile, the new profile should be valid too.
// It has either already been valid, or it has been cleaned up.
BF.ProfileMatchRatio = 1.0f;
// Update basic block and edge counts.
auto BBMergeI = BF.begin();
for (BinaryBasicBlock *BB : BasicBlocks) {
BinaryBasicBlock *BBMerge = &*BBMergeI;
assert(getIndex(BB) == BF.getIndex(BBMerge));
// Update BF's basic block count.
uint64_t MyBBExecutionCount = BB->getExecutionCount();
if (MyBBExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE) {
uint64_t OldExecCount = BBMerge->getExecutionCount();
uint64_t NewExecCount =
OldExecCount == BinaryBasicBlock::COUNT_NO_PROFILE ?
MyBBExecutionCount :
MyBBExecutionCount + OldExecCount;
BBMerge->setExecutionCount(NewExecCount);
// Update basic block count.
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
BBMerge->setExecutionCount(
BBMerge->getKnownExecutionCount() + BB->getExecutionCount());
}
// Update BF's edge count for successors of this basic block.
// Update edge count for successors of this basic block.
auto BBMergeSI = BBMerge->succ_begin();
auto BII = BB->branch_info_begin();
auto BIMergeI = BBMerge->branch_info_begin();
for (BinaryBasicBlock *BBSucc : BB->successors()) {
BinaryBasicBlock *BBMergeSucc = *BBMergeSI;
auto BII = BB->branch_info_begin();
for (const auto *BBSucc : BB->successors()) {
auto *BBMergeSucc = *BBMergeSI;
assert(getIndex(BBSucc) == BF.getIndex(BBMergeSucc));
if (BII->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
uint64_t OldBranchCount = BIMergeI->Count;
uint64_t NewBranchCount =
OldBranchCount == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE ?
BII->Count :
BII->Count + OldBranchCount;
BIMergeI->Count = NewBranchCount;
}
// At this point no branch count should be set to COUNT_NO_PROFILE.
assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"unexpected unknown branch profile");
assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"unexpected unknown branch profile");
if (BII->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
uint64_t OldMispredictedCount = BIMergeI->MispredictedCount;
uint64_t NewMispredictedCount =
OldMispredictedCount == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE ?
BII->MispredictedCount :
BII->MispredictedCount + OldMispredictedCount;
BIMergeI->MispredictedCount = NewMispredictedCount;
BIMergeI->Count += BII->Count;
// When we merge inferred and real fall-through branch data, the merged
// data is considered inferred.
if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
BIMergeI->MispredictedCount += BII->MispredictedCount;
} else {
BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
}
++BBMergeSI;
@ -2949,171 +2946,46 @@ void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
assert(BBMergeI == BF.end());
}
std::pair<bool, unsigned> BinaryFunction::isCalleeEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const {
// The callee operand in a direct call is the first operand. This
// operand should be a symbol corresponding to the callee function.
constexpr unsigned CalleeOpIndex = 0;
__attribute__((noinline)) BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
BasicBlockOrderType DFS;
unsigned Index = 0;
std::stack<BinaryBasicBlock *> Stack;
// Helper function.
auto getGlobalAddress = [this] (const MCSymbol &Symbol) -> uint64_t {
auto AI = BC.GlobalSymbols.find(Symbol.getName());
assert(AI != BC.GlobalSymbols.end());
return AI->second;
};
const MCOperand &CalleeOp = Inst.getOperand(CalleeOpIndex);
const MCOperand &CalleeOpOther = InstOther.getOperand(CalleeOpIndex);
if (!CalleeOp.isExpr() || !CalleeOpOther.isExpr()) {
// At least one of these is actually an indirect call.
return std::make_pair(false, 0);
}
const MCSymbol &CalleeSymbol = CalleeOp.getExpr()->getSymbol();
uint64_t CalleeAddress = getGlobalAddress(CalleeSymbol);
const MCSymbol &CalleeSymbolOther = CalleeOpOther.getExpr()->getSymbol();
uint64_t CalleeAddressOther = getGlobalAddress(CalleeSymbolOther);
bool BothRecursiveCalls =
CalleeAddress == getAddress() &&
CalleeAddressOther == BF.getAddress();
bool SameCallee = CalleeAddress == CalleeAddressOther;
return std::make_pair(BothRecursiveCalls || SameCallee, CalleeOpIndex);
}
std::pair<bool, unsigned> BinaryFunction::isTargetEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF,
bool AreInvokes) const {
// The target operand in a (non-indirect) jump instruction is the
// first operand.
unsigned TargetOpIndex = 0;
if (AreInvokes) {
// The landing pad operand in an invoke is either the second or the
// sixth operand, depending on the number of operands of the invoke.
TargetOpIndex = 1;
if (Inst.getNumOperands() == 7 || Inst.getNumOperands() == 8)
TargetOpIndex = 5;
}
const MCOperand &TargetOp = Inst.getOperand(TargetOpIndex);
const MCOperand &TargetOpOther = InstOther.getOperand(TargetOpIndex);
if (!TargetOp.isExpr() || !TargetOpOther.isExpr()) {
assert(AreInvokes);
// An invoke without a landing pad operand has no catch handler. As long
// as both invokes have no catch target, we can consider they have the
// same catch target.
return std::make_pair(!TargetOp.isExpr() && !TargetOpOther.isExpr(),
TargetOpIndex);
}
const MCSymbol &TargetSymbol = TargetOp.getExpr()->getSymbol();
BinaryBasicBlock *TargetBB =
AreInvokes ?
BB.getLandingPad(&TargetSymbol) :
BB.getSuccessor(&TargetSymbol);
const MCSymbol &TargetSymbolOther = TargetOpOther.getExpr()->getSymbol();
BinaryBasicBlock *TargetBBOther =
AreInvokes ?
BBOther.getLandingPad(&TargetSymbolOther) :
BBOther.getSuccessor(&TargetSymbolOther);
if (TargetBB == nullptr || TargetBBOther == nullptr) {
assert(!AreInvokes);
// This is a tail call implemented with a jump that was not
// converted to a call (e.g. conditional jump). Since the
// instructions were not identical, the functions canot be
// proven identical either.
return std::make_pair(false, 0);
}
return std::make_pair(getIndex(TargetBB) == BF.getIndex(TargetBBOther),
TargetOpIndex);
}
bool BinaryFunction::isInstrEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const {
// First check their opcodes.
if (Inst.getOpcode() != InstOther.getOpcode()) {
return false;
}
// Then check if they have the same number of operands.
unsigned NumOperands = Inst.getNumOperands();
unsigned NumOperandsOther = InstOther.getNumOperands();
if (NumOperands != NumOperandsOther) {
return false;
}
// We are interested in 3 special cases:
// Push entry points to the stack in reverse order.
//
// a) both instructions are recursive calls.
// b) both instructions are local jumps to basic blocks with same indices.
// c) both instructions are invokes with landing pad blocks with same indices.
//
// In any of these cases the instructions will differ in some operands, but
// given identical CFG of the functions, they can still be considered
// equivalent.
bool BothCalls =
BC.MIA->isCall(Inst) &&
BC.MIA->isCall(InstOther);
bool BothInvokes =
BC.MIA->isInvoke(Inst) &&
BC.MIA->isInvoke(InstOther);
bool BothBranches =
BC.MIA->isBranch(Inst) &&
!BC.MIA->isIndirectBranch(Inst) &&
BC.MIA->isBranch(InstOther) &&
!BC.MIA->isIndirectBranch(InstOther);
if (!BothCalls && !BothInvokes && !BothBranches) {
return Inst.equals(InstOther);
// NB: we rely on the original order of entries to match.
for (auto BBI = layout_rbegin(); BBI != layout_rend(); ++BBI) {
auto *BB = *BBI;
if (BB->isEntryPoint())
Stack.push(BB);
BB->setLayoutIndex(BinaryBasicBlock::InvalidIndex);
}
// We figure out if both instructions are recursive calls (case a) or else
// if they are calls to the same function.
bool EquivCallees = false;
unsigned CalleeOpIndex = 0;
if (BothCalls) {
std::tie(EquivCallees, CalleeOpIndex) =
isCalleeEquivalentWith(Inst, BB, InstOther, BBOther, BF);
}
while (!Stack.empty()) {
auto *BB = Stack.top();
Stack.pop();
// We figure out if both instructions are jumps (case b) or invokes (case c)
// with equivalent jump targets or landing pads respectively.
assert(!(BothInvokes && BothBranches));
bool SameTarget = false;
unsigned TargetOpIndex = 0;
if (BothInvokes || BothBranches) {
std::tie(SameTarget, TargetOpIndex) =
isTargetEquivalentWith(Inst, BB, InstOther, BBOther, BF, BothInvokes);
}
// Compare all operands.
for (unsigned i = 0; i < NumOperands; ++i) {
if (i == CalleeOpIndex && BothCalls && EquivCallees)
if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
continue;
if (i == TargetOpIndex && (BothInvokes || BothBranches) && SameTarget)
continue;
BB->setLayoutIndex(Index++);
DFS.push_back(BB);
if (!Inst.getOperand(i).equals(InstOther.getOperand(i)))
return false;
for (auto *SuccBB : BB->landing_pads()) {
Stack.push(SuccBB);
}
for (auto *SuccBB : BB->successors()) {
Stack.push(SuccBB);
}
}
// The instructions are equal although (some of) their operands
// may differ.
return true;
return DFS;
}
bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF) const {
bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF,
bool IgnoreSymbols,
bool UseDFS) const {
assert(CurrentState == State::CFG && OtherBF.CurrentState == State::CFG);
// Compare the two functions, one basic block at a time.
@ -3121,26 +2993,29 @@ bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF) const {
// instruction sequences and the same index in their corresponding
// functions. The latter is important for CFG equality.
// We do not consider functions with just different pseudo instruction
// sequences non-identical by default. However we print a warning
// in case two instructions that are identical have different pseudo
// instruction sequences.
bool PseudosDiffer = false;
if (size() != OtherBF.size())
if (layout_size() != OtherBF.layout_size())
return false;
// Make sure indices are up to date for both functions.
updateLayoutIndices();
OtherBF.updateLayoutIndices();
// Comparing multi-entry functions could be non-trivial.
if (isMultiEntry() || OtherBF.isMultiEntry())
return false;
auto BBI = OtherBF.layout_begin();
for (const auto *BB : layout()) {
// Process both functions in either DFS or existing order.
const auto &Order = UseDFS ? dfs() : BasicBlocksLayout;
const auto &OtherOrder = UseDFS ? OtherBF.dfs() : OtherBF.BasicBlocksLayout;
auto BBI = OtherOrder.begin();
for (const auto *BB : Order) {
const auto *OtherBB = *BBI;
if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
return false;
// Compare successor basic blocks.
// NOTE: the comparison for jump tables is only partially verified here.
if (BB->succ_size() != OtherBB->succ_size())
return false;
auto SuccBBI = OtherBB->succ_begin();
for (const auto *SuccBB : BB->successors()) {
const auto *SuccOtherBB = *SuccBBI;
@ -3149,94 +3024,155 @@ bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF) const {
++SuccBBI;
}
// Compare landing pads.
if (BB->lp_size() != OtherBB->lp_size())
return false;
auto LPI = OtherBB->lp_begin();
for (const auto *LP : BB->landing_pads()) {
const auto *LPOther = *LPI;
if (LP->getLayoutIndex() != LPOther->getLayoutIndex())
return false;
++LPI;
}
// Compare instructions.
// Compare all instructions including pseudos.
auto I = BB->begin(), E = BB->end();
auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
while (I != E && OtherI != OtherE) {
const MCInst &Inst = *I;
const MCInst &InstOther = *OtherI;
bool IsInstPseudo = BC.MII->get(Inst.getOpcode()).isPseudo();
bool IsInstOtherPseudo = BC.MII->get(InstOther.getOpcode()).isPseudo();
bool Identical;
if (IgnoreSymbols) {
Identical =
isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, OtherBF,
[](const MCSymbol *A, const MCSymbol *B) {
return true;
});
} else {
// Compare symbols.
auto AreSymbolsIdentical = [&] (const MCSymbol *A, const MCSymbol *B) {
if (A == B)
return true;
if (IsInstPseudo == IsInstOtherPseudo) {
// Either both are pseudos or none is.
bool areEqual =
isInstrEquivalentWith(Inst, *BB, InstOther, *OtherBB, OtherBF);
// All local symbols are considered identical since they affect a
// control flow and we check the control flow separately.
// If a local symbol is escaped, then the function (potentially) has
// multiple entry points and we exclude such functions from
// comparison.
if (A->isTemporary() && B->isTemporary())
return true;
if (!areEqual && IsInstPseudo) {
// Different pseudo instructions.
PseudosDiffer = true;
}
else if (!areEqual) {
// Different non-pseudo instructions.
return false;
}
// Compare symbols as functions.
const auto *FunctionA = BC.getFunctionForSymbol(A);
const auto *FunctionB = BC.getFunctionForSymbol(B);
if (FunctionA && FunctionB) {
// Self-referencing functions and recursive calls.
if (FunctionA == this && FunctionB == &OtherBF)
return true;
return FunctionA == FunctionB;
}
++I; ++OtherI;
}
else {
// One instruction is a pseudo while the other is not.
PseudosDiffer = true;
IsInstPseudo ? ++I : ++OtherI;
// Check if symbols are jump tables.
auto SIA = BC.GlobalSymbols.find(A->getName());
if (SIA == BC.GlobalSymbols.end())
return false;
auto SIB = BC.GlobalSymbols.find(B->getName());
if (SIB == BC.GlobalSymbols.end())
return false;
assert((SIA->second != SIB->second) &&
"different symbols should not have the same value");
const auto *JumpTableA = getJumpTableContainingAddress(SIA->second);
if (!JumpTableA)
return false;
const auto *JumpTableB =
OtherBF.getJumpTableContainingAddress(SIB->second);
if (!JumpTableB)
return false;
if ((SIA->second - JumpTableA->Address) !=
(SIB->second - JumpTableB->Address))
return false;
return equalJumpTables(JumpTableA, JumpTableB, OtherBF);
};
Identical =
isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, OtherBF,
AreSymbolsIdentical);
}
if (!Identical)
return false;
++I; ++OtherI;
}
// Check for trailing instructions or pseudos in one of the basic blocks.
auto TrailI = I == E ? OtherI : I;
auto TrailE = I == E ? OtherE : E;
while (TrailI != TrailE) {
const MCInst &InstTrail = *TrailI;
if (!BC.MII->get(InstTrail.getOpcode()).isPseudo()) {
// One of the functions has more instructions in this basic block
// than the other, hence not identical.
return false;
}
// There are trailing pseudos only in one of the basic blocks.
PseudosDiffer = true;
++TrailI;
// One of the identical blocks may have a trailing unconditional jump that
// is ignored for CFG purposes.
auto *TrailingInstr = (I != E ? &(*I)
: (OtherI != OtherE ? &(*OtherI) : 0));
if (TrailingInstr && !BC.MIA->isUnconditionalBranch(*TrailingInstr)) {
return false;
}
++BBI;
}
if (opts::Verbosity >= 1 && PseudosDiffer) {
errs() << "BOLT-WARNING: functions " << *this << " and "
<< OtherBF << " are identical, but have different"
<< " pseudo instruction sequences.\n";
return true;
}
bool BinaryFunction::equalJumpTables(const JumpTable *JumpTableA,
const JumpTable *JumpTableB,
const BinaryFunction &BFB) const {
if (JumpTableA->EntrySize != JumpTableB->EntrySize)
return false;
if (JumpTableA->Type != JumpTableB->Type)
return false;
if (JumpTableA->getSize() != JumpTableB->getSize())
return false;
for (uint64_t Index = 0; Index < JumpTableA->Entries.size(); ++Index) {
const auto *LabelA = JumpTableA->Entries[Index];
const auto *LabelB = JumpTableB->Entries[Index];
const auto *TargetA = getBasicBlockForLabel(LabelA);
const auto *TargetB = BFB.getBasicBlockForLabel(LabelB);
if (!TargetA || !TargetB) {
assert((TargetA || LabelA == getFunctionEndLabel()) &&
"no target basic block found");
assert((TargetB || LabelB == BFB.getFunctionEndLabel()) &&
"no target basic block found");
if (TargetA != TargetB)
return false;
continue;
}
assert(TargetA && TargetB && "cannot locate target block(s)");
if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
return false;
}
return true;
}
std::size_t BinaryFunction::hash() const {
std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const {
assert(CurrentState == State::CFG);
if (!Recompute)
return Hash;
const auto &Order = UseDFS ? dfs() : BasicBlocksLayout;
// The hash is computed by creating a string of all the opcodes
// in the function and hashing that string with std::hash.
std::string Opcodes;
for (const auto *BB : layout()) {
for (const auto *BB : Order) {
for (const auto &Inst : *BB) {
unsigned Opcode = Inst.getOpcode();
if (BC.MII->get(Opcode).isPseudo())
continue;
// Ignore conditional jumps because the conditional code is not
// always up to date.
if (BC.MIA->isConditionalBranch(Inst))
// Ignore unconditional jumps since we check CFG consistency by processing
// basic blocks in order and do not rely on branches to be in-sync with
// CFG. Note that we still use condition code of conditional jumps.
if (BC.MIA->isUnconditionalBranch(Inst))
continue;
if (Opcode == 0) {
@ -3252,7 +3188,7 @@ std::size_t BinaryFunction::hash() const {
}
}
return std::hash<std::string>{}(Opcodes);
return Hash = std::hash<std::string>{}(Opcodes);
}
void BinaryFunction::insertBasicBlocks(
@ -3508,7 +3444,7 @@ void BinaryFunction::calculateLoopInfo() {
auto BI = Latch->branch_info_begin();
for (BinaryBasicBlock *Succ : Latch->successors()) {
if (Succ == L->getHeader()) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE &&
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"profile data not found");
L->TotalBackEdgeCount += BI->Count;
}
@ -3528,7 +3464,7 @@ void BinaryFunction::calculateLoopInfo() {
auto BI = Exiting->branch_info_begin();
for (BinaryBasicBlock *Succ : Exiting->successors()) {
if (Succ == ExitTarget) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE &&
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"profile data not found");
L->ExitCount += BI->Count;
}
@ -3605,9 +3541,11 @@ DynoStats BinaryFunction::getDynoStats() const {
// basic block especially since the block may contain a function that
// does not return or a function that throws an exception.
uint64_t BBExecutionCount = 0;
for (const auto &BI : BB->branch_info())
if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
BBExecutionCount += BI.Count;
for (const auto &BI : BB->branch_info()) {
assert(BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"unexpected empty profile");
BBExecutionCount += BI.Count;
}
// Ignore empty blocks and blocks that were not executed.
if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)

View File

@ -194,7 +194,7 @@ public:
};
static constexpr uint64_t COUNT_NO_PROFILE =
std::numeric_limits<uint64_t>::max();
BinaryBasicBlock::COUNT_NO_PROFILE;
// Function size, in number of BBs, above which we fallback to a heuristic
// solution to the layout problem instead of seeking the optimal one.
static constexpr uint64_t FUNC_SIZE_THRESHOLD = 10;
@ -216,20 +216,6 @@ private:
/// base address for position independent binaries.
uint64_t Address;
/// List of functions that are identical to this one. We only maintain
/// the list for the function that should be emitted, for the rest we
/// set IdenticalFunction. When we emit this function we have
/// to emit symbols for all its twins.
std::set<BinaryFunction *> Twins;
/// Address of an identical function that can replace this one.
///
/// In case multiple functions are identical to each other, one of the
/// functions (the representative) will have it set to nullptr, while the
/// rest of the functions will point to the representative through one or
/// more steps.
BinaryFunction *IdenticalFunction{nullptr};
/// Original size of the function.
uint64_t Size;
@ -264,6 +250,9 @@ private:
/// True if the function uses DW_CFA_GNU_args_size CFIs.
bool UsesGnuArgsSize{false};
/// True if the function has more than one entry point.
bool IsMultiEntry{false};
/// The address for the code for this function in codegen memory.
uint64_t ImageAddress{0};
@ -298,12 +287,20 @@ private:
/// the output binary.
uint32_t AddressRangesOffset{-1U};
/// Last computed hash value.
mutable uint64_t Hash{0};
/// Get basic block index assuming it belongs to this function.
unsigned getIndex(const BinaryBasicBlock *BB) const {
assert(BB->getIndex() < BasicBlocks.size());
return BB->getIndex();
}
BinaryBasicBlock *getBasicBlockForLabel(const MCSymbol *Label) const {
auto I = LabelToBB.find(Label);
return I == LabelToBB.end() ? nullptr : I->second;
}
/// Return basic block that originally contained offset \p Offset
/// from the function start.
BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset);
@ -332,28 +329,46 @@ private:
/// Helper function that compares an instruction of this function to the
/// given instruction of the given function. The functions should have
/// identical CFG.
template <class Compare>
bool isInstrEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const;
const MCInst &InstA, const BinaryBasicBlock &BBA,
const MCInst &InstB, const BinaryBasicBlock &BBB,
const BinaryFunction &BFB, Compare Comp) const {
if (InstA.getOpcode() != InstB.getOpcode()) {
return false;
}
/// Helper function that compares the callees of two call instructions.
/// Callees are considered equivalent if both refer to the same function
/// or if both calls are recursive. Instructions should have same opcodes
/// and same number of operands. Returns true and the callee operand index
/// when callees are quivalent, and false, 0 otherwise.
std::pair<bool, unsigned> isCalleeEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const;
// In this function we check for special conditions:
//
// * instructions with landing pads
//
// Most of the common cases should be handled by MCInst::equals()
// that compares regular instruction operands.
//
// NB: there's no need to compare jump table indirect jump instructions
// separately as jump tables are handled by comparing corresponding
// symbols.
const auto EHInfoA = BC.MIA->getEHInfo(InstA);
const auto EHInfoB = BC.MIA->getEHInfo(InstB);
/// Helper function that compares the targets two jump or invoke instructions.
/// A target of an invoke we consider its landing pad basic block. The
/// corresponding functions should have identical CFG. Instructions should
/// have same opcodes and same number of operands. Returns true and the target
/// operand index when targets are equivalent, and false, 0 otherwise.
std::pair<bool, unsigned> isTargetEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF,
bool AreInvokes) const;
// Action indices should match.
if (EHInfoA.second != EHInfoB.second)
return false;
if (!EHInfoA.first != !EHInfoB.first)
return false;
if (EHInfoA.first && EHInfoB.first) {
const auto *LPA = BBA.getLandingPad(EHInfoA.first);
const auto *LPB = BBB.getLandingPad(EHInfoB.first);
assert(LPA && LPB && "cannot locate landing pad(s)");
if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
return false;
}
return InstA.equals(InstB, Comp);
}
/// Clear the landing pads for all blocks contained in the range of
/// [StartIndex, StartIndex + NumBlocks). This also has the effect of
@ -372,6 +387,9 @@ private:
/// Temporary holder of offsets that are potentially entry points.
std::unordered_set<uint64_t> EntryOffsets;
/// Map labels to corresponding basic blocks.
std::unordered_map<const MCSymbol *, BinaryBasicBlock *> LabelToBB;
using BranchListType = std::vector<std::pair<uint32_t, uint32_t>>;
BranchListType TakenBranches; /// All local taken branches.
BranchListType FTBranches; /// All fall-through branches.
@ -407,7 +425,7 @@ private:
uint32_t Index; // index of the tail call in the basic block
uint64_t TargetAddress; // address of the callee
uint64_t Count{0}; // taken count from profile data
uint64_t Mispreds{0}; // mispredicted count from progile data
uint64_t Mispreds{0}; // mispredicted count from profile data
uint32_t CFIStateBefore{0}; // CFI state before the tail call instruction
TailCallInfo(uint32_t Offset, uint32_t Index, uint64_t TargetAddress) :
@ -532,6 +550,23 @@ private:
return nullptr;
}
const JumpTable *getJumpTableContainingAddress(uint64_t Address) const {
auto JTI = JumpTables.upper_bound(Address);
if (JTI == JumpTables.begin())
return nullptr;
--JTI;
if (JTI->first + JTI->second.getSize() > Address) {
return &JTI->second;
}
return nullptr;
}
/// Compare two jump tables in 2 functions. The function relies on consistent
/// ordering of basic blocks in both binary functions (e.g. DFS).
bool equalJumpTables(const JumpTable *JumpTableA,
const JumpTable *JumpTableB,
const BinaryFunction &BFB) const;
/// All jump table sites in the function.
std::vector<std::pair<uint64_t, uint64_t>> JTSites;
@ -572,15 +607,18 @@ private:
CFIStateVector BBCFIState;
/// Symbol in the output.
///
/// NB: function can have multiple symbols associated with it. We will emit
/// all symbols for the function
MCSymbol *OutputSymbol;
MCSymbol *ColdSymbol{nullptr};
/// Symbol at the end of the function.
MCSymbol *FunctionEndLabel{nullptr};
mutable MCSymbol *FunctionEndLabel{nullptr};
/// Symbol at the end of the cold part of split function.
MCSymbol *FunctionColdEndLabel{nullptr};
mutable MCSymbol *FunctionColdEndLabel{nullptr};
/// Unique number associated with the function.
uint64_t FunctionNumber;
@ -603,6 +641,11 @@ private:
Itr itr;
};
/// Register alternative function name.
void addAlternativeName(std::string NewName) {
Names.emplace_back(NewName);
}
/// Return label at a given \p Address in the function. If the label does
/// not exist - create it. Assert if the \p Address does not belong to
/// the function. If \p CreatePastEnd is true, then return the function
@ -613,6 +656,7 @@ private:
/// Register an entry point at a given \p Offset into the function.
MCSymbol *addEntryPointAtOffset(uint64_t Offset) {
EntryOffsets.emplace(Offset);
IsMultiEntry = (Offset == 0 ? IsMultiEntry : true);
return getOrCreateLocalLabel(getAddress() + Offset);
}
@ -645,6 +689,7 @@ private:
BinaryFunction(const BinaryFunction &) = delete;
friend class RewriteInstance;
friend class BinaryContext;
/// Creation should be handled by RewriteInstance::createBinaryFunction().
BinaryFunction(const std::string &Name, SectionRef Section, uint64_t Address,
@ -737,6 +782,10 @@ public:
return iterator_range<const_cfi_iterator>(cie_begin(), cie_end());
}
/// Return a list of basic blocks sorted using DFS and update layout indices
/// using the same order. Does not modify the current layout.
BasicBlockOrderType dfs() const;
/// Modify code layout making necessary adjustments to instructions at the
/// end of basic blocks.
void modifyLayout(LayoutType Type, bool MinBranchClusters, bool Split);
@ -825,7 +874,7 @@ public:
return Names;
}
State getCurrentState() const {
State getState() const {
return CurrentState;
}
@ -882,7 +931,7 @@ public:
}
/// Return MC symbol associated with the end of the function.
MCSymbol *getFunctionEndLabel() {
MCSymbol *getFunctionEndLabel() const {
assert(BC.Ctx && "cannot be called with empty context");
if (!FunctionEndLabel) {
FunctionEndLabel = BC.Ctx->createTempSymbol("func_end", true);
@ -891,7 +940,7 @@ public:
}
/// Return MC symbol associated with the end of the cold part of the function.
MCSymbol *getFunctionColdEndLabel() {
MCSymbol *getFunctionColdEndLabel() const {
if (!FunctionColdEndLabel) {
FunctionColdEndLabel = BC.Ctx->createTempSymbol("func_cold_end", true);
}
@ -965,6 +1014,16 @@ public:
return UsesGnuArgsSize;
}
/// Return true if the function has more than one entry point.
bool isMultiEntry() const {
return IsMultiEntry;
}
/// Return true if the function uses jump tables.
bool hasJumpTables() const {
return JumpTables.size();
}
const MCSymbol *getPersonalityFunction() const {
return PersonalityFunction;
}
@ -988,9 +1047,9 @@ public:
return Address <= PC && PC < Address + Size;
}
/// Register alternative function name.
void addAlternativeName(std::string NewName) {
Names.emplace_back(NewName);
/// Add new names this function is known under.
void addNewNames(const std::vector<std::string> &NewNames) {
Names.insert(Names.begin(), NewNames.begin(), NewNames.end());
}
/// Create a basic block at a given \p Offset in the
@ -999,7 +1058,6 @@ public:
/// on the alignment of the existing offset.
/// The new block is not inserted into the CFG. The client must
/// use insertBasicBlocks to add any new blocks to the CFG.
///
std::unique_ptr<BinaryBasicBlock>
createBasicBlock(uint64_t Offset,
MCSymbol *Label = nullptr,
@ -1016,6 +1074,8 @@ public:
BB->setAlignment(std::min(DerivedAlignment, uint64_t(32)));
}
LabelToBB.emplace(Label, BB.get());
return BB;
}
@ -1142,6 +1202,12 @@ public:
return (II == Instructions.end()) ? nullptr : &II->second;
}
/// Return true if function has a profile, even if the profile does not
/// match CFG 100%.
bool hasProfile() const {
return ExecutionCount != COUNT_NO_PROFILE;
}
/// Return true if function profile is present and accurate.
bool hasValidProfile() const {
return ExecutionCount != COUNT_NO_PROFILE &&
@ -1298,45 +1364,17 @@ public:
return ExecutionCount;
}
/// Return the execution count for functions with known profile.
/// Return 0 if the function has no profile.
uint64_t getKnownExecutionCount() const {
return ExecutionCount == COUNT_NO_PROFILE ? 0 : ExecutionCount;
}
/// Return original LSDA address for the function or NULL.
uint64_t getLSDAAddress() const {
return LSDAAddress;
}
/// Return the address of an identical function. If none is found this will
/// return NULL.
BinaryFunction *getIdenticalFunction() const {
return IdenticalFunction;
}
/// Set the address of an identical function.
void setIdenticalFunction(BinaryFunction *BF) {
IdenticalFunction = BF;
// Copy over the list of twins.
if (!Twins.empty()) {
BF->getTwins().insert(Twins.begin(), Twins.end());
Twins.clear();
}
}
/// Return functions that are duplicates of this one.
std::set<BinaryFunction *> &getTwins() {
return Twins;
}
/// Register function that is identical to this one.
void addIdenticalFunction(BinaryFunction *BF) {
Twins.emplace(BF);
}
/// Return true if this function is a duplicate of another function.
bool isDuplicate() const {
bool IsDuplicate = getIdenticalFunction();
assert((Twins.empty() || !IsDuplicate) &&
"function with twins cannot be a duplicate of another function");
return IsDuplicate;
}
/// Return symbol pointing to function's LSDA.
MCSymbol *getLSDASymbol() {
if (LSDASymbol)
@ -1425,6 +1463,9 @@ public:
/// has been filled with LBR data.
void inferFallThroughCounts();
/// Clear execution profile of the function.
void clearProfile();
/// Converts conditional tail calls to unconditional tail calls. We do this to
/// handle conditional tail calls correctly and to give a chance to the
/// simplify conditional tail call pass to decide whether to re-optimize them
@ -1497,12 +1538,26 @@ public:
/// isIdenticalWith.
void mergeProfileDataInto(BinaryFunction &BF) const;
/// Returns true if this function has identical code and
/// CFG with the given function.
bool isIdenticalWith(const BinaryFunction &BF) const;
/// Returns true if this function has identical code and CFG with
/// the given function \p BF.
///
/// If \p IgnoreSymbols is set to true, then symbolic operands are ignored
/// during comparison.
///
/// If \p UseDFS is set to true, then compute DFS of each function and use
/// is for CFG equivalency. Potentially it will help to catch more cases,
/// but is slower.
bool isIdenticalWith(const BinaryFunction &BF,
bool IgnoreSymbols = false,
bool UseDFS = false) const;
/// Returns a hash value for the function. To be used for ICF.
std::size_t hash() const;
/// Returns a hash value for the function. To be used for ICF. Two congruent
/// functions (functions with different symbolic references but identical
/// otherwise) are required to have identical hashes.
///
/// If \p UseDFS is set, then process blocks in DFS order that we recompute.
/// Otherwise use the existing layout order.
std::size_t hash(bool Recompute = true, bool UseDFS = false) const;
/// Sets the associated .debug_info entry.
void addSubprogramDIE(DWARFCompileUnit *Unit,

View File

@ -65,12 +65,6 @@ SimplifyRODataLoads("simplify-rodata-loads",
static cl::opt<bool> OptimizeFrameAccesses(
"frame-opt", cl::desc("optimize stack frame accesses"), cl::ZeroOrMore);
static cl::opt<bool>
IdenticalCodeFolding(
"icf",
cl::desc("fold functions with identical code"),
cl::ZeroOrMore);
static cl::opt<bool>
PrintReordered("print-reordered",
cl::desc("print functions after layout optimization"),
@ -209,8 +203,7 @@ void BinaryFunctionPassManager::runAllPasses(
// Run this pass first to use stats for the original functions.
Manager.registerPass(llvm::make_unique<PrintSortedBy>(NeverPrint));
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF),
opts::IdenticalCodeFolding);
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF));
Manager.registerPass(llvm::make_unique<InlineSmallFunctions>(PrintInline),
opts::InlineSmallFunctions);
@ -223,6 +216,8 @@ void BinaryFunctionPassManager::runAllPasses(
llvm::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
opts::SimplifyRODataLoads);
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF));
Manager.registerPass(llvm::make_unique<ReorderBasicBlocks>(PrintReordered));
Manager.registerPass(llvm::make_unique<Peepholes>(PrintPeepholes),

View File

@ -144,13 +144,27 @@ SctcMode(
cl::values(clEnumValN(SctcAlways, "always", "always perform sctc"),
clEnumValN(SctcPreserveDirection,
"preserve",
"only perform sctc when branch direction is preserved"),
"only perform sctc when branch direction is "
"preserved"),
clEnumValN(SctcHeuristic,
"heuristic",
"use branch prediction data to control sctc"),
clEnumValEnd),
cl::ZeroOrMore);
static cl::opt<bool>
IdenticalCodeFolding(
"icf",
cl::desc("fold functions with identical code"),
cl::ZeroOrMore);
static cl::opt<bool>
UseDFSForICF(
"icf-dfs",
cl::desc("use DFS ordering when using -icf option"),
cl::ReallyHidden,
cl::ZeroOrMore);
} // namespace opts
namespace llvm {
@ -158,6 +172,7 @@ namespace bolt {
bool BinaryFunctionPass::shouldOptimize(const BinaryFunction &BF) const {
return BF.isSimple() &&
BF.getState() == BinaryFunction::State::CFG &&
opts::shouldProcess(BF) &&
(BF.getSize() > 0);
}
@ -813,7 +828,8 @@ void InlineSmallFunctions::runOnFunctions(
DEBUG(dbgs() << "BOLT-INFO: Inlined " << InlinedDynamicCalls << " of "
<< TotalDynamicCalls << " function calls in the profile.\n"
<< "BOLT-INFO: Inlined calls represent "
<< format("%.1f", 100.0 * InlinedDynamicCalls / TotalInlineableCalls)
<< format("%.1f",
100.0 * InlinedDynamicCalls / TotalInlineableCalls)
<< "% of all inlineable calls in the profile.\n");
}
@ -1304,220 +1320,143 @@ void SimplifyRODataLoads::runOnFunctions(
<< "BOLT-INFO: dynamic loads found: " << NumDynamicLoadsFound << "\n";
}
void IdenticalCodeFolding::discoverCallers(
BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) {
for (auto &I : BFs) {
BinaryFunction &Caller = I.second;
if (!shouldOptimize(Caller))
continue;
for (BinaryBasicBlock &BB : Caller) {
unsigned InstrIndex = 0;
for (MCInst &Inst : BB) {
if (!BC.MIA->isCall(Inst)) {
++InstrIndex;
continue;
}
const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst);
if (!TargetSymbol) {
// This is an indirect call, we cannot record a target.
++InstrIndex;
continue;
}
const auto *Function = BC.getFunctionForSymbol(TargetSymbol);
if (!Function) {
// Call to a function without a BinaryFunction object.
++InstrIndex;
continue;
}
// Insert a tuple in the Callers map.
Callers[Function].emplace_back(CallSite(&Caller, &BB, InstrIndex));
++InstrIndex;
}
}
}
}
void IdenticalCodeFolding::foldFunction(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunction *BFToFold,
BinaryFunction *BFToReplaceWith,
std::set<BinaryFunction *> &Modified) {
// Mark BFToFold as identical with BFTOreplaceWith.
BFToFold->setIdenticalFunction(BFToReplaceWith);
// Add the size of BFToFold to the total size savings estimate.
BytesSavedEstimate += BFToFold->getSize();
// Get callers of BFToFold.
auto CI = Callers.find(BFToFold);
if (CI == Callers.end())
void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &) {
if (!opts::IdenticalCodeFolding)
return;
std::vector<CallSite> &BFToFoldCallers = CI->second;
// Get callers of BFToReplaceWith.
std::vector<CallSite> &BFToReplaceWithCallers = Callers[BFToReplaceWith];
// Get MCSymbol for BFToReplaceWith.
MCSymbol *SymbolToReplaceWith =
BC.getOrCreateGlobalSymbol(BFToReplaceWith->getAddress(), "");
// Traverse callers of BFToFold and replace the calls with calls
// to BFToReplaceWith.
for (const CallSite &CS : BFToFoldCallers) {
// Get call instruction.
BinaryFunction *Caller = CS.Caller;
BinaryBasicBlock *CallBB = CS.Block;
MCInst &CallInst = CallBB->getInstructionAtIndex(CS.InstrIndex);
// Replace call target with BFToReplaceWith.
auto Success = BC.MIA->replaceCallTargetOperand(CallInst,
SymbolToReplaceWith,
BC.Ctx.get());
assert(Success && "unexpected call target prevented the replacement");
// Add this call site to the callers of BFToReplaceWith.
BFToReplaceWithCallers.emplace_back(CS);
// Add caller to the set of modified functions.
Modified.insert(Caller);
// Update dynamic calls folded stat.
if (Caller->hasValidProfile() &&
CallBB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
NumDynamicCallsFolded += CallBB->getExecutionCount();
}
// Remove all callers of BFToFold.
BFToFoldCallers.clear();
++NumFunctionsFolded;
// Merge execution counts of BFToFold into those of BFToReplaceWith.
BFToFold->mergeProfileDataInto(*BFToReplaceWith);
}
void IdenticalCodeFolding::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &
) {
discoverCallers(BC, BFs);
const auto OriginalFunctionCount = BFs.size();
uint64_t NumFunctionsFolded = 0;
uint64_t NumJTFunctionsFolded = 0;
uint64_t BytesSavedEstimate = 0;
static bool UseDFS = opts::UseDFSForICF;
// This hash table is used to identify identical functions. It maps
// a function to a bucket of functions identical to it.
struct KeyHash {
std::size_t operator()(const BinaryFunction *F) const { return F->hash(); }
std::size_t operator()(const BinaryFunction *F) const {
return F->hash(/*Recompute=*/false);
}
};
struct KeyCongruent {
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
return A->isIdenticalWith(*B, /*IgnoreSymbols=*/true, /*UseDFS=*/UseDFS);
}
};
struct KeyEqual {
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
return A->isIdenticalWith(*B);
return A->isIdenticalWith(*B, /*IgnoreSymbols=*/false, /*UseDFS=*/UseDFS);
}
};
std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
KeyHash, KeyEqual> Buckets;
// Set that holds the functions that were modified by the last pass.
std::set<BinaryFunction *> Mod;
// Create buckets with congruent functions - functions that potentially could
// be folded.
std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
KeyHash, KeyCongruent> CongruentBuckets;
for (auto &BFI : BFs) {
auto &BF = BFI.second;
if (!shouldOptimize(BF))
continue;
// Vector of all the candidate functions to be tested for being identical
// to each other. Initialized with all simple functions.
std::vector<BinaryFunction *> Cands;
for (auto &I : BFs) {
auto &BF = I.second;
if (shouldOptimize(BF))
Cands.emplace_back(&BF);
// Make sure indices are in-order.
BF.updateLayoutIndices();
// Pre-compute hash before pushing into hashtable.
BF.hash(/*Recompute=*/true, /*UseDFS*/UseDFS);
CongruentBuckets[&BF].emplace(&BF);
}
// We repeat the icf pass until no new modifications happen.
unsigned Iter = 1;
// We repeat the pass until no new modifications happen.
unsigned Iteration = 1;
uint64_t NumFoldedLastIteration;
do {
Buckets.clear();
Mod.clear();
NumFoldedLastIteration = 0;
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: icf pass " << Iter << "...\n";
}
DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
uint64_t NumIdenticalFunctions = 0;
// Compare candidate functions using the Buckets hash table. Identical
// functions are efficiently discovered and added to the same bucket.
for (BinaryFunction *BF : Cands) {
Buckets[BF].emplace_back(BF);
}
Cands.clear();
// Go through the functions of each bucket and fold any references to them
// with the references to the hottest function among them.
for (auto &I : Buckets) {
std::vector<BinaryFunction *> &IFs = I.second;
std::sort(IFs.begin(), IFs.end(),
[](const BinaryFunction *A, const BinaryFunction *B) {
if (!A->hasValidProfile() && !B->hasValidProfile())
return false;
if (!A->hasValidProfile())
return false;
if (!B->hasValidProfile())
return true;
return B->getExecutionCount() < A->getExecutionCount();
}
);
BinaryFunction *Hottest = IFs[0];
// For the next pass, we consider only one function from each set of
// identical functions.
Cands.emplace_back(Hottest);
if (IFs.size() <= 1)
for (auto &CBI : CongruentBuckets) {
auto &Candidates = CBI.second;
if (Candidates.size() < 2)
continue;
NumIdenticalFunctions += IFs.size() - 1;
for (unsigned i = 1; i < IFs.size(); ++i) {
BinaryFunction *BF = IFs[i];
Hottest->addIdenticalFunction(BF);
foldFunction(BC, BFs, BF, Hottest, Mod);
if (!MaxTwinFunction ||
MaxTwinFunction->getTwins().size() < Hottest->getTwins().size()) {
MaxTwinFunction = Hottest;
// Identical functions go into the same bucket.
std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
KeyHash, KeyEqual> IdenticalBuckets;
for (auto *BF : Candidates) {
IdenticalBuckets[BF].emplace_back(BF);
}
for (auto &IBI : IdenticalBuckets) {
// Functions identified as identical.
auto &Twins = IBI.second;
if (Twins.size() < 2)
continue;
// Fold functions. Keep the order consistent across invocations with
// different options.
std::stable_sort(Twins.begin(), Twins.end(),
[](const BinaryFunction *A, const BinaryFunction *B) {
return A->getFunctionNumber() < B->getFunctionNumber();
});
BinaryFunction *ParentBF = Twins[0];
for (unsigned i = 1; i < Twins.size(); ++i) {
auto *ChildBF = Twins[i];
DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
<< *ParentBF << '\n');
// Remove child function from the list of candidates.
auto FI = Candidates.find(ChildBF);
assert(FI != Candidates.end() &&
"function expected to be in the set");
Candidates.erase(FI);
// Fold the function and remove from the list of processed functions.
BC.foldFunction(*ChildBF, *ParentBF, BFs);
BytesSavedEstimate += ChildBF->getSize();
++NumFoldedLastIteration;
if (ParentBF->hasJumpTables())
++NumJTFunctionsFolded;
}
}
}
NumFunctionsFolded += NumFoldedLastIteration;
++Iteration;
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: found " << NumIdenticalFunctions
<< " identical functions.\n"
<< "BOLT-INFO: modified " << Mod.size() << " functions.\n";
} while (NumFoldedLastIteration > 0);
DEBUG(
// Print functions that are congruent but not identical.
for (auto &CBI : CongruentBuckets) {
auto &Candidates = CBI.second;
if (Candidates.size() < 2)
continue;
dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
<< " functions (each of size " << (*Candidates.begin())->getSize()
<< " bytes) are congruent but not identical:\n";
for (auto *BF : Candidates) {
dbgs() << " " << *BF;
if (BF->getKnownExecutionCount()) {
dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
}
dbgs() << '\n';
}
}
);
NumIdenticalFunctionsFound += NumIdenticalFunctions;
++Iter;
} while (!Mod.empty());
outs() << "BOLT-INFO: ICF pass found " << NumIdenticalFunctionsFound
<< " functions identical to some other function.\n"
<< "BOLT-INFO: ICF pass folded references to " << NumFunctionsFolded
<< " functions.\n"
<< "BOLT-INFO: ICF pass folded " << NumDynamicCallsFolded << " dynamic"
<< " function calls.\n"
<< "BOLT-INFO: Removing all identical functions could save "
<< format("%.2lf", (double) BytesSavedEstimate / 1024)
<< " KB of code space.\n";
if (MaxTwinFunction) {
outs() << "BOLT-INFO: Function with maximum number of twins ("
<< MaxTwinFunction->getTwins().size() << ") is " << *MaxTwinFunction
<< '\n';
if (NumFunctionsFolded) {
outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded
<< " out of " << OriginalFunctionCount << " functions in "
<< Iteration << " passes. "
<< NumJTFunctionsFolded << " functions had jump tables.\n"
<< "BOLT-INFO: Removing all identical functions will save "
<< format("%.2lf", (double) BytesSavedEstimate / 1024)
<< " KB of code space.\n";
}
}

View File

@ -363,41 +363,7 @@ public:
/// references to a single one of them.
///
class IdenticalCodeFolding : public BinaryFunctionPass {
uint64_t NumIdenticalFunctionsFound{0};
uint64_t NumFunctionsFolded{0};
uint64_t NumDynamicCallsFolded{0};
uint64_t BytesSavedEstimate{0};
BinaryFunction *MaxTwinFunction{nullptr};
/// Map from a binary function to its callers.
struct CallSite {
BinaryFunction *Caller;
BinaryBasicBlock *Block;
unsigned InstrIndex;
CallSite(BinaryFunction *Caller,
BinaryBasicBlock *Block,
unsigned InstrIndex) :
Caller(Caller), Block(Block), InstrIndex(InstrIndex) { }
};
using CallerMap = std::map<const BinaryFunction *, std::vector<CallSite>>;
CallerMap Callers;
/// Replaces all calls to BFTOFold with calls to BFToReplaceWith and merges
/// the profile data of BFToFold with those of BFToReplaceWith. All modified
/// functions are added to the Modified set.
void foldFunction(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunction *BFToFold,
BinaryFunction *BFToReplaceWith,
std::set<BinaryFunction *> &Modified);
/// Finds callers for each binary function and populates the Callers
/// map.
void discoverCallers(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs);
public:
public:
explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }

View File

@ -131,7 +131,7 @@ void GreedyClusterAlgorithm::clusterBasicBlocks(const BinaryFunction &BF,
// Populate priority queue with edges.
auto BI = BB->branch_info_begin();
for (auto &I : BB->successors()) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE &&
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"attempted reordering blocks of function with no profile data");
Queue.emplace_back(EdgeTy(BB, I, BI->Count));
++BI;
@ -252,7 +252,7 @@ int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
// Adjust the weight by taking into account other edges with the same source.
auto BI = SrcBB->branch_info_begin();
for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE &&
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"attempted reordering blocks of function with no profile data");
assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
"overflow detected");
@ -277,7 +277,7 @@ int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
++BI;
}
assert(BI != PredBB->branch_info_end() && "invalid control flow graph");
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE &&
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"attempted reordering blocks of function with no profile data");
assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
"overflow detected");
@ -399,7 +399,7 @@ void OptimalReorderAlgorithm::reorderBasicBlocks(
auto BI = BB->branch_info_begin();
Weight[BBToIndex[BB]].resize(N);
for (auto I : BB->successors()) {
if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
Weight[BBToIndex[BB]][BBToIndex[I]] = BI->Count;
++BI;
}

View File

@ -1706,12 +1706,8 @@ void emitFunction(MCStreamer &Streamer, BinaryFunction &Function,
if (Function.getSize() == 0)
return;
if (Function.isDuplicate()) {
DEBUG(dbgs() << "BOLT-DEBUG: skipping code generation for function "
<< Function << " because it is a duplicate of function "
<< *Function.getIdenticalFunction() << '\n');
if (Function.getState() == BinaryFunction::State::Empty)
return;
}
MCSection *Section;
if (opts::Relocs) {
@ -1741,57 +1737,17 @@ void emitFunction(MCStreamer &Streamer, BinaryFunction &Function,
Streamer.EmitCodeAlignment(Function.getAlignment());
}
// Emit all names the function and its twins (for ICF) are known under.
auto EmitFunctionSymbols = [&](BinaryFunction &BF) {
for (const auto &Name : BF.getNames()) {
Twine EmitName = EmitColdPart ? Twine(Name).concat(".cold") : Name;
auto *EmitSymbol = BC.Ctx->getOrCreateSymbol(EmitName);
Streamer.EmitSymbolAttribute(EmitSymbol, MCSA_ELF_TypeFunction);
DEBUG(dbgs() << "emitting symbol " << EmitSymbol->getName()
<< " for function " << BF
<< " from function " << Function << '\n');
Streamer.EmitLabel(EmitSymbol);
}
};
MCContext &Context = Streamer.getContext();
const MCAsmInfo *MAI = Context.getAsmInfo();
// Emit all names the function is known under.
EmitFunctionSymbols(Function);
for (auto *TwinFunction : Function.getTwins()) {
assert(TwinFunction != &Function && "function cannot be its own twin");
EmitFunctionSymbols(*TwinFunction);
}
if (opts::Verbosity >= 2 && !EmitColdPart && !Function.getTwins().empty()) {
std::vector<std::string> AllNames;
AllNames.insert(AllNames.end(),
Function.getNames().begin(),
Function.getNames().end());
for (auto *TwinFunction : Function.getTwins()) {
AllNames.insert(AllNames.end(),
TwinFunction->getNames().begin(),
TwinFunction->getNames().end());
}
outs() << "BOLT-INFO: all duplicate names (" << AllNames.size()
<< ") for function " << Function << ": \n";
for (const auto &Name : AllNames) {
outs() << " " << Name << '\n';
}
AllNames.clear();
AllNames.emplace_back(Function.getPrintName());
for (auto *TwinFunction : Function.getTwins()) {
AllNames.emplace_back(TwinFunction->getPrintName());
}
std::sort(AllNames.begin(), AllNames.end());
outs() << "BOLT-INFO: all ICF names (" << AllNames.size()
<< ") for function " << Function << ": \n";
for (const auto &Name : AllNames) {
outs() << " " << Name << '\n';
}
for (const auto &Name : Function.getNames()) {
Twine EmitName = EmitColdPart ? Twine(Name).concat(".cold") : Name;
auto *EmitSymbol = BC.Ctx->getOrCreateSymbol(EmitName);
Streamer.EmitSymbolAttribute(EmitSymbol, MCSA_ELF_TypeFunction);
DEBUG(dbgs() << "emitting symbol " << EmitSymbol->getName()
<< " for function " << Function << '\n');
Streamer.EmitLabel(EmitSymbol);
}
// Emit CFI start
@ -2141,9 +2097,6 @@ void RewriteInstance::mapFileSections(
if (!Function.isSimple() || !opts::shouldProcess(Function))
continue;
if (Function.isDuplicate())
continue;
auto TooLarge = false;
auto SMII = SectionMM->SectionMapInfo.find(Function.getCodeSectionName());
assert(SMII != SectionMM->SectionMapInfo.end() &&
@ -2936,7 +2889,7 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
}
uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
const auto *Function = getBinaryFunctionContainingAddress(OldAddress);
const auto *Function = getBinaryFunctionAtAddress(OldAddress);
if (!Function)
return 0;
auto JITS = OLT.findSymbol(Function->getSymbol()->getName(), false);
@ -3213,3 +3166,12 @@ RewriteInstance::getBinaryFunctionContainingAddress(uint64_t Address,
return nullptr;
return &FI->second;
}
const BinaryFunction *
RewriteInstance::getBinaryFunctionAtAddress(uint64_t Address) const {
const auto *Symbol = BC->getGlobalSymbolAtAddress(Address);
if (!Symbol)
return nullptr;
return BC->getFunctionForSymbol(Symbol);
}

View File

@ -192,7 +192,8 @@ public:
/// disassembleFunctions(), also preserve the original version.
void rewriteFile();
/// Return address of the function in the new binary.
/// Return address of a function in the new binary corresponding to
/// \p OldAddress address in the original binary.
uint64_t getNewFunctionAddress(uint64_t OldAddress);
/// Return value for the symbol \p Name in the output.
@ -211,6 +212,8 @@ public:
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
bool CheckPastEnd = false);
const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const;
private:
/// Detect addresses and offsets available in the binary for allocating
@ -382,7 +385,7 @@ private:
/// Store all non-zero symbols in this map for a quick address lookup.
std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
/// Store all functions seen in the binary, sorted by address.
/// Store all functions in the binary, sorted by original address.
std::map<uint64_t, BinaryFunction> BinaryFunctions;
/// Stores and serializes information that will be put into the .debug_ranges