ICF improvements.

Summary:
Re-worked the way ICF operates. The pass now checks for more than just
call instructions, but also for all references including function
pointers. Jump tables are handled too.

(cherry picked from FBD4372491)
This commit is contained in:
Maksim Panchenko 2016-12-21 17:13:56 -08:00
parent 55fc5417f8
commit bc8a456309
12 changed files with 617 additions and 695 deletions

View File

@ -121,7 +121,7 @@ void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) {
} }
void BinaryBasicBlock::addLandingPad(BinaryBasicBlock *LPBlock) { void BinaryBasicBlock::addLandingPad(BinaryBasicBlock *LPBlock) {
LandingPads.insert(LPBlock); LandingPads.push_back(LPBlock);
LPBlock->Throwers.insert(this); LPBlock->Throwers.insert(this);
} }
@ -190,7 +190,7 @@ BinaryBasicBlock::getBranchStats(const BinaryBasicBlock *Succ) const {
uint64_t TotalCount = 0; uint64_t TotalCount = 0;
uint64_t TotalMispreds = 0; uint64_t TotalMispreds = 0;
for (const auto &BI : BranchInfo) { for (const auto &BI : BranchInfo) {
if (BI.Count != COUNT_FALLTHROUGH_EDGE) { if (BI.Count != COUNT_NO_PROFILE) {
TotalCount += BI.Count; TotalCount += BI.Count;
TotalMispreds += BI.MispredictedCount; TotalMispreds += BI.MispredictedCount;
} }
@ -200,7 +200,7 @@ BinaryBasicBlock::getBranchStats(const BinaryBasicBlock *Succ) const {
auto Itr = std::find(Successors.begin(), Successors.end(), Succ); auto Itr = std::find(Successors.begin(), Successors.end(), Succ);
assert(Itr != Successors.end()); assert(Itr != Successors.end());
const auto &BI = BranchInfo[Itr - Successors.begin()]; const auto &BI = BranchInfo[Itr - Successors.begin()];
if (BI.Count && BI.Count != COUNT_FALLTHROUGH_EDGE) { if (BI.Count && BI.Count != COUNT_NO_PROFILE) {
if (TotalMispreds == 0) TotalMispreds = 1; if (TotalMispreds == 0) TotalMispreds = 1;
return std::make_pair(double(BI.Count) / TotalCount, return std::make_pair(double(BI.Count) / TotalCount,
double(BI.MispredictedCount) / TotalMispreds); double(BI.MispredictedCount) / TotalMispreds);

View File

@ -34,6 +34,17 @@ class BinaryFunction;
/// we might switch to it at some point. /// we might switch to it at some point.
class BinaryBasicBlock { class BinaryBasicBlock {
public: public:
/// Profile execution information for a given edge in CFG.
///
/// If MispredictedCount equals COUNT_INFERRED, then we have a profile
/// data for a fall-through edge with a Count representing an inferred
/// execution count, i.e. the count we calculated internally, not the one
/// coming from profile data.
///
/// For all other values of MispredictedCount, Count represents the number of
/// branch executions from a profile, and MispredictedCount is the number
/// of times the branch was mispredicted according to this profile.
struct BinaryBranchInfo { struct BinaryBranchInfo {
uint64_t Count; uint64_t Count;
uint64_t MispredictedCount; /// number of branches mispredicted uint64_t MispredictedCount; /// number of branches mispredicted
@ -47,7 +58,7 @@ private:
std::vector<BinaryBasicBlock *> Predecessors; std::vector<BinaryBasicBlock *> Predecessors;
std::vector<BinaryBasicBlock *> Successors; std::vector<BinaryBasicBlock *> Successors;
std::set<BinaryBasicBlock *> Throwers; std::set<BinaryBasicBlock *> Throwers;
std::set<BinaryBasicBlock *> LandingPads; std::vector<BinaryBasicBlock *> LandingPads;
/// Each successor has a corresponding BranchInfo entry in the list. /// Each successor has a corresponding BranchInfo entry in the list.
std::vector<BinaryBranchInfo> BranchInfo; std::vector<BinaryBranchInfo> BranchInfo;
@ -121,7 +132,7 @@ private:
} }
public: public:
static constexpr uint64_t COUNT_FALLTHROUGH_EDGE = static constexpr uint64_t COUNT_INFERRED =
std::numeric_limits<uint64_t>::max(); std::numeric_limits<uint64_t>::max();
static constexpr uint64_t COUNT_NO_PROFILE = static constexpr uint64_t COUNT_NO_PROFILE =
std::numeric_limits<uint64_t>::max(); std::numeric_limits<uint64_t>::max();
@ -478,6 +489,12 @@ public:
return ExecutionCount; return ExecutionCount;
} }
/// Return the execution count for blocks with known profile.
/// Return 0 if the block has no profile.
uint64_t getKnownExecutionCount() const {
return ExecutionCount == COUNT_NO_PROFILE ? 0 : ExecutionCount;
}
/// Set the execution count for this block. /// Set the execution count for this block.
void setExecutionCount(uint64_t Count) { void setExecutionCount(uint64_t Count) {
ExecutionCount = Count; ExecutionCount = Count;

View File

@ -17,11 +17,14 @@
#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
namespace llvm {
namespace bolt { using namespace llvm;
using namespace bolt;
namespace opts { namespace opts {
extern cl::opt<bool> Relocs;
static cl::opt<bool> static cl::opt<bool>
PrintDebugInfo("print-debug-info", PrintDebugInfo("print-debug-info",
cl::desc("print debug info when printing functions"), cl::desc("print debug info when printing functions"),
@ -68,6 +71,42 @@ MCSymbol *BinaryContext::getGlobalSymbolAtAddress(uint64_t Address) const {
return Symbol; return Symbol;
} }
void BinaryContext::foldFunction(BinaryFunction &ChildBF,
BinaryFunction &ParentBF,
std::map<uint64_t, BinaryFunction> &BFs) {
// Copy name list.
ParentBF.addNewNames(ChildBF.getNames());
// Update internal bookkeeping info.
for (auto &Name : ChildBF.getNames()) {
// Calls to functions are handled via symbols, and we keep the lookup table
// that we need to update.
auto *Symbol = Ctx->lookupSymbol(Name);
assert(Symbol && "symbol cannot be NULL at this point");
SymbolToFunctionMap[Symbol] = &ParentBF;
// NB: there's no need to update GlobalAddresses and GlobalSymbols.
}
// Merge execution counts of ChildBF into those of ParentBF.
ChildBF.mergeProfileDataInto(ParentBF);
if (opts::Relocs) {
// Remove ChildBF from the global set of functions in relocs mode.
auto FI = BFs.find(ChildBF.getAddress());
assert(FI != BFs.end() && "function not found");
assert(&ChildBF == &FI->second && "function mismatch");
FI = BFs.erase(FI);
} else {
// In non-relocation mode we keep the function, but rename it.
std::string NewName = "__ICF_" + ChildBF.Names.back();
ChildBF.Names.clear();
ChildBF.Names.push_back(NewName);
ChildBF.OutputSymbol = Ctx->getOrCreateSymbol(NewName);
}
}
void BinaryContext::printGlobalSymbols(raw_ostream& OS) const { void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
for (auto &entry : GlobalSymbols) { for (auto &entry : GlobalSymbols) {
OS << "(" << entry.first << " -> " << entry.second << ")\n"; OS << "(" << entry.first << " -> " << entry.second << ")\n";
@ -378,6 +417,3 @@ void BinaryContext::addSectionRelocation(SectionRef Section, uint64_t Address,
} }
RI->second.emplace_back(Relocation{Address, Symbol, Type, Addend}); RI->second.emplace_back(Relocation{Address, Symbol, Type, Addend});
} }
} // namespace bolt
} // namespace llvm

View File

@ -88,9 +88,13 @@ public:
SymbolMapType GlobalSymbols; SymbolMapType GlobalSymbols;
/// [address] -> [name1], [name2], ... /// [address] -> [name1], [name2], ...
/// Global addresses never change.
std::multimap<uint64_t, std::string> GlobalAddresses; std::multimap<uint64_t, std::string> GlobalAddresses;
/// [MCSymbol] -> [BinaryFunction] /// [MCSymbol] -> [BinaryFunction]
///
/// As we fold identical functions, multiple symbols can point
/// to the same BinaryFunction.
std::unordered_map<const MCSymbol *, std::unordered_map<const MCSymbol *,
const BinaryFunction *> SymbolToFunctionMap; const BinaryFunction *> SymbolToFunctionMap;
@ -220,6 +224,13 @@ public:
return Ctx->getOrCreateSymbol(Name); return Ctx->getOrCreateSymbol(Name);
} }
/// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
/// removed from the list of functions \p BFs. The profile data of \p ChildBF
/// is merged into that of \p ParentBF.
void foldFunction(BinaryFunction &ChildBF,
BinaryFunction &ParentBF,
std::map<uint64_t, BinaryFunction> &BFs);
/// Add section relocation. /// Add section relocation.
void addSectionRelocation(SectionRef Section, uint64_t Address, void addSectionRelocation(SectionRef Section, uint64_t Address,
MCSymbol *Symbol, uint64_t Type, MCSymbol *Symbol, uint64_t Type,

View File

@ -198,6 +198,9 @@ BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
size_t size_t
BinaryFunction::getBasicBlockOriginalSize(const BinaryBasicBlock *BB) const { BinaryFunction::getBasicBlockOriginalSize(const BinaryBasicBlock *BB) const {
if (CurrentState != State::CFG)
return 0;
auto Index = getIndex(BB); auto Index = getIndex(BB);
if (Index + 1 == BasicBlocks.size()) { if (Index + 1 == BasicBlocks.size()) {
return Size - BB->getOffset(); return Size - BB->getOffset();
@ -322,6 +325,10 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
<< "\n IsSimple : " << IsSimple << "\n IsSimple : " << IsSimple
<< "\n IsSplit : " << IsSplit << "\n IsSplit : " << IsSplit
<< "\n BB Count : " << BasicBlocksLayout.size(); << "\n BB Count : " << BasicBlocksLayout.size();
if (CurrentState == State::CFG) {
OS << "\n Hash : " << Twine::utohexstr(hash());
}
if (FrameInstructions.size()) { if (FrameInstructions.size()) {
OS << "\n CFI Instrs : " << FrameInstructions.size(); OS << "\n CFI Instrs : " << FrameInstructions.size();
} }
@ -339,18 +346,6 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "\n Exec Count : " << ExecutionCount; OS << "\n Exec Count : " << ExecutionCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f); OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
} }
if (getIdenticalFunction()) {
OS << "\n Copy Of : " << *getIdenticalFunction();
}
if (!Twins.empty()) {
OS << "\n Twins : ";
auto Sep = "";
for (auto *TwinFunction : Twins) {
OS << Sep << *TwinFunction;
Sep = ", ";
}
}
if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) { if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
OS << '\n'; OS << '\n';
@ -387,8 +382,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "------- HOT-COLD SPLIT POINT -------\n\n"; OS << "------- HOT-COLD SPLIT POINT -------\n\n";
OS << BB->getName() << " (" OS << BB->getName() << " ("
<< BB->size() << " instructions, align : " << BB->size() << " instructions, align : " << BB->getAlignment()
<< BB->getAlignment() << ")\n"; << ")\n";
if (BB->isEntryPoint()) if (BB->isEntryPoint())
OS << " Entry Point\n"; OS << " Entry Point\n";
@ -397,7 +392,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << " Landing Pad\n"; OS << " Landing Pad\n";
uint64_t BBExecCount = BB->getExecutionCount(); uint64_t BBExecCount = BB->getExecutionCount();
if (BBExecCount != BinaryBasicBlock::COUNT_NO_PROFILE) { if (hasValidProfile()) {
OS << " Exec Count : " << BBExecCount << "\n"; OS << " Exec Count : " << BBExecCount << "\n";
} }
if (!BBCFIState.empty()) { if (!BBCFIState.empty()) {
@ -435,11 +430,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
assert(BI != BB->branch_info_end() && "missing BranchInfo entry"); assert(BI != BB->branch_info_end() && "missing BranchInfo entry");
OS << Sep << Succ->getName(); OS << Sep << Succ->getName();
if (ExecutionCount != COUNT_NO_PROFILE && if (ExecutionCount != COUNT_NO_PROFILE &&
BI->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
OS << " (mispreds: " << BI->MispredictedCount OS << " (mispreds: " << BI->MispredictedCount
<< ", count: " << BI->Count << ")"; << ", count: " << BI->Count << ")";
} else if (ExecutionCount != COUNT_NO_PROFILE && } else if (ExecutionCount != COUNT_NO_PROFILE &&
BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
OS << " (inferred count: " << BI->Count << ")"; OS << " (inferred count: " << BI->Count << ")";
} }
Sep = ", "; Sep = ", ";
@ -1221,12 +1216,8 @@ void BinaryFunction::postProcessJumpTables() {
if (TargetOffset < getSize()) if (TargetOffset < getSize())
TakenBranches.emplace_back(JTSiteOffset, TargetOffset); TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
// The relocations for PIC-style jump table have to be ignored. // Ignore relocations for jump tables.
// BC.IgnoredRelocations.emplace(JT->Address + EntryOffset);
// We can ignore the rest too if we output jump table to a different
// section.
if (JT->Type == JumpTable::JTT_PIC)
BC.IgnoredRelocations.emplace(JT->Address + EntryOffset);
EntryOffset += JT->EntrySize; EntryOffset += JT->EntrySize;
@ -1651,8 +1642,8 @@ bool BinaryFunction::buildCFG() {
bool IsPrevFT = false; // Is previous block a fall-through. bool IsPrevFT = false; // Is previous block a fall-through.
for (auto BB : BasicBlocks) { for (auto BB : BasicBlocks) {
if (IsPrevFT) { if (IsPrevFT) {
PrevBB->addSuccessor(BB, BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE, PrevBB->addSuccessor(BB, BinaryBasicBlock::COUNT_NO_PROFILE,
BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE); BinaryBasicBlock::COUNT_INFERRED);
} }
if (BB->empty()) { if (BB->empty()) {
IsPrevFT = true; IsPrevFT = true;
@ -1708,6 +1699,8 @@ bool BinaryFunction::buildCFG() {
// Infer frequency for non-taken branches // Infer frequency for non-taken branches
if (hasValidProfile()) if (hasValidProfile())
inferFallThroughCounts(); inferFallThroughCounts();
else
clearProfile();
// Update CFI information for each BB // Update CFI information for each BB
BBCFIState = annotateCFIState(); BBCFIState = annotateCFIState();
@ -1914,6 +1907,19 @@ void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
} }
} }
void BinaryFunction::clearProfile() {
// Keep function execution profile the same. Only clear basic block and edge
// counts.
for (auto *BB : BasicBlocks) {
BB->ExecutionCount = 0;
for (auto &BI : BB->branch_info()) {
BI.Count = 0;
BI.MispredictedCount = 0;
}
}
}
void BinaryFunction::inferFallThroughCounts() { void BinaryFunction::inferFallThroughCounts() {
assert(!BasicBlocks.empty() && "basic block list should not be empty"); assert(!BasicBlocks.empty() && "basic block list should not be empty");
@ -1921,23 +1927,20 @@ void BinaryFunction::inferFallThroughCounts() {
// Compute preliminary execution time for each basic block // Compute preliminary execution time for each basic block
for (auto CurBB : BasicBlocks) { for (auto CurBB : BasicBlocks) {
if (CurBB == *BasicBlocks.begin()) {
CurBB->setExecutionCount(ExecutionCount);
continue;
}
CurBB->ExecutionCount = 0; CurBB->ExecutionCount = 0;
} }
BasicBlocks.front()->setExecutionCount(ExecutionCount);
for (auto CurBB : BasicBlocks) { for (auto CurBB : BasicBlocks) {
auto SuccCount = CurBB->branch_info_begin(); auto SuccCount = CurBB->branch_info_begin();
for (auto Succ : CurBB->successors()) { for (auto Succ : CurBB->successors()) {
// Do not update execution count of the entry block (when we have tail // Do not update execution count of the entry block (when we have tail
// calls). We already accounted for those when computing the func count. // calls). We already accounted for those when computing the func count.
if (Succ == *BasicBlocks.begin()) { if (Succ == BasicBlocks.front()) {
++SuccCount; ++SuccCount;
continue; continue;
} }
if (SuccCount->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) if (SuccCount->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
Succ->setExecutionCount(Succ->getExecutionCount() + SuccCount->Count); Succ->setExecutionCount(Succ->getExecutionCount() + SuccCount->Count);
++SuccCount; ++SuccCount;
} }
@ -1954,8 +1957,9 @@ void BinaryFunction::inferFallThroughCounts() {
} }
} }
// Work on a basic block at a time, propagating frequency information forwards // Work on a basic block at a time, propagating frequency information
// It is important to walk in the layout order // forwards.
// It is important to walk in the layout order.
for (auto CurBB : BasicBlocks) { for (auto CurBB : BasicBlocks) {
uint64_t BBExecCount = CurBB->getExecutionCount(); uint64_t BBExecCount = CurBB->getExecutionCount();
@ -1965,15 +1969,15 @@ void BinaryFunction::inferFallThroughCounts() {
continue; continue;
// Calculate frequency of outgoing branches from this node according to // Calculate frequency of outgoing branches from this node according to
// LBR data // LBR data.
uint64_t ReportedBranches = 0; uint64_t ReportedBranches = 0;
for (const auto &SuccCount : CurBB->branch_info()) { for (const auto &SuccCount : CurBB->branch_info()) {
if (SuccCount.Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) if (SuccCount.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
ReportedBranches += SuccCount.Count; ReportedBranches += SuccCount.Count;
} }
// Calculate frequency of outgoing tail calls from this node according to // Calculate frequency of outgoing tail calls from this node according to
// LBR data // LBR data.
uint64_t ReportedTailCalls = 0; uint64_t ReportedTailCalls = 0;
auto TCI = TailCallTerminatedBlocks.find(CurBB); auto TCI = TailCallTerminatedBlocks.find(CurBB);
if (TCI != TailCallTerminatedBlocks.end()) { if (TCI != TailCallTerminatedBlocks.end()) {
@ -1993,7 +1997,7 @@ void BinaryFunction::inferFallThroughCounts() {
ReportedBranches + ReportedTailCalls + ReportedThrows; ReportedBranches + ReportedTailCalls + ReportedThrows;
// Infer the frequency of the fall-through edge, representing not taking the // Infer the frequency of the fall-through edge, representing not taking the
// branch // branch.
uint64_t Inferred = 0; uint64_t Inferred = 0;
if (BBExecCount > TotalReportedJumps) if (BBExecCount > TotalReportedJumps)
Inferred = BBExecCount - TotalReportedJumps; Inferred = BBExecCount - TotalReportedJumps;
@ -2012,7 +2016,7 @@ void BinaryFunction::inferFallThroughCounts() {
// If there is an FT it will be the last successor. // If there is an FT it will be the last successor.
auto &SuccCount = *CurBB->branch_info_rbegin(); auto &SuccCount = *CurBB->branch_info_rbegin();
auto &Succ = *CurBB->succ_rbegin(); auto &Succ = *CurBB->succ_rbegin();
if (SuccCount.Count == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { if (SuccCount.Count == BinaryBasicBlock::COUNT_NO_PROFILE) {
SuccCount.Count = Inferred; SuccCount.Count = Inferred;
Succ->ExecutionCount += Inferred; Succ->ExecutionCount += Inferred;
} }
@ -2662,10 +2666,10 @@ void BinaryFunction::dumpGraph(raw_ostream& OS) const {
Branch.c_str()); Branch.c_str());
if (BB->getExecutionCount() != COUNT_NO_PROFILE && if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
BI->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
OS << "\\n(M:" << BI->MispredictedCount << ",C:" << BI->Count << ")"; OS << "\\n(M:" << BI->MispredictedCount << ",C:" << BI->Count << ")";
} else if (ExecutionCount != COUNT_NO_PROFILE && } else if (ExecutionCount != COUNT_NO_PROFILE &&
BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
OS << "\\n(IC:" << BI->Count << ")"; OS << "\\n(IC:" << BI->Count << ")";
} }
OS << "\"]\n"; OS << "\"]\n";
@ -2881,61 +2885,54 @@ void BinaryFunction::propagateGnuArgsSizeInfo() {
} }
void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
if (!hasValidProfile() || !BF.hasValidProfile()) // No reason to merge invalid or empty profiles into BF.
if (!hasValidProfile())
return; return;
// Update BF's execution count. // Update function execution count.
uint64_t MyExecutionCount = getExecutionCount(); if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
if (MyExecutionCount != BinaryFunction::COUNT_NO_PROFILE) { BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
uint64_t OldExecCount = BF.getExecutionCount();
uint64_t NewExecCount =
OldExecCount == BinaryFunction::COUNT_NO_PROFILE ?
MyExecutionCount :
MyExecutionCount + OldExecCount;
BF.setExecutionCount(NewExecCount);
} }
// Update BF's basic block and edge counts. // Since we are merging a valid profile, the new profile should be valid too.
// It has either already been valid, or it has been cleaned up.
BF.ProfileMatchRatio = 1.0f;
// Update basic block and edge counts.
auto BBMergeI = BF.begin(); auto BBMergeI = BF.begin();
for (BinaryBasicBlock *BB : BasicBlocks) { for (BinaryBasicBlock *BB : BasicBlocks) {
BinaryBasicBlock *BBMerge = &*BBMergeI; BinaryBasicBlock *BBMerge = &*BBMergeI;
assert(getIndex(BB) == BF.getIndex(BBMerge)); assert(getIndex(BB) == BF.getIndex(BBMerge));
// Update BF's basic block count. // Update basic block count.
uint64_t MyBBExecutionCount = BB->getExecutionCount(); if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
if (MyBBExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE) { BBMerge->setExecutionCount(
uint64_t OldExecCount = BBMerge->getExecutionCount(); BBMerge->getKnownExecutionCount() + BB->getExecutionCount());
uint64_t NewExecCount =
OldExecCount == BinaryBasicBlock::COUNT_NO_PROFILE ?
MyBBExecutionCount :
MyBBExecutionCount + OldExecCount;
BBMerge->setExecutionCount(NewExecCount);
} }
// Update BF's edge count for successors of this basic block. // Update edge count for successors of this basic block.
auto BBMergeSI = BBMerge->succ_begin(); auto BBMergeSI = BBMerge->succ_begin();
auto BII = BB->branch_info_begin();
auto BIMergeI = BBMerge->branch_info_begin(); auto BIMergeI = BBMerge->branch_info_begin();
for (BinaryBasicBlock *BBSucc : BB->successors()) { auto BII = BB->branch_info_begin();
BinaryBasicBlock *BBMergeSucc = *BBMergeSI; for (const auto *BBSucc : BB->successors()) {
auto *BBMergeSucc = *BBMergeSI;
assert(getIndex(BBSucc) == BF.getIndex(BBMergeSucc)); assert(getIndex(BBSucc) == BF.getIndex(BBMergeSucc));
if (BII->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { // At this point no branch count should be set to COUNT_NO_PROFILE.
uint64_t OldBranchCount = BIMergeI->Count; assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
uint64_t NewBranchCount = "unexpected unknown branch profile");
OldBranchCount == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE ? assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
BII->Count : "unexpected unknown branch profile");
BII->Count + OldBranchCount;
BIMergeI->Count = NewBranchCount;
}
if (BII->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { BIMergeI->Count += BII->Count;
uint64_t OldMispredictedCount = BIMergeI->MispredictedCount;
uint64_t NewMispredictedCount = // When we merge inferred and real fall-through branch data, the merged
OldMispredictedCount == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE ? // data is considered inferred.
BII->MispredictedCount : if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
BII->MispredictedCount + OldMispredictedCount; BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
BIMergeI->MispredictedCount = NewMispredictedCount; BIMergeI->MispredictedCount += BII->MispredictedCount;
} else {
BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
} }
++BBMergeSI; ++BBMergeSI;
@ -2949,171 +2946,46 @@ void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
assert(BBMergeI == BF.end()); assert(BBMergeI == BF.end());
} }
std::pair<bool, unsigned> BinaryFunction::isCalleeEquivalentWith( __attribute__((noinline)) BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, BasicBlockOrderType DFS;
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const { unsigned Index = 0;
// The callee operand in a direct call is the first operand. This std::stack<BinaryBasicBlock *> Stack;
// operand should be a symbol corresponding to the callee function.
constexpr unsigned CalleeOpIndex = 0;
// Helper function. // Push entry points to the stack in reverse order.
auto getGlobalAddress = [this] (const MCSymbol &Symbol) -> uint64_t {
auto AI = BC.GlobalSymbols.find(Symbol.getName());
assert(AI != BC.GlobalSymbols.end());
return AI->second;
};
const MCOperand &CalleeOp = Inst.getOperand(CalleeOpIndex);
const MCOperand &CalleeOpOther = InstOther.getOperand(CalleeOpIndex);
if (!CalleeOp.isExpr() || !CalleeOpOther.isExpr()) {
// At least one of these is actually an indirect call.
return std::make_pair(false, 0);
}
const MCSymbol &CalleeSymbol = CalleeOp.getExpr()->getSymbol();
uint64_t CalleeAddress = getGlobalAddress(CalleeSymbol);
const MCSymbol &CalleeSymbolOther = CalleeOpOther.getExpr()->getSymbol();
uint64_t CalleeAddressOther = getGlobalAddress(CalleeSymbolOther);
bool BothRecursiveCalls =
CalleeAddress == getAddress() &&
CalleeAddressOther == BF.getAddress();
bool SameCallee = CalleeAddress == CalleeAddressOther;
return std::make_pair(BothRecursiveCalls || SameCallee, CalleeOpIndex);
}
std::pair<bool, unsigned> BinaryFunction::isTargetEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF,
bool AreInvokes) const {
// The target operand in a (non-indirect) jump instruction is the
// first operand.
unsigned TargetOpIndex = 0;
if (AreInvokes) {
// The landing pad operand in an invoke is either the second or the
// sixth operand, depending on the number of operands of the invoke.
TargetOpIndex = 1;
if (Inst.getNumOperands() == 7 || Inst.getNumOperands() == 8)
TargetOpIndex = 5;
}
const MCOperand &TargetOp = Inst.getOperand(TargetOpIndex);
const MCOperand &TargetOpOther = InstOther.getOperand(TargetOpIndex);
if (!TargetOp.isExpr() || !TargetOpOther.isExpr()) {
assert(AreInvokes);
// An invoke without a landing pad operand has no catch handler. As long
// as both invokes have no catch target, we can consider they have the
// same catch target.
return std::make_pair(!TargetOp.isExpr() && !TargetOpOther.isExpr(),
TargetOpIndex);
}
const MCSymbol &TargetSymbol = TargetOp.getExpr()->getSymbol();
BinaryBasicBlock *TargetBB =
AreInvokes ?
BB.getLandingPad(&TargetSymbol) :
BB.getSuccessor(&TargetSymbol);
const MCSymbol &TargetSymbolOther = TargetOpOther.getExpr()->getSymbol();
BinaryBasicBlock *TargetBBOther =
AreInvokes ?
BBOther.getLandingPad(&TargetSymbolOther) :
BBOther.getSuccessor(&TargetSymbolOther);
if (TargetBB == nullptr || TargetBBOther == nullptr) {
assert(!AreInvokes);
// This is a tail call implemented with a jump that was not
// converted to a call (e.g. conditional jump). Since the
// instructions were not identical, the functions canot be
// proven identical either.
return std::make_pair(false, 0);
}
return std::make_pair(getIndex(TargetBB) == BF.getIndex(TargetBBOther),
TargetOpIndex);
}
bool BinaryFunction::isInstrEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const {
// First check their opcodes.
if (Inst.getOpcode() != InstOther.getOpcode()) {
return false;
}
// Then check if they have the same number of operands.
unsigned NumOperands = Inst.getNumOperands();
unsigned NumOperandsOther = InstOther.getNumOperands();
if (NumOperands != NumOperandsOther) {
return false;
}
// We are interested in 3 special cases:
// //
// a) both instructions are recursive calls. // NB: we rely on the original order of entries to match.
// b) both instructions are local jumps to basic blocks with same indices. for (auto BBI = layout_rbegin(); BBI != layout_rend(); ++BBI) {
// c) both instructions are invokes with landing pad blocks with same indices. auto *BB = *BBI;
// if (BB->isEntryPoint())
// In any of these cases the instructions will differ in some operands, but Stack.push(BB);
// given identical CFG of the functions, they can still be considered BB->setLayoutIndex(BinaryBasicBlock::InvalidIndex);
// equivalent.
bool BothCalls =
BC.MIA->isCall(Inst) &&
BC.MIA->isCall(InstOther);
bool BothInvokes =
BC.MIA->isInvoke(Inst) &&
BC.MIA->isInvoke(InstOther);
bool BothBranches =
BC.MIA->isBranch(Inst) &&
!BC.MIA->isIndirectBranch(Inst) &&
BC.MIA->isBranch(InstOther) &&
!BC.MIA->isIndirectBranch(InstOther);
if (!BothCalls && !BothInvokes && !BothBranches) {
return Inst.equals(InstOther);
} }
// We figure out if both instructions are recursive calls (case a) or else while (!Stack.empty()) {
// if they are calls to the same function. auto *BB = Stack.top();
bool EquivCallees = false; Stack.pop();
unsigned CalleeOpIndex = 0;
if (BothCalls) {
std::tie(EquivCallees, CalleeOpIndex) =
isCalleeEquivalentWith(Inst, BB, InstOther, BBOther, BF);
}
// We figure out if both instructions are jumps (case b) or invokes (case c) if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
// with equivalent jump targets or landing pads respectively.
assert(!(BothInvokes && BothBranches));
bool SameTarget = false;
unsigned TargetOpIndex = 0;
if (BothInvokes || BothBranches) {
std::tie(SameTarget, TargetOpIndex) =
isTargetEquivalentWith(Inst, BB, InstOther, BBOther, BF, BothInvokes);
}
// Compare all operands.
for (unsigned i = 0; i < NumOperands; ++i) {
if (i == CalleeOpIndex && BothCalls && EquivCallees)
continue; continue;
if (i == TargetOpIndex && (BothInvokes || BothBranches) && SameTarget) BB->setLayoutIndex(Index++);
continue; DFS.push_back(BB);
if (!Inst.getOperand(i).equals(InstOther.getOperand(i))) for (auto *SuccBB : BB->landing_pads()) {
return false; Stack.push(SuccBB);
}
for (auto *SuccBB : BB->successors()) {
Stack.push(SuccBB);
}
} }
// The instructions are equal although (some of) their operands return DFS;
// may differ.
return true;
} }
bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF) const { bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF,
bool IgnoreSymbols,
bool UseDFS) const {
assert(CurrentState == State::CFG && OtherBF.CurrentState == State::CFG); assert(CurrentState == State::CFG && OtherBF.CurrentState == State::CFG);
// Compare the two functions, one basic block at a time. // Compare the two functions, one basic block at a time.
@ -3121,26 +2993,29 @@ bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF) const {
// instruction sequences and the same index in their corresponding // instruction sequences and the same index in their corresponding
// functions. The latter is important for CFG equality. // functions. The latter is important for CFG equality.
// We do not consider functions with just different pseudo instruction if (layout_size() != OtherBF.layout_size())
// sequences non-identical by default. However we print a warning
// in case two instructions that are identical have different pseudo
// instruction sequences.
bool PseudosDiffer = false;
if (size() != OtherBF.size())
return false; return false;
// Make sure indices are up to date for both functions. // Comparing multi-entry functions could be non-trivial.
updateLayoutIndices(); if (isMultiEntry() || OtherBF.isMultiEntry())
OtherBF.updateLayoutIndices(); return false;
auto BBI = OtherBF.layout_begin(); // Process both functions in either DFS or existing order.
for (const auto *BB : layout()) { const auto &Order = UseDFS ? dfs() : BasicBlocksLayout;
const auto &OtherOrder = UseDFS ? OtherBF.dfs() : OtherBF.BasicBlocksLayout;
auto BBI = OtherOrder.begin();
for (const auto *BB : Order) {
const auto *OtherBB = *BBI; const auto *OtherBB = *BBI;
if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
return false;
// Compare successor basic blocks. // Compare successor basic blocks.
// NOTE: the comparison for jump tables is only partially verified here.
if (BB->succ_size() != OtherBB->succ_size()) if (BB->succ_size() != OtherBB->succ_size())
return false; return false;
auto SuccBBI = OtherBB->succ_begin(); auto SuccBBI = OtherBB->succ_begin();
for (const auto *SuccBB : BB->successors()) { for (const auto *SuccBB : BB->successors()) {
const auto *SuccOtherBB = *SuccBBI; const auto *SuccOtherBB = *SuccBBI;
@ -3149,94 +3024,155 @@ bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF) const {
++SuccBBI; ++SuccBBI;
} }
// Compare landing pads. // Compare all instructions including pseudos.
if (BB->lp_size() != OtherBB->lp_size())
return false;
auto LPI = OtherBB->lp_begin();
for (const auto *LP : BB->landing_pads()) {
const auto *LPOther = *LPI;
if (LP->getLayoutIndex() != LPOther->getLayoutIndex())
return false;
++LPI;
}
// Compare instructions.
auto I = BB->begin(), E = BB->end(); auto I = BB->begin(), E = BB->end();
auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
while (I != E && OtherI != OtherE) { while (I != E && OtherI != OtherE) {
const MCInst &Inst = *I;
const MCInst &InstOther = *OtherI;
bool IsInstPseudo = BC.MII->get(Inst.getOpcode()).isPseudo(); bool Identical;
bool IsInstOtherPseudo = BC.MII->get(InstOther.getOpcode()).isPseudo(); if (IgnoreSymbols) {
Identical =
isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, OtherBF,
[](const MCSymbol *A, const MCSymbol *B) {
return true;
});
} else {
// Compare symbols.
auto AreSymbolsIdentical = [&] (const MCSymbol *A, const MCSymbol *B) {
if (A == B)
return true;
if (IsInstPseudo == IsInstOtherPseudo) { // All local symbols are considered identical since they affect a
// Either both are pseudos or none is. // control flow and we check the control flow separately.
bool areEqual = // If a local symbol is escaped, then the function (potentially) has
isInstrEquivalentWith(Inst, *BB, InstOther, *OtherBB, OtherBF); // multiple entry points and we exclude such functions from
// comparison.
if (A->isTemporary() && B->isTemporary())
return true;
if (!areEqual && IsInstPseudo) { // Compare symbols as functions.
// Different pseudo instructions. const auto *FunctionA = BC.getFunctionForSymbol(A);
PseudosDiffer = true; const auto *FunctionB = BC.getFunctionForSymbol(B);
} if (FunctionA && FunctionB) {
else if (!areEqual) { // Self-referencing functions and recursive calls.
// Different non-pseudo instructions. if (FunctionA == this && FunctionB == &OtherBF)
return false; return true;
} return FunctionA == FunctionB;
}
++I; ++OtherI; // Check if symbols are jump tables.
} auto SIA = BC.GlobalSymbols.find(A->getName());
else { if (SIA == BC.GlobalSymbols.end())
// One instruction is a pseudo while the other is not. return false;
PseudosDiffer = true; auto SIB = BC.GlobalSymbols.find(B->getName());
IsInstPseudo ? ++I : ++OtherI; if (SIB == BC.GlobalSymbols.end())
return false;
assert((SIA->second != SIB->second) &&
"different symbols should not have the same value");
const auto *JumpTableA = getJumpTableContainingAddress(SIA->second);
if (!JumpTableA)
return false;
const auto *JumpTableB =
OtherBF.getJumpTableContainingAddress(SIB->second);
if (!JumpTableB)
return false;
if ((SIA->second - JumpTableA->Address) !=
(SIB->second - JumpTableB->Address))
return false;
return equalJumpTables(JumpTableA, JumpTableB, OtherBF);
};
Identical =
isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, OtherBF,
AreSymbolsIdentical);
} }
if (!Identical)
return false;
++I; ++OtherI;
} }
// Check for trailing instructions or pseudos in one of the basic blocks. // One of the identical blocks may have a trailing unconditional jump that
auto TrailI = I == E ? OtherI : I; // is ignored for CFG purposes.
auto TrailE = I == E ? OtherE : E; auto *TrailingInstr = (I != E ? &(*I)
while (TrailI != TrailE) { : (OtherI != OtherE ? &(*OtherI) : 0));
const MCInst &InstTrail = *TrailI; if (TrailingInstr && !BC.MIA->isUnconditionalBranch(*TrailingInstr)) {
if (!BC.MII->get(InstTrail.getOpcode()).isPseudo()) { return false;
// One of the functions has more instructions in this basic block
// than the other, hence not identical.
return false;
}
// There are trailing pseudos only in one of the basic blocks.
PseudosDiffer = true;
++TrailI;
} }
++BBI; ++BBI;
} }
if (opts::Verbosity >= 1 && PseudosDiffer) { return true;
errs() << "BOLT-WARNING: functions " << *this << " and " }
<< OtherBF << " are identical, but have different"
<< " pseudo instruction sequences.\n"; bool BinaryFunction::equalJumpTables(const JumpTable *JumpTableA,
const JumpTable *JumpTableB,
const BinaryFunction &BFB) const {
if (JumpTableA->EntrySize != JumpTableB->EntrySize)
return false;
if (JumpTableA->Type != JumpTableB->Type)
return false;
if (JumpTableA->getSize() != JumpTableB->getSize())
return false;
for (uint64_t Index = 0; Index < JumpTableA->Entries.size(); ++Index) {
const auto *LabelA = JumpTableA->Entries[Index];
const auto *LabelB = JumpTableB->Entries[Index];
const auto *TargetA = getBasicBlockForLabel(LabelA);
const auto *TargetB = BFB.getBasicBlockForLabel(LabelB);
if (!TargetA || !TargetB) {
assert((TargetA || LabelA == getFunctionEndLabel()) &&
"no target basic block found");
assert((TargetB || LabelB == BFB.getFunctionEndLabel()) &&
"no target basic block found");
if (TargetA != TargetB)
return false;
continue;
}
assert(TargetA && TargetB && "cannot locate target block(s)");
if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
return false;
} }
return true; return true;
} }
std::size_t BinaryFunction::hash() const { std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const {
assert(CurrentState == State::CFG); assert(CurrentState == State::CFG);
if (!Recompute)
return Hash;
const auto &Order = UseDFS ? dfs() : BasicBlocksLayout;
// The hash is computed by creating a string of all the opcodes // The hash is computed by creating a string of all the opcodes
// in the function and hashing that string with std::hash. // in the function and hashing that string with std::hash.
std::string Opcodes; std::string Opcodes;
for (const auto *BB : layout()) { for (const auto *BB : Order) {
for (const auto &Inst : *BB) { for (const auto &Inst : *BB) {
unsigned Opcode = Inst.getOpcode(); unsigned Opcode = Inst.getOpcode();
if (BC.MII->get(Opcode).isPseudo()) if (BC.MII->get(Opcode).isPseudo())
continue; continue;
// Ignore conditional jumps because the conditional code is not // Ignore unconditional jumps since we check CFG consistency by processing
// always up to date. // basic blocks in order and do not rely on branches to be in-sync with
if (BC.MIA->isConditionalBranch(Inst)) // CFG. Note that we still use condition code of conditional jumps.
if (BC.MIA->isUnconditionalBranch(Inst))
continue; continue;
if (Opcode == 0) { if (Opcode == 0) {
@ -3252,7 +3188,7 @@ std::size_t BinaryFunction::hash() const {
} }
} }
return std::hash<std::string>{}(Opcodes); return Hash = std::hash<std::string>{}(Opcodes);
} }
void BinaryFunction::insertBasicBlocks( void BinaryFunction::insertBasicBlocks(
@ -3508,7 +3444,7 @@ void BinaryFunction::calculateLoopInfo() {
auto BI = Latch->branch_info_begin(); auto BI = Latch->branch_info_begin();
for (BinaryBasicBlock *Succ : Latch->successors()) { for (BinaryBasicBlock *Succ : Latch->successors()) {
if (Succ == L->getHeader()) { if (Succ == L->getHeader()) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE && assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"profile data not found"); "profile data not found");
L->TotalBackEdgeCount += BI->Count; L->TotalBackEdgeCount += BI->Count;
} }
@ -3528,7 +3464,7 @@ void BinaryFunction::calculateLoopInfo() {
auto BI = Exiting->branch_info_begin(); auto BI = Exiting->branch_info_begin();
for (BinaryBasicBlock *Succ : Exiting->successors()) { for (BinaryBasicBlock *Succ : Exiting->successors()) {
if (Succ == ExitTarget) { if (Succ == ExitTarget) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE && assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"profile data not found"); "profile data not found");
L->ExitCount += BI->Count; L->ExitCount += BI->Count;
} }
@ -3605,9 +3541,11 @@ DynoStats BinaryFunction::getDynoStats() const {
// basic block especially since the block may contain a function that // basic block especially since the block may contain a function that
// does not return or a function that throws an exception. // does not return or a function that throws an exception.
uint64_t BBExecutionCount = 0; uint64_t BBExecutionCount = 0;
for (const auto &BI : BB->branch_info()) for (const auto &BI : BB->branch_info()) {
if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) assert(BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
BBExecutionCount += BI.Count; "unexpected empty profile");
BBExecutionCount += BI.Count;
}
// Ignore empty blocks and blocks that were not executed. // Ignore empty blocks and blocks that were not executed.
if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0) if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)

View File

@ -194,7 +194,7 @@ public:
}; };
static constexpr uint64_t COUNT_NO_PROFILE = static constexpr uint64_t COUNT_NO_PROFILE =
std::numeric_limits<uint64_t>::max(); BinaryBasicBlock::COUNT_NO_PROFILE;
// Function size, in number of BBs, above which we fallback to a heuristic // Function size, in number of BBs, above which we fallback to a heuristic
// solution to the layout problem instead of seeking the optimal one. // solution to the layout problem instead of seeking the optimal one.
static constexpr uint64_t FUNC_SIZE_THRESHOLD = 10; static constexpr uint64_t FUNC_SIZE_THRESHOLD = 10;
@ -216,20 +216,6 @@ private:
/// base address for position independent binaries. /// base address for position independent binaries.
uint64_t Address; uint64_t Address;
/// List of functions that are identical to this one. We only maintain
/// the list for the function that should be emitted, for the rest we
/// set IdenticalFunction. When we emit this function we have
/// to emit symbols for all its twins.
std::set<BinaryFunction *> Twins;
/// Address of an identical function that can replace this one.
///
/// In case multiple functions are identical to each other, one of the
/// functions (the representative) will have it set to nullptr, while the
/// rest of the functions will point to the representative through one or
/// more steps.
BinaryFunction *IdenticalFunction{nullptr};
/// Original size of the function. /// Original size of the function.
uint64_t Size; uint64_t Size;
@ -264,6 +250,9 @@ private:
/// True if the function uses DW_CFA_GNU_args_size CFIs. /// True if the function uses DW_CFA_GNU_args_size CFIs.
bool UsesGnuArgsSize{false}; bool UsesGnuArgsSize{false};
/// True if the function has more than one entry point.
bool IsMultiEntry{false};
/// The address for the code for this function in codegen memory. /// The address for the code for this function in codegen memory.
uint64_t ImageAddress{0}; uint64_t ImageAddress{0};
@ -298,12 +287,20 @@ private:
/// the output binary. /// the output binary.
uint32_t AddressRangesOffset{-1U}; uint32_t AddressRangesOffset{-1U};
/// Last computed hash value.
mutable uint64_t Hash{0};
/// Get basic block index assuming it belongs to this function. /// Get basic block index assuming it belongs to this function.
unsigned getIndex(const BinaryBasicBlock *BB) const { unsigned getIndex(const BinaryBasicBlock *BB) const {
assert(BB->getIndex() < BasicBlocks.size()); assert(BB->getIndex() < BasicBlocks.size());
return BB->getIndex(); return BB->getIndex();
} }
BinaryBasicBlock *getBasicBlockForLabel(const MCSymbol *Label) const {
auto I = LabelToBB.find(Label);
return I == LabelToBB.end() ? nullptr : I->second;
}
/// Return basic block that originally contained offset \p Offset /// Return basic block that originally contained offset \p Offset
/// from the function start. /// from the function start.
BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset); BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset);
@ -332,28 +329,46 @@ private:
/// Helper function that compares an instruction of this function to the /// Helper function that compares an instruction of this function to the
/// given instruction of the given function. The functions should have /// given instruction of the given function. The functions should have
/// identical CFG. /// identical CFG.
template <class Compare>
bool isInstrEquivalentWith( bool isInstrEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, const MCInst &InstA, const BinaryBasicBlock &BBA,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const; const MCInst &InstB, const BinaryBasicBlock &BBB,
const BinaryFunction &BFB, Compare Comp) const {
if (InstA.getOpcode() != InstB.getOpcode()) {
return false;
}
/// Helper function that compares the callees of two call instructions. // In this function we check for special conditions:
/// Callees are considered equivalent if both refer to the same function //
/// or if both calls are recursive. Instructions should have same opcodes // * instructions with landing pads
/// and same number of operands. Returns true and the callee operand index //
/// when callees are quivalent, and false, 0 otherwise. // Most of the common cases should be handled by MCInst::equals()
std::pair<bool, unsigned> isCalleeEquivalentWith( // that compares regular instruction operands.
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, //
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const; // NB: there's no need to compare jump table indirect jump instructions
// separately as jump tables are handled by comparing corresponding
// symbols.
const auto EHInfoA = BC.MIA->getEHInfo(InstA);
const auto EHInfoB = BC.MIA->getEHInfo(InstB);
/// Helper function that compares the targets two jump or invoke instructions. // Action indices should match.
/// A target of an invoke we consider its landing pad basic block. The if (EHInfoA.second != EHInfoB.second)
/// corresponding functions should have identical CFG. Instructions should return false;
/// have same opcodes and same number of operands. Returns true and the target
/// operand index when targets are equivalent, and false, 0 otherwise. if (!EHInfoA.first != !EHInfoB.first)
std::pair<bool, unsigned> isTargetEquivalentWith( return false;
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF, if (EHInfoA.first && EHInfoB.first) {
bool AreInvokes) const; const auto *LPA = BBA.getLandingPad(EHInfoA.first);
const auto *LPB = BBB.getLandingPad(EHInfoB.first);
assert(LPA && LPB && "cannot locate landing pad(s)");
if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
return false;
}
return InstA.equals(InstB, Comp);
}
/// Clear the landing pads for all blocks contained in the range of /// Clear the landing pads for all blocks contained in the range of
/// [StartIndex, StartIndex + NumBlocks). This also has the effect of /// [StartIndex, StartIndex + NumBlocks). This also has the effect of
@ -372,6 +387,9 @@ private:
/// Temporary holder of offsets that are potentially entry points. /// Temporary holder of offsets that are potentially entry points.
std::unordered_set<uint64_t> EntryOffsets; std::unordered_set<uint64_t> EntryOffsets;
/// Map labels to corresponding basic blocks.
std::unordered_map<const MCSymbol *, BinaryBasicBlock *> LabelToBB;
using BranchListType = std::vector<std::pair<uint32_t, uint32_t>>; using BranchListType = std::vector<std::pair<uint32_t, uint32_t>>;
BranchListType TakenBranches; /// All local taken branches. BranchListType TakenBranches; /// All local taken branches.
BranchListType FTBranches; /// All fall-through branches. BranchListType FTBranches; /// All fall-through branches.
@ -407,7 +425,7 @@ private:
uint32_t Index; // index of the tail call in the basic block uint32_t Index; // index of the tail call in the basic block
uint64_t TargetAddress; // address of the callee uint64_t TargetAddress; // address of the callee
uint64_t Count{0}; // taken count from profile data uint64_t Count{0}; // taken count from profile data
uint64_t Mispreds{0}; // mispredicted count from progile data uint64_t Mispreds{0}; // mispredicted count from profile data
uint32_t CFIStateBefore{0}; // CFI state before the tail call instruction uint32_t CFIStateBefore{0}; // CFI state before the tail call instruction
TailCallInfo(uint32_t Offset, uint32_t Index, uint64_t TargetAddress) : TailCallInfo(uint32_t Offset, uint32_t Index, uint64_t TargetAddress) :
@ -532,6 +550,23 @@ private:
return nullptr; return nullptr;
} }
const JumpTable *getJumpTableContainingAddress(uint64_t Address) const {
auto JTI = JumpTables.upper_bound(Address);
if (JTI == JumpTables.begin())
return nullptr;
--JTI;
if (JTI->first + JTI->second.getSize() > Address) {
return &JTI->second;
}
return nullptr;
}
/// Compare two jump tables in 2 functions. The function relies on consistent
/// ordering of basic blocks in both binary functions (e.g. DFS).
bool equalJumpTables(const JumpTable *JumpTableA,
const JumpTable *JumpTableB,
const BinaryFunction &BFB) const;
/// All jump table sites in the function. /// All jump table sites in the function.
std::vector<std::pair<uint64_t, uint64_t>> JTSites; std::vector<std::pair<uint64_t, uint64_t>> JTSites;
@ -572,15 +607,18 @@ private:
CFIStateVector BBCFIState; CFIStateVector BBCFIState;
/// Symbol in the output. /// Symbol in the output.
///
/// NB: function can have multiple symbols associated with it. We will emit
/// all symbols for the function
MCSymbol *OutputSymbol; MCSymbol *OutputSymbol;
MCSymbol *ColdSymbol{nullptr}; MCSymbol *ColdSymbol{nullptr};
/// Symbol at the end of the function. /// Symbol at the end of the function.
MCSymbol *FunctionEndLabel{nullptr}; mutable MCSymbol *FunctionEndLabel{nullptr};
/// Symbol at the end of the cold part of split function. /// Symbol at the end of the cold part of split function.
MCSymbol *FunctionColdEndLabel{nullptr}; mutable MCSymbol *FunctionColdEndLabel{nullptr};
/// Unique number associated with the function. /// Unique number associated with the function.
uint64_t FunctionNumber; uint64_t FunctionNumber;
@ -603,6 +641,11 @@ private:
Itr itr; Itr itr;
}; };
/// Register alternative function name.
void addAlternativeName(std::string NewName) {
Names.emplace_back(NewName);
}
/// Return label at a given \p Address in the function. If the label does /// Return label at a given \p Address in the function. If the label does
/// not exist - create it. Assert if the \p Address does not belong to /// not exist - create it. Assert if the \p Address does not belong to
/// the function. If \p CreatePastEnd is true, then return the function /// the function. If \p CreatePastEnd is true, then return the function
@ -613,6 +656,7 @@ private:
/// Register an entry point at a given \p Offset into the function. /// Register an entry point at a given \p Offset into the function.
MCSymbol *addEntryPointAtOffset(uint64_t Offset) { MCSymbol *addEntryPointAtOffset(uint64_t Offset) {
EntryOffsets.emplace(Offset); EntryOffsets.emplace(Offset);
IsMultiEntry = (Offset == 0 ? IsMultiEntry : true);
return getOrCreateLocalLabel(getAddress() + Offset); return getOrCreateLocalLabel(getAddress() + Offset);
} }
@ -645,6 +689,7 @@ private:
BinaryFunction(const BinaryFunction &) = delete; BinaryFunction(const BinaryFunction &) = delete;
friend class RewriteInstance; friend class RewriteInstance;
friend class BinaryContext;
/// Creation should be handled by RewriteInstance::createBinaryFunction(). /// Creation should be handled by RewriteInstance::createBinaryFunction().
BinaryFunction(const std::string &Name, SectionRef Section, uint64_t Address, BinaryFunction(const std::string &Name, SectionRef Section, uint64_t Address,
@ -737,6 +782,10 @@ public:
return iterator_range<const_cfi_iterator>(cie_begin(), cie_end()); return iterator_range<const_cfi_iterator>(cie_begin(), cie_end());
} }
/// Return a list of basic blocks sorted using DFS and update layout indices
/// using the same order. Does not modify the current layout.
BasicBlockOrderType dfs() const;
/// Modify code layout making necessary adjustments to instructions at the /// Modify code layout making necessary adjustments to instructions at the
/// end of basic blocks. /// end of basic blocks.
void modifyLayout(LayoutType Type, bool MinBranchClusters, bool Split); void modifyLayout(LayoutType Type, bool MinBranchClusters, bool Split);
@ -825,7 +874,7 @@ public:
return Names; return Names;
} }
State getCurrentState() const { State getState() const {
return CurrentState; return CurrentState;
} }
@ -882,7 +931,7 @@ public:
} }
/// Return MC symbol associated with the end of the function. /// Return MC symbol associated with the end of the function.
MCSymbol *getFunctionEndLabel() { MCSymbol *getFunctionEndLabel() const {
assert(BC.Ctx && "cannot be called with empty context"); assert(BC.Ctx && "cannot be called with empty context");
if (!FunctionEndLabel) { if (!FunctionEndLabel) {
FunctionEndLabel = BC.Ctx->createTempSymbol("func_end", true); FunctionEndLabel = BC.Ctx->createTempSymbol("func_end", true);
@ -891,7 +940,7 @@ public:
} }
/// Return MC symbol associated with the end of the cold part of the function. /// Return MC symbol associated with the end of the cold part of the function.
MCSymbol *getFunctionColdEndLabel() { MCSymbol *getFunctionColdEndLabel() const {
if (!FunctionColdEndLabel) { if (!FunctionColdEndLabel) {
FunctionColdEndLabel = BC.Ctx->createTempSymbol("func_cold_end", true); FunctionColdEndLabel = BC.Ctx->createTempSymbol("func_cold_end", true);
} }
@ -965,6 +1014,16 @@ public:
return UsesGnuArgsSize; return UsesGnuArgsSize;
} }
/// Return true if the function has more than one entry point.
bool isMultiEntry() const {
return IsMultiEntry;
}
/// Return true if the function uses jump tables.
bool hasJumpTables() const {
return JumpTables.size();
}
const MCSymbol *getPersonalityFunction() const { const MCSymbol *getPersonalityFunction() const {
return PersonalityFunction; return PersonalityFunction;
} }
@ -988,9 +1047,9 @@ public:
return Address <= PC && PC < Address + Size; return Address <= PC && PC < Address + Size;
} }
/// Register alternative function name. /// Add new names this function is known under.
void addAlternativeName(std::string NewName) { void addNewNames(const std::vector<std::string> &NewNames) {
Names.emplace_back(NewName); Names.insert(Names.begin(), NewNames.begin(), NewNames.end());
} }
/// Create a basic block at a given \p Offset in the /// Create a basic block at a given \p Offset in the
@ -999,7 +1058,6 @@ public:
/// on the alignment of the existing offset. /// on the alignment of the existing offset.
/// The new block is not inserted into the CFG. The client must /// The new block is not inserted into the CFG. The client must
/// use insertBasicBlocks to add any new blocks to the CFG. /// use insertBasicBlocks to add any new blocks to the CFG.
///
std::unique_ptr<BinaryBasicBlock> std::unique_ptr<BinaryBasicBlock>
createBasicBlock(uint64_t Offset, createBasicBlock(uint64_t Offset,
MCSymbol *Label = nullptr, MCSymbol *Label = nullptr,
@ -1016,6 +1074,8 @@ public:
BB->setAlignment(std::min(DerivedAlignment, uint64_t(32))); BB->setAlignment(std::min(DerivedAlignment, uint64_t(32)));
} }
LabelToBB.emplace(Label, BB.get());
return BB; return BB;
} }
@ -1142,6 +1202,12 @@ public:
return (II == Instructions.end()) ? nullptr : &II->second; return (II == Instructions.end()) ? nullptr : &II->second;
} }
/// Return true if function has a profile, even if the profile does not
/// match CFG 100%.
bool hasProfile() const {
return ExecutionCount != COUNT_NO_PROFILE;
}
/// Return true if function profile is present and accurate. /// Return true if function profile is present and accurate.
bool hasValidProfile() const { bool hasValidProfile() const {
return ExecutionCount != COUNT_NO_PROFILE && return ExecutionCount != COUNT_NO_PROFILE &&
@ -1298,45 +1364,17 @@ public:
return ExecutionCount; return ExecutionCount;
} }
/// Return the execution count for functions with known profile.
/// Return 0 if the function has no profile.
uint64_t getKnownExecutionCount() const {
return ExecutionCount == COUNT_NO_PROFILE ? 0 : ExecutionCount;
}
/// Return original LSDA address for the function or NULL. /// Return original LSDA address for the function or NULL.
uint64_t getLSDAAddress() const { uint64_t getLSDAAddress() const {
return LSDAAddress; return LSDAAddress;
} }
/// Return the address of an identical function. If none is found this will
/// return NULL.
BinaryFunction *getIdenticalFunction() const {
return IdenticalFunction;
}
/// Set the address of an identical function.
void setIdenticalFunction(BinaryFunction *BF) {
IdenticalFunction = BF;
// Copy over the list of twins.
if (!Twins.empty()) {
BF->getTwins().insert(Twins.begin(), Twins.end());
Twins.clear();
}
}
/// Return functions that are duplicates of this one.
std::set<BinaryFunction *> &getTwins() {
return Twins;
}
/// Register function that is identical to this one.
void addIdenticalFunction(BinaryFunction *BF) {
Twins.emplace(BF);
}
/// Return true if this function is a duplicate of another function.
bool isDuplicate() const {
bool IsDuplicate = getIdenticalFunction();
assert((Twins.empty() || !IsDuplicate) &&
"function with twins cannot be a duplicate of another function");
return IsDuplicate;
}
/// Return symbol pointing to function's LSDA. /// Return symbol pointing to function's LSDA.
MCSymbol *getLSDASymbol() { MCSymbol *getLSDASymbol() {
if (LSDASymbol) if (LSDASymbol)
@ -1425,6 +1463,9 @@ public:
/// has been filled with LBR data. /// has been filled with LBR data.
void inferFallThroughCounts(); void inferFallThroughCounts();
/// Clear execution profile of the function.
void clearProfile();
/// Converts conditional tail calls to unconditional tail calls. We do this to /// Converts conditional tail calls to unconditional tail calls. We do this to
/// handle conditional tail calls correctly and to give a chance to the /// handle conditional tail calls correctly and to give a chance to the
/// simplify conditional tail call pass to decide whether to re-optimize them /// simplify conditional tail call pass to decide whether to re-optimize them
@ -1497,12 +1538,26 @@ public:
/// isIdenticalWith. /// isIdenticalWith.
void mergeProfileDataInto(BinaryFunction &BF) const; void mergeProfileDataInto(BinaryFunction &BF) const;
/// Returns true if this function has identical code and /// Returns true if this function has identical code and CFG with
/// CFG with the given function. /// the given function \p BF.
bool isIdenticalWith(const BinaryFunction &BF) const; ///
/// If \p IgnoreSymbols is set to true, then symbolic operands are ignored
/// during comparison.
///
/// If \p UseDFS is set to true, then compute DFS of each function and use
/// is for CFG equivalency. Potentially it will help to catch more cases,
/// but is slower.
bool isIdenticalWith(const BinaryFunction &BF,
bool IgnoreSymbols = false,
bool UseDFS = false) const;
/// Returns a hash value for the function. To be used for ICF. /// Returns a hash value for the function. To be used for ICF. Two congruent
std::size_t hash() const; /// functions (functions with different symbolic references but identical
/// otherwise) are required to have identical hashes.
///
/// If \p UseDFS is set, then process blocks in DFS order that we recompute.
/// Otherwise use the existing layout order.
std::size_t hash(bool Recompute = true, bool UseDFS = false) const;
/// Sets the associated .debug_info entry. /// Sets the associated .debug_info entry.
void addSubprogramDIE(DWARFCompileUnit *Unit, void addSubprogramDIE(DWARFCompileUnit *Unit,

View File

@ -65,12 +65,6 @@ SimplifyRODataLoads("simplify-rodata-loads",
static cl::opt<bool> OptimizeFrameAccesses( static cl::opt<bool> OptimizeFrameAccesses(
"frame-opt", cl::desc("optimize stack frame accesses"), cl::ZeroOrMore); "frame-opt", cl::desc("optimize stack frame accesses"), cl::ZeroOrMore);
static cl::opt<bool>
IdenticalCodeFolding(
"icf",
cl::desc("fold functions with identical code"),
cl::ZeroOrMore);
static cl::opt<bool> static cl::opt<bool>
PrintReordered("print-reordered", PrintReordered("print-reordered",
cl::desc("print functions after layout optimization"), cl::desc("print functions after layout optimization"),
@ -209,8 +203,7 @@ void BinaryFunctionPassManager::runAllPasses(
// Run this pass first to use stats for the original functions. // Run this pass first to use stats for the original functions.
Manager.registerPass(llvm::make_unique<PrintSortedBy>(NeverPrint)); Manager.registerPass(llvm::make_unique<PrintSortedBy>(NeverPrint));
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF), Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF));
opts::IdenticalCodeFolding);
Manager.registerPass(llvm::make_unique<InlineSmallFunctions>(PrintInline), Manager.registerPass(llvm::make_unique<InlineSmallFunctions>(PrintInline),
opts::InlineSmallFunctions); opts::InlineSmallFunctions);
@ -223,6 +216,8 @@ void BinaryFunctionPassManager::runAllPasses(
llvm::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), llvm::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
opts::SimplifyRODataLoads); opts::SimplifyRODataLoads);
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF));
Manager.registerPass(llvm::make_unique<ReorderBasicBlocks>(PrintReordered)); Manager.registerPass(llvm::make_unique<ReorderBasicBlocks>(PrintReordered));
Manager.registerPass(llvm::make_unique<Peepholes>(PrintPeepholes), Manager.registerPass(llvm::make_unique<Peepholes>(PrintPeepholes),

View File

@ -144,13 +144,27 @@ SctcMode(
cl::values(clEnumValN(SctcAlways, "always", "always perform sctc"), cl::values(clEnumValN(SctcAlways, "always", "always perform sctc"),
clEnumValN(SctcPreserveDirection, clEnumValN(SctcPreserveDirection,
"preserve", "preserve",
"only perform sctc when branch direction is preserved"), "only perform sctc when branch direction is "
"preserved"),
clEnumValN(SctcHeuristic, clEnumValN(SctcHeuristic,
"heuristic", "heuristic",
"use branch prediction data to control sctc"), "use branch prediction data to control sctc"),
clEnumValEnd), clEnumValEnd),
cl::ZeroOrMore); cl::ZeroOrMore);
static cl::opt<bool>
IdenticalCodeFolding(
"icf",
cl::desc("fold functions with identical code"),
cl::ZeroOrMore);
static cl::opt<bool>
UseDFSForICF(
"icf-dfs",
cl::desc("use DFS ordering when using -icf option"),
cl::ReallyHidden,
cl::ZeroOrMore);
} // namespace opts } // namespace opts
namespace llvm { namespace llvm {
@ -158,6 +172,7 @@ namespace bolt {
bool BinaryFunctionPass::shouldOptimize(const BinaryFunction &BF) const { bool BinaryFunctionPass::shouldOptimize(const BinaryFunction &BF) const {
return BF.isSimple() && return BF.isSimple() &&
BF.getState() == BinaryFunction::State::CFG &&
opts::shouldProcess(BF) && opts::shouldProcess(BF) &&
(BF.getSize() > 0); (BF.getSize() > 0);
} }
@ -813,7 +828,8 @@ void InlineSmallFunctions::runOnFunctions(
DEBUG(dbgs() << "BOLT-INFO: Inlined " << InlinedDynamicCalls << " of " DEBUG(dbgs() << "BOLT-INFO: Inlined " << InlinedDynamicCalls << " of "
<< TotalDynamicCalls << " function calls in the profile.\n" << TotalDynamicCalls << " function calls in the profile.\n"
<< "BOLT-INFO: Inlined calls represent " << "BOLT-INFO: Inlined calls represent "
<< format("%.1f", 100.0 * InlinedDynamicCalls / TotalInlineableCalls) << format("%.1f",
100.0 * InlinedDynamicCalls / TotalInlineableCalls)
<< "% of all inlineable calls in the profile.\n"); << "% of all inlineable calls in the profile.\n");
} }
@ -1304,220 +1320,143 @@ void SimplifyRODataLoads::runOnFunctions(
<< "BOLT-INFO: dynamic loads found: " << NumDynamicLoadsFound << "\n"; << "BOLT-INFO: dynamic loads found: " << NumDynamicLoadsFound << "\n";
} }
void IdenticalCodeFolding::discoverCallers( void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC,
BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) { std::map<uint64_t, BinaryFunction> &BFs,
for (auto &I : BFs) { std::set<uint64_t> &) {
BinaryFunction &Caller = I.second; if (!opts::IdenticalCodeFolding)
if (!shouldOptimize(Caller))
continue;
for (BinaryBasicBlock &BB : Caller) {
unsigned InstrIndex = 0;
for (MCInst &Inst : BB) {
if (!BC.MIA->isCall(Inst)) {
++InstrIndex;
continue;
}
const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst);
if (!TargetSymbol) {
// This is an indirect call, we cannot record a target.
++InstrIndex;
continue;
}
const auto *Function = BC.getFunctionForSymbol(TargetSymbol);
if (!Function) {
// Call to a function without a BinaryFunction object.
++InstrIndex;
continue;
}
// Insert a tuple in the Callers map.
Callers[Function].emplace_back(CallSite(&Caller, &BB, InstrIndex));
++InstrIndex;
}
}
}
}
void IdenticalCodeFolding::foldFunction(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunction *BFToFold,
BinaryFunction *BFToReplaceWith,
std::set<BinaryFunction *> &Modified) {
// Mark BFToFold as identical with BFTOreplaceWith.
BFToFold->setIdenticalFunction(BFToReplaceWith);
// Add the size of BFToFold to the total size savings estimate.
BytesSavedEstimate += BFToFold->getSize();
// Get callers of BFToFold.
auto CI = Callers.find(BFToFold);
if (CI == Callers.end())
return; return;
std::vector<CallSite> &BFToFoldCallers = CI->second;
// Get callers of BFToReplaceWith. const auto OriginalFunctionCount = BFs.size();
std::vector<CallSite> &BFToReplaceWithCallers = Callers[BFToReplaceWith]; uint64_t NumFunctionsFolded = 0;
uint64_t NumJTFunctionsFolded = 0;
// Get MCSymbol for BFToReplaceWith. uint64_t BytesSavedEstimate = 0;
MCSymbol *SymbolToReplaceWith = static bool UseDFS = opts::UseDFSForICF;
BC.getOrCreateGlobalSymbol(BFToReplaceWith->getAddress(), "");
// Traverse callers of BFToFold and replace the calls with calls
// to BFToReplaceWith.
for (const CallSite &CS : BFToFoldCallers) {
// Get call instruction.
BinaryFunction *Caller = CS.Caller;
BinaryBasicBlock *CallBB = CS.Block;
MCInst &CallInst = CallBB->getInstructionAtIndex(CS.InstrIndex);
// Replace call target with BFToReplaceWith.
auto Success = BC.MIA->replaceCallTargetOperand(CallInst,
SymbolToReplaceWith,
BC.Ctx.get());
assert(Success && "unexpected call target prevented the replacement");
// Add this call site to the callers of BFToReplaceWith.
BFToReplaceWithCallers.emplace_back(CS);
// Add caller to the set of modified functions.
Modified.insert(Caller);
// Update dynamic calls folded stat.
if (Caller->hasValidProfile() &&
CallBB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
NumDynamicCallsFolded += CallBB->getExecutionCount();
}
// Remove all callers of BFToFold.
BFToFoldCallers.clear();
++NumFunctionsFolded;
// Merge execution counts of BFToFold into those of BFToReplaceWith.
BFToFold->mergeProfileDataInto(*BFToReplaceWith);
}
void IdenticalCodeFolding::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &
) {
discoverCallers(BC, BFs);
// This hash table is used to identify identical functions. It maps // This hash table is used to identify identical functions. It maps
// a function to a bucket of functions identical to it. // a function to a bucket of functions identical to it.
struct KeyHash { struct KeyHash {
std::size_t operator()(const BinaryFunction *F) const { return F->hash(); } std::size_t operator()(const BinaryFunction *F) const {
return F->hash(/*Recompute=*/false);
}
};
struct KeyCongruent {
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
return A->isIdenticalWith(*B, /*IgnoreSymbols=*/true, /*UseDFS=*/UseDFS);
}
}; };
struct KeyEqual { struct KeyEqual {
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
return A->isIdenticalWith(*B); return A->isIdenticalWith(*B, /*IgnoreSymbols=*/false, /*UseDFS=*/UseDFS);
} }
}; };
std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
KeyHash, KeyEqual> Buckets;
// Set that holds the functions that were modified by the last pass. // Create buckets with congruent functions - functions that potentially could
std::set<BinaryFunction *> Mod; // be folded.
std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
KeyHash, KeyCongruent> CongruentBuckets;
for (auto &BFI : BFs) {
auto &BF = BFI.second;
if (!shouldOptimize(BF))
continue;
// Vector of all the candidate functions to be tested for being identical // Make sure indices are in-order.
// to each other. Initialized with all simple functions. BF.updateLayoutIndices();
std::vector<BinaryFunction *> Cands;
for (auto &I : BFs) { // Pre-compute hash before pushing into hashtable.
auto &BF = I.second; BF.hash(/*Recompute=*/true, /*UseDFS*/UseDFS);
if (shouldOptimize(BF))
Cands.emplace_back(&BF); CongruentBuckets[&BF].emplace(&BF);
} }
// We repeat the icf pass until no new modifications happen. // We repeat the pass until no new modifications happen.
unsigned Iter = 1; unsigned Iteration = 1;
uint64_t NumFoldedLastIteration;
do { do {
Buckets.clear(); NumFoldedLastIteration = 0;
Mod.clear();
if (opts::Verbosity >= 1) { DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
outs() << "BOLT-INFO: icf pass " << Iter << "...\n";
}
uint64_t NumIdenticalFunctions = 0; for (auto &CBI : CongruentBuckets) {
auto &Candidates = CBI.second;
// Compare candidate functions using the Buckets hash table. Identical if (Candidates.size() < 2)
// functions are efficiently discovered and added to the same bucket.
for (BinaryFunction *BF : Cands) {
Buckets[BF].emplace_back(BF);
}
Cands.clear();
// Go through the functions of each bucket and fold any references to them
// with the references to the hottest function among them.
for (auto &I : Buckets) {
std::vector<BinaryFunction *> &IFs = I.second;
std::sort(IFs.begin(), IFs.end(),
[](const BinaryFunction *A, const BinaryFunction *B) {
if (!A->hasValidProfile() && !B->hasValidProfile())
return false;
if (!A->hasValidProfile())
return false;
if (!B->hasValidProfile())
return true;
return B->getExecutionCount() < A->getExecutionCount();
}
);
BinaryFunction *Hottest = IFs[0];
// For the next pass, we consider only one function from each set of
// identical functions.
Cands.emplace_back(Hottest);
if (IFs.size() <= 1)
continue; continue;
NumIdenticalFunctions += IFs.size() - 1; // Identical functions go into the same bucket.
for (unsigned i = 1; i < IFs.size(); ++i) { std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
BinaryFunction *BF = IFs[i]; KeyHash, KeyEqual> IdenticalBuckets;
Hottest->addIdenticalFunction(BF); for (auto *BF : Candidates) {
foldFunction(BC, BFs, BF, Hottest, Mod); IdenticalBuckets[BF].emplace_back(BF);
if (!MaxTwinFunction || }
MaxTwinFunction->getTwins().size() < Hottest->getTwins().size()) {
MaxTwinFunction = Hottest; for (auto &IBI : IdenticalBuckets) {
// Functions identified as identical.
auto &Twins = IBI.second;
if (Twins.size() < 2)
continue;
// Fold functions. Keep the order consistent across invocations with
// different options.
std::stable_sort(Twins.begin(), Twins.end(),
[](const BinaryFunction *A, const BinaryFunction *B) {
return A->getFunctionNumber() < B->getFunctionNumber();
});
BinaryFunction *ParentBF = Twins[0];
for (unsigned i = 1; i < Twins.size(); ++i) {
auto *ChildBF = Twins[i];
DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
<< *ParentBF << '\n');
// Remove child function from the list of candidates.
auto FI = Candidates.find(ChildBF);
assert(FI != Candidates.end() &&
"function expected to be in the set");
Candidates.erase(FI);
// Fold the function and remove from the list of processed functions.
BC.foldFunction(*ChildBF, *ParentBF, BFs);
BytesSavedEstimate += ChildBF->getSize();
++NumFoldedLastIteration;
if (ParentBF->hasJumpTables())
++NumJTFunctionsFolded;
} }
} }
} }
NumFunctionsFolded += NumFoldedLastIteration;
++Iteration;
if (opts::Verbosity >= 1) { } while (NumFoldedLastIteration > 0);
outs() << "BOLT-INFO: found " << NumIdenticalFunctions
<< " identical functions.\n" DEBUG(
<< "BOLT-INFO: modified " << Mod.size() << " functions.\n"; // Print functions that are congruent but not identical.
for (auto &CBI : CongruentBuckets) {
auto &Candidates = CBI.second;
if (Candidates.size() < 2)
continue;
dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
<< " functions (each of size " << (*Candidates.begin())->getSize()
<< " bytes) are congruent but not identical:\n";
for (auto *BF : Candidates) {
dbgs() << " " << *BF;
if (BF->getKnownExecutionCount()) {
dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
}
dbgs() << '\n';
}
} }
);
NumIdenticalFunctionsFound += NumIdenticalFunctions; if (NumFunctionsFolded) {
outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded
++Iter; << " out of " << OriginalFunctionCount << " functions in "
} while (!Mod.empty()); << Iteration << " passes. "
<< NumJTFunctionsFolded << " functions had jump tables.\n"
outs() << "BOLT-INFO: ICF pass found " << NumIdenticalFunctionsFound << "BOLT-INFO: Removing all identical functions will save "
<< " functions identical to some other function.\n" << format("%.2lf", (double) BytesSavedEstimate / 1024)
<< "BOLT-INFO: ICF pass folded references to " << NumFunctionsFolded << " KB of code space.\n";
<< " functions.\n"
<< "BOLT-INFO: ICF pass folded " << NumDynamicCallsFolded << " dynamic"
<< " function calls.\n"
<< "BOLT-INFO: Removing all identical functions could save "
<< format("%.2lf", (double) BytesSavedEstimate / 1024)
<< " KB of code space.\n";
if (MaxTwinFunction) {
outs() << "BOLT-INFO: Function with maximum number of twins ("
<< MaxTwinFunction->getTwins().size() << ") is " << *MaxTwinFunction
<< '\n';
} }
} }

View File

@ -363,41 +363,7 @@ public:
/// references to a single one of them. /// references to a single one of them.
/// ///
class IdenticalCodeFolding : public BinaryFunctionPass { class IdenticalCodeFolding : public BinaryFunctionPass {
uint64_t NumIdenticalFunctionsFound{0}; public:
uint64_t NumFunctionsFolded{0};
uint64_t NumDynamicCallsFolded{0};
uint64_t BytesSavedEstimate{0};
BinaryFunction *MaxTwinFunction{nullptr};
/// Map from a binary function to its callers.
struct CallSite {
BinaryFunction *Caller;
BinaryBasicBlock *Block;
unsigned InstrIndex;
CallSite(BinaryFunction *Caller,
BinaryBasicBlock *Block,
unsigned InstrIndex) :
Caller(Caller), Block(Block), InstrIndex(InstrIndex) { }
};
using CallerMap = std::map<const BinaryFunction *, std::vector<CallSite>>;
CallerMap Callers;
/// Replaces all calls to BFTOFold with calls to BFToReplaceWith and merges
/// the profile data of BFToFold with those of BFToReplaceWith. All modified
/// functions are added to the Modified set.
void foldFunction(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunction *BFToFold,
BinaryFunction *BFToReplaceWith,
std::set<BinaryFunction *> &Modified);
/// Finds callers for each binary function and populates the Callers
/// map.
void discoverCallers(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs);
public:
explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass) explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { } : BinaryFunctionPass(PrintPass) { }

View File

@ -131,7 +131,7 @@ void GreedyClusterAlgorithm::clusterBasicBlocks(const BinaryFunction &BF,
// Populate priority queue with edges. // Populate priority queue with edges.
auto BI = BB->branch_info_begin(); auto BI = BB->branch_info_begin();
for (auto &I : BB->successors()) { for (auto &I : BB->successors()) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE && assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"attempted reordering blocks of function with no profile data"); "attempted reordering blocks of function with no profile data");
Queue.emplace_back(EdgeTy(BB, I, BI->Count)); Queue.emplace_back(EdgeTy(BB, I, BI->Count));
++BI; ++BI;
@ -252,7 +252,7 @@ int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
// Adjust the weight by taking into account other edges with the same source. // Adjust the weight by taking into account other edges with the same source.
auto BI = SrcBB->branch_info_begin(); auto BI = SrcBB->branch_info_begin();
for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) { for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE && assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"attempted reordering blocks of function with no profile data"); "attempted reordering blocks of function with no profile data");
assert(BI->Count <= std::numeric_limits<int64_t>::max() && assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
"overflow detected"); "overflow detected");
@ -277,7 +277,7 @@ int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
++BI; ++BI;
} }
assert(BI != PredBB->branch_info_end() && "invalid control flow graph"); assert(BI != PredBB->branch_info_end() && "invalid control flow graph");
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE && assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"attempted reordering blocks of function with no profile data"); "attempted reordering blocks of function with no profile data");
assert(BI->Count <= std::numeric_limits<int64_t>::max() && assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
"overflow detected"); "overflow detected");
@ -399,7 +399,7 @@ void OptimalReorderAlgorithm::reorderBasicBlocks(
auto BI = BB->branch_info_begin(); auto BI = BB->branch_info_begin();
Weight[BBToIndex[BB]].resize(N); Weight[BBToIndex[BB]].resize(N);
for (auto I : BB->successors()) { for (auto I : BB->successors()) {
if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
Weight[BBToIndex[BB]][BBToIndex[I]] = BI->Count; Weight[BBToIndex[BB]][BBToIndex[I]] = BI->Count;
++BI; ++BI;
} }

View File

@ -1706,12 +1706,8 @@ void emitFunction(MCStreamer &Streamer, BinaryFunction &Function,
if (Function.getSize() == 0) if (Function.getSize() == 0)
return; return;
if (Function.isDuplicate()) { if (Function.getState() == BinaryFunction::State::Empty)
DEBUG(dbgs() << "BOLT-DEBUG: skipping code generation for function "
<< Function << " because it is a duplicate of function "
<< *Function.getIdenticalFunction() << '\n');
return; return;
}
MCSection *Section; MCSection *Section;
if (opts::Relocs) { if (opts::Relocs) {
@ -1741,57 +1737,17 @@ void emitFunction(MCStreamer &Streamer, BinaryFunction &Function,
Streamer.EmitCodeAlignment(Function.getAlignment()); Streamer.EmitCodeAlignment(Function.getAlignment());
} }
// Emit all names the function and its twins (for ICF) are known under.
auto EmitFunctionSymbols = [&](BinaryFunction &BF) {
for (const auto &Name : BF.getNames()) {
Twine EmitName = EmitColdPart ? Twine(Name).concat(".cold") : Name;
auto *EmitSymbol = BC.Ctx->getOrCreateSymbol(EmitName);
Streamer.EmitSymbolAttribute(EmitSymbol, MCSA_ELF_TypeFunction);
DEBUG(dbgs() << "emitting symbol " << EmitSymbol->getName()
<< " for function " << BF
<< " from function " << Function << '\n');
Streamer.EmitLabel(EmitSymbol);
}
};
MCContext &Context = Streamer.getContext(); MCContext &Context = Streamer.getContext();
const MCAsmInfo *MAI = Context.getAsmInfo(); const MCAsmInfo *MAI = Context.getAsmInfo();
// Emit all names the function is known under. // Emit all names the function is known under.
EmitFunctionSymbols(Function); for (const auto &Name : Function.getNames()) {
for (auto *TwinFunction : Function.getTwins()) { Twine EmitName = EmitColdPart ? Twine(Name).concat(".cold") : Name;
assert(TwinFunction != &Function && "function cannot be its own twin"); auto *EmitSymbol = BC.Ctx->getOrCreateSymbol(EmitName);
EmitFunctionSymbols(*TwinFunction); Streamer.EmitSymbolAttribute(EmitSymbol, MCSA_ELF_TypeFunction);
} DEBUG(dbgs() << "emitting symbol " << EmitSymbol->getName()
<< " for function " << Function << '\n');
if (opts::Verbosity >= 2 && !EmitColdPart && !Function.getTwins().empty()) { Streamer.EmitLabel(EmitSymbol);
std::vector<std::string> AllNames;
AllNames.insert(AllNames.end(),
Function.getNames().begin(),
Function.getNames().end());
for (auto *TwinFunction : Function.getTwins()) {
AllNames.insert(AllNames.end(),
TwinFunction->getNames().begin(),
TwinFunction->getNames().end());
}
outs() << "BOLT-INFO: all duplicate names (" << AllNames.size()
<< ") for function " << Function << ": \n";
for (const auto &Name : AllNames) {
outs() << " " << Name << '\n';
}
AllNames.clear();
AllNames.emplace_back(Function.getPrintName());
for (auto *TwinFunction : Function.getTwins()) {
AllNames.emplace_back(TwinFunction->getPrintName());
}
std::sort(AllNames.begin(), AllNames.end());
outs() << "BOLT-INFO: all ICF names (" << AllNames.size()
<< ") for function " << Function << ": \n";
for (const auto &Name : AllNames) {
outs() << " " << Name << '\n';
}
} }
// Emit CFI start // Emit CFI start
@ -2141,9 +2097,6 @@ void RewriteInstance::mapFileSections(
if (!Function.isSimple() || !opts::shouldProcess(Function)) if (!Function.isSimple() || !opts::shouldProcess(Function))
continue; continue;
if (Function.isDuplicate())
continue;
auto TooLarge = false; auto TooLarge = false;
auto SMII = SectionMM->SectionMapInfo.find(Function.getCodeSectionName()); auto SMII = SectionMM->SectionMapInfo.find(Function.getCodeSectionName());
assert(SMII != SectionMM->SectionMapInfo.end() && assert(SMII != SectionMM->SectionMapInfo.end() &&
@ -2936,7 +2889,7 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
} }
uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
const auto *Function = getBinaryFunctionContainingAddress(OldAddress); const auto *Function = getBinaryFunctionAtAddress(OldAddress);
if (!Function) if (!Function)
return 0; return 0;
auto JITS = OLT.findSymbol(Function->getSymbol()->getName(), false); auto JITS = OLT.findSymbol(Function->getSymbol()->getName(), false);
@ -3213,3 +3166,12 @@ RewriteInstance::getBinaryFunctionContainingAddress(uint64_t Address,
return nullptr; return nullptr;
return &FI->second; return &FI->second;
} }
const BinaryFunction *
RewriteInstance::getBinaryFunctionAtAddress(uint64_t Address) const {
const auto *Symbol = BC->getGlobalSymbolAtAddress(Address);
if (!Symbol)
return nullptr;
return BC->getFunctionForSymbol(Symbol);
}

View File

@ -192,7 +192,8 @@ public:
/// disassembleFunctions(), also preserve the original version. /// disassembleFunctions(), also preserve the original version.
void rewriteFile(); void rewriteFile();
/// Return address of the function in the new binary. /// Return address of a function in the new binary corresponding to
/// \p OldAddress address in the original binary.
uint64_t getNewFunctionAddress(uint64_t OldAddress); uint64_t getNewFunctionAddress(uint64_t OldAddress);
/// Return value for the symbol \p Name in the output. /// Return value for the symbol \p Name in the output.
@ -211,6 +212,8 @@ public:
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address, BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
bool CheckPastEnd = false); bool CheckPastEnd = false);
const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const;
private: private:
/// Detect addresses and offsets available in the binary for allocating /// Detect addresses and offsets available in the binary for allocating
@ -382,7 +385,7 @@ private:
/// Store all non-zero symbols in this map for a quick address lookup. /// Store all non-zero symbols in this map for a quick address lookup.
std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs; std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
/// Store all functions seen in the binary, sorted by address. /// Store all functions in the binary, sorted by original address.
std::map<uint64_t, BinaryFunction> BinaryFunctions; std::map<uint64_t, BinaryFunction> BinaryFunctions;
/// Stores and serializes information that will be put into the .debug_ranges /// Stores and serializes information that will be put into the .debug_ranges