[BOLT] Eliminate "shallow" function lookup

Summary:
Whenever we search for a function based on its address in the input
binary, we now always return a corresponding fragment for split
functions. If the user needs an access to the main fragment, they can
call getTopmostFragment().

(cherry picked from FBD23670311)
This commit is contained in:
Maksim Panchenko 2020-09-14 15:48:32 -07:00
parent 62469b5036
commit a82cff0f52
9 changed files with 111 additions and 103 deletions

View File

@ -1018,33 +1018,40 @@ void BinaryContext::generateSymbolHashes() {
void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
for (auto Address : Function.InterproceduralReferences) { for (auto Address : Function.InterproceduralReferences) {
auto *ContainingFunction = getBinaryFunctionContainingAddress(Address); if (!Address)
if (&Function == ContainingFunction)
continue; continue;
if (ContainingFunction) { auto *TargetFunction = getBinaryFunctionContainingAddress(Address);
if (&Function == TargetFunction)
continue;
if (TargetFunction) {
if (TargetFunction->IsFragment) {
// Only a parent function (or a sibling) can reach its fragment. // Only a parent function (or a sibling) can reach its fragment.
if (ContainingFunction->IsFragment) {
assert(!Function.IsFragment && assert(!Function.IsFragment &&
"only one cold fragment is supported at this time"); "only one cold fragment is supported at this time");
ContainingFunction->setParentFunction(&Function); if (auto *TargetParent = TargetFunction->getParentFragment()) {
Function.addFragment(ContainingFunction); assert(TargetParent == &Function && "mismatching parent function");
continue;
}
TargetFunction->setParentFragment(Function);
Function.addFragment(*TargetFunction);
if (!HasRelocations) { if (!HasRelocations) {
ContainingFunction->setSimple(false); TargetFunction->setSimple(false);
Function.setSimple(false); Function.setSimple(false);
} }
if (opts::Verbosity >= 1) { if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: marking " << *ContainingFunction outs() << "BOLT-INFO: marking " << *TargetFunction
<< " as a fragment of " << Function << '\n'; << " as a fragment of " << Function << '\n';
} }
} else if (TargetFunction->getAddress() != Address) {
TargetFunction->
addEntryPointAtOffset(Address - TargetFunction->getAddress());
}
continue; continue;
} }
if (ContainingFunction->getAddress() != Address) {
ContainingFunction->
addEntryPointAtOffset(Address - ContainingFunction->getAddress());
}
} else if (Address) {
// Check if address falls in function padding space - this could be // Check if address falls in function padding space - this could be
// unmarked data in code. In this case adjust the padding space size. // unmarked data in code. In this case adjust the padding space size.
auto Section = getSectionForAddress(Address); auto Section = getSectionForAddress(Address);
@ -1058,26 +1065,25 @@ void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
if (SectionName == ".plt" || SectionName == ".plt.got") if (SectionName == ".plt" || SectionName == ".plt.got")
continue; continue;
if (opts::UseOldText) { if (opts::processAllFunctions()) {
errs() << "BOLT-ERROR: cannot process binaries with unmarked " errs() << "BOLT-ERROR: cannot process binaries with unmarked "
<< "object in code at address 0x" << "object in code at address 0x"
<< Twine::utohexstr(Address) << " belonging to section " << Twine::utohexstr(Address) << " belonging to section "
<< SectionName << " in relocation mode.\n"; << SectionName << " in current mode\n";
exit(1); exit(1);
} }
ContainingFunction = TargetFunction =
getBinaryFunctionContainingAddress(Address, getBinaryFunctionContainingAddress(Address,
/*CheckPastEnd=*/false, /*CheckPastEnd=*/false,
/*UseMaxSize=*/true); /*UseMaxSize=*/true);
// We are not going to overwrite non-simple functions, but for simple // We are not going to overwrite non-simple functions, but for simple
// ones - adjust the padding size. // ones - adjust the padding size.
if (ContainingFunction && ContainingFunction->isSimple()) { if (TargetFunction && TargetFunction->isSimple()) {
errs() << "BOLT-WARNING: function " << *ContainingFunction errs() << "BOLT-WARNING: function " << *TargetFunction
<< " has an object detected in a padding region at address 0x" << " has an object detected in a padding region at address 0x"
<< Twine::utohexstr(Address) << '\n'; << Twine::utohexstr(Address) << '\n';
ContainingFunction->setMaxSize(ContainingFunction->getSize()); TargetFunction->setMaxSize(TargetFunction->getSize());
}
} }
} }
@ -1982,8 +1988,7 @@ uint64_t BinaryContext::getHotThreshold() const {
BinaryFunction * BinaryFunction *
BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address, BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
bool CheckPastEnd, bool CheckPastEnd,
bool UseMaxSize, bool UseMaxSize) {
bool Shallow) {
auto FI = BinaryFunctions.upper_bound(Address); auto FI = BinaryFunctions.upper_bound(Address);
if (FI == BinaryFunctions.begin()) if (FI == BinaryFunctions.begin())
return nullptr; return nullptr;
@ -1995,28 +2000,17 @@ BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
return nullptr; return nullptr;
auto *BF = &FI->second; return &FI->second;
if (Shallow)
return BF;
while (BF->getParentFunction())
BF = BF->getParentFunction();
return BF;
} }
BinaryFunction * BinaryFunction *
BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) { BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
// First, try to find a function starting at the given address. If the // First, try to find a function starting at the given address. If the
// function was folded, this will get us the original folded function if it // function was folded, this will get us the original folded function if it
// wasn't removed from the list, e.g. in non-relocation mode. // wasn't removed from the list, e.g. in non-relocation mode.
auto BFI = BinaryFunctions.find(Address); auto BFI = BinaryFunctions.find(Address);
if (BFI != BinaryFunctions.end()) { if (BFI != BinaryFunctions.end()) {
auto *BF = &BFI->second; return &BFI->second;
while (!Shallow && BF->getParentFunction() && !Shallow) {
BF = BF->getParentFunction();
}
return BF;
} }
// We might have folded the function matching the object at the given // We might have folded the function matching the object at the given
@ -2026,13 +2020,9 @@ BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) {
if (const auto *BD = getBinaryDataAtAddress(Address)) { if (const auto *BD = getBinaryDataAtAddress(Address)) {
uint64_t EntryID{0}; uint64_t EntryID{0};
auto *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); auto *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
if (BF && EntryID == 0) { if (BF && EntryID == 0)
while (BF->getParentFunction() && !Shallow) {
BF = BF->getParentFunction();
}
return BF; return BF;
} }
}
return nullptr; return nullptr;
} }

View File

@ -276,7 +276,7 @@ public:
} }
/// Return BinaryFunction containing a given \p Address or nullptr if /// Return BinaryFunction containing a given \p Address or nullptr if
/// no registered function has it. /// no registered function contains the \p Address.
/// ///
/// In a binary a function has somewhat vague boundaries. E.g. a function can /// In a binary a function has somewhat vague boundaries. E.g. a function can
/// refer to the first byte past the end of the function, and it will still be /// refer to the first byte past the end of the function, and it will still be
@ -294,19 +294,14 @@ public:
/// body and the next object in address ranges that we check. /// body and the next object in address ranges that we check.
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address, BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
bool CheckPastEnd = false, bool CheckPastEnd = false,
bool UseMaxSize = false, bool UseMaxSize = false);
bool Shallow = false);
/// Return BinaryFunction which has a fragment that starts at a given /// Return a BinaryFunction that starts at a given \p Address.
/// \p Address. If the BinaryFunction is a child fragment, then return its BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
/// parent unless \p Shallow parameter is set to true.
BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address,
bool Shallow = false);
const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address, const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
bool Shallow = false) const {
return const_cast<BinaryContext *>(this)-> return const_cast<BinaryContext *>(this)->
getBinaryFunctionAtAddress(Address, Shallow); getBinaryFunctionAtAddress(Address);
} }
/// Return size of an entry for the given jump table \p Type. /// Return size of an entry for the given jump table \p Type.

View File

@ -436,8 +436,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
if (isFolded()) { if (isFolded()) {
OS << "\n FoldedInto : " << *getFoldedIntoFunction(); OS << "\n FoldedInto : " << *getFoldedIntoFunction();
} }
if (ParentFunction) { if (ParentFragment) {
OS << "\n Parent : " << *ParentFunction; OS << "\n Parent : " << *ParentFragment;
} }
if (!Fragments.empty()) { if (!Fragments.empty()) {
OS << "\n Fragments : "; OS << "\n Fragments : ";

View File

@ -112,7 +112,10 @@ inline raw_ostream &operator<<(raw_ostream &OS,
/// BinaryFunction is a representation of machine-level function. /// BinaryFunction is a representation of machine-level function.
/// ///
/// We use the term "Binary" as "Machine" was already taken. /// In the input binary, an instance of BinaryFunction can represent a fragment
/// of a function if the higher-level function was split, e.g. into hot and cold
/// parts. The fragment containing the main entry point is called a parent
/// or the main fragment.
class BinaryFunction { class BinaryFunction {
public: public:
enum class State : char { enum class State : char {
@ -324,8 +327,8 @@ private:
/// Name for the corresponding cold code section. /// Name for the corresponding cold code section.
std::string ColdCodeSectionName; std::string ColdCodeSectionName;
/// Parent function for split function fragments. /// Parent function fragment for split function fragments.
BinaryFunction *ParentFunction{nullptr}; BinaryFunction *ParentFragment{nullptr};
/// Indicate if the function body was folded into another function. /// Indicate if the function body was folded into another function.
/// Used by ICF optimization. /// Used by ICF optimization.
@ -614,14 +617,18 @@ private:
/// instead of calling this function directly. /// instead of calling this function directly.
uint64_t getEntryIDForSymbol(const MCSymbol *EntrySymbol) const; uint64_t getEntryIDForSymbol(const MCSymbol *EntrySymbol) const;
void setParentFunction(BinaryFunction *BF) { /// If the function represents a secondary split function fragment, set its
assert((!ParentFunction || ParentFunction == BF) && /// parent fragment to \p BF.
void setParentFragment(BinaryFunction &BF) {
assert(IsFragment && "function must be a fragment to have a parent");
assert((!ParentFragment || ParentFragment == &BF) &&
"cannot have more than one parent function"); "cannot have more than one parent function");
ParentFunction = BF; ParentFragment = &BF;
} }
void addFragment(BinaryFunction *BF) { /// Register a child fragment for the main fragment of a split function.
Fragments.insert(BF); void addFragment(BinaryFunction &BF) {
Fragments.insert(&BF);
} }
void addInstruction(uint64_t Offset, MCInst &&Instruction) { void addInstruction(uint64_t Offset, MCInst &&Instruction) {
@ -1922,8 +1929,25 @@ public:
return ImageSize; return ImageSize;
} }
BinaryFunction *getParentFunction() const { /// Return true if the function is a secondary fragment of another function.
return ParentFunction; bool isFragment() const {
return IsFragment;
}
/// Return parent function fragment if this function is a secondary (child)
/// fragment of another function.
BinaryFunction *getParentFragment() const {
return ParentFragment;
}
/// If the function is a nested child fragment of another function, return its
/// topmost parent fragment.
const BinaryFunction *getTopmostFragment() const {
const BinaryFunction *BF = this;
while (BF->getParentFragment())
BF = BF->getParentFragment();
return BF;
} }
/// Set the profile data for the number of times the function was called. /// Set the profile data for the number of times the function was called.

View File

@ -157,6 +157,9 @@ void DWARFRewriter::updateUnitDebugInfo(
IsFunctionDef = true; IsFunctionDef = true;
const auto *Function = BC.getBinaryFunctionAtAddress(Address); const auto *Function = BC.getBinaryFunctionAtAddress(Address);
if (Function && Function->isFragment())
Function = Function->getTopmostFragment();
if (Function && Function->isFolded()) if (Function && Function->isFolded())
Function = nullptr; Function = nullptr;
FunctionStack.push_back(Function); FunctionStack.push_back(Function);

View File

@ -704,14 +704,8 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
if (!BC->containsAddress(Address)) if (!BC->containsAddress(Address))
return nullptr; return nullptr;
// Use shallow search to avoid fetching the parent function, in case
// BinaryContext linked two functions. When aggregating data and writing the
// profile, we want to write offsets relative to the closest symbol in the
// symbol table, not relative to the parent function, to avoid creating
// profile that is too fragile and depends on the layout of other functions.
return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
/*UseMaxSize=*/true, /*UseMaxSize=*/true);
/*Shallow=*/true);
} }
StringRef DataAggregator::getLocationName(BinaryFunction &Func, StringRef DataAggregator::getLocationName(BinaryFunction &Func,

View File

@ -3972,8 +3972,7 @@ void RewriteInstance::updateELFSymbolTable(
if (!PatchExisting && shouldStrip(Symbol)) if (!PatchExisting && shouldStrip(Symbol))
continue; continue;
const auto *Function = BC->getBinaryFunctionAtAddress(Symbol.st_value, const auto *Function = BC->getBinaryFunctionAtAddress(Symbol.st_value);
/*Shallow=*/true);
// Ignore false function references, e.g. when the section address matches // Ignore false function references, e.g. when the section address matches
// the address of the function. // the address of the function.
if (Function && Symbol.getType() == ELF::STT_SECTION) if (Function && Symbol.getType() == ELF::STT_SECTION)
@ -4012,8 +4011,7 @@ void RewriteInstance::updateELFSymbolTable(
Function = (Symbol.getType() == ELF::STT_FUNC) Function = (Symbol.getType() == ELF::STT_FUNC)
? BC->getBinaryFunctionContainingAddress(Symbol.st_value, ? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
/*CheckPastEnd=*/false, /*CheckPastEnd=*/false,
/*UseMaxSize=*/true, /*UseMaxSize=*/true)
/*Shallow=*/true)
: nullptr; : nullptr;
if (Function && Function->isEmitted()) { if (Function && Function->isEmitted()) {
@ -4061,8 +4059,7 @@ void RewriteInstance::updateELFSymbolTable(
Symbol.st_size == 0) { Symbol.st_size == 0) {
if (BC->getBinaryFunctionContainingAddress(Symbol.st_value, if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
/*CheckPastEnd=*/false, /*CheckPastEnd=*/false,
/*UseMaxSize=*/true, /*UseMaxSize=*/true)) {
/*Shallow=*/true)) {
// Can only delete the symbol if not patching. Such symbols should // Can only delete the symbol if not patching. Such symbols should
// not exist in the dynamic symbol table. // not exist in the dynamic symbol table.
assert(!PatchExisting && "cannot delete symbol"); assert(!PatchExisting && "cannot delete symbol");
@ -4469,10 +4466,12 @@ void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
const auto *Function = BC->getBinaryFunctionAtAddress(OldAddress, const auto *Function = BC->getBinaryFunctionAtAddress(OldAddress);
/*Shallow=*/true);
if (!Function) if (!Function)
return 0; return 0;
assert(!Function->isFragment() && "cannot get new address for a fragment");
return Function->getOutputAddress(); return Function->getOutputAddress();
} }

View File

@ -57,6 +57,7 @@ void HugifyRuntimeLibrary::adjustCommandLineOptions(
void HugifyRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) { void HugifyRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) {
const auto *StartFunction = const auto *StartFunction =
BC.getBinaryFunctionAtAddress(*(BC.StartFunctionAddress)); BC.getBinaryFunctionAtAddress(*(BC.StartFunctionAddress));
assert(!StartFunction->isFragment() && "expected main function fragment");
if (!StartFunction) { if (!StartFunction) {
errs() << "BOLT-ERROR: failed to locate function at binary start address\n"; errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
exit(1); exit(1);

View File

@ -61,12 +61,14 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
MCStreamer &Streamer) { MCStreamer &Streamer) {
const auto *StartFunction = const auto *StartFunction =
BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress); BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
assert(!StartFunction->isFragment() && "expected main function fragment");
if (!StartFunction) { if (!StartFunction) {
errs() << "BOLT-ERROR: failed to locate function at binary start address\n"; errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
exit(1); exit(1);
} }
const auto *FiniFunction = const auto *FiniFunction =
BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress); BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress);
assert(!FiniFunction->isFragment() && "expected main function fragment");
if (!FiniFunction) { if (!FiniFunction) {
errs() << "BOLT-ERROR: failed to locate function at binary fini address\n"; errs() << "BOLT-ERROR: failed to locate function at binary fini address\n";
exit(1); exit(1);