[BOLT] Eliminate "shallow" function lookup

Summary:
Whenever we search for a function based on its address in the input
binary, we now always return a corresponding fragment for split
functions. If the user needs an access to the main fragment, they can
call getTopmostFragment().

(cherry picked from FBD23670311)
This commit is contained in:
Maksim Panchenko 2020-09-14 15:48:32 -07:00
parent 62469b5036
commit a82cff0f52
9 changed files with 111 additions and 103 deletions

View File

@ -1018,66 +1018,72 @@ void BinaryContext::generateSymbolHashes() {
void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
for (auto Address : Function.InterproceduralReferences) {
auto *ContainingFunction = getBinaryFunctionContainingAddress(Address);
if (&Function == ContainingFunction)
if (!Address)
continue;
if (ContainingFunction) {
// Only a parent function (or a sibling) can reach its fragment.
if (ContainingFunction->IsFragment) {
auto *TargetFunction = getBinaryFunctionContainingAddress(Address);
if (&Function == TargetFunction)
continue;
if (TargetFunction) {
if (TargetFunction->IsFragment) {
// Only a parent function (or a sibling) can reach its fragment.
assert(!Function.IsFragment &&
"only one cold fragment is supported at this time");
ContainingFunction->setParentFunction(&Function);
Function.addFragment(ContainingFunction);
if (auto *TargetParent = TargetFunction->getParentFragment()) {
assert(TargetParent == &Function && "mismatching parent function");
continue;
}
TargetFunction->setParentFragment(Function);
Function.addFragment(*TargetFunction);
if (!HasRelocations) {
ContainingFunction->setSimple(false);
TargetFunction->setSimple(false);
Function.setSimple(false);
}
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: marking " << *ContainingFunction
outs() << "BOLT-INFO: marking " << *TargetFunction
<< " as a fragment of " << Function << '\n';
}
continue;
} else if (TargetFunction->getAddress() != Address) {
TargetFunction->
addEntryPointAtOffset(Address - TargetFunction->getAddress());
}
if (ContainingFunction->getAddress() != Address) {
ContainingFunction->
addEntryPointAtOffset(Address - ContainingFunction->getAddress());
}
} else if (Address) {
// Check if address falls in function padding space - this could be
// unmarked data in code. In this case adjust the padding space size.
auto Section = getSectionForAddress(Address);
assert(Section && "cannot get section for referenced address");
continue;
}
if (!Section->isText())
continue;
// Check if address falls in function padding space - this could be
// unmarked data in code. In this case adjust the padding space size.
auto Section = getSectionForAddress(Address);
assert(Section && "cannot get section for referenced address");
// PLT requires special handling and could be ignored in this context.
StringRef SectionName = Section->getName();
if (SectionName == ".plt" || SectionName == ".plt.got")
continue;
if (!Section->isText())
continue;
if (opts::UseOldText) {
errs() << "BOLT-ERROR: cannot process binaries with unmarked "
<< "object in code at address 0x"
<< Twine::utohexstr(Address) << " belonging to section "
<< SectionName << " in relocation mode.\n";
exit(1);
}
// PLT requires special handling and could be ignored in this context.
StringRef SectionName = Section->getName();
if (SectionName == ".plt" || SectionName == ".plt.got")
continue;
ContainingFunction =
getBinaryFunctionContainingAddress(Address,
/*CheckPastEnd=*/false,
/*UseMaxSize=*/true);
// We are not going to overwrite non-simple functions, but for simple
// ones - adjust the padding size.
if (ContainingFunction && ContainingFunction->isSimple()) {
errs() << "BOLT-WARNING: function " << *ContainingFunction
<< " has an object detected in a padding region at address 0x"
<< Twine::utohexstr(Address) << '\n';
ContainingFunction->setMaxSize(ContainingFunction->getSize());
}
if (opts::processAllFunctions()) {
errs() << "BOLT-ERROR: cannot process binaries with unmarked "
<< "object in code at address 0x"
<< Twine::utohexstr(Address) << " belonging to section "
<< SectionName << " in current mode\n";
exit(1);
}
TargetFunction =
getBinaryFunctionContainingAddress(Address,
/*CheckPastEnd=*/false,
/*UseMaxSize=*/true);
// We are not going to overwrite non-simple functions, but for simple
// ones - adjust the padding size.
if (TargetFunction && TargetFunction->isSimple()) {
errs() << "BOLT-WARNING: function " << *TargetFunction
<< " has an object detected in a padding region at address 0x"
<< Twine::utohexstr(Address) << '\n';
TargetFunction->setMaxSize(TargetFunction->getSize());
}
}
@ -1982,8 +1988,7 @@ uint64_t BinaryContext::getHotThreshold() const {
BinaryFunction *
BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
bool CheckPastEnd,
bool UseMaxSize,
bool Shallow) {
bool UseMaxSize) {
auto FI = BinaryFunctions.upper_bound(Address);
if (FI == BinaryFunctions.begin())
return nullptr;
@ -1995,28 +2000,17 @@ BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
return nullptr;
auto *BF = &FI->second;
if (Shallow)
return BF;
while (BF->getParentFunction())
BF = BF->getParentFunction();
return BF;
return &FI->second;
}
BinaryFunction *
BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) {
BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
// First, try to find a function starting at the given address. If the
// function was folded, this will get us the original folded function if it
// wasn't removed from the list, e.g. in non-relocation mode.
auto BFI = BinaryFunctions.find(Address);
if (BFI != BinaryFunctions.end()) {
auto *BF = &BFI->second;
while (!Shallow && BF->getParentFunction() && !Shallow) {
BF = BF->getParentFunction();
}
return BF;
return &BFI->second;
}
// We might have folded the function matching the object at the given
@ -2026,12 +2020,8 @@ BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) {
if (const auto *BD = getBinaryDataAtAddress(Address)) {
uint64_t EntryID{0};
auto *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
if (BF && EntryID == 0) {
while (BF->getParentFunction() && !Shallow) {
BF = BF->getParentFunction();
}
if (BF && EntryID == 0)
return BF;
}
}
return nullptr;
}

View File

@ -276,7 +276,7 @@ public:
}
/// Return BinaryFunction containing a given \p Address or nullptr if
/// no registered function has it.
/// no registered function contains the \p Address.
///
/// In a binary a function has somewhat vague boundaries. E.g. a function can
/// refer to the first byte past the end of the function, and it will still be
@ -294,19 +294,14 @@ public:
/// body and the next object in address ranges that we check.
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
bool CheckPastEnd = false,
bool UseMaxSize = false,
bool Shallow = false);
bool UseMaxSize = false);
/// Return BinaryFunction which has a fragment that starts at a given
/// \p Address. If the BinaryFunction is a child fragment, then return its
/// parent unless \p Shallow parameter is set to true.
BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address,
bool Shallow = false);
/// Return a BinaryFunction that starts at a given \p Address.
BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address,
bool Shallow = false) const {
const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
return const_cast<BinaryContext *>(this)->
getBinaryFunctionAtAddress(Address, Shallow);
getBinaryFunctionAtAddress(Address);
}
/// Return size of an entry for the given jump table \p Type.

View File

@ -436,8 +436,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
if (isFolded()) {
OS << "\n FoldedInto : " << *getFoldedIntoFunction();
}
if (ParentFunction) {
OS << "\n Parent : " << *ParentFunction;
if (ParentFragment) {
OS << "\n Parent : " << *ParentFragment;
}
if (!Fragments.empty()) {
OS << "\n Fragments : ";

View File

@ -112,7 +112,10 @@ inline raw_ostream &operator<<(raw_ostream &OS,
/// BinaryFunction is a representation of machine-level function.
///
/// We use the term "Binary" as "Machine" was already taken.
/// In the input binary, an instance of BinaryFunction can represent a fragment
/// of a function if the higher-level function was split, e.g. into hot and cold
/// parts. The fragment containing the main entry point is called a parent
/// or the main fragment.
class BinaryFunction {
public:
enum class State : char {
@ -324,8 +327,8 @@ private:
/// Name for the corresponding cold code section.
std::string ColdCodeSectionName;
/// Parent function for split function fragments.
BinaryFunction *ParentFunction{nullptr};
/// Parent function fragment for split function fragments.
BinaryFunction *ParentFragment{nullptr};
/// Indicate if the function body was folded into another function.
/// Used by ICF optimization.
@ -614,14 +617,18 @@ private:
/// instead of calling this function directly.
uint64_t getEntryIDForSymbol(const MCSymbol *EntrySymbol) const;
void setParentFunction(BinaryFunction *BF) {
assert((!ParentFunction || ParentFunction == BF) &&
/// If the function represents a secondary split function fragment, set its
/// parent fragment to \p BF.
void setParentFragment(BinaryFunction &BF) {
assert(IsFragment && "function must be a fragment to have a parent");
assert((!ParentFragment || ParentFragment == &BF) &&
"cannot have more than one parent function");
ParentFunction = BF;
ParentFragment = &BF;
}
void addFragment(BinaryFunction *BF) {
Fragments.insert(BF);
/// Register a child fragment for the main fragment of a split function.
void addFragment(BinaryFunction &BF) {
Fragments.insert(&BF);
}
void addInstruction(uint64_t Offset, MCInst &&Instruction) {
@ -1922,8 +1929,25 @@ public:
return ImageSize;
}
BinaryFunction *getParentFunction() const {
return ParentFunction;
/// Return true if the function is a secondary fragment of another function.
bool isFragment() const {
return IsFragment;
}
/// Return parent function fragment if this function is a secondary (child)
/// fragment of another function.
BinaryFunction *getParentFragment() const {
return ParentFragment;
}
/// If the function is a nested child fragment of another function, return its
/// topmost parent fragment.
const BinaryFunction *getTopmostFragment() const {
const BinaryFunction *BF = this;
while (BF->getParentFragment())
BF = BF->getParentFragment();
return BF;
}
/// Set the profile data for the number of times the function was called.

View File

@ -157,6 +157,9 @@ void DWARFRewriter::updateUnitDebugInfo(
IsFunctionDef = true;
const auto *Function = BC.getBinaryFunctionAtAddress(Address);
if (Function && Function->isFragment())
Function = Function->getTopmostFragment();
if (Function && Function->isFolded())
Function = nullptr;
FunctionStack.push_back(Function);

View File

@ -704,14 +704,8 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
if (!BC->containsAddress(Address))
return nullptr;
// Use shallow search to avoid fetching the parent function, in case
// BinaryContext linked two functions. When aggregating data and writing the
// profile, we want to write offsets relative to the closest symbol in the
// symbol table, not relative to the parent function, to avoid creating
// profile that is too fragile and depends on the layout of other functions.
return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
/*UseMaxSize=*/true,
/*Shallow=*/true);
/*UseMaxSize=*/true);
}
StringRef DataAggregator::getLocationName(BinaryFunction &Func,

View File

@ -3972,8 +3972,7 @@ void RewriteInstance::updateELFSymbolTable(
if (!PatchExisting && shouldStrip(Symbol))
continue;
const auto *Function = BC->getBinaryFunctionAtAddress(Symbol.st_value,
/*Shallow=*/true);
const auto *Function = BC->getBinaryFunctionAtAddress(Symbol.st_value);
// Ignore false function references, e.g. when the section address matches
// the address of the function.
if (Function && Symbol.getType() == ELF::STT_SECTION)
@ -4012,8 +4011,7 @@ void RewriteInstance::updateELFSymbolTable(
Function = (Symbol.getType() == ELF::STT_FUNC)
? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
/*CheckPastEnd=*/false,
/*UseMaxSize=*/true,
/*Shallow=*/true)
/*UseMaxSize=*/true)
: nullptr;
if (Function && Function->isEmitted()) {
@ -4061,8 +4059,7 @@ void RewriteInstance::updateELFSymbolTable(
Symbol.st_size == 0) {
if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
/*CheckPastEnd=*/false,
/*UseMaxSize=*/true,
/*Shallow=*/true)) {
/*UseMaxSize=*/true)) {
// Can only delete the symbol if not patching. Such symbols should
// not exist in the dynamic symbol table.
assert(!PatchExisting && "cannot delete symbol");
@ -4469,10 +4466,12 @@ void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
const auto *Function = BC->getBinaryFunctionAtAddress(OldAddress,
/*Shallow=*/true);
const auto *Function = BC->getBinaryFunctionAtAddress(OldAddress);
if (!Function)
return 0;
assert(!Function->isFragment() && "cannot get new address for a fragment");
return Function->getOutputAddress();
}

View File

@ -57,6 +57,7 @@ void HugifyRuntimeLibrary::adjustCommandLineOptions(
void HugifyRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) {
const auto *StartFunction =
BC.getBinaryFunctionAtAddress(*(BC.StartFunctionAddress));
assert(!StartFunction->isFragment() && "expected main function fragment");
if (!StartFunction) {
errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
exit(1);

View File

@ -61,12 +61,14 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
MCStreamer &Streamer) {
const auto *StartFunction =
BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
assert(!StartFunction->isFragment() && "expected main function fragment");
if (!StartFunction) {
errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
exit(1);
}
const auto *FiniFunction =
BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress);
assert(!FiniFunction->isFragment() && "expected main function fragment");
if (!FiniFunction) {
errs() << "BOLT-ERROR: failed to locate function at binary fini address\n";
exit(1);