forked from OSchip/llvm-project
[BOLT] Eliminate "shallow" function lookup
Summary: Whenever we search for a function based on its address in the input binary, we now always return a corresponding fragment for split functions. If the user needs an access to the main fragment, they can call getTopmostFragment(). (cherry picked from FBD23670311)
This commit is contained in:
parent
62469b5036
commit
a82cff0f52
|
@ -1018,66 +1018,72 @@ void BinaryContext::generateSymbolHashes() {
|
|||
|
||||
void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
|
||||
for (auto Address : Function.InterproceduralReferences) {
|
||||
auto *ContainingFunction = getBinaryFunctionContainingAddress(Address);
|
||||
if (&Function == ContainingFunction)
|
||||
if (!Address)
|
||||
continue;
|
||||
|
||||
if (ContainingFunction) {
|
||||
// Only a parent function (or a sibling) can reach its fragment.
|
||||
if (ContainingFunction->IsFragment) {
|
||||
auto *TargetFunction = getBinaryFunctionContainingAddress(Address);
|
||||
if (&Function == TargetFunction)
|
||||
continue;
|
||||
|
||||
if (TargetFunction) {
|
||||
if (TargetFunction->IsFragment) {
|
||||
// Only a parent function (or a sibling) can reach its fragment.
|
||||
assert(!Function.IsFragment &&
|
||||
"only one cold fragment is supported at this time");
|
||||
ContainingFunction->setParentFunction(&Function);
|
||||
Function.addFragment(ContainingFunction);
|
||||
if (auto *TargetParent = TargetFunction->getParentFragment()) {
|
||||
assert(TargetParent == &Function && "mismatching parent function");
|
||||
continue;
|
||||
}
|
||||
TargetFunction->setParentFragment(Function);
|
||||
Function.addFragment(*TargetFunction);
|
||||
if (!HasRelocations) {
|
||||
ContainingFunction->setSimple(false);
|
||||
TargetFunction->setSimple(false);
|
||||
Function.setSimple(false);
|
||||
}
|
||||
if (opts::Verbosity >= 1) {
|
||||
outs() << "BOLT-INFO: marking " << *ContainingFunction
|
||||
outs() << "BOLT-INFO: marking " << *TargetFunction
|
||||
<< " as a fragment of " << Function << '\n';
|
||||
}
|
||||
continue;
|
||||
} else if (TargetFunction->getAddress() != Address) {
|
||||
TargetFunction->
|
||||
addEntryPointAtOffset(Address - TargetFunction->getAddress());
|
||||
}
|
||||
|
||||
if (ContainingFunction->getAddress() != Address) {
|
||||
ContainingFunction->
|
||||
addEntryPointAtOffset(Address - ContainingFunction->getAddress());
|
||||
}
|
||||
} else if (Address) {
|
||||
// Check if address falls in function padding space - this could be
|
||||
// unmarked data in code. In this case adjust the padding space size.
|
||||
auto Section = getSectionForAddress(Address);
|
||||
assert(Section && "cannot get section for referenced address");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!Section->isText())
|
||||
continue;
|
||||
// Check if address falls in function padding space - this could be
|
||||
// unmarked data in code. In this case adjust the padding space size.
|
||||
auto Section = getSectionForAddress(Address);
|
||||
assert(Section && "cannot get section for referenced address");
|
||||
|
||||
// PLT requires special handling and could be ignored in this context.
|
||||
StringRef SectionName = Section->getName();
|
||||
if (SectionName == ".plt" || SectionName == ".plt.got")
|
||||
continue;
|
||||
if (!Section->isText())
|
||||
continue;
|
||||
|
||||
if (opts::UseOldText) {
|
||||
errs() << "BOLT-ERROR: cannot process binaries with unmarked "
|
||||
<< "object in code at address 0x"
|
||||
<< Twine::utohexstr(Address) << " belonging to section "
|
||||
<< SectionName << " in relocation mode.\n";
|
||||
exit(1);
|
||||
}
|
||||
// PLT requires special handling and could be ignored in this context.
|
||||
StringRef SectionName = Section->getName();
|
||||
if (SectionName == ".plt" || SectionName == ".plt.got")
|
||||
continue;
|
||||
|
||||
ContainingFunction =
|
||||
getBinaryFunctionContainingAddress(Address,
|
||||
/*CheckPastEnd=*/false,
|
||||
/*UseMaxSize=*/true);
|
||||
// We are not going to overwrite non-simple functions, but for simple
|
||||
// ones - adjust the padding size.
|
||||
if (ContainingFunction && ContainingFunction->isSimple()) {
|
||||
errs() << "BOLT-WARNING: function " << *ContainingFunction
|
||||
<< " has an object detected in a padding region at address 0x"
|
||||
<< Twine::utohexstr(Address) << '\n';
|
||||
ContainingFunction->setMaxSize(ContainingFunction->getSize());
|
||||
}
|
||||
if (opts::processAllFunctions()) {
|
||||
errs() << "BOLT-ERROR: cannot process binaries with unmarked "
|
||||
<< "object in code at address 0x"
|
||||
<< Twine::utohexstr(Address) << " belonging to section "
|
||||
<< SectionName << " in current mode\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
TargetFunction =
|
||||
getBinaryFunctionContainingAddress(Address,
|
||||
/*CheckPastEnd=*/false,
|
||||
/*UseMaxSize=*/true);
|
||||
// We are not going to overwrite non-simple functions, but for simple
|
||||
// ones - adjust the padding size.
|
||||
if (TargetFunction && TargetFunction->isSimple()) {
|
||||
errs() << "BOLT-WARNING: function " << *TargetFunction
|
||||
<< " has an object detected in a padding region at address 0x"
|
||||
<< Twine::utohexstr(Address) << '\n';
|
||||
TargetFunction->setMaxSize(TargetFunction->getSize());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1982,8 +1988,7 @@ uint64_t BinaryContext::getHotThreshold() const {
|
|||
BinaryFunction *
|
||||
BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
|
||||
bool CheckPastEnd,
|
||||
bool UseMaxSize,
|
||||
bool Shallow) {
|
||||
bool UseMaxSize) {
|
||||
auto FI = BinaryFunctions.upper_bound(Address);
|
||||
if (FI == BinaryFunctions.begin())
|
||||
return nullptr;
|
||||
|
@ -1995,28 +2000,17 @@ BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
|
|||
if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
|
||||
return nullptr;
|
||||
|
||||
auto *BF = &FI->second;
|
||||
if (Shallow)
|
||||
return BF;
|
||||
|
||||
while (BF->getParentFunction())
|
||||
BF = BF->getParentFunction();
|
||||
|
||||
return BF;
|
||||
return &FI->second;
|
||||
}
|
||||
|
||||
BinaryFunction *
|
||||
BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) {
|
||||
BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
|
||||
// First, try to find a function starting at the given address. If the
|
||||
// function was folded, this will get us the original folded function if it
|
||||
// wasn't removed from the list, e.g. in non-relocation mode.
|
||||
auto BFI = BinaryFunctions.find(Address);
|
||||
if (BFI != BinaryFunctions.end()) {
|
||||
auto *BF = &BFI->second;
|
||||
while (!Shallow && BF->getParentFunction() && !Shallow) {
|
||||
BF = BF->getParentFunction();
|
||||
}
|
||||
return BF;
|
||||
return &BFI->second;
|
||||
}
|
||||
|
||||
// We might have folded the function matching the object at the given
|
||||
|
@ -2026,12 +2020,8 @@ BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) {
|
|||
if (const auto *BD = getBinaryDataAtAddress(Address)) {
|
||||
uint64_t EntryID{0};
|
||||
auto *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
|
||||
if (BF && EntryID == 0) {
|
||||
while (BF->getParentFunction() && !Shallow) {
|
||||
BF = BF->getParentFunction();
|
||||
}
|
||||
if (BF && EntryID == 0)
|
||||
return BF;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -276,7 +276,7 @@ public:
|
|||
}
|
||||
|
||||
/// Return BinaryFunction containing a given \p Address or nullptr if
|
||||
/// no registered function has it.
|
||||
/// no registered function contains the \p Address.
|
||||
///
|
||||
/// In a binary a function has somewhat vague boundaries. E.g. a function can
|
||||
/// refer to the first byte past the end of the function, and it will still be
|
||||
|
@ -294,19 +294,14 @@ public:
|
|||
/// body and the next object in address ranges that we check.
|
||||
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
|
||||
bool CheckPastEnd = false,
|
||||
bool UseMaxSize = false,
|
||||
bool Shallow = false);
|
||||
bool UseMaxSize = false);
|
||||
|
||||
/// Return BinaryFunction which has a fragment that starts at a given
|
||||
/// \p Address. If the BinaryFunction is a child fragment, then return its
|
||||
/// parent unless \p Shallow parameter is set to true.
|
||||
BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address,
|
||||
bool Shallow = false);
|
||||
/// Return a BinaryFunction that starts at a given \p Address.
|
||||
BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
|
||||
|
||||
const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address,
|
||||
bool Shallow = false) const {
|
||||
const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
|
||||
return const_cast<BinaryContext *>(this)->
|
||||
getBinaryFunctionAtAddress(Address, Shallow);
|
||||
getBinaryFunctionAtAddress(Address);
|
||||
}
|
||||
|
||||
/// Return size of an entry for the given jump table \p Type.
|
||||
|
|
|
@ -436,8 +436,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
|||
if (isFolded()) {
|
||||
OS << "\n FoldedInto : " << *getFoldedIntoFunction();
|
||||
}
|
||||
if (ParentFunction) {
|
||||
OS << "\n Parent : " << *ParentFunction;
|
||||
if (ParentFragment) {
|
||||
OS << "\n Parent : " << *ParentFragment;
|
||||
}
|
||||
if (!Fragments.empty()) {
|
||||
OS << "\n Fragments : ";
|
||||
|
|
|
@ -112,7 +112,10 @@ inline raw_ostream &operator<<(raw_ostream &OS,
|
|||
|
||||
/// BinaryFunction is a representation of machine-level function.
|
||||
///
|
||||
/// We use the term "Binary" as "Machine" was already taken.
|
||||
/// In the input binary, an instance of BinaryFunction can represent a fragment
|
||||
/// of a function if the higher-level function was split, e.g. into hot and cold
|
||||
/// parts. The fragment containing the main entry point is called a parent
|
||||
/// or the main fragment.
|
||||
class BinaryFunction {
|
||||
public:
|
||||
enum class State : char {
|
||||
|
@ -324,8 +327,8 @@ private:
|
|||
/// Name for the corresponding cold code section.
|
||||
std::string ColdCodeSectionName;
|
||||
|
||||
/// Parent function for split function fragments.
|
||||
BinaryFunction *ParentFunction{nullptr};
|
||||
/// Parent function fragment for split function fragments.
|
||||
BinaryFunction *ParentFragment{nullptr};
|
||||
|
||||
/// Indicate if the function body was folded into another function.
|
||||
/// Used by ICF optimization.
|
||||
|
@ -614,14 +617,18 @@ private:
|
|||
/// instead of calling this function directly.
|
||||
uint64_t getEntryIDForSymbol(const MCSymbol *EntrySymbol) const;
|
||||
|
||||
void setParentFunction(BinaryFunction *BF) {
|
||||
assert((!ParentFunction || ParentFunction == BF) &&
|
||||
/// If the function represents a secondary split function fragment, set its
|
||||
/// parent fragment to \p BF.
|
||||
void setParentFragment(BinaryFunction &BF) {
|
||||
assert(IsFragment && "function must be a fragment to have a parent");
|
||||
assert((!ParentFragment || ParentFragment == &BF) &&
|
||||
"cannot have more than one parent function");
|
||||
ParentFunction = BF;
|
||||
ParentFragment = &BF;
|
||||
}
|
||||
|
||||
void addFragment(BinaryFunction *BF) {
|
||||
Fragments.insert(BF);
|
||||
/// Register a child fragment for the main fragment of a split function.
|
||||
void addFragment(BinaryFunction &BF) {
|
||||
Fragments.insert(&BF);
|
||||
}
|
||||
|
||||
void addInstruction(uint64_t Offset, MCInst &&Instruction) {
|
||||
|
@ -1922,8 +1929,25 @@ public:
|
|||
return ImageSize;
|
||||
}
|
||||
|
||||
BinaryFunction *getParentFunction() const {
|
||||
return ParentFunction;
|
||||
/// Return true if the function is a secondary fragment of another function.
|
||||
bool isFragment() const {
|
||||
return IsFragment;
|
||||
}
|
||||
|
||||
/// Return parent function fragment if this function is a secondary (child)
|
||||
/// fragment of another function.
|
||||
BinaryFunction *getParentFragment() const {
|
||||
return ParentFragment;
|
||||
}
|
||||
|
||||
/// If the function is a nested child fragment of another function, return its
|
||||
/// topmost parent fragment.
|
||||
const BinaryFunction *getTopmostFragment() const {
|
||||
const BinaryFunction *BF = this;
|
||||
while (BF->getParentFragment())
|
||||
BF = BF->getParentFragment();
|
||||
|
||||
return BF;
|
||||
}
|
||||
|
||||
/// Set the profile data for the number of times the function was called.
|
||||
|
|
|
@ -157,6 +157,9 @@ void DWARFRewriter::updateUnitDebugInfo(
|
|||
|
||||
IsFunctionDef = true;
|
||||
const auto *Function = BC.getBinaryFunctionAtAddress(Address);
|
||||
if (Function && Function->isFragment())
|
||||
Function = Function->getTopmostFragment();
|
||||
|
||||
if (Function && Function->isFolded())
|
||||
Function = nullptr;
|
||||
FunctionStack.push_back(Function);
|
||||
|
|
|
@ -704,14 +704,8 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
|
|||
if (!BC->containsAddress(Address))
|
||||
return nullptr;
|
||||
|
||||
// Use shallow search to avoid fetching the parent function, in case
|
||||
// BinaryContext linked two functions. When aggregating data and writing the
|
||||
// profile, we want to write offsets relative to the closest symbol in the
|
||||
// symbol table, not relative to the parent function, to avoid creating
|
||||
// profile that is too fragile and depends on the layout of other functions.
|
||||
return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
|
||||
/*UseMaxSize=*/true,
|
||||
/*Shallow=*/true);
|
||||
/*UseMaxSize=*/true);
|
||||
}
|
||||
|
||||
StringRef DataAggregator::getLocationName(BinaryFunction &Func,
|
||||
|
|
|
@ -3972,8 +3972,7 @@ void RewriteInstance::updateELFSymbolTable(
|
|||
if (!PatchExisting && shouldStrip(Symbol))
|
||||
continue;
|
||||
|
||||
const auto *Function = BC->getBinaryFunctionAtAddress(Symbol.st_value,
|
||||
/*Shallow=*/true);
|
||||
const auto *Function = BC->getBinaryFunctionAtAddress(Symbol.st_value);
|
||||
// Ignore false function references, e.g. when the section address matches
|
||||
// the address of the function.
|
||||
if (Function && Symbol.getType() == ELF::STT_SECTION)
|
||||
|
@ -4012,8 +4011,7 @@ void RewriteInstance::updateELFSymbolTable(
|
|||
Function = (Symbol.getType() == ELF::STT_FUNC)
|
||||
? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
|
||||
/*CheckPastEnd=*/false,
|
||||
/*UseMaxSize=*/true,
|
||||
/*Shallow=*/true)
|
||||
/*UseMaxSize=*/true)
|
||||
: nullptr;
|
||||
|
||||
if (Function && Function->isEmitted()) {
|
||||
|
@ -4061,8 +4059,7 @@ void RewriteInstance::updateELFSymbolTable(
|
|||
Symbol.st_size == 0) {
|
||||
if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
|
||||
/*CheckPastEnd=*/false,
|
||||
/*UseMaxSize=*/true,
|
||||
/*Shallow=*/true)) {
|
||||
/*UseMaxSize=*/true)) {
|
||||
// Can only delete the symbol if not patching. Such symbols should
|
||||
// not exist in the dynamic symbol table.
|
||||
assert(!PatchExisting && "cannot delete symbol");
|
||||
|
@ -4469,10 +4466,12 @@ void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
|
|||
|
||||
|
||||
uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
|
||||
const auto *Function = BC->getBinaryFunctionAtAddress(OldAddress,
|
||||
/*Shallow=*/true);
|
||||
const auto *Function = BC->getBinaryFunctionAtAddress(OldAddress);
|
||||
if (!Function)
|
||||
return 0;
|
||||
|
||||
assert(!Function->isFragment() && "cannot get new address for a fragment");
|
||||
|
||||
return Function->getOutputAddress();
|
||||
}
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ void HugifyRuntimeLibrary::adjustCommandLineOptions(
|
|||
void HugifyRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) {
|
||||
const auto *StartFunction =
|
||||
BC.getBinaryFunctionAtAddress(*(BC.StartFunctionAddress));
|
||||
assert(!StartFunction->isFragment() && "expected main function fragment");
|
||||
if (!StartFunction) {
|
||||
errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
|
||||
exit(1);
|
||||
|
|
|
@ -61,12 +61,14 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
|
|||
MCStreamer &Streamer) {
|
||||
const auto *StartFunction =
|
||||
BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
|
||||
assert(!StartFunction->isFragment() && "expected main function fragment");
|
||||
if (!StartFunction) {
|
||||
errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
|
||||
exit(1);
|
||||
}
|
||||
const auto *FiniFunction =
|
||||
BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress);
|
||||
assert(!FiniFunction->isFragment() && "expected main function fragment");
|
||||
if (!FiniFunction) {
|
||||
errs() << "BOLT-ERROR: failed to locate function at binary fini address\n";
|
||||
exit(1);
|
||||
|
|
Loading…
Reference in New Issue