[BOLT] Improve object discovery runtime

Summary:

(cherry picked from FBD17872824)
This commit is contained in:
Maksim Panchenko 2019-10-08 11:03:33 -07:00
parent 13948f376d
commit 8c6ea8540a
3 changed files with 73 additions and 68 deletions

View File

@ -21,6 +21,7 @@
#include "MCPlusBuilder.h" #include "MCPlusBuilder.h"
#include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator.h"
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h" #include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h"
@ -172,11 +173,25 @@ class BinaryContext {
/// with size won't overflow /// with size won't overflow
uint32_t DuplicatedJumpTables{0x10000000}; uint32_t DuplicatedJumpTables{0x10000000};
/// Map used for disambiguation of local symbols.
StringMap<uint64_t> LocalSymbols;
public: public:
/// [name] -> [BinaryData*] map used for global symbol resolution. /// [name] -> [BinaryData*] map used for global symbol resolution.
using SymbolMapType = std::map<std::string, BinaryData *>; using SymbolMapType = StringMap<BinaryData *>;
SymbolMapType GlobalSymbols; SymbolMapType GlobalSymbols;
/// Return unique version of a symbol name in the form "<name>/<number>".
std::string uniquifySymbolName(const std::string &Name) {
const auto ID = ++LocalSymbols[Name];
return Name + '/' + std::to_string(ID);
}
/// Release memory used for disambiguation of local symbols.
void freeLocalSymbols() {
clearList(LocalSymbols);
}
/// [address] -> [BinaryData], ... /// [address] -> [BinaryData], ...
/// Addresses never change. /// Addresses never change.
/// Note: it is important that clients do not hold on to instances of /// Note: it is important that clients do not hold on to instances of

View File

@ -48,7 +48,8 @@ protected:
std::vector<MCSymbol *> Symbols; std::vector<MCSymbol *> Symbols;
/// Section this data belongs to. /// Section this data belongs to.
BinarySection *Section; BinarySection *Section{nullptr};
/// Start address of this symbol. /// Start address of this symbol.
uint64_t Address{0}; uint64_t Address{0};
/// Size of this data (can be 0). /// Size of this data (can be 0).

View File

@ -590,13 +590,6 @@ void check_error(Error E, Twine Message) {
namespace { namespace {
std::string uniquifyName(BinaryContext &BC, std::string NamePrefix) {
unsigned LocalID = 1;
while (BC.getBinaryDataByName(NamePrefix + std::to_string(LocalID)))
++LocalID;
return NamePrefix + std::to_string(LocalID);
}
bool refersToReorderedSection(ErrorOr<BinarySection &> Section) { bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
auto Itr = std::find_if(opts::ReorderData.begin(), auto Itr = std::find_if(opts::ReorderData.begin(),
opts::ReorderData.end(), opts::ReorderData.end(),
@ -1188,63 +1181,66 @@ void RewriteInstance::discoverFileObjects() {
// For aarch64, the ABI defines mapping symbols so we identify data in the // For aarch64, the ABI defines mapping symbols so we identify data in the
// code section (see IHI0056B). $d identifies data contents. // code section (see IHI0056B). $d identifies data contents.
auto MarkersBegin = SortedFileSymbols.end(); auto LastSymbol = SortedFileSymbols.end() - 1;
if (BC->isAArch64()) { if (BC->isAArch64()) {
MarkersBegin = std::stable_partition( LastSymbol = std::stable_partition(
SortedFileSymbols.begin(), SortedFileSymbols.end(), SortedFileSymbols.begin(), SortedFileSymbols.end(),
[](const SymbolRef &Symbol) { [](const SymbolRef &Symbol) {
StringRef Name = cantFail(Symbol.getName()); StringRef Name = cantFail(Symbol.getName());
return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown &&
(Name == "$d" || Name == "$x")); (Name == "$d" || Name == "$x"));
}); });
--LastSymbol;
} }
auto getNextAddress = [&](std::vector<SymbolRef>::const_iterator Itr) { auto getNextAddress = [&](std::vector<SymbolRef>::const_iterator Itr) {
auto Section = cantFail(Itr->getSection()); const auto SymbolSection = cantFail(Itr->getSection());
const auto SymbolEndAddress = const auto SymbolAddress = cantFail(Itr->getAddress());
(cantFail(Itr->getAddress()) + ELFSymbolRef(*Itr).getSize()); const auto SymbolEndAddress = SymbolAddress + ELFSymbolRef(*Itr).getSize();
// absolute sym // absolute sym
if (Section == InputFile->section_end()) if (SymbolSection == InputFile->section_end())
return SymbolEndAddress; return SymbolEndAddress;
while (Itr != MarkersBegin - 1 && while (Itr != LastSymbol &&
cantFail(std::next(Itr)->getSection()) == Section && cantFail(std::next(Itr)->getSection()) == SymbolSection &&
cantFail(std::next(Itr)->getAddress()) == cantFail(std::next(Itr)->getAddress()) == SymbolAddress) {
cantFail(Itr->getAddress())) {
++Itr; ++Itr;
} }
if (Itr != MarkersBegin - 1 && if (Itr != LastSymbol &&
cantFail(std::next(Itr)->getSection()) == Section) cantFail(std::next(Itr)->getSection()) == SymbolSection)
return cantFail(std::next(Itr)->getAddress()); return cantFail(std::next(Itr)->getAddress());
const auto SectionEndAddress = Section->getAddress() + Section->getSize(); const auto SymbolSectionEndAddress =
if ((ELFSectionRef(*Section).getFlags() & ELF::SHF_TLS) || SymbolSection->getAddress() + SymbolSection->getSize();
SymbolEndAddress > SectionEndAddress) if ((ELFSectionRef(*SymbolSection).getFlags() & ELF::SHF_TLS) ||
SymbolEndAddress > SymbolSectionEndAddress)
return SymbolEndAddress; return SymbolEndAddress;
return SectionEndAddress; return SymbolSectionEndAddress;
}; };
BinaryFunction *PreviousFunction = nullptr; BinaryFunction *PreviousFunction = nullptr;
unsigned AnonymousId = 0; unsigned AnonymousId = 0;
const auto MarkersBegin = std::next(LastSymbol);
for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) { for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) {
const auto &Symbol = *ISym; const auto &Symbol = *ISym;
// Keep undefined symbols for pretty printing? // Keep undefined symbols for pretty printing?
if (Symbol.getFlags() & SymbolRef::SF_Undefined) if (Symbol.getFlags() & SymbolRef::SF_Undefined)
continue; continue;
if (cantFail(Symbol.getType()) == SymbolRef::ST_File) const auto SymbolType = cantFail(Symbol.getType());
if (SymbolType == SymbolRef::ST_File)
continue; continue;
StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name");
uint64_t Address = uint64_t Address =
cantFail(Symbol.getAddress(), "cannot get symbol address"); cantFail(Symbol.getAddress(), "cannot get symbol address");
if (Address == 0) { if (Address == 0) {
if (opts::Verbosity >= 1 && if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function)
cantFail(Symbol.getType()) == SymbolRef::ST_Function)
errs() << "BOLT-WARNING: function with 0 address seen\n"; errs() << "BOLT-WARNING: function with 0 address seen\n";
continue; continue;
} }
@ -1288,21 +1284,21 @@ void RewriteInstance::discoverFileObjects() {
// The <id> field is used for disambiguation of local symbols since there // The <id> field is used for disambiguation of local symbols since there
// could be identical function names coming from identical file names // could be identical function names coming from identical file names
// (e.g. from different directories). // (e.g. from different directories).
std::string Prefix = Name + "/";
std::string AltPrefix; std::string AltPrefix;
auto SFI = SymbolToFileName.find(Symbol); auto SFI = SymbolToFileName.find(Symbol);
if (SFI != SymbolToFileName.end()) { if (SymbolType == SymbolRef::ST_Function &&
AltPrefix = Prefix + std::string(SFI->second) + "/"; SFI != SymbolToFileName.end()) {
AltPrefix = Name + "/" + std::string(SFI->second);
} }
UniqueName = uniquifyName(*BC, Prefix); UniqueName = BC->uniquifySymbolName(Name);
if (!AltPrefix.empty()) if (!AltPrefix.empty())
AlternativeName = uniquifyName(*BC, AltPrefix); AlternativeName = BC->uniquifySymbolName(AltPrefix);
} }
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
uint64_t NextAddress = getNextAddress(ISym); uint64_t TentativeSize = SymbolSize ? SymbolSize
uint64_t TentativeSize = !SymbolSize ? NextAddress - Address : SymbolSize; : getNextAddress(ISym) - Address;
uint64_t SymbolAlignment = Symbol.getAlignment(); uint64_t SymbolAlignment = Symbol.getAlignment();
unsigned SymbolFlags = Symbol.getFlags(); unsigned SymbolFlags = Symbol.getFlags();
@ -1330,7 +1326,7 @@ void RewriteInstance::discoverFileObjects() {
<< " for function\n"); << " for function\n");
if (!Section->isText()) { if (!Section->isText()) {
assert(cantFail(Symbol.getType()) != SymbolRef::ST_Function && assert(SymbolType != SymbolRef::ST_Function &&
"unexpected function inside non-code section"); "unexpected function inside non-code section");
DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n");
registerName(TentativeSize); registerName(TentativeSize);
@ -1344,34 +1340,24 @@ void RewriteInstance::discoverFileObjects() {
// Sometimes assembly functions are not marked as functions and neither are // Sometimes assembly functions are not marked as functions and neither are
// their local labels. The only way to tell them apart is to look at // their local labels. The only way to tell them apart is to look at
// symbol scope - global vs local. // symbol scope - global vs local.
if (cantFail(Symbol.getType()) != SymbolRef::ST_Function) { if (PreviousFunction && SymbolType != SymbolRef::ST_Function) {
if (PreviousFunction) { if (PreviousFunction->containsAddress(Address)) {
if (PreviousFunction->getSize() == 0) {
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
registerName(SymbolSize); } else if (Address == PreviousFunction->getAddress() && !SymbolSize) {
continue;
}
} else if (PreviousFunction->containsAddress(Address)) {
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
registerName(SymbolSize);
continue;
} else {
if (Address == PreviousFunction->getAddress() && SymbolSize == 0) {
DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n");
registerName(SymbolSize); } else if (opts::Verbosity > 1) {
continue;
}
if (opts::Verbosity > 1) {
errs() << "BOLT-WARNING: symbol " << UniqueName errs() << "BOLT-WARNING: symbol " << UniqueName
<< " seen in the middle of function " << " seen in the middle of function "
<< *PreviousFunction << ". Could be a new entry.\n"; << *PreviousFunction << ". Could be a new entry.\n";
} }
registerName(SymbolSize); registerName(SymbolSize);
continue; continue;
} } else if (PreviousFunction->getSize() == 0 &&
} PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
registerName(SymbolSize);
continue;
} }
} }
@ -1441,7 +1427,6 @@ void RewriteInstance::discoverFileObjects() {
<< Twine::utohexstr(Address) << "\n"); << Twine::utohexstr(Address) << "\n");
} }
} }
TentativeSize = SymbolSize;
} }
BinaryFunction *BF{nullptr}; BinaryFunction *BF{nullptr};
@ -1574,6 +1559,8 @@ void RewriteInstance::discoverFileObjects() {
for (const auto &Section : RelocationSections) for (const auto &Section : RelocationSections)
readRelocations(Section); readRelocations(Section);
BC->freeLocalSymbols();
} }
void RewriteInstance::disassemblePLT() { void RewriteInstance::disassemblePLT() {
@ -2527,10 +2514,12 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
if (SymbolFlags & SymbolRef::SF_Global) { if (SymbolFlags & SymbolRef::SF_Global) {
Name = SymbolName; Name = SymbolName;
} else { } else {
Name = uniquifyName(*BC, StringRef(SymbolName).startswith( if (StringRef(SymbolName).startswith(
BC->AsmInfo->getPrivateGlobalPrefix()) BC->AsmInfo->getPrivateGlobalPrefix())) {
? "PG" + SymbolName + "/" Name = BC->uniquifySymbolName("PG" + SymbolName);
: SymbolName + "/"); } else {
Name = BC->uniquifySymbolName(SymbolName);
}
} }
ReferencedSymbol = BC->registerNameAtAddress(Name, ReferencedSymbol = BC->registerNameAtAddress(Name,
SymbolAddress, SymbolAddress,