diff --git a/bolt/src/BinaryContext.h b/bolt/src/BinaryContext.h index e384273aed3c..98718f575d88 100644 --- a/bolt/src/BinaryContext.h +++ b/bolt/src/BinaryContext.h @@ -21,6 +21,7 @@ #include "MCPlusBuilder.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -172,11 +173,25 @@ class BinaryContext { /// with size won't overflow uint32_t DuplicatedJumpTables{0x10000000}; + /// Map used for disambiguation of local symbols. + StringMap LocalSymbols; + public: /// [name] -> [BinaryData*] map used for global symbol resolution. - using SymbolMapType = std::map; + using SymbolMapType = StringMap; SymbolMapType GlobalSymbols; + /// Return unique version of a symbol name in the form "/". + std::string uniquifySymbolName(const std::string &Name) { + const auto ID = ++LocalSymbols[Name]; + return Name + '/' + std::to_string(ID); + } + + /// Release memory used for disambiguation of local symbols. + void freeLocalSymbols() { + clearList(LocalSymbols); + } + /// [address] -> [BinaryData], ... /// Addresses never change. /// Note: it is important that clients do not hold on to instances of diff --git a/bolt/src/BinaryData.h b/bolt/src/BinaryData.h index bf8c0c3741cf..d2720941cb30 100644 --- a/bolt/src/BinaryData.h +++ b/bolt/src/BinaryData.h @@ -48,7 +48,8 @@ protected: std::vector Symbols; /// Section this data belongs to. - BinarySection *Section; + BinarySection *Section{nullptr}; + /// Start address of this symbol. uint64_t Address{0}; /// Size of this data (can be 0). diff --git a/bolt/src/RewriteInstance.cpp b/bolt/src/RewriteInstance.cpp index fcc8da5eda01..bf1d8d819d79 100644 --- a/bolt/src/RewriteInstance.cpp +++ b/bolt/src/RewriteInstance.cpp @@ -590,13 +590,6 @@ void check_error(Error E, Twine Message) { namespace { -std::string uniquifyName(BinaryContext &BC, std::string NamePrefix) { - unsigned LocalID = 1; - while (BC.getBinaryDataByName(NamePrefix + std::to_string(LocalID))) - ++LocalID; - return NamePrefix + std::to_string(LocalID); -} - bool refersToReorderedSection(ErrorOr Section) { auto Itr = std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(), @@ -1188,63 +1181,66 @@ void RewriteInstance::discoverFileObjects() { // For aarch64, the ABI defines mapping symbols so we identify data in the // code section (see IHI0056B). $d identifies data contents. - auto MarkersBegin = SortedFileSymbols.end(); + auto LastSymbol = SortedFileSymbols.end() - 1; if (BC->isAArch64()) { - MarkersBegin = std::stable_partition( + LastSymbol = std::stable_partition( SortedFileSymbols.begin(), SortedFileSymbols.end(), [](const SymbolRef &Symbol) { StringRef Name = cantFail(Symbol.getName()); return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && (Name == "$d" || Name == "$x")); }); + --LastSymbol; } auto getNextAddress = [&](std::vector::const_iterator Itr) { - auto Section = cantFail(Itr->getSection()); - const auto SymbolEndAddress = - (cantFail(Itr->getAddress()) + ELFSymbolRef(*Itr).getSize()); + const auto SymbolSection = cantFail(Itr->getSection()); + const auto SymbolAddress = cantFail(Itr->getAddress()); + const auto SymbolEndAddress = SymbolAddress + ELFSymbolRef(*Itr).getSize(); // absolute sym - if (Section == InputFile->section_end()) + if (SymbolSection == InputFile->section_end()) return SymbolEndAddress; - while (Itr != MarkersBegin - 1 && - cantFail(std::next(Itr)->getSection()) == Section && - cantFail(std::next(Itr)->getAddress()) == - cantFail(Itr->getAddress())) { + while (Itr != LastSymbol && + cantFail(std::next(Itr)->getSection()) == SymbolSection && + cantFail(std::next(Itr)->getAddress()) == SymbolAddress) { ++Itr; } - if (Itr != MarkersBegin - 1 && - cantFail(std::next(Itr)->getSection()) == Section) + if (Itr != LastSymbol && + cantFail(std::next(Itr)->getSection()) == SymbolSection) return cantFail(std::next(Itr)->getAddress()); - const auto SectionEndAddress = Section->getAddress() + Section->getSize(); - if ((ELFSectionRef(*Section).getFlags() & ELF::SHF_TLS) || - SymbolEndAddress > SectionEndAddress) + const auto SymbolSectionEndAddress = + SymbolSection->getAddress() + SymbolSection->getSize(); + if ((ELFSectionRef(*SymbolSection).getFlags() & ELF::SHF_TLS) || + SymbolEndAddress > SymbolSectionEndAddress) return SymbolEndAddress; - return SectionEndAddress; + return SymbolSectionEndAddress; }; BinaryFunction *PreviousFunction = nullptr; unsigned AnonymousId = 0; + const auto MarkersBegin = std::next(LastSymbol); for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) { const auto &Symbol = *ISym; // Keep undefined symbols for pretty printing? if (Symbol.getFlags() & SymbolRef::SF_Undefined) continue; - if (cantFail(Symbol.getType()) == SymbolRef::ST_File) + const auto SymbolType = cantFail(Symbol.getType()); + + if (SymbolType == SymbolRef::ST_File) continue; StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); uint64_t Address = cantFail(Symbol.getAddress(), "cannot get symbol address"); if (Address == 0) { - if (opts::Verbosity >= 1 && - cantFail(Symbol.getType()) == SymbolRef::ST_Function) + if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function) errs() << "BOLT-WARNING: function with 0 address seen\n"; continue; } @@ -1288,21 +1284,21 @@ void RewriteInstance::discoverFileObjects() { // The field is used for disambiguation of local symbols since there // could be identical function names coming from identical file names // (e.g. from different directories). - std::string Prefix = Name + "/"; std::string AltPrefix; auto SFI = SymbolToFileName.find(Symbol); - if (SFI != SymbolToFileName.end()) { - AltPrefix = Prefix + std::string(SFI->second) + "/"; + if (SymbolType == SymbolRef::ST_Function && + SFI != SymbolToFileName.end()) { + AltPrefix = Name + "/" + std::string(SFI->second); } - UniqueName = uniquifyName(*BC, Prefix); + UniqueName = BC->uniquifySymbolName(Name); if (!AltPrefix.empty()) - AlternativeName = uniquifyName(*BC, AltPrefix); + AlternativeName = BC->uniquifySymbolName(AltPrefix); } uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); - uint64_t NextAddress = getNextAddress(ISym); - uint64_t TentativeSize = !SymbolSize ? NextAddress - Address : SymbolSize; + uint64_t TentativeSize = SymbolSize ? SymbolSize + : getNextAddress(ISym) - Address; uint64_t SymbolAlignment = Symbol.getAlignment(); unsigned SymbolFlags = Symbol.getFlags(); @@ -1330,7 +1326,7 @@ void RewriteInstance::discoverFileObjects() { << " for function\n"); if (!Section->isText()) { - assert(cantFail(Symbol.getType()) != SymbolRef::ST_Function && + assert(SymbolType != SymbolRef::ST_Function && "unexpected function inside non-code section"); DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); registerName(TentativeSize); @@ -1344,34 +1340,24 @@ void RewriteInstance::discoverFileObjects() { // Sometimes assembly functions are not marked as functions and neither are // their local labels. The only way to tell them apart is to look at // symbol scope - global vs local. - if (cantFail(Symbol.getType()) != SymbolRef::ST_Function) { - if (PreviousFunction) { - if (PreviousFunction->getSize() == 0) { - if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { - DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); - registerName(SymbolSize); - continue; - } - } else if (PreviousFunction->containsAddress(Address)) { - if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { - DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); - registerName(SymbolSize); - continue; - } else { - if (Address == PreviousFunction->getAddress() && SymbolSize == 0) { - DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); - registerName(SymbolSize); - continue; - } - if (opts::Verbosity > 1) { - errs() << "BOLT-WARNING: symbol " << UniqueName - << " seen in the middle of function " - << *PreviousFunction << ". Could be a new entry.\n"; - } - registerName(SymbolSize); - continue; - } + if (PreviousFunction && SymbolType != SymbolRef::ST_Function) { + if (PreviousFunction->containsAddress(Address)) { + if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { + DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); + } else if (Address == PreviousFunction->getAddress() && !SymbolSize) { + DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); + } else if (opts::Verbosity > 1) { + errs() << "BOLT-WARNING: symbol " << UniqueName + << " seen in the middle of function " + << *PreviousFunction << ". Could be a new entry.\n"; } + registerName(SymbolSize); + continue; + } else if (PreviousFunction->getSize() == 0 && + PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { + DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); + registerName(SymbolSize); + continue; } } @@ -1441,7 +1427,6 @@ void RewriteInstance::discoverFileObjects() { << Twine::utohexstr(Address) << "\n"); } } - TentativeSize = SymbolSize; } BinaryFunction *BF{nullptr}; @@ -1574,6 +1559,8 @@ void RewriteInstance::discoverFileObjects() { for (const auto &Section : RelocationSections) readRelocations(Section); + + BC->freeLocalSymbols(); } void RewriteInstance::disassemblePLT() { @@ -2527,10 +2514,12 @@ void RewriteInstance::readRelocations(const SectionRef &Section) { if (SymbolFlags & SymbolRef::SF_Global) { Name = SymbolName; } else { - Name = uniquifyName(*BC, StringRef(SymbolName).startswith( - BC->AsmInfo->getPrivateGlobalPrefix()) - ? "PG" + SymbolName + "/" - : SymbolName + "/"); + if (StringRef(SymbolName).startswith( + BC->AsmInfo->getPrivateGlobalPrefix())) { + Name = BC->uniquifySymbolName("PG" + SymbolName); + } else { + Name = BC->uniquifySymbolName(SymbolName); + } } ReferencedSymbol = BC->registerNameAtAddress(Name, SymbolAddress,