[BOLT] Support relocations without symbols

Summary:
lld may generate relocations without associated symbols. Instead of
rejecting binaries with such relocations, we can re-create the symbol
the relocation is against based on the extracted value.

(cherry picked from FBD10054576)
This commit is contained in:
Maksim Panchenko 2018-09-21 12:00:20 -07:00
parent bd0b99c45d
commit ce508b58c6
7 changed files with 78 additions and 76 deletions

View File

@ -228,9 +228,9 @@ BinaryContext::getSubBinaryData(BinaryData *BD) {
}
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
Twine Prefix,
uint64_t Size,
uint16_t Alignment,
Twine Prefix,
unsigned Flags) {
auto Itr = BinaryDataMap.find(Address);
if (Itr != BinaryDataMap.end()) {
@ -536,7 +536,7 @@ void BinaryContext::fixBinaryDataHoles() {
if (BD->getSection() == Section)
setBinaryDataSize(Hole.first, Hole.second);
} else {
getOrCreateGlobalSymbol(Hole.first, Hole.second, 1, "HOLEat");
getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
}
}
}

View File

@ -386,9 +386,9 @@ public:
/// If there are multiple symbols registered at the \p Address, then
/// return the first one.
MCSymbol *getOrCreateGlobalSymbol(uint64_t Address,
uint64_t Size,
uint16_t Alignment,
Twine Prefix,
uint64_t Size = 0,
uint16_t Alignment = 0,
unsigned Flags = 0);
/// Register a symbol with \p Name at a given \p Address and \p Size.

View File

@ -1046,7 +1046,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
}
// TODO: use DWARF info to get size/alignment here?
auto *TargetSymbol =
BC.getOrCreateGlobalSymbol(TargetAddress, 0, 0, "DATAat");
BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat");
DEBUG(if (opts::Verbosity >= 2) {
auto SectionName = BD ? BD->getSectionName() : "<unknown>";
dbgs() << "Created DATAat sym: " << TargetSymbol->getName()
@ -1301,7 +1301,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
}
TargetSymbol =
BC.getOrCreateGlobalSymbol(TargetAddress, 0, 0, "FUNCat");
BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
if (TargetAddress == 0) {
// We actually see calls to address 0 in presence of weak
// symbols originating from libraries. This code is never meant

View File

@ -1608,7 +1608,7 @@ public:
BinaryFunction &setPersonalityFunction(uint64_t Addr) {
assert(!PersonalityFunction && "can't set personality function twice");
PersonalityFunction = BC.getOrCreateGlobalSymbol(Addr, 0, 0, "FUNCat");
PersonalityFunction = BC.getOrCreateGlobalSymbol(Addr, "FUNCat");
return *this;
}
@ -1762,7 +1762,7 @@ public:
return nullptr;
// Register our island at global namespace
Symbol = BC.getOrCreateGlobalSymbol(Address, 0, 0, "ISLANDat");
Symbol = BC.getOrCreateGlobalSymbol(Address, "ISLANDat");
// Internal bookkeeping
const auto Offset = Address - getAddress();
assert((!IslandOffsets.count(Offset) || IslandOffsets[Offset] == Symbol) &&

View File

@ -637,9 +637,9 @@ void BinaryFunction::emitLSDA(MCStreamer *Streamer, bool EmitColdPart) {
if (TypeAddress) {
const auto *TypeSymbol =
BC.getOrCreateGlobalSymbol(TypeAddress,
"TI",
TTypeEncodingSize,
TTypeAlignment,
"TI");
TTypeAlignment);
auto *DotSymbol = BC.Ctx->createTempSymbol();
Streamer->EmitLabel(DotSymbol);
const auto *SubDotExpr = MCBinaryExpr::createSub(

View File

@ -1695,7 +1695,7 @@ void RewriteInstance::relocateEHFrameSection() {
if (!Symbol) {
DEBUG(dbgs() << "BOLT-DEBUG: creating symbol for DWARF reference at 0x"
<< Twine::utohexstr(Value) << '\n');
Symbol = BC->getOrCreateGlobalSymbol(Value, 0, 0, "FUNCat");
Symbol = BC->getOrCreateGlobalSymbol(Value, "FUNCat");
}
DEBUG(dbgs() << "BOLT-DEBUG: adding DWARF reference against symbol "
@ -1806,8 +1806,7 @@ void RewriteInstance::readSpecialSections() {
}
void RewriteInstance::adjustCommandLineOptions() {
if (BC->isAArch64() && opts::RelocationMode != cl::BOU_TRUE &&
!opts::AggregateOnly) {
if (BC->isAArch64() && !BC->HasRelocations) {
errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
"supported\n";
}
@ -1874,6 +1873,7 @@ int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
SectionRef RelocatedSection,
std::string &SymbolName,
bool &IsSectionRelocation,
uint64_t &SymbolAddress,
int64_t &Addend,
uint64_t &ExtractedValue) const {
@ -1882,60 +1882,43 @@ bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
const bool IsAArch64 = BC->isAArch64();
// For value extraction.
// Extract the value.
StringRef RelocatedSectionContents;
RelocatedSection.getContents(RelocatedSectionContents);
DataExtractor DE(RelocatedSectionContents,
BC->AsmInfo->isLittleEndian(),
BC->AsmInfo->getCodePointerSize());
const bool IsPCRelative = Relocation::isPCRelative(Rel.getType());
auto SymbolIter = Rel.getSymbol();
assert(SymbolIter != InputFile->symbol_end() &&
"relocation symbol must exist");
const auto &Symbol = *SymbolIter;
SymbolName = cantFail(Symbol.getName());
SymbolAddress = cantFail(Symbol.getAddress());
Addend = getRelocationAddend(InputFile, Rel);
uint32_t RelocationOffset =
Rel.getOffset() - RelocatedSection.getAddress();
uint32_t RelocationOffset = Rel.getOffset() - RelocatedSection.getAddress();
const auto RelSize = Relocation::getSizeForType(Rel.getType());
ExtractedValue =
static_cast<uint64_t>(DE.getSigned(&RelocationOffset, RelSize));
ExtractedValue = static_cast<uint64_t>(DE.getSigned(&RelocationOffset,
RelSize));
if (IsAArch64) {
ExtractedValue = Relocation::extractValue(Rel.getType(),
ExtractedValue,
Rel.getOffset());
}
// Section symbols are marked as ST_Debug.
const bool SymbolIsSection =
(cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
const auto PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
Addend = getRelocationAddend(InputFile, Rel);
// If no symbol has been found or if it is a relocation requiring the
// creation of a GOT entry, do not link against the symbol but against
// whatever address was extracted from the instruction itself. We are
// not creating a GOT entry as this was already processed by the linker.
if (!SymbolAddress || Relocation::isGOT(Rel.getType())) {
assert(!SymbolIsSection);
if (ExtractedValue) {
SymbolAddress = ExtractedValue - Addend + PCRelOffset;
} else {
// This is weird case. The extracted value is zero but the addend is
// non-zero and the relocation is not pc-rel. Using the previous logic,
// the SymbolAddress would end up as a huge number. Seen in
// exceptions_pic.test.
DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x"
<< Twine::utohexstr(Rel.getOffset())
<< " value does not match addend for "
<< "relocation to undefined symbol.\n");
const bool IsPCRelative = Relocation::isPCRelative(Rel.getType());
const auto PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
bool SkipVerification = false;
auto SymbolIter = Rel.getSymbol();
if (SymbolIter == InputFile->symbol_end()) {
SymbolAddress = ExtractedValue - Addend;
if (IsPCRelative)
SymbolAddress += PCRelOffset;
return true;
}
} else if (SymbolIsSection) {
auto *RelSymbol = BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat");
SymbolName = RelSymbol->getName();
IsSectionRelocation = false;
} else {
const auto &Symbol = *SymbolIter;
SymbolName = cantFail(Symbol.getName());
SymbolAddress = cantFail(Symbol.getAddress());
SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other);
// Section symbols are marked as ST_Debug.
IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
if (IsSectionRelocation) {
auto Section = Symbol.getSection();
if (Section && *Section != InputFile->section_end()) {
SymbolName = "section " + std::string(getSectionName(**Section));
@ -1953,8 +1936,34 @@ bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
}
}
}
}
// If no symbol has been found or if it is a relocation requiring the
// creation of a GOT entry, do not link against the symbol but against
// whatever address was extracted from the instruction itself. We are
// not creating a GOT entry as this was already processed by the linker.
if (!SymbolAddress || Relocation::isGOT(Rel.getType())) {
assert(!IsSectionRelocation);
if (ExtractedValue) {
SymbolAddress = ExtractedValue - Addend + PCRelOffset;
} else {
// This is weird case. The extracted value is zero but the addend is
// non-zero and the relocation is not pc-rel. Using the previous logic,
// the SymbolAddress would end up as a huge number. Seen in
// exceptions_pic.test.
DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x"
<< Twine::utohexstr(Rel.getOffset())
<< " value does not match addend for "
<< "relocation to undefined symbol.\n");
SymbolAddress += PCRelOffset;
return true;
}
}
auto verifyExtractedValue = [&]() {
if (SkipVerification)
return true;
if (IsAArch64)
return true;
@ -1964,9 +1973,6 @@ bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
if (Relocation::isTLS(Rel.getType()))
return true;
if (cantFail(Symbol.getType()) == SymbolRef::ST_Other)
return true;
return truncateToSize(ExtractedValue, RelSize) ==
truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize);
};
@ -1981,7 +1987,7 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
Section.getName(SectionName);
DEBUG(dbgs() << "BOLT-DEBUG: relocations for section "
<< SectionName << ":\n");
if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) {
if (BinarySection(*BC, Section).isAllocatable()) {
DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n");
return;
}
@ -1994,7 +2000,7 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
DEBUG(dbgs() << "BOLT-DEBUG: relocated section is "
<< RelocatedSectionName << '\n');
if (!(ELFSectionRef(RelocatedSection).getFlags() & ELF::SHF_ALLOC)) {
if (!BinarySection(*BC, RelocatedSection).isAllocatable()) {
DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against "
<< "non-allocatable section\n");
return;
@ -2046,10 +2052,11 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
uint64_t SymbolAddress;
int64_t Addend;
uint64_t ExtractedValue;
bool IsSectionRelocation;
if (!analyzeRelocation(Rel,
RelocatedSection,
SymbolName,
IsSectionRelocation,
SymbolAddress,
Addend,
ExtractedValue)) {
@ -2117,7 +2124,6 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
if (BC->isAArch64() && Rel.getType() == ELF::R_AARCH64_ADR_GOT_PAGE)
ForceRelocation = true;
// TODO: RefSection should be the same as **Rel.getSymbol().getSection()
auto RefSection = BC->getSectionForAddress(SymbolAddress);
if (!RefSection && !ForceRelocation) {
DEBUG(dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n");
@ -2125,8 +2131,6 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
}
const bool IsToCode = RefSection && RefSection->isText();
const bool IsSectionRelocation =
(cantFail(Rel.getSymbol()->getType()) == SymbolRef::ST_Debug);
// Occasionally we may see a reference past the last byte of the function
// typically as a result of __builtin_unreachable(). Check it here.
@ -2298,7 +2302,6 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
SymbolAddress = BD->getAddress();
assert(Address == SymbolAddress + Addend);
} else {
auto Symbol = *Rel.getSymbol();
// These are mostly local data symbols but undefined symbols
// in relocation sections can get through here too, from .plt.
assert((IsAArch64 ||
@ -2306,14 +2309,18 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt"))
&& "known symbols should not resolve to anonymous locals");
if (IsSectionRelocation) {
ReferencedSymbol = BC->getOrCreateGlobalSymbol(SymbolAddress,
"SYMBOLat");
} else {
auto Symbol = *Rel.getSymbol();
const uint64_t SymbolSize =
IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize();
const uint64_t SymbolAlignment = IsAArch64 ? 1 : Symbol.getAlignment();
const unsigned SymbolFlags = Symbol.getFlags();
if (!IsSectionRelocation) {
const uint64_t SymbolAlignment =
IsAArch64 ? 1 : Symbol.getAlignment();
const auto SymbolFlags = Symbol.getFlags();
std::string Name;
if (Symbol.getFlags() & SymbolRef::SF_Global) {
if (SymbolFlags & SymbolRef::SF_Global) {
Name = SymbolName;
} else {
Name = uniquifyName(*BC, StringRef(SymbolName).startswith(
@ -2326,12 +2333,6 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
SymbolSize,
SymbolAlignment,
SymbolFlags);
} else {
ReferencedSymbol = BC->getOrCreateGlobalSymbol(SymbolAddress,
SymbolSize,
SymbolAlignment,
"SYMBOLat",
SymbolFlags);
}
if (!opts::AllowSectionRelocations && IsSectionRelocation) {

View File

@ -264,6 +264,7 @@ private:
bool analyzeRelocation(const RelocationRef &Rel,
SectionRef RelocatedSection,
std::string &SymbolName,
bool &IsSectionRelocation,
uint64_t &SymbolAddress,
int64_t &Addend,
uint64_t &ExtractedValue) const;