forked from OSchip/llvm-project
[BOLT] [AArch64] Handle constant islands spanning multiple functions
Fix BOLT's constant island mapping when a constant island marked by $d spans multiple functions. Currently, because BOLT only marks the constant island in the first function where $d is located, if the next function contains data at its start, BOLT will miss the data and try to disassemble it. This patch adds code to explicitly go through all symbols between $d and $x markers and mark their respective offsets as data, which stops BOLT from trying to disassemble data. It also adds MarkerType enum and refactors related functions. Reviewed By: yota9, rafauler Differential Revision: https://reviews.llvm.org/D126177
This commit is contained in:
parent
218393f44e
commit
8579db96e8
|
@ -82,6 +82,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
|
||||||
return OS;
|
return OS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AArch64-specific symbol markers used to delimit code/data in .text.
|
||||||
|
enum class MarkerSymType : char {
|
||||||
|
NONE = 0,
|
||||||
|
CODE,
|
||||||
|
DATA,
|
||||||
|
};
|
||||||
|
|
||||||
enum class MemoryContentsType : char {
|
enum class MemoryContentsType : char {
|
||||||
UNKNOWN = 0, /// Unknown contents.
|
UNKNOWN = 0, /// Unknown contents.
|
||||||
POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
|
POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
|
||||||
|
@ -662,6 +669,11 @@ public:
|
||||||
TheTriple->getArch() == llvm::Triple::x86_64;
|
TheTriple->getArch() == llvm::Triple::x86_64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AArch64-specific functions to check if symbol is used to delimit
|
||||||
|
// code/data in .text. Code is marked by $x, data by $d.
|
||||||
|
MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
|
||||||
|
bool isMarker(const SymbolRef &Symbol) const;
|
||||||
|
|
||||||
/// Iterate over all BinaryData.
|
/// Iterate over all BinaryData.
|
||||||
iterator_range<binary_data_const_iterator> getBinaryData() const {
|
iterator_range<binary_data_const_iterator> getBinaryData() const {
|
||||||
return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
|
return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
|
||||||
|
|
|
@ -1948,11 +1948,6 @@ public:
|
||||||
return ColdLSDASymbol;
|
return ColdLSDASymbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// True if the symbol is a mapping symbol used in AArch64 to delimit
|
|
||||||
/// data inside code section.
|
|
||||||
bool isDataMarker(const SymbolRef &Symbol, uint64_t SymbolSize) const;
|
|
||||||
bool isCodeMarker(const SymbolRef &Symbol, uint64_t SymbolSize) const;
|
|
||||||
|
|
||||||
void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }
|
void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }
|
||||||
|
|
||||||
uint64_t getOutputDataAddress() const { return OutputDataOffset; }
|
uint64_t getOutputDataAddress() const { return OutputDataOffset; }
|
||||||
|
|
|
@ -1639,6 +1639,35 @@ void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
|
||||||
|
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
||||||
|
// code section (see IHI0056B). $x identifies a symbol starting code or the
|
||||||
|
// end of a data chunk inside code, $d indentifies start of data.
|
||||||
|
if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
|
||||||
|
return MarkerSymType::NONE;
|
||||||
|
|
||||||
|
Expected<StringRef> NameOrError = Symbol.getName();
|
||||||
|
Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
|
||||||
|
|
||||||
|
if (!TypeOrError || !NameOrError)
|
||||||
|
return MarkerSymType::NONE;
|
||||||
|
|
||||||
|
if (*TypeOrError != SymbolRef::ST_Unknown)
|
||||||
|
return MarkerSymType::NONE;
|
||||||
|
|
||||||
|
if (*NameOrError == "$x" || NameOrError->startswith("$x."))
|
||||||
|
return MarkerSymType::CODE;
|
||||||
|
|
||||||
|
if (*NameOrError == "$d" || NameOrError->startswith("$d."))
|
||||||
|
return MarkerSymType::DATA;
|
||||||
|
|
||||||
|
return MarkerSymType::NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
|
||||||
|
return getMarkerType(Symbol) != MarkerSymType::NONE;
|
||||||
|
}
|
||||||
|
|
||||||
void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
|
void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
|
||||||
uint64_t Offset,
|
uint64_t Offset,
|
||||||
const BinaryFunction *Function,
|
const BinaryFunction *Function,
|
||||||
|
|
|
@ -3926,33 +3926,6 @@ void BinaryFunction::deleteConservativeEdges() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BinaryFunction::isDataMarker(const SymbolRef &Symbol,
|
|
||||||
uint64_t SymbolSize) const {
|
|
||||||
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
|
||||||
// code section (see IHI0056B). $d identifies a symbol starting data contents.
|
|
||||||
if (BC.isAArch64() && Symbol.getType() &&
|
|
||||||
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
|
|
||||||
Symbol.getName() &&
|
|
||||||
(cantFail(Symbol.getName()) == "$d" ||
|
|
||||||
cantFail(Symbol.getName()).startswith("$d.")))
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool BinaryFunction::isCodeMarker(const SymbolRef &Symbol,
|
|
||||||
uint64_t SymbolSize) const {
|
|
||||||
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
|
||||||
// code section (see IHI0056B). $x identifies a symbol starting code or the
|
|
||||||
// end of a data chunk inside code.
|
|
||||||
if (BC.isAArch64() && Symbol.getType() &&
|
|
||||||
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
|
|
||||||
Symbol.getName() &&
|
|
||||||
(cantFail(Symbol.getName()) == "$x" ||
|
|
||||||
cantFail(Symbol.getName()).startswith("$x.")))
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
||||||
uint64_t SymbolSize) const {
|
uint64_t SymbolSize) const {
|
||||||
// If this symbol is in a different section from the one where the
|
// If this symbol is in a different section from the one where the
|
||||||
|
@ -3963,7 +3936,7 @@ bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
||||||
|
|
||||||
// Some symbols are tolerated inside function bodies, others are not.
|
// Some symbols are tolerated inside function bodies, others are not.
|
||||||
// The real function boundaries may not be known at this point.
|
// The real function boundaries may not be known at this point.
|
||||||
if (isDataMarker(Symbol, SymbolSize) || isCodeMarker(Symbol, SymbolSize))
|
if (BC.isMarker(Symbol))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// It's okay to have a zero-sized symbol in the middle of non-zero-sized
|
// It's okay to have a zero-sized symbol in the middle of non-zero-sized
|
||||||
|
|
|
@ -880,47 +880,88 @@ void RewriteInstance::discoverFileObjects() {
|
||||||
std::vector<SymbolRef> SortedFileSymbols;
|
std::vector<SymbolRef> SortedFileSymbols;
|
||||||
std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(),
|
std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(),
|
||||||
std::back_inserter(SortedFileSymbols), isSymbolInMemory);
|
std::back_inserter(SortedFileSymbols), isSymbolInMemory);
|
||||||
|
auto CompareSymbols = [this](const SymbolRef &A, const SymbolRef &B) {
|
||||||
|
// Marker symbols have the highest precedence, while
|
||||||
|
// SECTIONs have the lowest.
|
||||||
|
auto AddressA = cantFail(A.getAddress());
|
||||||
|
auto AddressB = cantFail(B.getAddress());
|
||||||
|
if (AddressA != AddressB)
|
||||||
|
return AddressA < AddressB;
|
||||||
|
|
||||||
std::stable_sort(
|
bool AMarker = BC->isMarker(A);
|
||||||
SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
bool BMarker = BC->isMarker(B);
|
||||||
[](const SymbolRef &A, const SymbolRef &B) {
|
if (AMarker || BMarker) {
|
||||||
// FUNC symbols have the highest precedence, while SECTIONs
|
return AMarker && !BMarker;
|
||||||
// have the lowest.
|
}
|
||||||
uint64_t AddressA = cantFail(A.getAddress());
|
|
||||||
uint64_t AddressB = cantFail(B.getAddress());
|
|
||||||
if (AddressA != AddressB)
|
|
||||||
return AddressA < AddressB;
|
|
||||||
|
|
||||||
SymbolRef::Type AType = cantFail(A.getType());
|
auto AType = cantFail(A.getType());
|
||||||
SymbolRef::Type BType = cantFail(B.getType());
|
auto BType = cantFail(B.getType());
|
||||||
if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
|
if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
|
||||||
return true;
|
return true;
|
||||||
if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
|
if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
});
|
};
|
||||||
|
|
||||||
|
std::stable_sort(SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
||||||
|
CompareSymbols);
|
||||||
|
|
||||||
|
auto LastSymbol = SortedFileSymbols.end() - 1;
|
||||||
|
|
||||||
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
||||||
// code section (see IHI0056B). $d identifies data contents.
|
// code section (see IHI0056B). $d identifies data contents.
|
||||||
auto LastSymbol = SortedFileSymbols.end() - 1;
|
// Compilers usually merge multiple data objects in a single $d-$x interval,
|
||||||
|
// but we need every data object to be marked with $d. Because of that we
|
||||||
|
// create a vector of MarkerSyms with all locations of data objects.
|
||||||
|
|
||||||
|
struct MarkerSym {
|
||||||
|
uint64_t Address;
|
||||||
|
MarkerSymType Type;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<MarkerSym> SortedMarkerSymbols;
|
||||||
|
auto addExtraDataMarkerPerSymbol =
|
||||||
|
[this](const std::vector<SymbolRef> &SortedFileSymbols,
|
||||||
|
std::vector<MarkerSym> &SortedMarkerSymbols) {
|
||||||
|
bool IsData = false;
|
||||||
|
uint64_t LastAddr = 0;
|
||||||
|
for (auto Sym = SortedFileSymbols.begin();
|
||||||
|
Sym < SortedFileSymbols.end(); ++Sym) {
|
||||||
|
uint64_t Address = cantFail(Sym->getAddress());
|
||||||
|
if (LastAddr == Address) // don't repeat markers
|
||||||
|
continue;
|
||||||
|
|
||||||
|
MarkerSymType MarkerType = BC->getMarkerType(*Sym);
|
||||||
|
if (MarkerType != MarkerSymType::NONE) {
|
||||||
|
SortedMarkerSymbols.push_back(MarkerSym{Address, MarkerType});
|
||||||
|
LastAddr = Address;
|
||||||
|
IsData = MarkerType == MarkerSymType::DATA;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsData) {
|
||||||
|
SortedMarkerSymbols.push_back(
|
||||||
|
MarkerSym{cantFail(Sym->getAddress()), MarkerSymType::DATA});
|
||||||
|
LastAddr = Address;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (BC->isAArch64()) {
|
if (BC->isAArch64()) {
|
||||||
|
addExtraDataMarkerPerSymbol(SortedFileSymbols, SortedMarkerSymbols);
|
||||||
LastSymbol = std::stable_partition(
|
LastSymbol = std::stable_partition(
|
||||||
SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
||||||
[](const SymbolRef &Symbol) {
|
[this](const SymbolRef &Symbol) { return !BC->isMarker(Symbol); });
|
||||||
StringRef Name = cantFail(Symbol.getName());
|
|
||||||
return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown &&
|
|
||||||
(Name == "$d" || Name.startswith("$d.") || Name == "$x" ||
|
|
||||||
Name.startswith("$x.")));
|
|
||||||
});
|
|
||||||
--LastSymbol;
|
--LastSymbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
BinaryFunction *PreviousFunction = nullptr;
|
BinaryFunction *PreviousFunction = nullptr;
|
||||||
unsigned AnonymousId = 0;
|
unsigned AnonymousId = 0;
|
||||||
|
|
||||||
const auto MarkersBegin = std::next(LastSymbol);
|
const auto SortedSymbolsEnd = std::next(LastSymbol);
|
||||||
for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) {
|
for (auto ISym = SortedFileSymbols.begin(); ISym != SortedSymbolsEnd;
|
||||||
|
++ISym) {
|
||||||
const SymbolRef &Symbol = *ISym;
|
const SymbolRef &Symbol = *ISym;
|
||||||
// Keep undefined symbols for pretty printing?
|
// Keep undefined symbols for pretty printing?
|
||||||
if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
|
if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
|
||||||
|
@ -1213,25 +1254,24 @@ void RewriteInstance::discoverFileObjects() {
|
||||||
adjustFunctionBoundaries();
|
adjustFunctionBoundaries();
|
||||||
|
|
||||||
// Annotate functions with code/data markers in AArch64
|
// Annotate functions with code/data markers in AArch64
|
||||||
for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) {
|
for (auto ISym = SortedMarkerSymbols.begin();
|
||||||
const SymbolRef &Symbol = *ISym;
|
ISym != SortedMarkerSymbols.end(); ++ISym) {
|
||||||
uint64_t Address =
|
|
||||||
cantFail(Symbol.getAddress(), "cannot get symbol address");
|
auto *BF =
|
||||||
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
|
BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
|
||||||
BinaryFunction *BF =
|
|
||||||
BC->getBinaryFunctionContainingAddress(Address, true, true);
|
|
||||||
if (!BF) {
|
if (!BF) {
|
||||||
// Stray marker
|
// Stray marker
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const uint64_t EntryOffset = Address - BF->getAddress();
|
const auto EntryOffset = ISym->Address - BF->getAddress();
|
||||||
if (BF->isCodeMarker(Symbol, SymbolSize)) {
|
if (ISym->Type == MarkerSymType::CODE) {
|
||||||
BF->markCodeAtOffset(EntryOffset);
|
BF->markCodeAtOffset(EntryOffset);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (BF->isDataMarker(Symbol, SymbolSize)) {
|
if (ISym->Type == MarkerSymType::DATA) {
|
||||||
BF->markDataAtOffset(EntryOffset);
|
BF->markDataAtOffset(EntryOffset);
|
||||||
BC->AddressToConstantIslandMap[Address] = BF;
|
BC->AddressToConstantIslandMap[ISym->Address] = BF;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
llvm_unreachable("Unknown marker");
|
llvm_unreachable("Unknown marker");
|
||||||
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
--- !ELF
|
||||||
|
FileHeader:
|
||||||
|
Class: ELFCLASS64
|
||||||
|
Data: ELFDATA2LSB
|
||||||
|
Type: ET_EXEC
|
||||||
|
Machine: EM_AARCH64
|
||||||
|
Entry: 0x210134
|
||||||
|
ProgramHeaders:
|
||||||
|
- Type: PT_PHDR
|
||||||
|
Flags: [ PF_R ]
|
||||||
|
VAddr: 0x200040
|
||||||
|
Align: 0x8
|
||||||
|
FileSize: 0x0000e0
|
||||||
|
MemSize: 0x0000e0
|
||||||
|
Offset: 0x000040
|
||||||
|
- Type: PT_LOAD
|
||||||
|
Flags: [ PF_R ]
|
||||||
|
VAddr: 0x200000
|
||||||
|
Align: 0x10000
|
||||||
|
FileSize: 0x000120
|
||||||
|
MemSize: 0x000120
|
||||||
|
Offset: 0x000000
|
||||||
|
- Type: PT_LOAD
|
||||||
|
Flags: [ PF_X, PF_R ]
|
||||||
|
FirstSec: .text
|
||||||
|
LastSec: .text
|
||||||
|
VAddr: 0x210120
|
||||||
|
Align: 0x10000
|
||||||
|
- Type: PT_GNU_STACK
|
||||||
|
Flags: [ PF_W, PF_R ]
|
||||||
|
Align: 0x0
|
||||||
|
Sections:
|
||||||
|
- Name: .text
|
||||||
|
Type: SHT_PROGBITS
|
||||||
|
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
|
||||||
|
Address: 0x210120
|
||||||
|
AddressAlign: 0x4
|
||||||
|
Content: 030F0B0700000000030F0B0700000000C0035FD6FFFFFF97000080D2A80B8052010000D4
|
||||||
|
- Name: .rela.text
|
||||||
|
Type: SHT_RELA
|
||||||
|
Flags: [ SHF_INFO_LINK ]
|
||||||
|
Link: .symtab
|
||||||
|
AddressAlign: 0x8
|
||||||
|
Info: .text
|
||||||
|
Relocations:
|
||||||
|
- Offset: 0x210134
|
||||||
|
Symbol: dummy
|
||||||
|
Type: R_AARCH64_CALL26
|
||||||
|
- Name: .comment
|
||||||
|
Type: SHT_PROGBITS
|
||||||
|
Flags: [ SHF_MERGE, SHF_STRINGS ]
|
||||||
|
AddressAlign: 0x1
|
||||||
|
EntSize: 0x1
|
||||||
|
Content: 4C696E6B65723A204C4C442031352E302E3000
|
||||||
|
Symbols:
|
||||||
|
- Name: val
|
||||||
|
Index: SHN_ABS
|
||||||
|
Value: 0x70B0F03
|
||||||
|
- Name: first
|
||||||
|
Section: .text
|
||||||
|
Value: 0x210120
|
||||||
|
Size: 0x8
|
||||||
|
- Name: '$d.0'
|
||||||
|
Section: .text
|
||||||
|
Value: 0x210120
|
||||||
|
- Name: second
|
||||||
|
Section: .text
|
||||||
|
Value: 0x210128
|
||||||
|
Size: 0x8
|
||||||
|
- Name: '$x.1'
|
||||||
|
Section: .text
|
||||||
|
Value: 0x210130
|
||||||
|
- Name: .text
|
||||||
|
Type: STT_SECTION
|
||||||
|
Section: .text
|
||||||
|
Value: 0x210120
|
||||||
|
- Name: .comment
|
||||||
|
Type: STT_SECTION
|
||||||
|
Section: .comment
|
||||||
|
- Name: dummy
|
||||||
|
Type: STT_FUNC
|
||||||
|
Section: .text
|
||||||
|
Binding: STB_GLOBAL
|
||||||
|
Value: 0x210130
|
||||||
|
- Name: _start
|
||||||
|
Type: STT_FUNC
|
||||||
|
Section: .text
|
||||||
|
Binding: STB_GLOBAL
|
||||||
|
Value: 0x210134
|
||||||
|
...
|
|
@ -0,0 +1,34 @@
|
||||||
|
// This test checks that multiple data objects in text of which only first is marked get disassembled properly
|
||||||
|
|
||||||
|
// RUN: yaml2obj %S/Inputs/unmarked-data.yaml -o %t.exe
|
||||||
|
// RUN: llvm-bolt %t.exe -o %t.bolt -lite=0 -use-old-text=0 2>&1 | FileCheck %s
|
||||||
|
// CHECK-NOT: BOLT-WARNING
|
||||||
|
// RUN: llvm-objdump -j .text -d --disassemble-symbols=first,second %t.bolt | FileCheck %s -check-prefix=CHECK-SYMBOL
|
||||||
|
// CHECK-SYMBOL: <first>:
|
||||||
|
// CHECK-SYMBOL: <second>:
|
||||||
|
|
||||||
|
// YAML is based in the following assembly:
|
||||||
|
|
||||||
|
.equ val, 0x070b0f03 // we use constant that is not a valid instruction so that it can't be silently dissassembled
|
||||||
|
.text
|
||||||
|
|
||||||
|
first:
|
||||||
|
.xword val
|
||||||
|
.size first, .-first
|
||||||
|
|
||||||
|
second:
|
||||||
|
.xword val
|
||||||
|
.size second, .-second
|
||||||
|
|
||||||
|
.globl dummy
|
||||||
|
.type dummy, %function
|
||||||
|
dummy: // dummy function to force relocations
|
||||||
|
ret
|
||||||
|
|
||||||
|
.globl _start
|
||||||
|
.type _start, %function
|
||||||
|
_start:
|
||||||
|
bl dummy
|
||||||
|
mov x0, #0
|
||||||
|
mov w8, #93
|
||||||
|
svc #0
|
Loading…
Reference in New Issue