forked from OSchip/llvm-project
[BOLT] [AArch64] Handle constant islands spanning multiple functions
Fix BOLT's constant island mapping when a constant island marked by $d spans multiple functions. Currently, because BOLT only marks the constant island in the first function where $d is located, if the next function contains data at its start, BOLT will miss the data and try to disassemble it. This patch adds code to explicitly go through all symbols between $d and $x markers and mark their respective offsets as data, which stops BOLT from trying to disassemble data. It also adds MarkerType enum and refactors related functions. Reviewed By: yota9, rafauler Differential Revision: https://reviews.llvm.org/D126177
This commit is contained in:
parent
218393f44e
commit
8579db96e8
|
@ -82,6 +82,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
|
|||
return OS;
|
||||
}
|
||||
|
||||
// AArch64-specific symbol markers used to delimit code/data in .text.
|
||||
enum class MarkerSymType : char {
|
||||
NONE = 0,
|
||||
CODE,
|
||||
DATA,
|
||||
};
|
||||
|
||||
enum class MemoryContentsType : char {
|
||||
UNKNOWN = 0, /// Unknown contents.
|
||||
POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
|
||||
|
@ -662,6 +669,11 @@ public:
|
|||
TheTriple->getArch() == llvm::Triple::x86_64;
|
||||
}
|
||||
|
||||
// AArch64-specific functions to check if symbol is used to delimit
|
||||
// code/data in .text. Code is marked by $x, data by $d.
|
||||
MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
|
||||
bool isMarker(const SymbolRef &Symbol) const;
|
||||
|
||||
/// Iterate over all BinaryData.
|
||||
iterator_range<binary_data_const_iterator> getBinaryData() const {
|
||||
return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
|
||||
|
|
|
@ -1948,11 +1948,6 @@ public:
|
|||
return ColdLSDASymbol;
|
||||
}
|
||||
|
||||
/// True if the symbol is a mapping symbol used in AArch64 to delimit
|
||||
/// data inside code section.
|
||||
bool isDataMarker(const SymbolRef &Symbol, uint64_t SymbolSize) const;
|
||||
bool isCodeMarker(const SymbolRef &Symbol, uint64_t SymbolSize) const;
|
||||
|
||||
void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }
|
||||
|
||||
uint64_t getOutputDataAddress() const { return OutputDataOffset; }
|
||||
|
|
|
@ -1639,6 +1639,35 @@ void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
|
|||
}
|
||||
}
|
||||
|
||||
MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
|
||||
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
||||
// code section (see IHI0056B). $x identifies a symbol starting code or the
|
||||
// end of a data chunk inside code, $d indentifies start of data.
|
||||
if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
|
||||
return MarkerSymType::NONE;
|
||||
|
||||
Expected<StringRef> NameOrError = Symbol.getName();
|
||||
Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
|
||||
|
||||
if (!TypeOrError || !NameOrError)
|
||||
return MarkerSymType::NONE;
|
||||
|
||||
if (*TypeOrError != SymbolRef::ST_Unknown)
|
||||
return MarkerSymType::NONE;
|
||||
|
||||
if (*NameOrError == "$x" || NameOrError->startswith("$x."))
|
||||
return MarkerSymType::CODE;
|
||||
|
||||
if (*NameOrError == "$d" || NameOrError->startswith("$d."))
|
||||
return MarkerSymType::DATA;
|
||||
|
||||
return MarkerSymType::NONE;
|
||||
}
|
||||
|
||||
bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
|
||||
return getMarkerType(Symbol) != MarkerSymType::NONE;
|
||||
}
|
||||
|
||||
void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
|
||||
uint64_t Offset,
|
||||
const BinaryFunction *Function,
|
||||
|
|
|
@ -3926,33 +3926,6 @@ void BinaryFunction::deleteConservativeEdges() {
|
|||
}
|
||||
}
|
||||
|
||||
bool BinaryFunction::isDataMarker(const SymbolRef &Symbol,
|
||||
uint64_t SymbolSize) const {
|
||||
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
||||
// code section (see IHI0056B). $d identifies a symbol starting data contents.
|
||||
if (BC.isAArch64() && Symbol.getType() &&
|
||||
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
|
||||
Symbol.getName() &&
|
||||
(cantFail(Symbol.getName()) == "$d" ||
|
||||
cantFail(Symbol.getName()).startswith("$d.")))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BinaryFunction::isCodeMarker(const SymbolRef &Symbol,
|
||||
uint64_t SymbolSize) const {
|
||||
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
||||
// code section (see IHI0056B). $x identifies a symbol starting code or the
|
||||
// end of a data chunk inside code.
|
||||
if (BC.isAArch64() && Symbol.getType() &&
|
||||
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
|
||||
Symbol.getName() &&
|
||||
(cantFail(Symbol.getName()) == "$x" ||
|
||||
cantFail(Symbol.getName()).startswith("$x.")))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
||||
uint64_t SymbolSize) const {
|
||||
// If this symbol is in a different section from the one where the
|
||||
|
@ -3963,7 +3936,7 @@ bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
|||
|
||||
// Some symbols are tolerated inside function bodies, others are not.
|
||||
// The real function boundaries may not be known at this point.
|
||||
if (isDataMarker(Symbol, SymbolSize) || isCodeMarker(Symbol, SymbolSize))
|
||||
if (BC.isMarker(Symbol))
|
||||
return true;
|
||||
|
||||
// It's okay to have a zero-sized symbol in the middle of non-zero-sized
|
||||
|
|
|
@ -880,47 +880,88 @@ void RewriteInstance::discoverFileObjects() {
|
|||
std::vector<SymbolRef> SortedFileSymbols;
|
||||
std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(),
|
||||
std::back_inserter(SortedFileSymbols), isSymbolInMemory);
|
||||
auto CompareSymbols = [this](const SymbolRef &A, const SymbolRef &B) {
|
||||
// Marker symbols have the highest precedence, while
|
||||
// SECTIONs have the lowest.
|
||||
auto AddressA = cantFail(A.getAddress());
|
||||
auto AddressB = cantFail(B.getAddress());
|
||||
if (AddressA != AddressB)
|
||||
return AddressA < AddressB;
|
||||
|
||||
std::stable_sort(
|
||||
SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
||||
[](const SymbolRef &A, const SymbolRef &B) {
|
||||
// FUNC symbols have the highest precedence, while SECTIONs
|
||||
// have the lowest.
|
||||
uint64_t AddressA = cantFail(A.getAddress());
|
||||
uint64_t AddressB = cantFail(B.getAddress());
|
||||
if (AddressA != AddressB)
|
||||
return AddressA < AddressB;
|
||||
bool AMarker = BC->isMarker(A);
|
||||
bool BMarker = BC->isMarker(B);
|
||||
if (AMarker || BMarker) {
|
||||
return AMarker && !BMarker;
|
||||
}
|
||||
|
||||
SymbolRef::Type AType = cantFail(A.getType());
|
||||
SymbolRef::Type BType = cantFail(B.getType());
|
||||
if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
|
||||
return true;
|
||||
if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
|
||||
return true;
|
||||
auto AType = cantFail(A.getType());
|
||||
auto BType = cantFail(B.getType());
|
||||
if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
|
||||
return true;
|
||||
if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
});
|
||||
return false;
|
||||
};
|
||||
|
||||
std::stable_sort(SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
||||
CompareSymbols);
|
||||
|
||||
auto LastSymbol = SortedFileSymbols.end() - 1;
|
||||
|
||||
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
||||
// code section (see IHI0056B). $d identifies data contents.
|
||||
auto LastSymbol = SortedFileSymbols.end() - 1;
|
||||
// Compilers usually merge multiple data objects in a single $d-$x interval,
|
||||
// but we need every data object to be marked with $d. Because of that we
|
||||
// create a vector of MarkerSyms with all locations of data objects.
|
||||
|
||||
struct MarkerSym {
|
||||
uint64_t Address;
|
||||
MarkerSymType Type;
|
||||
};
|
||||
|
||||
std::vector<MarkerSym> SortedMarkerSymbols;
|
||||
auto addExtraDataMarkerPerSymbol =
|
||||
[this](const std::vector<SymbolRef> &SortedFileSymbols,
|
||||
std::vector<MarkerSym> &SortedMarkerSymbols) {
|
||||
bool IsData = false;
|
||||
uint64_t LastAddr = 0;
|
||||
for (auto Sym = SortedFileSymbols.begin();
|
||||
Sym < SortedFileSymbols.end(); ++Sym) {
|
||||
uint64_t Address = cantFail(Sym->getAddress());
|
||||
if (LastAddr == Address) // don't repeat markers
|
||||
continue;
|
||||
|
||||
MarkerSymType MarkerType = BC->getMarkerType(*Sym);
|
||||
if (MarkerType != MarkerSymType::NONE) {
|
||||
SortedMarkerSymbols.push_back(MarkerSym{Address, MarkerType});
|
||||
LastAddr = Address;
|
||||
IsData = MarkerType == MarkerSymType::DATA;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsData) {
|
||||
SortedMarkerSymbols.push_back(
|
||||
MarkerSym{cantFail(Sym->getAddress()), MarkerSymType::DATA});
|
||||
LastAddr = Address;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (BC->isAArch64()) {
|
||||
addExtraDataMarkerPerSymbol(SortedFileSymbols, SortedMarkerSymbols);
|
||||
LastSymbol = std::stable_partition(
|
||||
SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
||||
[](const SymbolRef &Symbol) {
|
||||
StringRef Name = cantFail(Symbol.getName());
|
||||
return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown &&
|
||||
(Name == "$d" || Name.startswith("$d.") || Name == "$x" ||
|
||||
Name.startswith("$x.")));
|
||||
});
|
||||
[this](const SymbolRef &Symbol) { return !BC->isMarker(Symbol); });
|
||||
--LastSymbol;
|
||||
}
|
||||
|
||||
BinaryFunction *PreviousFunction = nullptr;
|
||||
unsigned AnonymousId = 0;
|
||||
|
||||
const auto MarkersBegin = std::next(LastSymbol);
|
||||
for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) {
|
||||
const auto SortedSymbolsEnd = std::next(LastSymbol);
|
||||
for (auto ISym = SortedFileSymbols.begin(); ISym != SortedSymbolsEnd;
|
||||
++ISym) {
|
||||
const SymbolRef &Symbol = *ISym;
|
||||
// Keep undefined symbols for pretty printing?
|
||||
if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
|
||||
|
@ -1213,25 +1254,24 @@ void RewriteInstance::discoverFileObjects() {
|
|||
adjustFunctionBoundaries();
|
||||
|
||||
// Annotate functions with code/data markers in AArch64
|
||||
for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) {
|
||||
const SymbolRef &Symbol = *ISym;
|
||||
uint64_t Address =
|
||||
cantFail(Symbol.getAddress(), "cannot get symbol address");
|
||||
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
|
||||
BinaryFunction *BF =
|
||||
BC->getBinaryFunctionContainingAddress(Address, true, true);
|
||||
for (auto ISym = SortedMarkerSymbols.begin();
|
||||
ISym != SortedMarkerSymbols.end(); ++ISym) {
|
||||
|
||||
auto *BF =
|
||||
BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
|
||||
|
||||
if (!BF) {
|
||||
// Stray marker
|
||||
continue;
|
||||
}
|
||||
const uint64_t EntryOffset = Address - BF->getAddress();
|
||||
if (BF->isCodeMarker(Symbol, SymbolSize)) {
|
||||
const auto EntryOffset = ISym->Address - BF->getAddress();
|
||||
if (ISym->Type == MarkerSymType::CODE) {
|
||||
BF->markCodeAtOffset(EntryOffset);
|
||||
continue;
|
||||
}
|
||||
if (BF->isDataMarker(Symbol, SymbolSize)) {
|
||||
if (ISym->Type == MarkerSymType::DATA) {
|
||||
BF->markDataAtOffset(EntryOffset);
|
||||
BC->AddressToConstantIslandMap[Address] = BF;
|
||||
BC->AddressToConstantIslandMap[ISym->Address] = BF;
|
||||
continue;
|
||||
}
|
||||
llvm_unreachable("Unknown marker");
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
--- !ELF
|
||||
FileHeader:
|
||||
Class: ELFCLASS64
|
||||
Data: ELFDATA2LSB
|
||||
Type: ET_EXEC
|
||||
Machine: EM_AARCH64
|
||||
Entry: 0x210134
|
||||
ProgramHeaders:
|
||||
- Type: PT_PHDR
|
||||
Flags: [ PF_R ]
|
||||
VAddr: 0x200040
|
||||
Align: 0x8
|
||||
FileSize: 0x0000e0
|
||||
MemSize: 0x0000e0
|
||||
Offset: 0x000040
|
||||
- Type: PT_LOAD
|
||||
Flags: [ PF_R ]
|
||||
VAddr: 0x200000
|
||||
Align: 0x10000
|
||||
FileSize: 0x000120
|
||||
MemSize: 0x000120
|
||||
Offset: 0x000000
|
||||
- Type: PT_LOAD
|
||||
Flags: [ PF_X, PF_R ]
|
||||
FirstSec: .text
|
||||
LastSec: .text
|
||||
VAddr: 0x210120
|
||||
Align: 0x10000
|
||||
- Type: PT_GNU_STACK
|
||||
Flags: [ PF_W, PF_R ]
|
||||
Align: 0x0
|
||||
Sections:
|
||||
- Name: .text
|
||||
Type: SHT_PROGBITS
|
||||
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
|
||||
Address: 0x210120
|
||||
AddressAlign: 0x4
|
||||
Content: 030F0B0700000000030F0B0700000000C0035FD6FFFFFF97000080D2A80B8052010000D4
|
||||
- Name: .rela.text
|
||||
Type: SHT_RELA
|
||||
Flags: [ SHF_INFO_LINK ]
|
||||
Link: .symtab
|
||||
AddressAlign: 0x8
|
||||
Info: .text
|
||||
Relocations:
|
||||
- Offset: 0x210134
|
||||
Symbol: dummy
|
||||
Type: R_AARCH64_CALL26
|
||||
- Name: .comment
|
||||
Type: SHT_PROGBITS
|
||||
Flags: [ SHF_MERGE, SHF_STRINGS ]
|
||||
AddressAlign: 0x1
|
||||
EntSize: 0x1
|
||||
Content: 4C696E6B65723A204C4C442031352E302E3000
|
||||
Symbols:
|
||||
- Name: val
|
||||
Index: SHN_ABS
|
||||
Value: 0x70B0F03
|
||||
- Name: first
|
||||
Section: .text
|
||||
Value: 0x210120
|
||||
Size: 0x8
|
||||
- Name: '$d.0'
|
||||
Section: .text
|
||||
Value: 0x210120
|
||||
- Name: second
|
||||
Section: .text
|
||||
Value: 0x210128
|
||||
Size: 0x8
|
||||
- Name: '$x.1'
|
||||
Section: .text
|
||||
Value: 0x210130
|
||||
- Name: .text
|
||||
Type: STT_SECTION
|
||||
Section: .text
|
||||
Value: 0x210120
|
||||
- Name: .comment
|
||||
Type: STT_SECTION
|
||||
Section: .comment
|
||||
- Name: dummy
|
||||
Type: STT_FUNC
|
||||
Section: .text
|
||||
Binding: STB_GLOBAL
|
||||
Value: 0x210130
|
||||
- Name: _start
|
||||
Type: STT_FUNC
|
||||
Section: .text
|
||||
Binding: STB_GLOBAL
|
||||
Value: 0x210134
|
||||
...
|
|
@ -0,0 +1,34 @@
|
|||
// This test checks that multiple data objects in text of which only first is marked get disassembled properly
|
||||
|
||||
// RUN: yaml2obj %S/Inputs/unmarked-data.yaml -o %t.exe
|
||||
// RUN: llvm-bolt %t.exe -o %t.bolt -lite=0 -use-old-text=0 2>&1 | FileCheck %s
|
||||
// CHECK-NOT: BOLT-WARNING
|
||||
// RUN: llvm-objdump -j .text -d --disassemble-symbols=first,second %t.bolt | FileCheck %s -check-prefix=CHECK-SYMBOL
|
||||
// CHECK-SYMBOL: <first>:
|
||||
// CHECK-SYMBOL: <second>:
|
||||
|
||||
// YAML is based in the following assembly:
|
||||
|
||||
.equ val, 0x070b0f03 // we use constant that is not a valid instruction so that it can't be silently dissassembled
|
||||
.text
|
||||
|
||||
first:
|
||||
.xword val
|
||||
.size first, .-first
|
||||
|
||||
second:
|
||||
.xword val
|
||||
.size second, .-second
|
||||
|
||||
.globl dummy
|
||||
.type dummy, %function
|
||||
dummy: // dummy function to force relocations
|
||||
ret
|
||||
|
||||
.globl _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
bl dummy
|
||||
mov x0, #0
|
||||
mov w8, #93
|
||||
svc #0
|
Loading…
Reference in New Issue