[BOLT-AArch64] Fix AArch64 port - make it work with hhvm

Summary:
This diff has 3 fixes. First fixes the way relocations are read
and interpreted for AArch64, so the references are preserved correctly.
Second, it fixes constant islands to be able to live in the very first
address of a function (which means there is no code, but this function
contains just a constant island).
Third, it fixes function splitting to do not outline entry points for
AArch64. This was done because some functions may load pointers to its
internal basic blocks, issueing a short-range ADR instruction to do so
without its pair ADRP (since the size of the function is supposed to
be small). But when we move this block to a cold region, that is not
the case anymore. Since blocks with a reference are marked as entry
points, we conservatively disable outlining for them in AArch64.

(cherry picked from FBD7505067)
This commit is contained in:
Rafael Auler 2018-03-20 14:34:58 -07:00 committed by Maksim Panchenko
parent 489e514530
commit 7df6a6d5c6
10 changed files with 110 additions and 80 deletions

View File

@ -300,6 +300,10 @@ public:
std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS);
bool isAArch64() const {
return TheTriple->getArch() == llvm::Triple::aarch64;
}
/// Iterate over all BinaryData.
iterator_range<binary_data_const_iterator> getBinaryData() const {
return make_range(BinaryDataMap.begin(), BinaryDataMap.end());

View File

@ -657,7 +657,7 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
auto Begin = Instructions.begin();
auto End = Instructions.end();
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
if (BC.isAArch64()) {
PreserveNops = BC.HasRelocations;
// Start at the last label as an approximation of the current basic block.
// This is a heuristic, since the full set of labels have yet to be
@ -688,7 +688,7 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
if (MemLocInstr != &Instruction)
IndexRegNum = 0;
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
if (BC.isAArch64()) {
const auto *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
assert (Sym && "Symbol extraction failed");
if (auto *BD = BC.getBinaryDataByName(Sym->getName())) {
@ -730,7 +730,7 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
assert(BD && "global symbol needs a value");
ArrayStart = BD->getAddress() + TargetOffset;
BaseRegNum = 0;
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
if (BC.isAArch64()) {
ArrayStart &= ~0xFFFULL;
ArrayStart += DispValue & 0xFFFULL;
}
@ -817,7 +817,7 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
<< " is referencing address 0x"
<< Twine::utohexstr(Section->getAddress() + ValueOffset));
// Extract the value and increment the offset.
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
if (BC.isAArch64()) {
Value = PCRelAddr + DE.getSigned(&ValueOffset, EntrySize);
} else if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
Value = ArrayStart + DE.getSigned(&ValueOffset, 4);
@ -955,7 +955,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
}
}
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
if (BC.isAArch64()) {
// Check if this is an access to a constant island and create bookkeeping
// to keep track of it and emit it later as part of this function
if (MCSymbol *IslandSym = getOrCreateIslandAccess(TargetAddress).first) {
@ -988,10 +988,9 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
// after reading relocations. ADRP contents now are not really meaningful
// without its supporting relocation.
if (!TargetSymbol && Section && Section->isText() &&
(BC.TheTriple->getArch() != llvm::Triple::aarch64 ||
!BC.MIB->isADRP(Instruction))) {
(!BC.isAArch64() || !BC.MIB->isADRP(Instruction))) {
if (containsAddress(TargetAddress, /*UseMaxSize=*/
BC.TheTriple->getArch() == llvm::Triple::aarch64)) {
BC.isAArch64())) {
if (TargetAddress != getAddress()) {
// The address could potentially escape. Mark it as another entry
// point into the function.
@ -1128,24 +1127,23 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
// For aarch, if we replaced an immediate with a symbol from a
// relocation, we mark it so we do not try to further process a
// pc-relative operand. All we need is the symbol.
if (BC.TheTriple->getArch() == llvm::Triple::aarch64)
if (BC.isAArch64())
UsedReloc = true;
// Make sure we replaced the correct immediate (instruction
// can have multiple immediate operands).
assert((BC.TheTriple->getArch() == llvm::Triple::aarch64 ||
assert((BC.isAArch64() ||
static_cast<uint64_t>(Value) == Relocation.Value) &&
"immediate value mismatch in function");
}
// Convert instruction to a shorter version that could be relaxed if needed.
// Convert instruction to a shorter version that could be relaxed if
// needed.
MIB->shortenInstruction(Instruction);
if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
uint64_t TargetAddress = 0;
if (MIB->evaluateBranch(Instruction,
AbsoluteInstrAddr,
Size,
if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
TargetAddress)) {
// Check if the target is within the same function. Otherwise it's
// a call, possibly a tail call.
@ -1163,8 +1161,8 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
} else {
// Possibly an old-style PIC code
errs() << "BOLT-WARNING: internal call detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< " in function " << *this << ". Skipping.\n";
<< Twine::utohexstr(AbsoluteInstrAddr) << " in function "
<< *this << ". Skipping.\n";
IsSimple = false;
}
}
@ -1192,9 +1190,8 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
if (opts::Verbosity >= 2 && !IsCall && Size == 2 &&
!BC.HasRelocations) {
errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< " in function " << *this
<< ". Code size will be increased.\n";
<< Twine::utohexstr(AbsoluteInstrAddr) << " in function "
<< *this << ". Code size will be increased.\n";
}
assert(!MIB->isTailCall(Instruction) &&
@ -1215,14 +1212,12 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
IsCall = true;
}
TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress,
0,
0,
"FUNCat");
TargetSymbol =
BC.getOrCreateGlobalSymbol(TargetAddress, 0, 0, "FUNCat");
if (TargetAddress == 0) {
// We actually see calls to address 0 in presence of weak symbols
// originating from libraries. This code is never meant to be
// executed.
// We actually see calls to address 0 in presence of weak
// symbols originating from libraries. This code is never meant
// to be executed.
if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: Function " << *this
<< " has a call to address zero.\n";
@ -1234,22 +1229,21 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
// code without re-assembly.
size_t RelSize = (Size < 5) ? 1 : 4;
auto RelOffset = Offset + Size - RelSize;
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
if (BC.isAArch64()) {
RelSize = 0;
RelOffset = Offset;
}
auto RI = MoveRelocations.find(RelOffset);
if (RI == MoveRelocations.end()) {
uint64_t RelType = (RelSize == 1) ? ELF::R_X86_64_PC8
: ELF::R_X86_64_PC32;
if (BC.TheTriple->getArch() == llvm::Triple::aarch64)
uint64_t RelType =
(RelSize == 1) ? ELF::R_X86_64_PC8 : ELF::R_X86_64_PC32;
if (BC.isAArch64())
RelType = ELF::R_AARCH64_CALL26;
DEBUG(dbgs() << "BOLT-DEBUG: creating relocation for static"
<< " function call to " << TargetSymbol->getName()
<< " at offset 0x"
<< Twine::utohexstr(RelOffset)
<< " with size " << RelSize
<< " for function " << *this << '\n');
<< " at offset 0x" << Twine::utohexstr(RelOffset)
<< " with size " << RelSize << " for function "
<< *this << '\n');
addRelocation(getAddress() + RelOffset, TargetSymbol, RelType,
-RelSize, 0);
}
@ -1279,14 +1273,11 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
switch (Result) {
default:
llvm_unreachable("unexpected result");
case IndirectBranchType::POSSIBLE_TAIL_CALL:
{
auto Result =
MIB->convertJmpToTailCall(Instruction, BC.Ctx.get());
case IndirectBranchType::POSSIBLE_TAIL_CALL: {
auto Result = MIB->convertJmpToTailCall(Instruction, BC.Ctx.get());
(void)Result;
assert(Result);
}
break;
} break;
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
if (opts::JumpTables == JTS_NONE)
@ -2346,10 +2337,14 @@ void BinaryFunction::emitConstantIslands(
DEBUG(dbgs() << "BOLT-DEBUG: emitted label "
<< IS->second->getName() << " at offset 0x"
<< Twine::utohexstr(IS->first) << '\n');
if (IS->second->isUndefined())
Streamer.EmitLabel(IS->second);
else
assert(hasName(IS->second->getName()));
} else {
DEBUG(dbgs() << "BOLT-DEBUG: emitted label "
<< ColdIslandSymbols[IS->second]->getName() << '\n');
if (ColdIslandSymbols[IS->second]->isUndefined())
Streamer.EmitLabel(ColdIslandSymbols[IS->second]);
}
} else {
@ -3270,7 +3265,7 @@ bool BinaryFunction::isDataMarker(const SymbolRef &Symbol,
uint64_t SymbolSize) const {
// For aarch64, the ABI defines mapping symbols so we identify data in the
// code section (see IHI0056B). $d identifies a symbol starting data contents.
if (BC.TheTriple->getArch() == llvm::Triple::aarch64 && Symbol.getType() &&
if (BC.isAArch64() && Symbol.getType() &&
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
Symbol.getName() && cantFail(Symbol.getName()) == "$d")
return true;
@ -3282,7 +3277,7 @@ bool BinaryFunction::isCodeMarker(const SymbolRef &Symbol,
// For aarch64, the ABI defines mapping symbols so we identify data in the
// code section (see IHI0056B). $x identifies a symbol starting code or the
// end of a data chunk inside code.
if (BC.TheTriple->getArch() == llvm::Triple::aarch64 && Symbol.getType() &&
if (BC.isAArch64() && Symbol.getType() &&
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
Symbol.getName() && cantFail(Symbol.getName()) == "$x")
return true;

View File

@ -1800,7 +1800,7 @@ public:
/// Detects whether \p Address is inside a data region in this function
/// (constant islands).
bool isInConstantIsland(uint64_t Address) const {
if (Address <= getAddress())
if (Address < getAddress())
return false;
auto Offset = Address - getAddress();

View File

@ -441,7 +441,7 @@ void BinaryFunctionPassManager::runAllPasses(
// Thighten branches according to offset differences between branch and
// targets. No extra instructions after this pass, otherwise we may have
// relocations out of range and crash during linking.
if (BC.TheTriple->getArch() == llvm::Triple::aarch64)
if (BC.isAArch64())
Manager.registerPass(llvm::make_unique<LongJmpPass>(PrintLongJmp));
// This pass turns tail calls into jumps which makes them invisible to

View File

@ -404,6 +404,11 @@ public:
return false;
}
virtual bool isADR(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}
virtual bool isMoveMem2Reg(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;

View File

@ -377,6 +377,8 @@ void ReorderBasicBlocks::runOnFunctions(
if (opts::ReorderBlocks == ReorderBasicBlocks::LT_NONE)
return;
IsAArch64 = BC.isAArch64();
uint64_t ModifiedFuncCount = 0;
for (auto &It : BFs) {
auto &Function = It.second;
@ -518,6 +520,13 @@ void ReorderBasicBlocks::splitFunction(BinaryFunction &BF) const {
BB->setCanOutline(false);
continue;
}
// Do not split extra entry points in aarch64. They can be referred by
// using ADRs and when this happens, these blocks cannot be placed far
// away due to the limited range in ADR instruction.
if (IsAArch64 && BB->isEntryPoint()) {
BB->setCanOutline(false);
continue;
}
if (BF.hasEHRanges() && !opts::SplitEH) {
// We cannot move landing pads (or rather entry points for landing
// pads).
@ -577,8 +586,7 @@ void FixupBranches::runOnFunctions(
for (auto &It : BFs) {
auto &Function = It.second;
if (BC.HasRelocations || shouldOptimize(Function)) {
if (BC.TheTriple->getArch() == llvm::Triple::aarch64 &&
!Function.isSimple())
if (BC.isAArch64() && !Function.isSimple())
continue;
Function.fixBranches();
}

View File

@ -185,6 +185,8 @@ private:
/// executed BBs. The cold part is moved to a new BinaryFunction.
void splitFunction(BinaryFunction &Function) const;
bool IsAArch64{false};
public:
explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }

View File

@ -368,8 +368,7 @@ bool LongJmpPass::removeOrShrinkStubs(const BinaryContext &BC,
BinaryFunction &Func) {
bool Modified{false};
assert(BC.TheTriple->getArch() == llvm::Triple::aarch64 &&
"Unsupported arch");
assert(BC.isAArch64() && "Unsupported arch");
constexpr auto InsnSize = 4; // AArch64
// Remove unnecessary stubs for branch targets we know we can fit in the
// instruction

View File

@ -962,7 +962,7 @@ void RewriteInstance::run() {
}
// Flip unsupported flags in AArch64 mode
if (BC->TheTriple->getArch() == llvm::Triple::aarch64) {
if (BC->isAArch64()) {
if (opts::BoostMacroops) {
opts::BoostMacroops = false;
outs() << "BOLT-INFO: disabling -boost-macroops for AArch64\n";
@ -1112,7 +1112,7 @@ void RewriteInstance::discoverFileObjects() {
// For aarch64, the ABI defines mapping symbols so we identify data in the
// code section (see IHI0056B). $d identifies data contents.
auto MarkersBegin = SortedFileSymbols.end();
if (BC->TheTriple->getArch() == llvm::Triple::aarch64) {
if (BC->isAArch64()) {
MarkersBegin = std::stable_partition(
SortedFileSymbols.begin(), SortedFileSymbols.end(),
[](const SymbolRef &Symbol) {
@ -1813,7 +1813,7 @@ bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
if (!Relocation::isSupported(Rel.getType()))
return false;
const bool IsAArch64 = BC->TheTriple->getArch() == llvm::Triple::aarch64;
const bool IsAArch64 = BC->isAArch64();
const bool IsFromCode = RelocatedSection.isText();
// For value extraction.
@ -1920,7 +1920,7 @@ bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
SymbolName != "__hot_end" &&
truncateToSize(ExtractedValue, RelSize) !=
truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize)) {
auto Section = Symbol.getSection();
auto Section = cantFail(Symbol.getSection());
SmallString<16> TypeName;
Rel.getTypeName(TypeName);
dbgs() << "BOLT-DEBUG: Mismatch between extracted value and relocation "
@ -1935,7 +1935,7 @@ bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
<< "; symbol address = 0x" << Twine::utohexstr(SymbolAddress)
<< "; orig symbol address = 0x"
<< Twine::utohexstr(cantFail(Symbol.getAddress()))
<< "; symbol section = " << getSectionName(**Section)
<< "; symbol section = " << getSectionName(*Section)
<< "; addend = 0x" << Twine::utohexstr(Addend)
<< "; original addend = 0x"
<< Twine::utohexstr(getRelocationAddend(InputFile, Rel))
@ -1984,7 +1984,7 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
return;
}
const bool IsAArch64 = BC->TheTriple->getArch() == llvm::Triple::aarch64;
const bool IsAArch64 = BC->isAArch64();
const bool IsFromCode = RelocatedSection.isText();
auto printRelocationInfo = [&](const RelocationRef &Rel,
@ -2114,11 +2114,11 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
if (RefFunctionOffset) {
ReferencedSymbol =
ReferencedBF->getOrCreateLocalLabel(Address, /*CreatePastEnd*/ true);
SymbolAddress = Address;
Addend = 0;
} else {
ReferencedSymbol = ReferencedBF->getSymbol();
}
SymbolAddress = Address;
Addend = 0;
} else {
if (RefSection && RefSection->isText() && SymbolAddress) {
// This can happen e.g. with PIC-style jump tables.
@ -2126,11 +2126,28 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
"relocation against code\n");
}
// In AArch64 there are zero reasons to keep a reference to the
// "original" symbol plus addend. The original symbol is probably just a
// section symbol. If we are here, this means we are probably accessing
// data, so it is imperative to keep the original address.
if (IsAArch64) {
SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str();
SymbolAddress = Address;
Addend = 0;
}
if (auto *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) {
assert(cantFail(Rel.getSymbol()->getType()) == SymbolRef::ST_Debug ||
// Note: this assertion is trying to check sanity of BinaryData objects
// but AArch64 has inferred and incomplete object locations coming from
// GOT/TLS or any other non-trivial relocation (that requires creation
// of sections and whose symbol address is not really what should be
// encoded in the instruction). So we essentially disabled this check
// for AArch64 and live with bogus names for objects.
assert(IsAArch64 ||
cantFail(Rel.getSymbol()->getType()) == SymbolRef::ST_Debug ||
BD->nameStartsWith(SymbolName) ||
BD->nameStartsWith("PG" + SymbolName) ||
(BD->nameStartsWith("ANONYMOUS") &&
((BD->nameStartsWith("ANONYMOUS")) &&
(BD->getSectionName().startswith(".plt") ||
BD->getSectionName().endswith(".plt"))));
ReferencedSymbol = BD->getSymbol();
@ -2141,19 +2158,25 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
auto Symbol = *Rel.getSymbol();
// These are mostly local data symbols but undefined symbols
// in relocation sections can get through here too, from .plt.
assert(cantFail(Symbol.getType()) == SymbolRef::ST_Debug ||
assert(IsAArch64 ||
cantFail(Symbol.getType()) == SymbolRef::ST_Debug ||
BC->getSectionForAddress(SymbolAddress)
->getName()
.startswith(".plt"));
const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
const uint64_t SymbolAlignment = Symbol.getAlignment();
const uint64_t SymbolSize =
IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize();
const uint64_t SymbolAlignment = IsAArch64 ? 1 : Symbol.getAlignment();
if (cantFail(Symbol.getType()) != SymbolRef::ST_Debug) {
std::string Name;
if (Symbol.getFlags() & SymbolRef::SF_Global)
if (Symbol.getFlags() & SymbolRef::SF_Global) {
Name = SymbolName;
else // TODO: add PG prefix?
Name = uniquifyName(*BC, SymbolName + "/");
} else {
Name = uniquifyName(*BC, StringRef(SymbolName).startswith(
BC->AsmInfo->getPrivateGlobalPrefix())
? "PG" + SymbolName + "/"
: SymbolName + "/");
}
ReferencedSymbol = BC->registerNameAtAddress(Name,
SymbolAddress,
SymbolSize,

View File

@ -58,9 +58,8 @@ public:
return Inst.getOpcode() == AArch64::ADRP;
}
bool isADR(const MCInst &Inst) const {
return (Inst.getOpcode() == AArch64::ADR ||
Inst.getOpcode() == AArch64::ADRP);
bool isADR(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::ADR;
}
bool isTB(const MCInst &Inst) const {
@ -198,7 +197,7 @@ public:
}
bool hasPCRelOperand(const MCInst &Inst) const override {
if (isADR(Inst))
if (isADR(Inst) || isADRP(Inst))
return true;
// Look for literal addressing mode (see C1-143 ARM DDI 0487B.a)
@ -212,7 +211,7 @@ public:
bool evaluateADR(const MCInst &Inst, int64_t &Imm,
const MCExpr **DispExpr) const {
assert(isADR(Inst) && "Not an ADR instruction");
assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction");
auto &Label = Inst.getOperand(1);
if (!Label.isImm()) {
@ -234,7 +233,7 @@ public:
int64_t &DispImm,
const MCExpr **DispExpr = nullptr)
const {
if (isADR(Inst))
if (isADR(Inst) || isADRP(Inst))
return evaluateADR(Inst, DispImm, DispExpr);
// Literal addressing mode
@ -278,7 +277,7 @@ public:
bool replaceMemOperandDisp(MCInst &Inst, MCOperand Operand) const override {
MCInst::iterator OI = Inst.begin();
if (isADR(Inst)) {
if (isADR(Inst) || isADRP(Inst)) {
assert(MCPlus::getNumPrimeOperands(Inst) >= 2 &&
"Unexpected number of operands");
++OI;
@ -745,10 +744,6 @@ public:
return false;
}
bool isInvoke(const MCInst &Inst) const override {
return false;
}
bool analyzeBranch(InstructionIterator Begin,
InstructionIterator End,
const MCSymbol *&TBB,
@ -971,4 +966,3 @@ MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *Analysis,
const MCRegisterInfo *RegInfo) {
return new AArch64MCPlusBuilder(Analysis, Info, RegInfo);
}