[BOLT] Better handling of address references

Summary:
We used to handle PC-relative address references differently from direct
address references. As a result, some cases, such as escaped function
label address, were not handled when dealing with absolute (non-PIC)
code. This diff moves processing of an address reference into
BinaryContext::handleAddressRef() which is called for both PIC and
non-PIC code.

(cherry picked from FBD15643535)
This commit is contained in:
Maksim Panchenko 2019-06-04 15:30:22 -07:00
parent d3c1821f5f
commit fac6a89c23
10 changed files with 142 additions and 122 deletions

View File

@ -224,6 +224,68 @@ BinaryContext::getSubBinaryData(BinaryData *BD) {
return make_range(Start, End);
}
std::pair<MCSymbol *, uint64_t>
BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF) {
uint64_t Addend{0};
if (isAArch64()) {
// Check if this is an access to a constant island and create bookkeeping
// to keep track of it and emit it later as part of this function.
if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) {
return std::make_pair(IslandSym, Addend);
} else {
// Detect custom code written in assembly that refers to arbitrary
// constant islands from other functions. Write this reference so we
// can pull this constant island and emit it as part of this function
// too.
auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
if (IslandIter != AddressToConstantIslandMap.end()) {
if (auto *IslandSym =
IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
/// Make this function depend on IslandIter->second because we have
/// a reference to its constant island. When emitting this function,
/// we will also emit IslandIter->second's constants. This only
/// happens in custom AArch64 assembly code.
BF.IslandDependency.insert(IslandIter->second);
BF.ProxyIslandSymbols[IslandSym] = IslandIter->second;
return std::make_pair(IslandSym, Addend);
}
}
}
}
// Note that the address does not necessarily have to reside inside
// a section, it could be an absolute address too.
auto Section = getSectionForAddress(Address);
if (Section && Section->isText()) {
if (BF.containsAddress(Address, /*UseMaxSize=*/ isAArch64())) {
if (Address != BF.getAddress()) {
// The address could potentially escape. Mark it as another entry
// point into the function.
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: potentially escaped address 0x"
<< Twine::utohexstr(Address) << " in function "
<< BF << '\n';
}
return std::make_pair(
BF.addEntryPointAtOffset(Address - BF.getAddress()),
Addend);
}
} else {
InterproceduralReferences.insert(std::make_pair(&BF, Address));
}
}
if (auto *BD = getBinaryDataContainingAddress(Address)) {
return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
}
// TODO: use DWARF info to get size/alignment here?
auto *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName());
return std::make_pair(TargetSymbol, Addend);
}
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
Twine Prefix,
uint64_t Size,

View File

@ -481,6 +481,19 @@ public:
BinaryDataMap.clear();
}
/// Process \p Address reference from code in function \BF.
/// Return <Symbol, Addend> pair corresponding to the \p Address.
std::pair<MCSymbol *, uint64_t> handleAddressRef(uint64_t Address,
BinaryFunction &BF);
/// Return a value of the global \p Symbol or an error if the value
/// was not set.
ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {
const auto *BD = getBinaryDataByName(Symbol.getName());
if (!BD)
return std::make_error_code(std::errc::bad_address);
return BD->getAddress();
}
/// Return a global symbol registered at a given \p Address and \p Size.
/// If no symbol exists, create one with unique name using \p Prefix.

View File

@ -371,9 +371,9 @@ bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
}
} else {
// Absolute symbol.
auto *CalleeSI = BC.getBinaryDataByName(CalleeSymbol->getName());
assert(CalleeSI && "unregistered symbol found");
return CalleeSI->getAddress() > getAddress();
auto CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
assert(CalleeAddressOrError && "unregistered symbol found");
return *CalleeAddressOrError > getAddress();
}
}
@ -711,8 +711,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
if (BC.isAArch64()) {
const auto *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
assert (Sym && "Symbol extraction failed");
if (auto *BD = BC.getBinaryDataByName(Sym->getName())) {
PCRelAddr = BD->getAddress();
auto SymValueOrError = BC.getSymbolValue(*Sym);
if (SymValueOrError) {
PCRelAddr = *SymValueOrError;
} else {
for (auto &Elmt : Labels) {
if (Elmt.second == Sym) {
@ -746,9 +747,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
const MCSymbol *TargetSym;
uint64_t TargetOffset;
std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
auto *BD = BC.getBinaryDataByName(TargetSym->getName());
assert(BD && "global symbol needs a value");
ArrayStart = BD->getAddress() + TargetOffset;
auto SymValueOrError = BC.getSymbolValue(*TargetSym);
assert(SymValueOrError && "global symbol needs a value");
ArrayStart = *SymValueOrError + TargetOffset;
BaseRegNum = 0;
if (BC.isAArch64()) {
ArrayStart &= ~0xFFFULL;
@ -960,78 +961,9 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
Labels[0] = Ctx->createTempSymbol("BB0", false);
addEntryPointAtOffset(0);
auto getOrCreateSymbolForAddress = [&](const MCInst &Instruction,
uint64_t TargetAddress,
uint64_t &SymbolAddend) {
if (BC.isAArch64()) {
// Check if this is an access to a constant island and create bookkeeping
// to keep track of it and emit it later as part of this function
if (MCSymbol *IslandSym = getOrCreateIslandAccess(TargetAddress)) {
return IslandSym;
} else {
// Detect custom code written in assembly that refers to arbitrary
// constant islands from other functions. Write this reference so we
// can pull this constant island and emit it as part of this function
// too.
auto IslandIter =
BC.AddressToConstantIslandMap.lower_bound(TargetAddress);
if (IslandIter != BC.AddressToConstantIslandMap.end()) {
if (MCSymbol *IslandSym =
IslandIter->second->getOrCreateProxyIslandAccess(
TargetAddress, this)) {
/// Make this function depend on IslandIter->second because we have
/// a reference to its constant island. When emitting this function,
/// we will also emit IslandIter->second's constants. This only
/// happens in custom AArch64 assembly code.
IslandDependency.insert(IslandIter->second);
ProxyIslandSymbols[IslandSym] = IslandIter->second;
return IslandSym;
}
}
}
}
// Note that the address does not necessarily have to reside inside
// a section, it could be an absolute address too.
auto Section = BC.getSectionForAddress(TargetAddress);
if (Section && Section->isText()) {
if (containsAddress(TargetAddress, /*UseMaxSize=*/ BC.isAArch64())) {
if (TargetAddress != getAddress()) {
// The address could potentially escape. Mark it as another entry
// point into the function.
DEBUG(dbgs() << "BOLT-DEBUG: potentially escaped address 0x"
<< Twine::utohexstr(TargetAddress) << " in function "
<< *this << '\n');
return addEntryPointAtOffset(TargetAddress - getAddress());
}
} else {
BC.InterproceduralReferences.insert(
std::make_pair(this, TargetAddress));
}
}
auto *BD = BC.getBinaryDataContainingAddress(TargetAddress);
if (BD) {
auto *TargetSymbol = BD->getSymbol();
SymbolAddend = TargetAddress - BD->getAddress();
return TargetSymbol;
}
// TODO: use DWARF info to get size/alignment here?
auto *TargetSymbol =
BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat");
DEBUG(if (opts::Verbosity >= 2) {
auto SectionName = BD ? BD->getSectionName() : "<unknown>";
dbgs() << "Created DATAat sym: " << TargetSymbol->getName()
<< " in section " << SectionName << "\n";
});
return TargetSymbol;
};
auto handlePCRelOperand =
[&](MCInst &Instruction, uint64_t Address, uint64_t Size) {
uint64_t TargetAddress{0};
uint64_t TargetOffset{0};
MCSymbol *TargetSymbol{nullptr};
if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
Size)) {
errs() << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
@ -1048,9 +980,10 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
}
}
TargetSymbol =
getOrCreateSymbolForAddress(Instruction, TargetAddress, TargetOffset);
MCSymbol *TargetSymbol;
uint64_t TargetOffset;
std::tie(TargetSymbol, TargetOffset) = BC.handleAddressRef(TargetAddress,
*this);
const MCExpr *Expr = MCSymbolRefExpr::create(TargetSymbol,
MCSymbolRefExpr::VK_None,
*BC.Ctx);
@ -1070,14 +1003,15 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
// info
auto fixStubTarget = [&](MCInst &LoadLowBits, MCInst &LoadHiBits,
uint64_t Target) {
uint64_t Addend{0};
int64_t Val;
MCSymbol *TargetSymbol;
TargetSymbol = getOrCreateSymbolForAddress(LoadLowBits, Target, Addend);
MIB->replaceImmWithSymbol(LoadHiBits, TargetSymbol, Addend, Ctx.get(),
Val, ELF::R_AARCH64_ADR_PREL_PG_HI21);
MIB->replaceImmWithSymbol(LoadLowBits, TargetSymbol, Addend, Ctx.get(), Val,
ELF::R_AARCH64_ADD_ABS_LO12_NC);
uint64_t Addend{0};
std::tie(TargetSymbol, Addend) = BC.handleAddressRef(Target, *this);
int64_t Val;
MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(),
Val, ELF::R_AARCH64_ADR_PREL_PG_HI21);
MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
};
uint64_t Size = 0; // instruction size
@ -1167,14 +1101,20 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
<< " for instruction at offset 0x"
<< Twine::utohexstr(Offset) << '\n');
int64_t Value = Relocation.Value;
const auto Result = BC.MIB->replaceImmWithSymbol(Instruction,
Relocation.Symbol,
Relocation.Addend,
Ctx.get(),
Value,
Relocation.Type);
// Process reference to the primary symbol.
if (!Relocation.isPCRelative())
BC.handleAddressRef(Relocation.Value - Relocation.Addend, *this);
const auto Result = BC.MIB->replaceImmWithSymbolRef(Instruction,
Relocation.Symbol,
Relocation.Addend,
Ctx.get(),
Value,
Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");
// For aarch, if we replaced an immediate with a symbol from a
// relocation, we mark it so we do not try to further process a
// pc-relative operand. All we need is the symbol.
@ -1188,7 +1128,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
Relocation::getSizeForType(Relocation.Type)) ==
truncateToSize(Relocation.Value,
Relocation::getSizeForType(Relocation.Type)) &&
"immediate value mismatch in function");
"immediate value mismatch in function");
}
}

View File

@ -1800,20 +1800,20 @@ public:
/// separate symbols when emitting our constant island on behalf of this other
/// function.
MCSymbol *
getOrCreateProxyIslandAccess(uint64_t Address, BinaryFunction *Referrer) {
getOrCreateProxyIslandAccess(uint64_t Address, BinaryFunction &Referrer) {
auto Symbol = getOrCreateIslandAccess(Address);
if (!Symbol)
return nullptr;
MCSymbol *Proxy;
if (!IslandProxies[Referrer].count(Symbol)) {
if (!IslandProxies[&Referrer].count(Symbol)) {
Proxy =
BC.Ctx->getOrCreateSymbol(Symbol->getName() +
".proxy.for." + Referrer->getPrintName());
IslandProxies[Referrer][Symbol] = Proxy;
IslandProxies[Referrer][Proxy] = Symbol;
".proxy.for." + Referrer.getPrintName());
IslandProxies[&Referrer][Symbol] = Proxy;
IslandProxies[&Referrer][Proxy] = Symbol;
}
Proxy = IslandProxies[Referrer][Symbol];
Proxy = IslandProxies[&Referrer][Symbol];
return Proxy;
}

View File

@ -320,16 +320,15 @@ void BinaryFunction::postProcessProfile() {
return;
}
// Check if MCF post-processing was requested.
if (opts::DoMCF != MCF_DISABLE) {
removeTagsFromProfile();
solveMCF(*this, opts::DoMCF);
if (!(getProfileFlags() & PF_LBR)) {
// Check if MCF post-processing was requested.
if (opts::DoMCF != MCF_DISABLE) {
removeTagsFromProfile();
solveMCF(*this, opts::DoMCF);
}
return;
}
if (!(getProfileFlags() & PF_LBR))
return;
// Pre-sort branch data.
if (BranchData)
std::stable_sort(BranchData->Data.begin(), BranchData->Data.end());
@ -389,6 +388,12 @@ void BinaryFunction::postProcessProfile() {
if (opts::InferFallThroughs)
inferFallThroughCounts();
// Check if MCF post-processing was requested.
if (opts::DoMCF != MCF_DISABLE) {
removeTagsFromProfile();
solveMCF(*this, opts::DoMCF);
}
// Update profile information for jump tables based on CFG branch data.
for (auto *BB : BasicBlocks) {
const auto *LastInstr = BB->getLastNonPseudoInstr();

View File

@ -896,9 +896,9 @@ public:
/// of the passed \p Symbol plus \p Addend. If the instruction does not have
/// an immediate operand or has more than one - then return false. Otherwise
/// return true.
virtual bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol,
int64_t Addend, MCContext *Ctx,
int64_t &Value, uint64_t RelType) const {
virtual bool replaceImmWithSymbolRef(MCInst &Inst, MCSymbol *Symbol,
int64_t Addend, MCContext *Ctx,
int64_t &Value, uint64_t RelType) const {
llvm_unreachable("not implemented");
return false;
}

View File

@ -449,9 +449,9 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(
uint64_t ArrayStart;
if (DispExpr) {
auto *BD = BC.getBinaryDataByName(DispExpr->getSymbol().getName());
assert(BD && "global symbol needs a value");
ArrayStart = BD->getAddress();
auto DispValueOrError = BC.getSymbolValue(DispExpr->getSymbol());
assert(DispValueOrError && "global symbol needs a value");
ArrayStart = *DispValueOrError;
} else {
ArrayStart = static_cast<uint64_t>(DispValue);
}

View File

@ -427,9 +427,9 @@ uint64_t LongJmpPass::getSymbolAddress(const BinaryContext &BC,
if (Iter == HotAddresses.end()) {
// Look at BinaryContext's resolution for this symbol - this is a symbol not
// mapped to a BinaryFunction
auto *BD = BC.getBinaryDataByName(Target->getName());
assert(BD && "Unrecognized symbol");
return BD ? BD->getAddress() : 0;
auto ValueOrError = BC.getSymbolValue(*Target);
assert(ValueOrError && "Unrecognized symbol");
return *ValueOrError;
}
return Iter->second;
}

View File

@ -968,9 +968,9 @@ public:
return true;
}
bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
MCContext *Ctx, int64_t &Value,
uint64_t RelType) const override {
bool replaceImmWithSymbolRef(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
MCContext *Ctx, int64_t &Value,
uint64_t RelType) const override {
unsigned ImmOpNo = -1U;
for (unsigned Index = 0; Index < MCPlus::getNumPrimeOperands(Inst);
++Index) {

View File

@ -2794,9 +2794,9 @@ public:
return Code;
}
bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
MCContext *Ctx, int64_t &Value,
uint64_t RelType) const override {
bool replaceImmWithSymbolRef(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
MCContext *Ctx, int64_t &Value,
uint64_t RelType) const override {
unsigned ImmOpNo = -1U;
for (unsigned Index = 0;