forked from OSchip/llvm-project
[BOLT] Better handling of address references
Summary: We used to handle PC-relative address references differently from direct address references. As a result, some cases, such as escaped function label address, were not handled when dealing with absolute (non-PIC) code. This diff moves processing of an address reference into BinaryContext::handleAddressRef() which is called for both PIC and non-PIC code. (cherry picked from FBD15643535)
This commit is contained in:
parent
d3c1821f5f
commit
fac6a89c23
|
@ -224,6 +224,68 @@ BinaryContext::getSubBinaryData(BinaryData *BD) {
|
|||
return make_range(Start, End);
|
||||
}
|
||||
|
||||
std::pair<MCSymbol *, uint64_t>
|
||||
BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF) {
|
||||
uint64_t Addend{0};
|
||||
|
||||
if (isAArch64()) {
|
||||
// Check if this is an access to a constant island and create bookkeeping
|
||||
// to keep track of it and emit it later as part of this function.
|
||||
if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) {
|
||||
return std::make_pair(IslandSym, Addend);
|
||||
} else {
|
||||
// Detect custom code written in assembly that refers to arbitrary
|
||||
// constant islands from other functions. Write this reference so we
|
||||
// can pull this constant island and emit it as part of this function
|
||||
// too.
|
||||
auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
|
||||
if (IslandIter != AddressToConstantIslandMap.end()) {
|
||||
if (auto *IslandSym =
|
||||
IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
|
||||
/// Make this function depend on IslandIter->second because we have
|
||||
/// a reference to its constant island. When emitting this function,
|
||||
/// we will also emit IslandIter->second's constants. This only
|
||||
/// happens in custom AArch64 assembly code.
|
||||
BF.IslandDependency.insert(IslandIter->second);
|
||||
BF.ProxyIslandSymbols[IslandSym] = IslandIter->second;
|
||||
return std::make_pair(IslandSym, Addend);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note that the address does not necessarily have to reside inside
|
||||
// a section, it could be an absolute address too.
|
||||
auto Section = getSectionForAddress(Address);
|
||||
if (Section && Section->isText()) {
|
||||
if (BF.containsAddress(Address, /*UseMaxSize=*/ isAArch64())) {
|
||||
if (Address != BF.getAddress()) {
|
||||
// The address could potentially escape. Mark it as another entry
|
||||
// point into the function.
|
||||
if (opts::Verbosity >= 1) {
|
||||
outs() << "BOLT-INFO: potentially escaped address 0x"
|
||||
<< Twine::utohexstr(Address) << " in function "
|
||||
<< BF << '\n';
|
||||
}
|
||||
return std::make_pair(
|
||||
BF.addEntryPointAtOffset(Address - BF.getAddress()),
|
||||
Addend);
|
||||
}
|
||||
} else {
|
||||
InterproceduralReferences.insert(std::make_pair(&BF, Address));
|
||||
}
|
||||
}
|
||||
|
||||
if (auto *BD = getBinaryDataContainingAddress(Address)) {
|
||||
return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
|
||||
}
|
||||
|
||||
// TODO: use DWARF info to get size/alignment here?
|
||||
auto *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
|
||||
DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName());
|
||||
return std::make_pair(TargetSymbol, Addend);
|
||||
}
|
||||
|
||||
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
|
||||
Twine Prefix,
|
||||
uint64_t Size,
|
||||
|
|
|
@ -481,6 +481,19 @@ public:
|
|||
BinaryDataMap.clear();
|
||||
}
|
||||
|
||||
/// Process \p Address reference from code in function \BF.
|
||||
/// Return <Symbol, Addend> pair corresponding to the \p Address.
|
||||
std::pair<MCSymbol *, uint64_t> handleAddressRef(uint64_t Address,
|
||||
BinaryFunction &BF);
|
||||
|
||||
/// Return a value of the global \p Symbol or an error if the value
|
||||
/// was not set.
|
||||
ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {
|
||||
const auto *BD = getBinaryDataByName(Symbol.getName());
|
||||
if (!BD)
|
||||
return std::make_error_code(std::errc::bad_address);
|
||||
return BD->getAddress();
|
||||
}
|
||||
|
||||
/// Return a global symbol registered at a given \p Address and \p Size.
|
||||
/// If no symbol exists, create one with unique name using \p Prefix.
|
||||
|
|
|
@ -371,9 +371,9 @@ bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
|
|||
}
|
||||
} else {
|
||||
// Absolute symbol.
|
||||
auto *CalleeSI = BC.getBinaryDataByName(CalleeSymbol->getName());
|
||||
assert(CalleeSI && "unregistered symbol found");
|
||||
return CalleeSI->getAddress() > getAddress();
|
||||
auto CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
|
||||
assert(CalleeAddressOrError && "unregistered symbol found");
|
||||
return *CalleeAddressOrError > getAddress();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -711,8 +711,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
|
|||
if (BC.isAArch64()) {
|
||||
const auto *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
|
||||
assert (Sym && "Symbol extraction failed");
|
||||
if (auto *BD = BC.getBinaryDataByName(Sym->getName())) {
|
||||
PCRelAddr = BD->getAddress();
|
||||
auto SymValueOrError = BC.getSymbolValue(*Sym);
|
||||
if (SymValueOrError) {
|
||||
PCRelAddr = *SymValueOrError;
|
||||
} else {
|
||||
for (auto &Elmt : Labels) {
|
||||
if (Elmt.second == Sym) {
|
||||
|
@ -746,9 +747,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
|
|||
const MCSymbol *TargetSym;
|
||||
uint64_t TargetOffset;
|
||||
std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
|
||||
auto *BD = BC.getBinaryDataByName(TargetSym->getName());
|
||||
assert(BD && "global symbol needs a value");
|
||||
ArrayStart = BD->getAddress() + TargetOffset;
|
||||
auto SymValueOrError = BC.getSymbolValue(*TargetSym);
|
||||
assert(SymValueOrError && "global symbol needs a value");
|
||||
ArrayStart = *SymValueOrError + TargetOffset;
|
||||
BaseRegNum = 0;
|
||||
if (BC.isAArch64()) {
|
||||
ArrayStart &= ~0xFFFULL;
|
||||
|
@ -960,78 +961,9 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
Labels[0] = Ctx->createTempSymbol("BB0", false);
|
||||
addEntryPointAtOffset(0);
|
||||
|
||||
auto getOrCreateSymbolForAddress = [&](const MCInst &Instruction,
|
||||
uint64_t TargetAddress,
|
||||
uint64_t &SymbolAddend) {
|
||||
if (BC.isAArch64()) {
|
||||
// Check if this is an access to a constant island and create bookkeeping
|
||||
// to keep track of it and emit it later as part of this function
|
||||
if (MCSymbol *IslandSym = getOrCreateIslandAccess(TargetAddress)) {
|
||||
return IslandSym;
|
||||
} else {
|
||||
// Detect custom code written in assembly that refers to arbitrary
|
||||
// constant islands from other functions. Write this reference so we
|
||||
// can pull this constant island and emit it as part of this function
|
||||
// too.
|
||||
auto IslandIter =
|
||||
BC.AddressToConstantIslandMap.lower_bound(TargetAddress);
|
||||
if (IslandIter != BC.AddressToConstantIslandMap.end()) {
|
||||
if (MCSymbol *IslandSym =
|
||||
IslandIter->second->getOrCreateProxyIslandAccess(
|
||||
TargetAddress, this)) {
|
||||
/// Make this function depend on IslandIter->second because we have
|
||||
/// a reference to its constant island. When emitting this function,
|
||||
/// we will also emit IslandIter->second's constants. This only
|
||||
/// happens in custom AArch64 assembly code.
|
||||
IslandDependency.insert(IslandIter->second);
|
||||
ProxyIslandSymbols[IslandSym] = IslandIter->second;
|
||||
return IslandSym;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note that the address does not necessarily have to reside inside
|
||||
// a section, it could be an absolute address too.
|
||||
auto Section = BC.getSectionForAddress(TargetAddress);
|
||||
if (Section && Section->isText()) {
|
||||
if (containsAddress(TargetAddress, /*UseMaxSize=*/ BC.isAArch64())) {
|
||||
if (TargetAddress != getAddress()) {
|
||||
// The address could potentially escape. Mark it as another entry
|
||||
// point into the function.
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: potentially escaped address 0x"
|
||||
<< Twine::utohexstr(TargetAddress) << " in function "
|
||||
<< *this << '\n');
|
||||
return addEntryPointAtOffset(TargetAddress - getAddress());
|
||||
}
|
||||
} else {
|
||||
BC.InterproceduralReferences.insert(
|
||||
std::make_pair(this, TargetAddress));
|
||||
}
|
||||
}
|
||||
|
||||
auto *BD = BC.getBinaryDataContainingAddress(TargetAddress);
|
||||
if (BD) {
|
||||
auto *TargetSymbol = BD->getSymbol();
|
||||
SymbolAddend = TargetAddress - BD->getAddress();
|
||||
return TargetSymbol;
|
||||
}
|
||||
// TODO: use DWARF info to get size/alignment here?
|
||||
auto *TargetSymbol =
|
||||
BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat");
|
||||
DEBUG(if (opts::Verbosity >= 2) {
|
||||
auto SectionName = BD ? BD->getSectionName() : "<unknown>";
|
||||
dbgs() << "Created DATAat sym: " << TargetSymbol->getName()
|
||||
<< " in section " << SectionName << "\n";
|
||||
});
|
||||
return TargetSymbol;
|
||||
};
|
||||
|
||||
auto handlePCRelOperand =
|
||||
[&](MCInst &Instruction, uint64_t Address, uint64_t Size) {
|
||||
uint64_t TargetAddress{0};
|
||||
uint64_t TargetOffset{0};
|
||||
MCSymbol *TargetSymbol{nullptr};
|
||||
if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
|
||||
Size)) {
|
||||
errs() << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
|
||||
|
@ -1048,9 +980,10 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
}
|
||||
}
|
||||
|
||||
TargetSymbol =
|
||||
getOrCreateSymbolForAddress(Instruction, TargetAddress, TargetOffset);
|
||||
|
||||
MCSymbol *TargetSymbol;
|
||||
uint64_t TargetOffset;
|
||||
std::tie(TargetSymbol, TargetOffset) = BC.handleAddressRef(TargetAddress,
|
||||
*this);
|
||||
const MCExpr *Expr = MCSymbolRefExpr::create(TargetSymbol,
|
||||
MCSymbolRefExpr::VK_None,
|
||||
*BC.Ctx);
|
||||
|
@ -1070,14 +1003,15 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
// info
|
||||
auto fixStubTarget = [&](MCInst &LoadLowBits, MCInst &LoadHiBits,
|
||||
uint64_t Target) {
|
||||
uint64_t Addend{0};
|
||||
int64_t Val;
|
||||
MCSymbol *TargetSymbol;
|
||||
TargetSymbol = getOrCreateSymbolForAddress(LoadLowBits, Target, Addend);
|
||||
MIB->replaceImmWithSymbol(LoadHiBits, TargetSymbol, Addend, Ctx.get(),
|
||||
Val, ELF::R_AARCH64_ADR_PREL_PG_HI21);
|
||||
MIB->replaceImmWithSymbol(LoadLowBits, TargetSymbol, Addend, Ctx.get(), Val,
|
||||
ELF::R_AARCH64_ADD_ABS_LO12_NC);
|
||||
uint64_t Addend{0};
|
||||
std::tie(TargetSymbol, Addend) = BC.handleAddressRef(Target, *this);
|
||||
|
||||
int64_t Val;
|
||||
MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(),
|
||||
Val, ELF::R_AARCH64_ADR_PREL_PG_HI21);
|
||||
MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
|
||||
Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
|
||||
};
|
||||
|
||||
uint64_t Size = 0; // instruction size
|
||||
|
@ -1167,14 +1101,20 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
<< " for instruction at offset 0x"
|
||||
<< Twine::utohexstr(Offset) << '\n');
|
||||
int64_t Value = Relocation.Value;
|
||||
const auto Result = BC.MIB->replaceImmWithSymbol(Instruction,
|
||||
Relocation.Symbol,
|
||||
Relocation.Addend,
|
||||
Ctx.get(),
|
||||
Value,
|
||||
Relocation.Type);
|
||||
|
||||
// Process reference to the primary symbol.
|
||||
if (!Relocation.isPCRelative())
|
||||
BC.handleAddressRef(Relocation.Value - Relocation.Addend, *this);
|
||||
|
||||
const auto Result = BC.MIB->replaceImmWithSymbolRef(Instruction,
|
||||
Relocation.Symbol,
|
||||
Relocation.Addend,
|
||||
Ctx.get(),
|
||||
Value,
|
||||
Relocation.Type);
|
||||
(void)Result;
|
||||
assert(Result && "cannot replace immediate with relocation");
|
||||
|
||||
// For aarch, if we replaced an immediate with a symbol from a
|
||||
// relocation, we mark it so we do not try to further process a
|
||||
// pc-relative operand. All we need is the symbol.
|
||||
|
@ -1188,7 +1128,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
Relocation::getSizeForType(Relocation.Type)) ==
|
||||
truncateToSize(Relocation.Value,
|
||||
Relocation::getSizeForType(Relocation.Type)) &&
|
||||
"immediate value mismatch in function");
|
||||
"immediate value mismatch in function");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1800,20 +1800,20 @@ public:
|
|||
/// separate symbols when emitting our constant island on behalf of this other
|
||||
/// function.
|
||||
MCSymbol *
|
||||
getOrCreateProxyIslandAccess(uint64_t Address, BinaryFunction *Referrer) {
|
||||
getOrCreateProxyIslandAccess(uint64_t Address, BinaryFunction &Referrer) {
|
||||
auto Symbol = getOrCreateIslandAccess(Address);
|
||||
if (!Symbol)
|
||||
return nullptr;
|
||||
|
||||
MCSymbol *Proxy;
|
||||
if (!IslandProxies[Referrer].count(Symbol)) {
|
||||
if (!IslandProxies[&Referrer].count(Symbol)) {
|
||||
Proxy =
|
||||
BC.Ctx->getOrCreateSymbol(Symbol->getName() +
|
||||
".proxy.for." + Referrer->getPrintName());
|
||||
IslandProxies[Referrer][Symbol] = Proxy;
|
||||
IslandProxies[Referrer][Proxy] = Symbol;
|
||||
".proxy.for." + Referrer.getPrintName());
|
||||
IslandProxies[&Referrer][Symbol] = Proxy;
|
||||
IslandProxies[&Referrer][Proxy] = Symbol;
|
||||
}
|
||||
Proxy = IslandProxies[Referrer][Symbol];
|
||||
Proxy = IslandProxies[&Referrer][Symbol];
|
||||
return Proxy;
|
||||
}
|
||||
|
||||
|
|
|
@ -320,16 +320,15 @@ void BinaryFunction::postProcessProfile() {
|
|||
return;
|
||||
}
|
||||
|
||||
// Check if MCF post-processing was requested.
|
||||
if (opts::DoMCF != MCF_DISABLE) {
|
||||
removeTagsFromProfile();
|
||||
solveMCF(*this, opts::DoMCF);
|
||||
if (!(getProfileFlags() & PF_LBR)) {
|
||||
// Check if MCF post-processing was requested.
|
||||
if (opts::DoMCF != MCF_DISABLE) {
|
||||
removeTagsFromProfile();
|
||||
solveMCF(*this, opts::DoMCF);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(getProfileFlags() & PF_LBR))
|
||||
return;
|
||||
|
||||
// Pre-sort branch data.
|
||||
if (BranchData)
|
||||
std::stable_sort(BranchData->Data.begin(), BranchData->Data.end());
|
||||
|
@ -389,6 +388,12 @@ void BinaryFunction::postProcessProfile() {
|
|||
if (opts::InferFallThroughs)
|
||||
inferFallThroughCounts();
|
||||
|
||||
// Check if MCF post-processing was requested.
|
||||
if (opts::DoMCF != MCF_DISABLE) {
|
||||
removeTagsFromProfile();
|
||||
solveMCF(*this, opts::DoMCF);
|
||||
}
|
||||
|
||||
// Update profile information for jump tables based on CFG branch data.
|
||||
for (auto *BB : BasicBlocks) {
|
||||
const auto *LastInstr = BB->getLastNonPseudoInstr();
|
||||
|
|
|
@ -896,9 +896,9 @@ public:
|
|||
/// of the passed \p Symbol plus \p Addend. If the instruction does not have
|
||||
/// an immediate operand or has more than one - then return false. Otherwise
|
||||
/// return true.
|
||||
virtual bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol,
|
||||
int64_t Addend, MCContext *Ctx,
|
||||
int64_t &Value, uint64_t RelType) const {
|
||||
virtual bool replaceImmWithSymbolRef(MCInst &Inst, MCSymbol *Symbol,
|
||||
int64_t Addend, MCContext *Ctx,
|
||||
int64_t &Value, uint64_t RelType) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -449,9 +449,9 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(
|
|||
|
||||
uint64_t ArrayStart;
|
||||
if (DispExpr) {
|
||||
auto *BD = BC.getBinaryDataByName(DispExpr->getSymbol().getName());
|
||||
assert(BD && "global symbol needs a value");
|
||||
ArrayStart = BD->getAddress();
|
||||
auto DispValueOrError = BC.getSymbolValue(DispExpr->getSymbol());
|
||||
assert(DispValueOrError && "global symbol needs a value");
|
||||
ArrayStart = *DispValueOrError;
|
||||
} else {
|
||||
ArrayStart = static_cast<uint64_t>(DispValue);
|
||||
}
|
||||
|
|
|
@ -427,9 +427,9 @@ uint64_t LongJmpPass::getSymbolAddress(const BinaryContext &BC,
|
|||
if (Iter == HotAddresses.end()) {
|
||||
// Look at BinaryContext's resolution for this symbol - this is a symbol not
|
||||
// mapped to a BinaryFunction
|
||||
auto *BD = BC.getBinaryDataByName(Target->getName());
|
||||
assert(BD && "Unrecognized symbol");
|
||||
return BD ? BD->getAddress() : 0;
|
||||
auto ValueOrError = BC.getSymbolValue(*Target);
|
||||
assert(ValueOrError && "Unrecognized symbol");
|
||||
return *ValueOrError;
|
||||
}
|
||||
return Iter->second;
|
||||
}
|
||||
|
|
|
@ -968,9 +968,9 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
|
||||
MCContext *Ctx, int64_t &Value,
|
||||
uint64_t RelType) const override {
|
||||
bool replaceImmWithSymbolRef(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
|
||||
MCContext *Ctx, int64_t &Value,
|
||||
uint64_t RelType) const override {
|
||||
unsigned ImmOpNo = -1U;
|
||||
for (unsigned Index = 0; Index < MCPlus::getNumPrimeOperands(Inst);
|
||||
++Index) {
|
||||
|
|
|
@ -2794,9 +2794,9 @@ public:
|
|||
return Code;
|
||||
}
|
||||
|
||||
bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
|
||||
MCContext *Ctx, int64_t &Value,
|
||||
uint64_t RelType) const override {
|
||||
bool replaceImmWithSymbolRef(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
|
||||
MCContext *Ctx, int64_t &Value,
|
||||
uint64_t RelType) const override {
|
||||
unsigned ImmOpNo = -1U;
|
||||
|
||||
for (unsigned Index = 0;
|
||||
|
|
Loading…
Reference in New Issue