[BOLT] Support PC-relative relocations with addends

PC-relative memory operand could reference a different object from
the one located at the target address, e.g. when a negative offset
is used. Check relocations for the real referenced object.

Reviewed By: rafauler

Differential Revision: https://reviews.llvm.org/D120379
This commit is contained in:
Maksim Panchenko 2022-02-23 22:54:42 -08:00
parent 8b62f19f27
commit 4101aa130a
6 changed files with 132 additions and 60 deletions

View File

@ -1299,10 +1299,11 @@ public:
case ELF::R_X86_64_32:
case ELF::R_X86_64_32S:
case ELF::R_X86_64_64:
case ELF::R_X86_64_PC8:
case ELF::R_X86_64_PC32:
case ELF::R_X86_64_PC64:
Relocations[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
return;
case ELF::R_X86_64_PC32:
case ELF::R_X86_64_PC8:
case ELF::R_X86_64_PLT32:
case ELF::R_X86_64_GOTPCRELX:
case ELF::R_X86_64_REX_GOTPCRELX:

View File

@ -49,7 +49,7 @@ struct Relocation {
/// Used to validate relocation correctness.
uint64_t Value;
/// Return size of the given relocation \p Type.
/// Return size in bytes of the given relocation \p Type.
static size_t getSizeForType(uint64_t Type);
/// Return size of this relocation.

View File

@ -1265,51 +1265,6 @@ bool BinaryFunction::disassemble() {
}
}
// Check if there's a relocation associated with this instruction.
bool UsedReloc = false;
for (auto Itr = Relocations.lower_bound(Offset),
ItrE = Relocations.lower_bound(Offset + Size);
Itr != ItrE; ++Itr) {
const Relocation &Relocation = Itr->second;
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: replacing immediate 0x"
<< Twine::utohexstr(Relocation.Value)
<< " with relocation"
" against "
<< Relocation.Symbol << "+" << Relocation.Addend
<< " in function " << *this
<< " for instruction at offset 0x"
<< Twine::utohexstr(Offset) << '\n');
// Process reference to the primary symbol.
if (!Relocation.isPCRelative())
BC.handleAddressRef(Relocation.Value - Relocation.Addend, *this,
/*IsPCRel*/ false);
int64_t Value = Relocation.Value;
const bool Result = BC.MIB->replaceImmWithSymbolRef(
Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(), Value,
Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");
// For aarch, if we replaced an immediate with a symbol from a
// relocation, we mark it so we do not try to further process a
// pc-relative operand. All we need is the symbol.
if (BC.isAArch64())
UsedReloc = true;
// Make sure we replaced the correct immediate (instruction
// can have multiple immediate operands).
if (BC.isX86()) {
assert(truncateToSize(static_cast<uint64_t>(Value),
Relocation::getSizeForType(Relocation.Type)) ==
truncateToSize(Relocation.Value, Relocation::getSizeForType(
Relocation.Type)) &&
"immediate value mismatch in function");
}
}
if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
uint64_t TargetAddress = 0;
if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
@ -1394,7 +1349,74 @@ bool BinaryFunction::disassemble() {
if (BC.isAArch64())
handleAArch64IndirectCall(Instruction, Offset);
}
} else if (MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
} else {
// Check if there's a relocation associated with this instruction.
bool UsedReloc = false;
for (auto Itr = Relocations.lower_bound(Offset),
ItrE = Relocations.lower_bound(Offset + Size);
Itr != ItrE; ++Itr) {
const Relocation &Relocation = Itr->second;
uint64_t SymbolValue = Relocation.Value - Relocation.Addend;
if (Relocation.isPCRelative())
SymbolValue += getAddress() + Relocation.Offset;
// Process reference to the symbol.
if (BC.isX86())
BC.handleAddressRef(SymbolValue, *this, Relocation.isPCRelative());
if (BC.isAArch64() || !Relocation.isPCRelative()) {
int64_t Value = Relocation.Value;
const bool Result = BC.MIB->replaceImmWithSymbolRef(
Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(),
Value, Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");
if (BC.isX86()) {
// Make sure we replaced the correct immediate (instruction
// can have multiple immediate operands).
assert(
truncateToSize(static_cast<uint64_t>(Value),
Relocation::getSizeForType(Relocation.Type)) ==
truncateToSize(Relocation.Value, Relocation::getSizeForType(
Relocation.Type)) &&
"immediate value mismatch in function");
} else if (BC.isAArch64()) {
// For aarch, if we replaced an immediate with a symbol from a
// relocation, we mark it so we do not try to further process a
// pc-relative operand. All we need is the symbol.
UsedReloc = true;
}
} else {
// Check if the relocation matches memop's Disp.
uint64_t TargetAddress;
if (!BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
AbsoluteInstrAddr, Size)) {
errs() << "BOLT-ERROR: PC-relative operand can't be evaluated\n";
exit(1);
}
assert(TargetAddress == Relocation.Value + AbsoluteInstrAddr + Size &&
"Immediate value mismatch detected.");
const MCExpr *Expr = MCSymbolRefExpr::create(
Relocation.Symbol, MCSymbolRefExpr::VK_None, *BC.Ctx);
// Real addend for pc-relative targets is adjusted with a delta
// from relocation placement to the next instruction.
const uint64_t TargetAddend =
Relocation.Addend + Offset + Size - Relocation.Offset;
if (TargetAddend) {
const MCConstantExpr *Offset =
MCConstantExpr::create(TargetAddend, *BC.Ctx);
Expr = MCBinaryExpr::createAdd(Expr, Offset, *BC.Ctx);
}
BC.MIB->replaceMemOperandDisp(
Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
Instruction, Expr, *BC.Ctx, 0)));
UsedReloc = true;
}
}
if (MIB->hasPCRelOperand(Instruction) && !UsedReloc)
handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size);
}
@ -1565,6 +1587,8 @@ bool BinaryFunction::scanExternalRefs() {
ItrE = Relocations.lower_bound(Offset + Size);
Itr != ItrE; ++Itr) {
Relocation &Relocation = Itr->second;
if (Relocation.isPCRelative() && BC.isX86())
continue;
if (ignoreReference(Relocation.Symbol))
continue;

View File

@ -254,7 +254,9 @@ uint64_t adjustValueAArch64(uint64_t Type, uint64_t Value, uint64_t PC) {
uint64_t extractValueX86(uint64_t Type, uint64_t Contents, uint64_t PC) {
if (Type == ELF::R_X86_64_32S)
return SignExtend64<32>(Contents & 0xffffffff);
return SignExtend64<32>(Contents);
if (Relocation::isPCRelative(Type))
return SignExtend64(Contents, 8 * Relocation::getSizeForType(Type));
return Contents;
}
@ -442,6 +444,8 @@ bool isPCRelativeX86(uint64_t Type) {
case ELF::R_X86_64_PC64:
case ELF::R_X86_64_GOTPCREL:
case ELF::R_X86_64_PLT32:
case ELF::R_X86_64_GOTOFF64:
case ELF::R_X86_64_GOTPC32:
case ELF::R_X86_64_GOTTPOFF:
case ELF::R_X86_64_GOTPCRELX:
case ELF::R_X86_64_REX_GOTPCRELX:

View File

@ -2302,6 +2302,12 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
}
}
MCSymbol *ReferencedSymbol = nullptr;
if (!IsSectionRelocation) {
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
ReferencedSymbol = BD->getSymbol();
}
// PC-relative relocations from data to code are tricky since the original
// information is typically lost after linking even with '--emit-relocs'.
// They are normally used by PIC-style jump tables and reference both
@ -2310,12 +2316,15 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
// that it references an arbitrary location in the code, possibly even
// in a different function from that containing the jump table.
if (!IsAArch64 && Relocation::isPCRelative(RType)) {
// Just register the fact that we have PC-relative relocation at a given
// address. The actual referenced label/address cannot be determined
// from linker data alone.
// For relocations against non-code sections, just register the fact that
// we have a PC-relative relocation at a given address. The actual
// referenced label/address cannot be determined from linker data alone.
if (!IsFromCode)
BC->addPCRelativeDataRelocation(Rel.getOffset());
else if (!IsSectionRelocation && ReferencedSymbol)
ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
Addend, ExtractedValue);
else
LLVM_DEBUG(
dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x"
<< Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName
@ -2399,7 +2408,6 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
}
}
MCSymbol *ReferencedSymbol = nullptr;
if (ForceRelocation) {
std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName;
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);

View File

@ -0,0 +1,35 @@
## Check that BOLT correctly recognizes pc-relative function pointer
## reference with an addend.
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: ld.lld %t.o -o %t.exe -q
# RUN: llvm-bolt %t.exe -relocs -o /dev/null -print-only=foo -print-disasm \
# RUN: | FileCheck %s
.text
.globl _start
.type _start,@function
_start:
.cfi_startproc
call foo
retq
.size main, .-main
.cfi_endproc
.globl foo
.type foo,@function
foo:
.cfi_startproc
leaq foo-1(%rip), %rax
## Check that the instruction references foo with a negative addend,
## not the previous function with a positive addend (_start+X).
#
# CHECK: leaq foo-1(%rip), %rax
retq
.size foo, .-foo
.cfi_endproc