[ORC] Add a utility for adding missing "self" relocations to a Symbol

If a tool wants to introduce new indirections via stubs at link-time in
ORC, it can cause fidelity issues around the address of the function if
some references to the function do not have relocations. This is known
to happen inside the body of the function itself on x86_64 for example,
where a PC-relative address is formed, but without a relocation.

```
_foo:
  leaq -7(%rip), %rax ## form pointer to '_foo' without relocation

_bar:
  leaq (%rip), %rax ##  uses X86_64_RELOC_SIGNED to '_foo'
```

The consequence of introducing a stub for such a function at link time
is that if it forms a pointer to itself without relocation, it will not
have the same value as a pointer from outside the function. If the
function pointer is used as a key, this can cause problems.

This utility provides best-effort support for adding such missing
relocations using MCDisassembler and MCInstrAnalysis to identify the
problematic instructions. Currently it is only implemented for x86_64.

Note: the related issue with call/jump instructions is not handled
here, only forming function pointers.

rdar://83514317

Differential revision: https://reviews.llvm.org/D113038
This commit is contained in:
Ben Langmuir 2021-11-02 10:49:08 -07:00
parent 41481b7db5
commit a2639dcbe6
10 changed files with 297 additions and 11 deletions

View File

@ -45,6 +45,13 @@ class PointerType;
class Triple;
class Twine;
class Value;
class MCDisassembler;
class MCInstrAnalysis;
namespace jitlink {
class LinkGraph;
class Symbol;
} // namespace jitlink
namespace orc {
@ -557,6 +564,33 @@ GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
ValueToValueMapTy &VMap);
/// Introduce relocations to \p Sym in its own definition if there are any
/// pointers formed via PC-relative address that do not already have a
/// relocation.
///
/// This is useful when introducing indirection via a stub function at link time
/// without compiler support. If a function pointer is formed without a
/// relocation, e.g. in the definition of \c foo
///
/// \code
/// _foo:
/// leaq -7(%rip), rax # form pointer to _foo without relocation
/// _bar:
/// leaq (%rip), %rax # uses X86_64_RELOC_SIGNED to '_foo'
/// \endcode
///
/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we
/// introduce a stub for _foo. If the pointer is used as a key, this may be
/// observable to the program. This pass will attempt to introduce the missing
/// "self-relocation" on the leaq instruction.
///
/// This is based on disassembly and should be considered "best effort". It may
/// silently fail to add relocations.
Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
jitlink::LinkGraph &G,
MCDisassembler &Disassembler,
MCInstrAnalysis &MIA);
} // end namespace orc
} // end namespace llvm

View File

@ -158,6 +158,11 @@ public:
evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI,
uint64_t Addr, uint64_t Size) const;
/// Given an instruction with a memory operand that could require relocation,
/// returns the offset within the instruction of that relocation.
virtual Optional<uint64_t>
getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const;
/// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
virtual std::vector<std::pair<uint64_t, uint64_t>>
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,

View File

@ -51,6 +51,7 @@ add_llvm_component_library(LLVMOrcJIT
OrcShared
OrcTargetProcess
MC
MCDisassembler
Passes
RuntimeDyld
Support

View File

@ -9,12 +9,17 @@
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/Support/Format.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <sstream>
#define DEBUG_TYPE "orc"
using namespace llvm;
using namespace llvm::orc;
@ -372,5 +377,77 @@ void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
Dst.addModuleFlag(MapMetadata(MF, VMap));
}
Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
jitlink::LinkGraph &G,
MCDisassembler &Disassembler,
MCInstrAnalysis &MIA) {
// AArch64 appears to already come with the necessary relocations. Among other
// architectures, only x86_64 is currently implemented here.
if (G.getTargetTriple().getArch() != Triple::x86_64)
return Error::success();
raw_null_ostream CommentStream;
auto &STI = Disassembler.getSubtargetInfo();
// Determine the function bounds
auto &B = Sym.getBlock();
assert(!B.isZeroFill() && "expected content block");
auto SymAddress = Sym.getAddress();
auto SymStartInBlock =
(const uint8_t *)B.getContent().data() + Sym.getOffset();
auto SymSize = Sym.getSize() ? Sym.getSize() : B.getSize() - Sym.getOffset();
auto Content = makeArrayRef(SymStartInBlock, SymSize);
LLVM_DEBUG(dbgs() << "Adding self-relocations to " << Sym.getName() << "\n");
SmallDenseSet<uintptr_t, 8> ExistingRelocations;
for (auto &E : B.edges()) {
if (E.isRelocation())
ExistingRelocations.insert(E.getOffset());
}
size_t I = 0;
while (I < Content.size()) {
MCInst Instr;
uint64_t InstrSize = 0;
uint64_t InstrStart = SymAddress + I;
auto DecodeStatus = Disassembler.getInstruction(
Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream);
if (DecodeStatus != MCDisassembler::Success) {
LLVM_DEBUG(dbgs() << "Aborting due to disassembly failure at address "
<< InstrStart);
return make_error<StringError>(
formatv("failed to disassemble at address {0:x16}", InstrStart),
inconvertibleErrorCode());
}
// Advance to the next instruction.
I += InstrSize;
// Check for a PC-relative address equal to the symbol itself.
auto PCRelAddr =
MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize);
if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress)
continue;
auto RelocOffInInstr =
MIA.getMemoryOperandRelocationOffset(Instr, InstrSize);
if (!RelocOffInInstr.hasValue() ||
InstrSize - RelocOffInInstr.getValue() != 4) {
LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at "
<< InstrStart);
continue;
}
auto RelocOffInBlock =
InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset();
if (ExistingRelocations.contains(RelocOffInBlock))
continue;
LLVM_DEBUG(dbgs() << "Adding delta32 self-relocation at " << InstrStart);
B.addEdge(jitlink::x86_64::Delta32, RelocOffInBlock, Sym, /*Addend=*/-4);
}
return Error::success();
}
} // End namespace orc.
} // End namespace llvm.

View File

@ -34,3 +34,9 @@ Optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
uint64_t Size) const {
return None;
}
Optional<uint64_t>
MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
uint64_t Size) const {
return None;
}

View File

@ -408,6 +408,9 @@ public:
const MCSubtargetInfo *STI,
uint64_t Addr,
uint64_t Size) const override;
Optional<uint64_t>
getMemoryOperandRelocationOffset(const MCInst &Inst,
uint64_t Size) const override;
};
#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
@ -557,6 +560,30 @@ Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
return None;
}
Optional<uint64_t>
X86MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
uint64_t Size) const {
if (Inst.getOpcode() != X86::LEA64r)
return None;
const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags);
if (MemOpStart == -1)
return None;
MemOpStart += X86II::getOperandBias(MCID);
const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg);
const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg);
const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg);
const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt);
const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp);
// Must be a simple rip-relative address.
if (BaseReg.getReg() != X86::RIP || SegReg.getReg() != 0 ||
IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm())
return None;
// rip-relative ModR/M immediate is 32 bits.
assert(Size > 4 && "invalid instruction size for rip-relative lea");
return Size - 4;
}
} // end of namespace X86_MC
} // end of namespace llvm

View File

@ -0,0 +1,35 @@
.section __TEXT,__text,regular,pure_instructions
.globl _form_func_ptr
_form_func_ptr:
leaq _form_func_ptr(%rip), %rax
leaq _other(%rip), %rax
leaq _form_func_ptr(%rip), %rax
nop
leaq _form_func_ptr(%rip), %rax
retq
.globl _other
_other:
leaq _form_func_ptr(%rip), %rax
retq
# Return 0 if the pointers formed inside and outside the function are the same.
.globl _main
_main:
pushq %rbp
movq %rsp, %rbp
subq $32, %rsp
movl $0, -4(%rbp)
callq _form_func_ptr
movq %rax, -16(%rbp)
callq _other
movq %rax, -24(%rbp)
movq -16(%rbp), %rax
cmpq -24(%rbp), %rax
setne %al
andb $1, %al
movzbl %al, %eax
addq $32, %rsp
popq %rbp
retq

View File

@ -0,0 +1,34 @@
# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s
# RUN: llvm-jitlink -noexec -show-graph %t 2>&1 | \
# RUN: FileCheck -check-prefix=WITHOUT %s
# RUN: llvm-jitlink -noexec -show-graph -add-self-relocations %t 2>&1 | \
# RUN: FileCheck -check-prefix=CHECK %s
# WITHOUT: block
# WITHOUT-NEXT: symbols
# WITHOUT-NEXT: _form_func_ptr
# WITHOUT-NEXT: edges
# WITHOUT-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other
# WITHOUT-NOT: kind =
# CHECK: block
# CHECK-NEXT: symbols
# CHECK-NEXT: _form_func_ptr
# CHECK-NEXT: edges
# CHECK-NEXT: (block + 0x00000003), addend = -0x00000004, kind = Delta32, target = _form_func_ptr
# CHECK-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other
# CHECK-NEXT: (block + 0x00000011), addend = -0x00000004, kind = Delta32, target = _form_func_ptr
# CHECK-NEXT: (block + 0x00000019), addend = -0x00000004, kind = Delta32, target = _form_func_ptr
# CHECK-NOT: kind =
# WITHOUT: block
# WITHOUT-NEXT: symbols
# WITHOUT-NEXT: _other
# WITHOUT-NEXT: edges
# WITHOUT-NEXT: kind = Delta32, target = _form_func_ptr
# CHECK: block
# CHECK-NEXT: symbols
# CHECK-NEXT: _other
# CHECK-NEXT: edges
# CHECK-NEXT: kind = Delta32, target = _form_func_ptr

View File

@ -0,0 +1,8 @@
# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s
# RUN: llvm-jitlink -show-graph %t
# RUN: llvm-jitlink -show-graph -add-self-relocations %t
# Ensure that the added relocation does not create an incorrect pointer.
# Execution test
# REQUIRES: system-darwin && native && target-x86_64

View File

@ -21,6 +21,7 @@
#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h"
#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
@ -28,6 +29,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@ -178,6 +180,11 @@ static cl::opt<std::string>
OrcRuntime("orc-runtime", cl::desc("Use ORC runtime from given path"),
cl::init(""), cl::cat(JITLinkCategory));
static cl::opt<bool> AddSelfRelocations(
"add-self-relocations",
cl::desc("Add relocations to function pointers to the current function"),
cl::init(false), cl::cat(JITLinkCategory));
ExitOnError ExitOnErr;
LLVM_ATTRIBUTE_USED void linkComponents() {
@ -194,6 +201,8 @@ extern "C" void llvm_jitlink_setTestResultOverride(int64_t Value) {
UseTestResultOverride = true;
}
static Error addSelfRelocations(LinkGraph &G);
namespace llvm {
static raw_ostream &
@ -1086,6 +1095,9 @@ void Session::modifyPassConfig(const Triple &TT,
dumpSectionContents(outs(), G);
return Error::success();
});
if (AddSelfRelocations)
PassConfig.PostPrunePasses.push_back(addSelfRelocations);
}
Expected<Session::FileInfo &> Session::findFileInfo(StringRef FileName) {
@ -1374,14 +1386,21 @@ static Error loadObjects(Session &S) {
return Error::success();
}
static Error runChecks(Session &S) {
const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple();
if (CheckFiles.empty())
return Error::success();
LLVM_DEBUG(dbgs() << "Running checks...\n");
namespace {
struct TargetInfo {
const Target *TheTarget;
std::unique_ptr<MCSubtargetInfo> STI;
std::unique_ptr<MCRegisterInfo> MRI;
std::unique_ptr<MCAsmInfo> MAI;
std::unique_ptr<MCContext> Ctx;
std::unique_ptr<MCDisassembler> Disassembler;
std::unique_ptr<MCInstrInfo> MII;
std::unique_ptr<MCInstrAnalysis> MIA;
std::unique_ptr<MCInstPrinter> InstPrinter;
};
} // anonymous namespace
static TargetInfo getTargetInfo(const Triple &TT) {
auto TripleName = TT.str();
std::string ErrorStr;
const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, ErrorStr);
@ -1412,19 +1431,49 @@ static Error runChecks(Session &S) {
TripleName,
inconvertibleErrorCode()));
MCContext Ctx(Triple(TripleName), MAI.get(), MRI.get(), STI.get());
auto Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(),
MRI.get(), STI.get());
std::unique_ptr<MCDisassembler> Disassembler(
TheTarget->createMCDisassembler(*STI, Ctx));
TheTarget->createMCDisassembler(*STI, *Ctx));
if (!Disassembler)
ExitOnErr(make_error<StringError>("Unable to create disassembler for " +
TripleName,
inconvertibleErrorCode()));
std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII)
ExitOnErr(make_error<StringError>("Unable to create instruction info for" +
TripleName,
inconvertibleErrorCode()));
std::unique_ptr<MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
if (!MIA)
ExitOnErr(make_error<StringError>(
"Unable to create instruction analysis for" + TripleName,
inconvertibleErrorCode()));
std::unique_ptr<MCInstPrinter> InstPrinter(
TheTarget->createMCInstPrinter(Triple(TripleName), 0, *MAI, *MII, *MRI));
if (!InstPrinter)
ExitOnErr(make_error<StringError>(
"Unable to create instruction printer for" + TripleName,
inconvertibleErrorCode()));
return {TheTarget, std::move(STI), std::move(MRI),
std::move(MAI), std::move(Ctx), std::move(Disassembler),
std::move(MII), std::move(MIA), std::move(InstPrinter)};
}
static Error runChecks(Session &S) {
const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple();
if (CheckFiles.empty())
return Error::success();
LLVM_DEBUG(dbgs() << "Running checks...\n");
auto TI = getTargetInfo(TT);
auto IsSymbolValid = [&S](StringRef Symbol) {
return S.isSymbolRegistered(Symbol);
@ -1448,8 +1497,8 @@ static Error runChecks(Session &S) {
RuntimeDyldChecker Checker(
IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, GetGOTInfo,
TT.isLittleEndian() ? support::little : support::big, Disassembler.get(),
InstPrinter.get(), dbgs());
TT.isLittleEndian() ? support::little : support::big,
TI.Disassembler.get(), TI.InstPrinter.get(), dbgs());
std::string CheckLineStart = "# " + CheckName + ":";
for (auto &CheckFile : CheckFiles) {
@ -1463,6 +1512,16 @@ static Error runChecks(Session &S) {
return Error::success();
}
static Error addSelfRelocations(LinkGraph &G) {
auto TI = getTargetInfo(G.getTargetTriple());
for (auto *Sym : G.defined_symbols())
if (Sym->isCallable())
if (auto Err = addFunctionPointerRelocationsToCurrentSymbol(
*Sym, G, *TI.Disassembler, *TI.MIA))
return Err;
return Error::success();
}
static void dumpSessionStats(Session &S) {
if (!ShowSizes)
return;