[BOLT] Fix instrumentation bug in duplicated JTs

Summary:
Fix a bug with instrumentation when trying to instrument
functions that share a jump table with multiple indirect
jumps. Usually, each indirect jump that uses a JT will have its own
copy of it. When this does not happen, we need to duplicate the jump
table safely, so we can split the edges correctly (each copy of the
jump table may have different split edges). For this to happen, we
need to correctly match the sequence of instructions that perform the
indirect jump to identify the base address of the jump table and patch
it to point to the new cloned JT. It was reported to us a case in
which the compiler generated suboptimal code to do an indirect jump
which our matcher failed to identify.

Fixes facebookincubator/BOLT#126

(cherry picked from FBD27065579)
This commit is contained in:
Rafael Auler 2021-03-15 16:34:25 -07:00 committed by Maksim Panchenko
parent b11c826889
commit b3c34d568a
7 changed files with 236 additions and 48 deletions

View File

@ -3864,7 +3864,7 @@ bool BinaryFunction::checkForAmbiguousJumpTables() {
void BinaryFunction::disambiguateJumpTables(
MCPlusBuilder::AllocatorIdTy AllocId) {
assert((opts::JumpTables != JTS_BASIC && isSimple()) || BC.HasRelocations);
assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
SmallPtrSet<JumpTable *, 4> JumpTables;
for (auto &BB : BasicBlocks) {
for (auto &Inst : *BB) {
@ -3881,51 +3881,70 @@ void BinaryFunction::disambiguateJumpTables(
// This instruction is an indirect jump using a jump table, but it is
// using the same jump table of another jump. Try all our tricks to
// extract the jump table symbol and make it point to a new, duplicated JT
MCPhysReg BaseReg1;
uint64_t Scale;
const MCSymbol *Target;
// In case we match if our first matcher, first instruction is the one to
// patch
MCInst *JTLoadInst = &Inst;
// Try a standard indirect jump matcher, scale 8
auto IndJmpMatcher = BC.MIB->matchIndJmp(
BC.MIB->matchReg(), BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
/*Offset=*/BC.MIB->matchSymbol(Target));
if (!BC.MIB->hasPCRelOperand(Inst) ||
!IndJmpMatcher->match(
auto IndJmpMatcher =
BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
/*Offset=*/BC.MIB->matchSymbol(Target));
if (!IndJmpMatcher->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
BaseReg1 != BC.MIB->getNoRegister() ||
Scale != 8) {
// Standard JT matching failed. Trying now:
// PIC-style matcher, scale 4
// addq %rdx, %rsi
// addq %rdx, %rdi
// leaq DATAat0x402450(%rip), %r11
// movslq (%r11,%rdx,4), %rcx
// addq %r11, %rcx
// jmpq *%rcx # JUMPTABLE @0x402450
MCPhysReg BaseReg1;
MCPhysReg BaseReg2;
uint64_t Offset;
auto PICIndJmpMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
BC.MIB->matchReg(BaseReg1),
BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
BC.MIB->matchImm(Offset))));
auto LEAMatcherOwner =
BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
auto LEAMatcher = LEAMatcherOwner.get();
auto PICBaseAddrMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
std::move(LEAMatcherOwner), BC.MIB->matchAnyOperand()));
if (!PICIndJmpMatcher->match(
// Standard JT matching failed. Trying now:
// movq "jt.2397/1"(,%rax,8), %rax
// jmpq *%rax
auto LoadMatcherOwner = BC.MIB->matchLoad(
BC.MIB->matchReg(BaseReg1), BC.MIB->matchImm(Scale),
BC.MIB->matchReg(), /*Offset=*/BC.MIB->matchSymbol(Target));
auto LoadMatcher = LoadMatcherOwner.get();
auto IndJmpMatcher2 = BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
if (!IndJmpMatcher2->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
!PICBaseAddrMatcher->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
llvm_unreachable("Failed to extract jump table base");
continue;
BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
// JT matching failed. Trying now:
// PIC-style matcher, scale 4
// addq %rdx, %rsi
// addq %rdx, %rdi
// leaq DATAat0x402450(%rip), %r11
// movslq (%r11,%rdx,4), %rcx
// addq %r11, %rcx
// jmpq *%rcx # JUMPTABLE @0x402450
auto PICIndJmpMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
BC.MIB->matchReg(BaseReg1),
BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
BC.MIB->matchImm(Offset))));
auto LEAMatcherOwner =
BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
auto LEAMatcher = LEAMatcherOwner.get();
auto PICBaseAddrMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
std::move(LEAMatcherOwner), BC.MIB->matchAnyOperand()));
if (!PICIndJmpMatcher->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
!PICBaseAddrMatcher->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
llvm_unreachable("Failed to extract jump table base");
continue;
}
// Matched PIC, identify the instruction with the reference to the JT
JTLoadInst = LEAMatcher->CurInst;
} else {
// Matched non-PIC
JTLoadInst = LoadMatcher->CurInst;
}
// Matched PIC
JTLoadInst = &*LEAMatcher->CurInst;
}
uint64_t NewJumpTableID{0};

View File

@ -61,9 +61,11 @@ namespace bolt {
MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile *InputFile,
StringRef ToolPath)
: InputFile(InputFile), ToolPath(ToolPath),
BC(BinaryContext::createBinaryContext(
InputFile, /* IsPIC */ true,
DWARFContext::create(*InputFile))) {}
BC(BinaryContext::createBinaryContext(InputFile, /* IsPIC */ true,
DWARFContext::create(*InputFile))) {
if (opts::Instrument)
BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
}
Error MachORewriteInstance::setProfile(StringRef Filename) {
if (!sys::fs::exists(Filename))

View File

@ -170,16 +170,18 @@ Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
MCSymbol *Label;
Label = BC.Ctx->createNamedTempSymbol("InstrEntry");
Summary->Counters.emplace_back(Label);
std::vector<MCInst> CounterInstrs(5);
std::vector<MCInst> CounterInstrs;
CounterInstrs.resize(IsLeaf? 5 : 3);
uint32_t I = 0;
// Don't clobber application red zone (ABI dependent)
if (IsLeaf)
BC.MIB->createStackPointerIncrement(CounterInstrs[0], 128,
BC.MIB->createStackPointerIncrement(CounterInstrs[I++], 128,
/*NoFlagsClobber=*/true);
BC.MIB->createPushFlags(CounterInstrs[1], 2);
BC.MIB->createIncMemory(CounterInstrs[2], Label, &*BC.Ctx);
BC.MIB->createPopFlags(CounterInstrs[3], 2);
BC.MIB->createPushFlags(CounterInstrs[I++], 2);
BC.MIB->createIncMemory(CounterInstrs[I++], Label, &*BC.Ctx);
BC.MIB->createPopFlags(CounterInstrs[I++], 2);
if (IsLeaf)
BC.MIB->createStackPointerDecrement(CounterInstrs[4], 128,
BC.MIB->createStackPointerDecrement(CounterInstrs[I++], 128,
/*NoFlagsClobber=*/true);
return CounterInstrs;
}
@ -660,8 +662,10 @@ void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {
outs() << "BOLT-INSTRUMENTER: Profile will be saved to file "
<< opts::InstrumentationFilename << "\n";
BC.setRuntimeLibrary(
std::make_unique<InstrumentationRuntimeLibrary>(std::move(Summary)));
auto *RtLibrary =
static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary());
assert(RtLibrary && "instrumentation runtime library object must be set");
RtLibrary->setSummary(std::move(Summary));
}
}
}

View File

@ -494,8 +494,11 @@ RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
if (opts::UpdateDebugSections)
DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC, SectionPatchers);
if (opts::Hugify)
if (opts::Instrument) {
BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
} else if (opts::Hugify) {
BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>());
}
}
RewriteInstance::~RewriteInstance() {}

View File

@ -10,6 +10,7 @@
#include "InstrumentationRuntimeLibrary.h"
#include "BinaryFunction.h"
#include "JumpTable.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
@ -26,6 +27,7 @@ extern cl::opt<std::string> InstrumentationFilename;
extern cl::opt<uint32_t> InstrumentationSleepTime;
extern cl::opt<bool> InstrumentationNoCountersClear;
extern cl::opt<bool> InstrumentationWaitForks;
extern cl::opt<JumpTableSupportLevel> JumpTables;
cl::opt<bool>
Instrument("instrument",
@ -46,6 +48,10 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
"relocations\n";
exit(1);
}
if (opts::JumpTables != JTS_MOVE) {
opts::JumpTables = JTS_MOVE;
outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
}
if (!BC.StartFunctionAddress) {
errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
"entry point of "

View File

@ -17,8 +17,9 @@ namespace bolt {
class InstrumentationRuntimeLibrary : public RuntimeLibrary {
public:
InstrumentationRuntimeLibrary(std::unique_ptr<InstrumentationSummary> Summary)
: Summary(std::move(Summary)) {}
void setSummary(std::unique_ptr<InstrumentationSummary> &&S) {
Summary.swap(S);
}
void addRuntimeLibSections(std::vector<std::string> &SecNames) const final {
SecNames.push_back(".bolt.instr.counters");

View File

@ -0,0 +1,153 @@
# This reproduces a bug with instrumentation when trying to instrument
# functions that share a jump table with multiple indirect jumps. Usually,
# each indirect jump that uses a JT will have its own copy of it. When
# this does not happen, we need to duplicate the jump table safely, so
# we can split the edges correctly (each copy of the jump table may have
# different split edges). For this to happen, we need to correctly match
# the sequence of instructions that perform the indirect jump to identify
# the base address of the jump table and patch it to point to the new
# cloned JT.
#
# Here we test this variant:
# movq jt.2397(,%rax,8), %rax
# jmp *%rax
#
# Which is suboptimal since the compiler could've avoided using an intermediary
# register, but GCC does generate this code and it triggered a bug in our
# matcher. Usual jumps in non-PIC code have this format:
#
# jmp *jt.2397(,%rax,8)
#
# This is the C code fed to GCC:
# #include <stdio.h>
#int interp(char* code) {
# static void* jt[] = { &&op_end, &&op_inc, &&do_dec };
# int pc = 0;
# int res = 0;
# goto *jt[code[pc++] - '0'];
#
#op_inc:
# res += 1;
# printf("%d\n", res);
# goto *jt[code[pc++] - '0'];
#do_dec:
# res -= 1;
# printf("%d\n", res);
# goto *jt[code[pc++] - '0'];
#op_end:
# return res;
#}
#int main(int argc, char** argv) {
# return interp(argv[1]);
#}
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: %host_cc %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -instrument -instrumentation-file=%t.fdata \
# RUN: -o %t.instrumented
# Instrumented program needs to finish returning zero
# RUN: %t.instrumented 120
# Test that the instrumented data makes sense
# RUN: llvm-bolt %t.exe -o %t.bolted -data %t.fdata \
# RUN: -reorder-blocks=cache+ -reorder-functions=hfsort+ \
# RUN: -print-only=interp -print-finalized | FileCheck %s
# RUN: %t.bolted 120
# Check that our two indirect jumps are recorded in the fdata file and that
# each has its own independent profile
# CHECK: Successors: .Ltmp1 (mispreds: 0, count: 1), .Ltmp0 (mispreds: 0, count: 0), .Ltmp2 (mispreds: 0, count: 0)
# CHECK: Successors: .Ltmp0 (mispreds: 0, count: 1), .Ltmp2 (mispreds: 0, count: 1), .Ltmp1 (mispreds: 0, count: 0)
.file "test.c"
.text
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "%d\n"
.text
.p2align 4,,15
.globl interp
.type interp, @function
interp:
.LFB11:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
xorl %ebp, %ebp
pushq %rbx
.cfi_def_cfa_offset 24
.cfi_offset 3, -24
leaq 1(%rdi), %rbx
subq $8, %rsp
.cfi_def_cfa_offset 32
movsbl (%rdi), %eax
subl $48, %eax
cltq
movq jt.2397(,%rax,8), %rax
jmp *%rax
.p2align 4,,10
.p2align 3
.L3:
addl $1, %ebp
.L8:
movl %ebp, %esi
movl $.LC0, %edi
xorl %eax, %eax
addq $1, %rbx
call printf
movsbl -1(%rbx), %eax
subl $48, %eax
cltq
movq jt.2397(,%rax,8), %rax
jmp *%rax
.p2align 4,,10
.p2align 3
.L6:
addq $8, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 24
movl %ebp, %eax
popq %rbx
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
ret
.p2align 4,,10
.p2align 3
.L4:
.cfi_restore_state
subl $1, %ebp
jmp .L8
.cfi_endproc
.LFE11:
.size interp, .-interp
.section .text.startup,"ax",@progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB12:
.cfi_startproc
movq 8(%rsi), %rdi
jmp interp
.cfi_endproc
.LFE12:
.size main, .-main
.section .rodata
.align 16
.type jt.2397, @object
.size jt.2397, 24
jt.2397:
.quad .L6
.quad .L3
.quad .L4
.ident "GCC: (GNU) 8"
.section .note.GNU-stack,"",@progbits