forked from OSchip/llvm-project
[BOLT] Fix instrumentation bug in duplicated JTs
Summary: Fix a bug with instrumentation when trying to instrument functions that share a jump table with multiple indirect jumps. Usually, each indirect jump that uses a JT will have its own copy of it. When this does not happen, we need to duplicate the jump table safely, so we can split the edges correctly (each copy of the jump table may have different split edges). For this to happen, we need to correctly match the sequence of instructions that perform the indirect jump to identify the base address of the jump table and patch it to point to the new cloned JT. It was reported to us a case in which the compiler generated suboptimal code to do an indirect jump which our matcher failed to identify. Fixes facebookincubator/BOLT#126 (cherry picked from FBD27065579)
This commit is contained in:
parent
b11c826889
commit
b3c34d568a
|
@ -3864,7 +3864,7 @@ bool BinaryFunction::checkForAmbiguousJumpTables() {
|
|||
|
||||
void BinaryFunction::disambiguateJumpTables(
|
||||
MCPlusBuilder::AllocatorIdTy AllocId) {
|
||||
assert((opts::JumpTables != JTS_BASIC && isSimple()) || BC.HasRelocations);
|
||||
assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
|
||||
SmallPtrSet<JumpTable *, 4> JumpTables;
|
||||
for (auto &BB : BasicBlocks) {
|
||||
for (auto &Inst : *BB) {
|
||||
|
@ -3881,51 +3881,70 @@ void BinaryFunction::disambiguateJumpTables(
|
|||
// This instruction is an indirect jump using a jump table, but it is
|
||||
// using the same jump table of another jump. Try all our tricks to
|
||||
// extract the jump table symbol and make it point to a new, duplicated JT
|
||||
MCPhysReg BaseReg1;
|
||||
uint64_t Scale;
|
||||
const MCSymbol *Target;
|
||||
// In case we match if our first matcher, first instruction is the one to
|
||||
// patch
|
||||
MCInst *JTLoadInst = &Inst;
|
||||
// Try a standard indirect jump matcher, scale 8
|
||||
auto IndJmpMatcher = BC.MIB->matchIndJmp(
|
||||
BC.MIB->matchReg(), BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
|
||||
/*Offset=*/BC.MIB->matchSymbol(Target));
|
||||
if (!BC.MIB->hasPCRelOperand(Inst) ||
|
||||
!IndJmpMatcher->match(
|
||||
auto IndJmpMatcher =
|
||||
BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
|
||||
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
|
||||
/*Offset=*/BC.MIB->matchSymbol(Target));
|
||||
if (!IndJmpMatcher->match(
|
||||
*BC.MRI, *BC.MIB,
|
||||
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
|
||||
BaseReg1 != BC.MIB->getNoRegister() ||
|
||||
Scale != 8) {
|
||||
// Standard JT matching failed. Trying now:
|
||||
// PIC-style matcher, scale 4
|
||||
// addq %rdx, %rsi
|
||||
// addq %rdx, %rdi
|
||||
// leaq DATAat0x402450(%rip), %r11
|
||||
// movslq (%r11,%rdx,4), %rcx
|
||||
// addq %r11, %rcx
|
||||
// jmpq *%rcx # JUMPTABLE @0x402450
|
||||
MCPhysReg BaseReg1;
|
||||
MCPhysReg BaseReg2;
|
||||
uint64_t Offset;
|
||||
auto PICIndJmpMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
|
||||
BC.MIB->matchReg(BaseReg1),
|
||||
BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
|
||||
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
|
||||
BC.MIB->matchImm(Offset))));
|
||||
auto LEAMatcherOwner =
|
||||
BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
|
||||
auto LEAMatcher = LEAMatcherOwner.get();
|
||||
auto PICBaseAddrMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
|
||||
std::move(LEAMatcherOwner), BC.MIB->matchAnyOperand()));
|
||||
if (!PICIndJmpMatcher->match(
|
||||
// Standard JT matching failed. Trying now:
|
||||
// movq "jt.2397/1"(,%rax,8), %rax
|
||||
// jmpq *%rax
|
||||
auto LoadMatcherOwner = BC.MIB->matchLoad(
|
||||
BC.MIB->matchReg(BaseReg1), BC.MIB->matchImm(Scale),
|
||||
BC.MIB->matchReg(), /*Offset=*/BC.MIB->matchSymbol(Target));
|
||||
auto LoadMatcher = LoadMatcherOwner.get();
|
||||
auto IndJmpMatcher2 = BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
|
||||
if (!IndJmpMatcher2->match(
|
||||
*BC.MRI, *BC.MIB,
|
||||
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
|
||||
Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
|
||||
!PICBaseAddrMatcher->match(
|
||||
*BC.MRI, *BC.MIB,
|
||||
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
|
||||
llvm_unreachable("Failed to extract jump table base");
|
||||
continue;
|
||||
BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
|
||||
// JT matching failed. Trying now:
|
||||
// PIC-style matcher, scale 4
|
||||
// addq %rdx, %rsi
|
||||
// addq %rdx, %rdi
|
||||
// leaq DATAat0x402450(%rip), %r11
|
||||
// movslq (%r11,%rdx,4), %rcx
|
||||
// addq %r11, %rcx
|
||||
// jmpq *%rcx # JUMPTABLE @0x402450
|
||||
auto PICIndJmpMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
|
||||
BC.MIB->matchReg(BaseReg1),
|
||||
BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
|
||||
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
|
||||
BC.MIB->matchImm(Offset))));
|
||||
auto LEAMatcherOwner =
|
||||
BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
|
||||
auto LEAMatcher = LEAMatcherOwner.get();
|
||||
auto PICBaseAddrMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
|
||||
std::move(LEAMatcherOwner), BC.MIB->matchAnyOperand()));
|
||||
if (!PICIndJmpMatcher->match(
|
||||
*BC.MRI, *BC.MIB,
|
||||
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
|
||||
Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
|
||||
!PICBaseAddrMatcher->match(
|
||||
*BC.MRI, *BC.MIB,
|
||||
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
|
||||
llvm_unreachable("Failed to extract jump table base");
|
||||
continue;
|
||||
}
|
||||
// Matched PIC, identify the instruction with the reference to the JT
|
||||
JTLoadInst = LEAMatcher->CurInst;
|
||||
} else {
|
||||
// Matched non-PIC
|
||||
JTLoadInst = LoadMatcher->CurInst;
|
||||
}
|
||||
// Matched PIC
|
||||
JTLoadInst = &*LEAMatcher->CurInst;
|
||||
}
|
||||
|
||||
uint64_t NewJumpTableID{0};
|
||||
|
|
|
@ -61,9 +61,11 @@ namespace bolt {
|
|||
MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile *InputFile,
|
||||
StringRef ToolPath)
|
||||
: InputFile(InputFile), ToolPath(ToolPath),
|
||||
BC(BinaryContext::createBinaryContext(
|
||||
InputFile, /* IsPIC */ true,
|
||||
DWARFContext::create(*InputFile))) {}
|
||||
BC(BinaryContext::createBinaryContext(InputFile, /* IsPIC */ true,
|
||||
DWARFContext::create(*InputFile))) {
|
||||
if (opts::Instrument)
|
||||
BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
|
||||
}
|
||||
|
||||
Error MachORewriteInstance::setProfile(StringRef Filename) {
|
||||
if (!sys::fs::exists(Filename))
|
||||
|
|
|
@ -170,16 +170,18 @@ Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
|
|||
MCSymbol *Label;
|
||||
Label = BC.Ctx->createNamedTempSymbol("InstrEntry");
|
||||
Summary->Counters.emplace_back(Label);
|
||||
std::vector<MCInst> CounterInstrs(5);
|
||||
std::vector<MCInst> CounterInstrs;
|
||||
CounterInstrs.resize(IsLeaf? 5 : 3);
|
||||
uint32_t I = 0;
|
||||
// Don't clobber application red zone (ABI dependent)
|
||||
if (IsLeaf)
|
||||
BC.MIB->createStackPointerIncrement(CounterInstrs[0], 128,
|
||||
BC.MIB->createStackPointerIncrement(CounterInstrs[I++], 128,
|
||||
/*NoFlagsClobber=*/true);
|
||||
BC.MIB->createPushFlags(CounterInstrs[1], 2);
|
||||
BC.MIB->createIncMemory(CounterInstrs[2], Label, &*BC.Ctx);
|
||||
BC.MIB->createPopFlags(CounterInstrs[3], 2);
|
||||
BC.MIB->createPushFlags(CounterInstrs[I++], 2);
|
||||
BC.MIB->createIncMemory(CounterInstrs[I++], Label, &*BC.Ctx);
|
||||
BC.MIB->createPopFlags(CounterInstrs[I++], 2);
|
||||
if (IsLeaf)
|
||||
BC.MIB->createStackPointerDecrement(CounterInstrs[4], 128,
|
||||
BC.MIB->createStackPointerDecrement(CounterInstrs[I++], 128,
|
||||
/*NoFlagsClobber=*/true);
|
||||
return CounterInstrs;
|
||||
}
|
||||
|
@ -660,8 +662,10 @@ void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {
|
|||
outs() << "BOLT-INSTRUMENTER: Profile will be saved to file "
|
||||
<< opts::InstrumentationFilename << "\n";
|
||||
|
||||
BC.setRuntimeLibrary(
|
||||
std::make_unique<InstrumentationRuntimeLibrary>(std::move(Summary)));
|
||||
auto *RtLibrary =
|
||||
static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary());
|
||||
assert(RtLibrary && "instrumentation runtime library object must be set");
|
||||
RtLibrary->setSummary(std::move(Summary));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -494,8 +494,11 @@ RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
|
|||
if (opts::UpdateDebugSections)
|
||||
DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC, SectionPatchers);
|
||||
|
||||
if (opts::Hugify)
|
||||
if (opts::Instrument) {
|
||||
BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
|
||||
} else if (opts::Hugify) {
|
||||
BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>());
|
||||
}
|
||||
}
|
||||
|
||||
RewriteInstance::~RewriteInstance() {}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include "InstrumentationRuntimeLibrary.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "JumpTable.h"
|
||||
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
||||
#include "llvm/Support/Alignment.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
@ -26,6 +27,7 @@ extern cl::opt<std::string> InstrumentationFilename;
|
|||
extern cl::opt<uint32_t> InstrumentationSleepTime;
|
||||
extern cl::opt<bool> InstrumentationNoCountersClear;
|
||||
extern cl::opt<bool> InstrumentationWaitForks;
|
||||
extern cl::opt<JumpTableSupportLevel> JumpTables;
|
||||
|
||||
cl::opt<bool>
|
||||
Instrument("instrument",
|
||||
|
@ -46,6 +48,10 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
|
|||
"relocations\n";
|
||||
exit(1);
|
||||
}
|
||||
if (opts::JumpTables != JTS_MOVE) {
|
||||
opts::JumpTables = JTS_MOVE;
|
||||
outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
|
||||
}
|
||||
if (!BC.StartFunctionAddress) {
|
||||
errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
|
||||
"entry point of "
|
||||
|
|
|
@ -17,8 +17,9 @@ namespace bolt {
|
|||
|
||||
class InstrumentationRuntimeLibrary : public RuntimeLibrary {
|
||||
public:
|
||||
InstrumentationRuntimeLibrary(std::unique_ptr<InstrumentationSummary> Summary)
|
||||
: Summary(std::move(Summary)) {}
|
||||
void setSummary(std::unique_ptr<InstrumentationSummary> &&S) {
|
||||
Summary.swap(S);
|
||||
}
|
||||
|
||||
void addRuntimeLibSections(std::vector<std::string> &SecNames) const final {
|
||||
SecNames.push_back(".bolt.instr.counters");
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
# This reproduces a bug with instrumentation when trying to instrument
|
||||
# functions that share a jump table with multiple indirect jumps. Usually,
|
||||
# each indirect jump that uses a JT will have its own copy of it. When
|
||||
# this does not happen, we need to duplicate the jump table safely, so
|
||||
# we can split the edges correctly (each copy of the jump table may have
|
||||
# different split edges). For this to happen, we need to correctly match
|
||||
# the sequence of instructions that perform the indirect jump to identify
|
||||
# the base address of the jump table and patch it to point to the new
|
||||
# cloned JT.
|
||||
#
|
||||
# Here we test this variant:
|
||||
# movq jt.2397(,%rax,8), %rax
|
||||
# jmp *%rax
|
||||
#
|
||||
# Which is suboptimal since the compiler could've avoided using an intermediary
|
||||
# register, but GCC does generate this code and it triggered a bug in our
|
||||
# matcher. Usual jumps in non-PIC code have this format:
|
||||
#
|
||||
# jmp *jt.2397(,%rax,8)
|
||||
#
|
||||
# This is the C code fed to GCC:
|
||||
# #include <stdio.h>
|
||||
#int interp(char* code) {
|
||||
# static void* jt[] = { &&op_end, &&op_inc, &&do_dec };
|
||||
# int pc = 0;
|
||||
# int res = 0;
|
||||
# goto *jt[code[pc++] - '0'];
|
||||
#
|
||||
#op_inc:
|
||||
# res += 1;
|
||||
# printf("%d\n", res);
|
||||
# goto *jt[code[pc++] - '0'];
|
||||
#do_dec:
|
||||
# res -= 1;
|
||||
# printf("%d\n", res);
|
||||
# goto *jt[code[pc++] - '0'];
|
||||
#op_end:
|
||||
# return res;
|
||||
#}
|
||||
#int main(int argc, char** argv) {
|
||||
# return interp(argv[1]);
|
||||
#}
|
||||
|
||||
|
||||
# REQUIRES: system-linux
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
|
||||
# RUN: %s -o %t.o
|
||||
# RUN: %host_cc %t.o -o %t.exe -Wl,-q
|
||||
|
||||
# RUN: llvm-bolt %t.exe -instrument -instrumentation-file=%t.fdata \
|
||||
# RUN: -o %t.instrumented
|
||||
|
||||
# Instrumented program needs to finish returning zero
|
||||
# RUN: %t.instrumented 120
|
||||
|
||||
# Test that the instrumented data makes sense
|
||||
# RUN: llvm-bolt %t.exe -o %t.bolted -data %t.fdata \
|
||||
# RUN: -reorder-blocks=cache+ -reorder-functions=hfsort+ \
|
||||
# RUN: -print-only=interp -print-finalized | FileCheck %s
|
||||
|
||||
# RUN: %t.bolted 120
|
||||
|
||||
# Check that our two indirect jumps are recorded in the fdata file and that
|
||||
# each has its own independent profile
|
||||
# CHECK: Successors: .Ltmp1 (mispreds: 0, count: 1), .Ltmp0 (mispreds: 0, count: 0), .Ltmp2 (mispreds: 0, count: 0)
|
||||
# CHECK: Successors: .Ltmp0 (mispreds: 0, count: 1), .Ltmp2 (mispreds: 0, count: 1), .Ltmp1 (mispreds: 0, count: 0)
|
||||
|
||||
.file "test.c"
|
||||
.text
|
||||
.section .rodata.str1.1,"aMS",@progbits,1
|
||||
.LC0:
|
||||
.string "%d\n"
|
||||
.text
|
||||
.p2align 4,,15
|
||||
.globl interp
|
||||
.type interp, @function
|
||||
interp:
|
||||
.LFB11:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset 6, -16
|
||||
xorl %ebp, %ebp
|
||||
pushq %rbx
|
||||
.cfi_def_cfa_offset 24
|
||||
.cfi_offset 3, -24
|
||||
leaq 1(%rdi), %rbx
|
||||
subq $8, %rsp
|
||||
.cfi_def_cfa_offset 32
|
||||
movsbl (%rdi), %eax
|
||||
subl $48, %eax
|
||||
cltq
|
||||
movq jt.2397(,%rax,8), %rax
|
||||
jmp *%rax
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
.L3:
|
||||
addl $1, %ebp
|
||||
.L8:
|
||||
movl %ebp, %esi
|
||||
movl $.LC0, %edi
|
||||
xorl %eax, %eax
|
||||
addq $1, %rbx
|
||||
call printf
|
||||
movsbl -1(%rbx), %eax
|
||||
subl $48, %eax
|
||||
cltq
|
||||
movq jt.2397(,%rax,8), %rax
|
||||
jmp *%rax
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
.L6:
|
||||
addq $8, %rsp
|
||||
.cfi_remember_state
|
||||
.cfi_def_cfa_offset 24
|
||||
movl %ebp, %eax
|
||||
popq %rbx
|
||||
.cfi_def_cfa_offset 16
|
||||
popq %rbp
|
||||
.cfi_def_cfa_offset 8
|
||||
ret
|
||||
.p2align 4,,10
|
||||
.p2align 3
|
||||
.L4:
|
||||
.cfi_restore_state
|
||||
subl $1, %ebp
|
||||
jmp .L8
|
||||
.cfi_endproc
|
||||
.LFE11:
|
||||
.size interp, .-interp
|
||||
.section .text.startup,"ax",@progbits
|
||||
.p2align 4,,15
|
||||
.globl main
|
||||
.type main, @function
|
||||
main:
|
||||
.LFB12:
|
||||
.cfi_startproc
|
||||
movq 8(%rsi), %rdi
|
||||
jmp interp
|
||||
.cfi_endproc
|
||||
.LFE12:
|
||||
.size main, .-main
|
||||
.section .rodata
|
||||
.align 16
|
||||
.type jt.2397, @object
|
||||
.size jt.2397, 24
|
||||
jt.2397:
|
||||
.quad .L6
|
||||
.quad .L3
|
||||
.quad .L4
|
||||
.ident "GCC: (GNU) 8"
|
||||
.section .note.GNU-stack,"",@progbits
|
Loading…
Reference in New Issue