[PR] Instrumentation: Fix runtime handlers for PIE files

Summary:
This commit fixes runtime instrumentation handlers for PIE
binaries case.

Vladislav Khmelevsky,
Advanced Software Technology Lab, Huawei

(cherry picked from FBD30092522)
This commit is contained in:
Vladislav Khmelevsky 2021-06-23 18:24:09 +00:00 committed by Maksim Panchenko
parent 9b39a823ea
commit 361f3b5576
8 changed files with 128 additions and 84 deletions

View File

@ -100,8 +100,8 @@ extern bool __bolt_instr_use_pid;
// only support resolving dependencies from this file to the output of BOLT,
// *not* the other way around.
// TODO: We need better linking support to make that happen.
extern void (*__bolt_trampoline_ind_call)();
extern void (*__bolt_trampoline_ind_tailcall)();
extern void (*__bolt_ind_call_counter_func_pointer)();
extern void (*__bolt_ind_tailcall_counter_func_pointer)();
// Function pointers to init/fini trampoline routines in the binary, so we can
// resume regular execution of these functions that we hooked
extern void (*__bolt_start_trampoline)();
@ -1537,8 +1537,8 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
0x3 /*PROT_READ|PROT_WRITE*/,
0x31 /*MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED*/, -1, 0);
__bolt_trampoline_ind_call = __bolt_instr_indirect_call;
__bolt_trampoline_ind_tailcall = __bolt_instr_indirect_tailcall;
__bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call;
__bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall;
// Conservatively reserve 100MiB shared pages
GlobalAlloc.setMaxSize(0x6400000);
GlobalAlloc.setShared(true);
@ -1558,7 +1558,8 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
}
}
extern "C" void instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
extern "C" __attribute((force_align_arg_pointer)) void
instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
GlobalIndCallCounters[IndCallID].incrementVal(Target, GlobalAlloc);
}
@ -1567,27 +1568,22 @@ extern "C" void instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
{
__asm__ __volatile__(SAVE_ALL
"mov 0x90(%%rsp), %%rdi\n"
"mov 0x88(%%rsp), %%rsi\n"
"mov 0xa0(%%rsp), %%rdi\n"
"mov 0x98(%%rsp), %%rsi\n"
"call instrumentIndirectCall\n"
RESTORE_ALL
"pop %%rdi\n"
"add $16, %%rsp\n"
"xchg (%%rsp), %%rdi\n"
"jmp *-8(%%rsp)\n"
"ret\n"
:::);
}
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
{
__asm__ __volatile__(SAVE_ALL
"mov 0x88(%%rsp), %%rdi\n"
"mov 0x80(%%rsp), %%rsi\n"
"mov 0x98(%%rsp), %%rdi\n"
"mov 0x90(%%rsp), %%rsi\n"
"call instrumentIndirectCall\n"
RESTORE_ALL
"add $16, %%rsp\n"
"pop %%rdi\n"
"jmp *-16(%%rsp)\n"
"ret\n"
:::);
}

View File

@ -413,7 +413,7 @@ public:
}
virtual bool createDirectCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) {
MCContext *Ctx, bool IsTailCall) {
llvm_unreachable("not implemented");
return false;
}
@ -1735,12 +1735,21 @@ public:
return std::vector<MCInst>();
}
virtual std::vector<MCInst> createInstrumentedNoopIndCallHandler() const {
virtual std::vector<MCInst> createInstrumentedIndCallHandlerExitBB() const {
llvm_unreachable("not implemented");
return std::vector<MCInst>();
}
virtual std::vector<MCInst> createInstrumentedNoopIndTailCallHandler() const {
virtual std::vector<MCInst>
createInstrumentedIndTailCallHandlerExitBB() const {
llvm_unreachable("not implemented");
return std::vector<MCInst>();
}
virtual std::vector<MCInst>
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
const MCSymbol *IndCallHandler,
MCContext *Ctx) {
llvm_unreachable("not implemented");
return std::vector<MCInst>();
}

View File

@ -225,8 +225,8 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
bool IsTailCall = BC.MIB->isTailCall(*Iter);
std::vector<MCInst> CounterInstrs = BC.MIB->createInstrumentedIndirectCall(
*Iter, IsTailCall,
IsTailCall ? Summary->IndTailCallHandlerFunc
: Summary->IndCallHandlerFunc,
IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol()
: IndCallHandlerExitBBFunction->getSymbol(),
IndCallSiteID, &*BC.Ctx);
Iter = BB.eraseInstruction(Iter);
@ -531,10 +531,12 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) {
/*Alignment=*/1,
/*IsReadOnly=*/true, ELF::SHT_NOTE);
Summary->IndCallHandlerFunc =
BC.Ctx->getOrCreateSymbol("__bolt_trampoline_ind_call");
Summary->IndTailCallHandlerFunc =
BC.Ctx->getOrCreateSymbol("__bolt_trampoline_ind_tailcall");
Summary->IndCallCounterFuncPtr =
BC.Ctx->getOrCreateSymbol("__bolt_ind_call_counter_func_pointer");
Summary->IndTailCallCounterFuncPtr =
BC.Ctx->getOrCreateSymbol("__bolt_ind_tailcall_counter_func_pointer");
createAuxiliaryFunctions(BC);
ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
return (!BF.isSimple() || BF.isIgnored() ||
@ -550,8 +552,6 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) {
BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFun,
SkipPredicate, "instrumentation", /* ForceSequential=*/true);
createAuxiliaryFunctions(BC);
if (BC.isMachO()) {
if (BC.StartFunctionAddress) {
BinaryFunction *Main =
@ -616,13 +616,32 @@ void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) {
return Func;
};
Summary->InitialIndCallHandlerFunction =
createSimpleFunction("__bolt_instr_default_ind_call_handler",
BC.MIB->createInstrumentedNoopIndCallHandler());
// Here we are creating a set of functions to handle BB entry/exit.
// IndCallHandlerExitBB contains instructions to finish handling traffic to an
// indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(),
// which will check if a pointer to runtime library traffic accounting
// function was initialized (it is done during initialization of runtime
// library). If it is so - calls it. Then this routine returns to normal
// execution by jumping to exit BB.
BinaryFunction *IndCallHandlerExitBB =
createSimpleFunction("__bolt_instr_ind_call_handler",
BC.MIB->createInstrumentedIndCallHandlerExitBB());
Summary->InitialIndTailCallHandlerFunction =
createSimpleFunction("__bolt_instr_default_ind_tailcall_handler",
BC.MIB->createInstrumentedNoopIndTailCallHandler());
IndCallHandlerExitBBFunction =
createSimpleFunction("__bolt_instr_ind_call_handler_func",
BC.MIB->createInstrumentedIndCallHandlerEntryBB(
Summary->IndCallCounterFuncPtr,
IndCallHandlerExitBB->getSymbol(), &*BC.Ctx));
BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction(
"__bolt_instr_ind_tail_call_handler",
BC.MIB->createInstrumentedIndTailCallHandlerExitBB());
IndTailCallHandlerExitBBFunction = createSimpleFunction(
"__bolt_instr_ind_tailcall_handler_func",
BC.MIB->createInstrumentedIndCallHandlerEntryBB(
Summary->IndTailCallCounterFuncPtr,
IndTailCallHandlerExitBB->getSymbol(), &*BC.Ctx));
createSimpleFunction("__bolt_num_counters_getter",
BC.MIB->createNumCountersGetter(BC.Ctx.get()));

View File

@ -119,6 +119,10 @@ private:
uint32_t DirectCallCounters{0};
uint32_t BranchCounters{0};
uint32_t LeafNodeCounters{0};
/// Indirect call instrumentation functions
BinaryFunction *IndCallHandlerExitBBFunction;
BinaryFunction *IndTailCallHandlerExitBBFunction;
};
}
}

View File

@ -104,9 +104,9 @@ struct InstrumentationSummary {
/// Stores function names, to be emitted to the runtime
std::string StringTable;
/// Our runtime indirect call instrumenter function
MCSymbol *IndCallHandlerFunc;
MCSymbol *IndTailCallHandlerFunc;
/// Pointer to runtime instrumentation handlers
MCSymbol *IndCallCounterFuncPtr;
MCSymbol *IndTailCallCounterFuncPtr;
/// Intra-function control flow and direct calls
std::vector<FunctionDescription> FunctionDescriptions;
@ -115,15 +115,6 @@ struct InstrumentationSummary {
std::vector<IndCallDescription> IndCallDescriptions;
std::vector<IndCallTargetDescription> IndCallTargetDescriptions;
/// Our generated initial indirect call handler function that does nothing
/// except calling the indirect call target. The target program starts
/// using this no-op instrumentation function until our runtime library
/// setup runs and installs the correct handler. We need something before
/// our setup runs in case dyld starts running init code for other libs when
/// we did not have time to set up our indirect call counters yet.
BinaryFunction *InitialIndCallHandlerFunction;
BinaryFunction *InitialIndTailCallHandlerFunction;
static constexpr uint64_t NUM_SERIALIZED_CONTAINERS = 4;
static constexpr uint64_t SERIALIZED_CONTAINER_SIZE =
sizeof(uint32_t) * NUM_SERIALIZED_CONTAINERS;

View File

@ -106,7 +106,7 @@ BinaryFunction *createNewRetpoline(BinaryContext &BC,
// Build BB0
MCInst DirectCall;
MIB.createDirectCall(DirectCall, BB2.getLabel(), &Ctx);
MIB.createDirectCall(DirectCall, BB2.getLabel(), &Ctx, /*IsTailCall*/ false);
BB0.addInstruction(DirectCall);
// Build BB1
@ -244,10 +244,8 @@ void createBranchReplacement(BinaryContext &BC,
// Call the retpoline
MCInst RetpolineCall;
if (BrInfo.isJump() || BrInfo.isTailCall())
MIB.createTailCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get());
else
MIB.createDirectCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get());
MIB.createDirectCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get(),
BrInfo.isJump() || BrInfo.isTailCall());
Replacement.push_back(RetpolineCall);
}

View File

@ -140,7 +140,10 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
const unsigned Psize = BC.AsmInfo->getCodePointerSize();
emitDataPadding(Psize);
emitLabel(Symbol);
Streamer.emitValue(Value, Psize);
if (Value)
Streamer.emitValue(Value, Psize);
else
Streamer.emitFill(Psize, 0);
};
auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName](
@ -172,13 +175,8 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
!!opts::InstrumentationNoCountersClear, 1);
emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
emitIntValue("__bolt_num_counters", Summary->Counters.size());
emitValue(Summary->IndCallHandlerFunc,
MCSymbolRefExpr::create(
Summary->InitialIndCallHandlerFunction->getSymbol(), *BC.Ctx));
emitValue(
Summary->IndTailCallHandlerFunc,
MCSymbolRefExpr::create(
Summary->InitialIndTailCallHandlerFunction->getSymbol(), *BC.Ctx));
emitValue(Summary->IndCallCounterFuncPtr, nullptr);
emitValue(Summary->IndTailCallCounterFuncPtr, nullptr);
emitIntValue("__bolt_instr_num_ind_calls",
Summary->IndCallDescriptions.size());
emitIntValue("__bolt_instr_num_ind_targets",

View File

@ -2971,11 +2971,7 @@ public:
bool createTailCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
Inst.setOpcode(X86::JMP_4);
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
setTailCall(Inst);
return true;
return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true);
}
bool createTrap(MCInst &Inst) const override {
@ -3064,12 +3060,14 @@ public:
Inst.setOpcode(X86::LFENCE);
}
bool createDirectCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
bool IsTailCall) override {
Inst.clear();
Inst.setOpcode(X86::CALL64pcrel32);
Inst.setOpcode(IsTailCall ? X86::JMP_4 : X86::CALL64pcrel32);
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
if (IsTailCall)
setTailCall(Inst);
return true;
}
@ -3211,7 +3209,7 @@ public:
// push %rdi
// movq $CallSiteID, %rdi
// push %rdi
// callq/jmp *HandlerFuncAddr
// callq/jmp HandlerFuncAddr
Insts.emplace_back();
createPushRegister(Insts.back(), TempReg, 8);
if (UsesSP) { // Only adjust SP if we really need to
@ -3234,8 +3232,8 @@ public:
Insts.emplace_back();
createPushRegister(Insts.back(), TempReg, 8);
Insts.emplace_back();
createIndirectCall(Insts.back(), HandlerFuncAddr, Ctx,
/*TailCall=*/TailCall);
createDirectCall(Insts.back(), HandlerFuncAddr, Ctx,
/*TailCall=*/TailCall);
// Carry over metadata
for (int I = MCPlus::getNumPrimeOperands(CallInst),
E = CallInst.getNumOperands();
@ -3245,35 +3243,65 @@ public:
return Insts;
}
std::vector<MCInst>
createInstrumentedNoopIndCallHandler() const override {
std::vector<MCInst> createInstrumentedIndCallHandlerExitBB() const override {
const MCPhysReg TempReg = getIntArgRegister(0);
// For the default indirect call handler that is supposed to be a no-op,
// we just need to undo the sequence created for every ind call in
// We just need to undo the sequence created for every ind call in
// instrumentIndirectTarget(), which can be accomplished minimally with:
// popfq
// pop %rdi
// add $16, %rsp
// xchg (%rsp), %rdi
// jmp *-8(%rsp)
std::vector<MCInst> Insts(4);
createPopRegister(Insts[0], TempReg, 8);
createStackPointerDecrement(Insts[1], 16, /*NoFlagsClobber=*/false);
createSwap(Insts[2], TempReg, X86::RSP, 0);
createIndirectBranch(Insts[3], X86::RSP, -8);
std::vector<MCInst> Insts(5);
createPopFlags(Insts[0], 8);
createPopRegister(Insts[1], TempReg, 8);
createStackPointerDecrement(Insts[2], 16, /*NoFlagsClobber=*/false);
createSwap(Insts[3], TempReg, X86::RSP, 0);
createIndirectBranch(Insts[4], X86::RSP, -8);
return Insts;
}
std::vector<MCInst>
createInstrumentedNoopIndTailCallHandler() const override {
createInstrumentedIndTailCallHandlerExitBB() const override {
const MCPhysReg TempReg = getIntArgRegister(0);
// Same thing as above, but for tail calls
// popfq
// add $16, %rsp
// pop %rdi
// jmp *-16(%rsp)
std::vector<MCInst> Insts(3);
createStackPointerDecrement(Insts[0], 16, /*NoFlagsClobber=*/false);
createPopRegister(Insts[1], TempReg, 8);
createIndirectBranch(Insts[2], X86::RSP, -16);
std::vector<MCInst> Insts(4);
createPopFlags(Insts[0], 8);
createStackPointerDecrement(Insts[1], 16, /*NoFlagsClobber=*/false);
createPopRegister(Insts[2], TempReg, 8);
createIndirectBranch(Insts[3], X86::RSP, -16);
return Insts;
}
std::vector<MCInst>
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
const MCSymbol *IndCallHandler,
MCContext *Ctx) override {
const MCPhysReg TempReg = getIntArgRegister(0);
// Code sequence used to check whether InstrTampoline was initialized
// and call it if so, returns via IndCallHandler.
// pushfq
// mov InstrTrampoline,%rdi
// cmp $0x0,%rdi
// je IndCallHandler
// callq *%rdi
// jmpq IndCallHandler
std::vector<MCInst> Insts;
Insts.emplace_back();
createPushFlags(Insts.back(), 8);
Insts.emplace_back();
createMove(Insts.back(), InstrTrampoline, TempReg, Ctx);
std::vector<MCInst> cmpJmp = createCmpJE(TempReg, 0, IndCallHandler, Ctx);
Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
Insts.emplace_back();
Insts.back().setOpcode(X86::CALL64r);
Insts.back().addOperand(MCOperand::createReg(TempReg));
Insts.emplace_back();
createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
return Insts;
}
@ -3285,7 +3313,8 @@ public:
return Insts;
}
std::vector<MCInst> createInstrLocationsGetter(MCContext *Ctx) const override {
std::vector<MCInst>
createInstrLocationsGetter(MCContext *Ctx) const override {
std::vector<MCInst> Insts(2);
MCSymbol *Locs = Ctx->getOrCreateSymbol("__bolt_instr_locations");
createLea(Insts[0], Locs, X86::EAX, Ctx);