From 361f3b5576c4e90e1f1cb7572d2d3923a1670b4b Mon Sep 17 00:00:00 2001 From: Vladislav Khmelevsky Date: Wed, 23 Jun 2021 18:24:09 +0000 Subject: [PATCH] [PR] Instrumentation: Fix runtime handlers for PIE files Summary: This commit fixes runtime instrumentation handlers for PIE binaries case. Vladislav Khmelevsky, Advanced Software Technology Lab, Huawei (cherry picked from FBD30092522) --- bolt/runtime/instr.cpp | 28 +++---- bolt/src/MCPlusBuilder.h | 15 +++- bolt/src/Passes/Instrumentation.cpp | 47 +++++++---- bolt/src/Passes/Instrumentation.h | 4 + bolt/src/Passes/InstrumentationSummary.h | 15 +--- bolt/src/Passes/RetpolineInsertion.cpp | 8 +- .../InstrumentationRuntimeLibrary.cpp | 14 ++-- bolt/src/Target/X86/X86MCPlusBuilder.cpp | 81 +++++++++++++------ 8 files changed, 128 insertions(+), 84 deletions(-) diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp index 474158662805..f4ac4a656cf6 100644 --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -100,8 +100,8 @@ extern bool __bolt_instr_use_pid; // only support resolving dependencies from this file to the output of BOLT, // *not* the other way around. // TODO: We need better linking support to make that happen. -extern void (*__bolt_trampoline_ind_call)(); -extern void (*__bolt_trampoline_ind_tailcall)(); +extern void (*__bolt_ind_call_counter_func_pointer)(); +extern void (*__bolt_ind_tailcall_counter_func_pointer)(); // Function pointers to init/fini trampoline routines in the binary, so we can // resume regular execution of these functions that we hooked extern void (*__bolt_start_trampoline)(); @@ -1537,8 +1537,8 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() { 0x3 /*PROT_READ|PROT_WRITE*/, 0x31 /*MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED*/, -1, 0); - __bolt_trampoline_ind_call = __bolt_instr_indirect_call; - __bolt_trampoline_ind_tailcall = __bolt_instr_indirect_tailcall; + __bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call; + __bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall; // Conservatively reserve 100MiB shared pages GlobalAlloc.setMaxSize(0x6400000); GlobalAlloc.setShared(true); @@ -1558,7 +1558,8 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() { } } -extern "C" void instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) { +extern "C" __attribute((force_align_arg_pointer)) void +instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) { GlobalIndCallCounters[IndCallID].incrementVal(Target, GlobalAlloc); } @@ -1567,27 +1568,22 @@ extern "C" void instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) { extern "C" __attribute((naked)) void __bolt_instr_indirect_call() { __asm__ __volatile__(SAVE_ALL - "mov 0x90(%%rsp), %%rdi\n" - "mov 0x88(%%rsp), %%rsi\n" + "mov 0xa0(%%rsp), %%rdi\n" + "mov 0x98(%%rsp), %%rsi\n" "call instrumentIndirectCall\n" RESTORE_ALL - "pop %%rdi\n" - "add $16, %%rsp\n" - "xchg (%%rsp), %%rdi\n" - "jmp *-8(%%rsp)\n" + "ret\n" :::); } extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall() { __asm__ __volatile__(SAVE_ALL - "mov 0x88(%%rsp), %%rdi\n" - "mov 0x80(%%rsp), %%rsi\n" + "mov 0x98(%%rsp), %%rdi\n" + "mov 0x90(%%rsp), %%rsi\n" "call instrumentIndirectCall\n" RESTORE_ALL - "add $16, %%rsp\n" - "pop %%rdi\n" - "jmp *-16(%%rsp)\n" + "ret\n" :::); } diff --git a/bolt/src/MCPlusBuilder.h b/bolt/src/MCPlusBuilder.h index 89bdc1c0e7c8..99ccecf33b2a 100644 --- a/bolt/src/MCPlusBuilder.h +++ b/bolt/src/MCPlusBuilder.h @@ -413,7 +413,7 @@ public: } virtual bool createDirectCall(MCInst &Inst, const MCSymbol *Target, - MCContext *Ctx) { + MCContext *Ctx, bool IsTailCall) { llvm_unreachable("not implemented"); return false; } @@ -1735,12 +1735,21 @@ public: return std::vector(); } - virtual std::vector createInstrumentedNoopIndCallHandler() const { + virtual std::vector createInstrumentedIndCallHandlerExitBB() const { llvm_unreachable("not implemented"); return std::vector(); } - virtual std::vector createInstrumentedNoopIndTailCallHandler() const { + virtual std::vector + createInstrumentedIndTailCallHandlerExitBB() const { + llvm_unreachable("not implemented"); + return std::vector(); + } + + virtual std::vector + createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline, + const MCSymbol *IndCallHandler, + MCContext *Ctx) { llvm_unreachable("not implemented"); return std::vector(); } diff --git a/bolt/src/Passes/Instrumentation.cpp b/bolt/src/Passes/Instrumentation.cpp index a69b9e8ad9dd..d03c4434e9d0 100644 --- a/bolt/src/Passes/Instrumentation.cpp +++ b/bolt/src/Passes/Instrumentation.cpp @@ -225,8 +225,8 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB, bool IsTailCall = BC.MIB->isTailCall(*Iter); std::vector CounterInstrs = BC.MIB->createInstrumentedIndirectCall( *Iter, IsTailCall, - IsTailCall ? Summary->IndTailCallHandlerFunc - : Summary->IndCallHandlerFunc, + IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol() + : IndCallHandlerExitBBFunction->getSymbol(), IndCallSiteID, &*BC.Ctx); Iter = BB.eraseInstruction(Iter); @@ -531,10 +531,12 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) { /*Alignment=*/1, /*IsReadOnly=*/true, ELF::SHT_NOTE); - Summary->IndCallHandlerFunc = - BC.Ctx->getOrCreateSymbol("__bolt_trampoline_ind_call"); - Summary->IndTailCallHandlerFunc = - BC.Ctx->getOrCreateSymbol("__bolt_trampoline_ind_tailcall"); + Summary->IndCallCounterFuncPtr = + BC.Ctx->getOrCreateSymbol("__bolt_ind_call_counter_func_pointer"); + Summary->IndTailCallCounterFuncPtr = + BC.Ctx->getOrCreateSymbol("__bolt_ind_tailcall_counter_func_pointer"); + + createAuxiliaryFunctions(BC); ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { return (!BF.isSimple() || BF.isIgnored() || @@ -550,8 +552,6 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) { BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFun, SkipPredicate, "instrumentation", /* ForceSequential=*/true); - createAuxiliaryFunctions(BC); - if (BC.isMachO()) { if (BC.StartFunctionAddress) { BinaryFunction *Main = @@ -616,13 +616,32 @@ void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) { return Func; }; - Summary->InitialIndCallHandlerFunction = - createSimpleFunction("__bolt_instr_default_ind_call_handler", - BC.MIB->createInstrumentedNoopIndCallHandler()); + // Here we are creating a set of functions to handle BB entry/exit. + // IndCallHandlerExitBB contains instructions to finish handling traffic to an + // indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(), + // which will check if a pointer to runtime library traffic accounting + // function was initialized (it is done during initialization of runtime + // library). If it is so - calls it. Then this routine returns to normal + // execution by jumping to exit BB. + BinaryFunction *IndCallHandlerExitBB = + createSimpleFunction("__bolt_instr_ind_call_handler", + BC.MIB->createInstrumentedIndCallHandlerExitBB()); - Summary->InitialIndTailCallHandlerFunction = - createSimpleFunction("__bolt_instr_default_ind_tailcall_handler", - BC.MIB->createInstrumentedNoopIndTailCallHandler()); + IndCallHandlerExitBBFunction = + createSimpleFunction("__bolt_instr_ind_call_handler_func", + BC.MIB->createInstrumentedIndCallHandlerEntryBB( + Summary->IndCallCounterFuncPtr, + IndCallHandlerExitBB->getSymbol(), &*BC.Ctx)); + + BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction( + "__bolt_instr_ind_tail_call_handler", + BC.MIB->createInstrumentedIndTailCallHandlerExitBB()); + + IndTailCallHandlerExitBBFunction = createSimpleFunction( + "__bolt_instr_ind_tailcall_handler_func", + BC.MIB->createInstrumentedIndCallHandlerEntryBB( + Summary->IndTailCallCounterFuncPtr, + IndTailCallHandlerExitBB->getSymbol(), &*BC.Ctx)); createSimpleFunction("__bolt_num_counters_getter", BC.MIB->createNumCountersGetter(BC.Ctx.get())); diff --git a/bolt/src/Passes/Instrumentation.h b/bolt/src/Passes/Instrumentation.h index 79eb92421bdb..eedc5999cc4a 100644 --- a/bolt/src/Passes/Instrumentation.h +++ b/bolt/src/Passes/Instrumentation.h @@ -119,6 +119,10 @@ private: uint32_t DirectCallCounters{0}; uint32_t BranchCounters{0}; uint32_t LeafNodeCounters{0}; + + /// Indirect call instrumentation functions + BinaryFunction *IndCallHandlerExitBBFunction; + BinaryFunction *IndTailCallHandlerExitBBFunction; }; } } diff --git a/bolt/src/Passes/InstrumentationSummary.h b/bolt/src/Passes/InstrumentationSummary.h index 81c7da352f1c..ac878872a95b 100644 --- a/bolt/src/Passes/InstrumentationSummary.h +++ b/bolt/src/Passes/InstrumentationSummary.h @@ -104,9 +104,9 @@ struct InstrumentationSummary { /// Stores function names, to be emitted to the runtime std::string StringTable; - /// Our runtime indirect call instrumenter function - MCSymbol *IndCallHandlerFunc; - MCSymbol *IndTailCallHandlerFunc; + /// Pointer to runtime instrumentation handlers + MCSymbol *IndCallCounterFuncPtr; + MCSymbol *IndTailCallCounterFuncPtr; /// Intra-function control flow and direct calls std::vector FunctionDescriptions; @@ -115,15 +115,6 @@ struct InstrumentationSummary { std::vector IndCallDescriptions; std::vector IndCallTargetDescriptions; - /// Our generated initial indirect call handler function that does nothing - /// except calling the indirect call target. The target program starts - /// using this no-op instrumentation function until our runtime library - /// setup runs and installs the correct handler. We need something before - /// our setup runs in case dyld starts running init code for other libs when - /// we did not have time to set up our indirect call counters yet. - BinaryFunction *InitialIndCallHandlerFunction; - BinaryFunction *InitialIndTailCallHandlerFunction; - static constexpr uint64_t NUM_SERIALIZED_CONTAINERS = 4; static constexpr uint64_t SERIALIZED_CONTAINER_SIZE = sizeof(uint32_t) * NUM_SERIALIZED_CONTAINERS; diff --git a/bolt/src/Passes/RetpolineInsertion.cpp b/bolt/src/Passes/RetpolineInsertion.cpp index 2b9748c4fc7a..4cdc457b8ebb 100644 --- a/bolt/src/Passes/RetpolineInsertion.cpp +++ b/bolt/src/Passes/RetpolineInsertion.cpp @@ -106,7 +106,7 @@ BinaryFunction *createNewRetpoline(BinaryContext &BC, // Build BB0 MCInst DirectCall; - MIB.createDirectCall(DirectCall, BB2.getLabel(), &Ctx); + MIB.createDirectCall(DirectCall, BB2.getLabel(), &Ctx, /*IsTailCall*/ false); BB0.addInstruction(DirectCall); // Build BB1 @@ -244,10 +244,8 @@ void createBranchReplacement(BinaryContext &BC, // Call the retpoline MCInst RetpolineCall; - if (BrInfo.isJump() || BrInfo.isTailCall()) - MIB.createTailCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get()); - else - MIB.createDirectCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get()); + MIB.createDirectCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get(), + BrInfo.isJump() || BrInfo.isTailCall()); Replacement.push_back(RetpolineCall); } diff --git a/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp index 4bde7c07ea36..5973cf11d8ab 100644 --- a/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +++ b/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp @@ -140,7 +140,10 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, const unsigned Psize = BC.AsmInfo->getCodePointerSize(); emitDataPadding(Psize); emitLabel(Symbol); - Streamer.emitValue(Value, Psize); + if (Value) + Streamer.emitValue(Value, Psize); + else + Streamer.emitFill(Psize, 0); }; auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName]( @@ -172,13 +175,8 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, !!opts::InstrumentationNoCountersClear, 1); emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1); emitIntValue("__bolt_num_counters", Summary->Counters.size()); - emitValue(Summary->IndCallHandlerFunc, - MCSymbolRefExpr::create( - Summary->InitialIndCallHandlerFunction->getSymbol(), *BC.Ctx)); - emitValue( - Summary->IndTailCallHandlerFunc, - MCSymbolRefExpr::create( - Summary->InitialIndTailCallHandlerFunction->getSymbol(), *BC.Ctx)); + emitValue(Summary->IndCallCounterFuncPtr, nullptr); + emitValue(Summary->IndTailCallCounterFuncPtr, nullptr); emitIntValue("__bolt_instr_num_ind_calls", Summary->IndCallDescriptions.size()); emitIntValue("__bolt_instr_num_ind_targets", diff --git a/bolt/src/Target/X86/X86MCPlusBuilder.cpp b/bolt/src/Target/X86/X86MCPlusBuilder.cpp index a7dd3570074f..5dc6ef6c51cb 100644 --- a/bolt/src/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/src/Target/X86/X86MCPlusBuilder.cpp @@ -2971,11 +2971,7 @@ public: bool createTailCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) override { - Inst.setOpcode(X86::JMP_4); - Inst.addOperand(MCOperand::createExpr( - MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); - setTailCall(Inst); - return true; + return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true); } bool createTrap(MCInst &Inst) const override { @@ -3064,12 +3060,14 @@ public: Inst.setOpcode(X86::LFENCE); } - bool createDirectCall(MCInst &Inst, const MCSymbol *Target, - MCContext *Ctx) override { + bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx, + bool IsTailCall) override { Inst.clear(); - Inst.setOpcode(X86::CALL64pcrel32); + Inst.setOpcode(IsTailCall ? X86::JMP_4 : X86::CALL64pcrel32); Inst.addOperand(MCOperand::createExpr( MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); + if (IsTailCall) + setTailCall(Inst); return true; } @@ -3211,7 +3209,7 @@ public: // push %rdi // movq $CallSiteID, %rdi // push %rdi - // callq/jmp *HandlerFuncAddr + // callq/jmp HandlerFuncAddr Insts.emplace_back(); createPushRegister(Insts.back(), TempReg, 8); if (UsesSP) { // Only adjust SP if we really need to @@ -3234,8 +3232,8 @@ public: Insts.emplace_back(); createPushRegister(Insts.back(), TempReg, 8); Insts.emplace_back(); - createIndirectCall(Insts.back(), HandlerFuncAddr, Ctx, - /*TailCall=*/TailCall); + createDirectCall(Insts.back(), HandlerFuncAddr, Ctx, + /*TailCall=*/TailCall); // Carry over metadata for (int I = MCPlus::getNumPrimeOperands(CallInst), E = CallInst.getNumOperands(); @@ -3245,35 +3243,65 @@ public: return Insts; } - std::vector - createInstrumentedNoopIndCallHandler() const override { + std::vector createInstrumentedIndCallHandlerExitBB() const override { const MCPhysReg TempReg = getIntArgRegister(0); - // For the default indirect call handler that is supposed to be a no-op, - // we just need to undo the sequence created for every ind call in + // We just need to undo the sequence created for every ind call in // instrumentIndirectTarget(), which can be accomplished minimally with: + // popfq // pop %rdi // add $16, %rsp // xchg (%rsp), %rdi // jmp *-8(%rsp) - std::vector Insts(4); - createPopRegister(Insts[0], TempReg, 8); - createStackPointerDecrement(Insts[1], 16, /*NoFlagsClobber=*/false); - createSwap(Insts[2], TempReg, X86::RSP, 0); - createIndirectBranch(Insts[3], X86::RSP, -8); + std::vector Insts(5); + createPopFlags(Insts[0], 8); + createPopRegister(Insts[1], TempReg, 8); + createStackPointerDecrement(Insts[2], 16, /*NoFlagsClobber=*/false); + createSwap(Insts[3], TempReg, X86::RSP, 0); + createIndirectBranch(Insts[4], X86::RSP, -8); return Insts; } std::vector - createInstrumentedNoopIndTailCallHandler() const override { + createInstrumentedIndTailCallHandlerExitBB() const override { const MCPhysReg TempReg = getIntArgRegister(0); // Same thing as above, but for tail calls + // popfq // add $16, %rsp // pop %rdi // jmp *-16(%rsp) - std::vector Insts(3); - createStackPointerDecrement(Insts[0], 16, /*NoFlagsClobber=*/false); - createPopRegister(Insts[1], TempReg, 8); - createIndirectBranch(Insts[2], X86::RSP, -16); + std::vector Insts(4); + createPopFlags(Insts[0], 8); + createStackPointerDecrement(Insts[1], 16, /*NoFlagsClobber=*/false); + createPopRegister(Insts[2], TempReg, 8); + createIndirectBranch(Insts[3], X86::RSP, -16); + return Insts; + } + + std::vector + createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline, + const MCSymbol *IndCallHandler, + MCContext *Ctx) override { + const MCPhysReg TempReg = getIntArgRegister(0); + // Code sequence used to check whether InstrTampoline was initialized + // and call it if so, returns via IndCallHandler. + // pushfq + // mov InstrTrampoline,%rdi + // cmp $0x0,%rdi + // je IndCallHandler + // callq *%rdi + // jmpq IndCallHandler + std::vector Insts; + Insts.emplace_back(); + createPushFlags(Insts.back(), 8); + Insts.emplace_back(); + createMove(Insts.back(), InstrTrampoline, TempReg, Ctx); + std::vector cmpJmp = createCmpJE(TempReg, 0, IndCallHandler, Ctx); + Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end()); + Insts.emplace_back(); + Insts.back().setOpcode(X86::CALL64r); + Insts.back().addOperand(MCOperand::createReg(TempReg)); + Insts.emplace_back(); + createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true); return Insts; } @@ -3285,7 +3313,8 @@ public: return Insts; } - std::vector createInstrLocationsGetter(MCContext *Ctx) const override { + std::vector + createInstrLocationsGetter(MCContext *Ctx) const override { std::vector Insts(2); MCSymbol *Locs = Ctx->getOrCreateSymbol("__bolt_instr_locations"); createLea(Insts[0], Locs, X86::EAX, Ctx);