forked from OSchip/llvm-project
[PR] Instrumentation: Fix runtime handlers for PIE files
Summary: This commit fixes runtime instrumentation handlers for PIE binaries case. Vladislav Khmelevsky, Advanced Software Technology Lab, Huawei (cherry picked from FBD30092522)
This commit is contained in:
parent
9b39a823ea
commit
361f3b5576
|
@ -100,8 +100,8 @@ extern bool __bolt_instr_use_pid;
|
|||
// only support resolving dependencies from this file to the output of BOLT,
|
||||
// *not* the other way around.
|
||||
// TODO: We need better linking support to make that happen.
|
||||
extern void (*__bolt_trampoline_ind_call)();
|
||||
extern void (*__bolt_trampoline_ind_tailcall)();
|
||||
extern void (*__bolt_ind_call_counter_func_pointer)();
|
||||
extern void (*__bolt_ind_tailcall_counter_func_pointer)();
|
||||
// Function pointers to init/fini trampoline routines in the binary, so we can
|
||||
// resume regular execution of these functions that we hooked
|
||||
extern void (*__bolt_start_trampoline)();
|
||||
|
@ -1537,8 +1537,8 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
|
|||
0x3 /*PROT_READ|PROT_WRITE*/,
|
||||
0x31 /*MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED*/, -1, 0);
|
||||
|
||||
__bolt_trampoline_ind_call = __bolt_instr_indirect_call;
|
||||
__bolt_trampoline_ind_tailcall = __bolt_instr_indirect_tailcall;
|
||||
__bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call;
|
||||
__bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall;
|
||||
// Conservatively reserve 100MiB shared pages
|
||||
GlobalAlloc.setMaxSize(0x6400000);
|
||||
GlobalAlloc.setShared(true);
|
||||
|
@ -1558,7 +1558,8 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
|
|||
}
|
||||
}
|
||||
|
||||
extern "C" void instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
|
||||
extern "C" __attribute((force_align_arg_pointer)) void
|
||||
instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
|
||||
GlobalIndCallCounters[IndCallID].incrementVal(Target, GlobalAlloc);
|
||||
}
|
||||
|
||||
|
@ -1567,27 +1568,22 @@ extern "C" void instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
|
|||
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
|
||||
{
|
||||
__asm__ __volatile__(SAVE_ALL
|
||||
"mov 0x90(%%rsp), %%rdi\n"
|
||||
"mov 0x88(%%rsp), %%rsi\n"
|
||||
"mov 0xa0(%%rsp), %%rdi\n"
|
||||
"mov 0x98(%%rsp), %%rsi\n"
|
||||
"call instrumentIndirectCall\n"
|
||||
RESTORE_ALL
|
||||
"pop %%rdi\n"
|
||||
"add $16, %%rsp\n"
|
||||
"xchg (%%rsp), %%rdi\n"
|
||||
"jmp *-8(%%rsp)\n"
|
||||
"ret\n"
|
||||
:::);
|
||||
}
|
||||
|
||||
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
|
||||
{
|
||||
__asm__ __volatile__(SAVE_ALL
|
||||
"mov 0x88(%%rsp), %%rdi\n"
|
||||
"mov 0x80(%%rsp), %%rsi\n"
|
||||
"mov 0x98(%%rsp), %%rdi\n"
|
||||
"mov 0x90(%%rsp), %%rsi\n"
|
||||
"call instrumentIndirectCall\n"
|
||||
RESTORE_ALL
|
||||
"add $16, %%rsp\n"
|
||||
"pop %%rdi\n"
|
||||
"jmp *-16(%%rsp)\n"
|
||||
"ret\n"
|
||||
:::);
|
||||
}
|
||||
|
||||
|
|
|
@ -413,7 +413,7 @@ public:
|
|||
}
|
||||
|
||||
virtual bool createDirectCall(MCInst &Inst, const MCSymbol *Target,
|
||||
MCContext *Ctx) {
|
||||
MCContext *Ctx, bool IsTailCall) {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
}
|
||||
|
@ -1735,12 +1735,21 @@ public:
|
|||
return std::vector<MCInst>();
|
||||
}
|
||||
|
||||
virtual std::vector<MCInst> createInstrumentedNoopIndCallHandler() const {
|
||||
virtual std::vector<MCInst> createInstrumentedIndCallHandlerExitBB() const {
|
||||
llvm_unreachable("not implemented");
|
||||
return std::vector<MCInst>();
|
||||
}
|
||||
|
||||
virtual std::vector<MCInst> createInstrumentedNoopIndTailCallHandler() const {
|
||||
virtual std::vector<MCInst>
|
||||
createInstrumentedIndTailCallHandlerExitBB() const {
|
||||
llvm_unreachable("not implemented");
|
||||
return std::vector<MCInst>();
|
||||
}
|
||||
|
||||
virtual std::vector<MCInst>
|
||||
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
|
||||
const MCSymbol *IndCallHandler,
|
||||
MCContext *Ctx) {
|
||||
llvm_unreachable("not implemented");
|
||||
return std::vector<MCInst>();
|
||||
}
|
||||
|
|
|
@ -225,8 +225,8 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
|
|||
bool IsTailCall = BC.MIB->isTailCall(*Iter);
|
||||
std::vector<MCInst> CounterInstrs = BC.MIB->createInstrumentedIndirectCall(
|
||||
*Iter, IsTailCall,
|
||||
IsTailCall ? Summary->IndTailCallHandlerFunc
|
||||
: Summary->IndCallHandlerFunc,
|
||||
IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol()
|
||||
: IndCallHandlerExitBBFunction->getSymbol(),
|
||||
IndCallSiteID, &*BC.Ctx);
|
||||
|
||||
Iter = BB.eraseInstruction(Iter);
|
||||
|
@ -531,10 +531,12 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) {
|
|||
/*Alignment=*/1,
|
||||
/*IsReadOnly=*/true, ELF::SHT_NOTE);
|
||||
|
||||
Summary->IndCallHandlerFunc =
|
||||
BC.Ctx->getOrCreateSymbol("__bolt_trampoline_ind_call");
|
||||
Summary->IndTailCallHandlerFunc =
|
||||
BC.Ctx->getOrCreateSymbol("__bolt_trampoline_ind_tailcall");
|
||||
Summary->IndCallCounterFuncPtr =
|
||||
BC.Ctx->getOrCreateSymbol("__bolt_ind_call_counter_func_pointer");
|
||||
Summary->IndTailCallCounterFuncPtr =
|
||||
BC.Ctx->getOrCreateSymbol("__bolt_ind_tailcall_counter_func_pointer");
|
||||
|
||||
createAuxiliaryFunctions(BC);
|
||||
|
||||
ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
|
||||
return (!BF.isSimple() || BF.isIgnored() ||
|
||||
|
@ -550,8 +552,6 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) {
|
|||
BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFun,
|
||||
SkipPredicate, "instrumentation", /* ForceSequential=*/true);
|
||||
|
||||
createAuxiliaryFunctions(BC);
|
||||
|
||||
if (BC.isMachO()) {
|
||||
if (BC.StartFunctionAddress) {
|
||||
BinaryFunction *Main =
|
||||
|
@ -616,13 +616,32 @@ void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) {
|
|||
return Func;
|
||||
};
|
||||
|
||||
Summary->InitialIndCallHandlerFunction =
|
||||
createSimpleFunction("__bolt_instr_default_ind_call_handler",
|
||||
BC.MIB->createInstrumentedNoopIndCallHandler());
|
||||
// Here we are creating a set of functions to handle BB entry/exit.
|
||||
// IndCallHandlerExitBB contains instructions to finish handling traffic to an
|
||||
// indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(),
|
||||
// which will check if a pointer to runtime library traffic accounting
|
||||
// function was initialized (it is done during initialization of runtime
|
||||
// library). If it is so - calls it. Then this routine returns to normal
|
||||
// execution by jumping to exit BB.
|
||||
BinaryFunction *IndCallHandlerExitBB =
|
||||
createSimpleFunction("__bolt_instr_ind_call_handler",
|
||||
BC.MIB->createInstrumentedIndCallHandlerExitBB());
|
||||
|
||||
Summary->InitialIndTailCallHandlerFunction =
|
||||
createSimpleFunction("__bolt_instr_default_ind_tailcall_handler",
|
||||
BC.MIB->createInstrumentedNoopIndTailCallHandler());
|
||||
IndCallHandlerExitBBFunction =
|
||||
createSimpleFunction("__bolt_instr_ind_call_handler_func",
|
||||
BC.MIB->createInstrumentedIndCallHandlerEntryBB(
|
||||
Summary->IndCallCounterFuncPtr,
|
||||
IndCallHandlerExitBB->getSymbol(), &*BC.Ctx));
|
||||
|
||||
BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction(
|
||||
"__bolt_instr_ind_tail_call_handler",
|
||||
BC.MIB->createInstrumentedIndTailCallHandlerExitBB());
|
||||
|
||||
IndTailCallHandlerExitBBFunction = createSimpleFunction(
|
||||
"__bolt_instr_ind_tailcall_handler_func",
|
||||
BC.MIB->createInstrumentedIndCallHandlerEntryBB(
|
||||
Summary->IndTailCallCounterFuncPtr,
|
||||
IndTailCallHandlerExitBB->getSymbol(), &*BC.Ctx));
|
||||
|
||||
createSimpleFunction("__bolt_num_counters_getter",
|
||||
BC.MIB->createNumCountersGetter(BC.Ctx.get()));
|
||||
|
|
|
@ -119,6 +119,10 @@ private:
|
|||
uint32_t DirectCallCounters{0};
|
||||
uint32_t BranchCounters{0};
|
||||
uint32_t LeafNodeCounters{0};
|
||||
|
||||
/// Indirect call instrumentation functions
|
||||
BinaryFunction *IndCallHandlerExitBBFunction;
|
||||
BinaryFunction *IndTailCallHandlerExitBBFunction;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,9 +104,9 @@ struct InstrumentationSummary {
|
|||
/// Stores function names, to be emitted to the runtime
|
||||
std::string StringTable;
|
||||
|
||||
/// Our runtime indirect call instrumenter function
|
||||
MCSymbol *IndCallHandlerFunc;
|
||||
MCSymbol *IndTailCallHandlerFunc;
|
||||
/// Pointer to runtime instrumentation handlers
|
||||
MCSymbol *IndCallCounterFuncPtr;
|
||||
MCSymbol *IndTailCallCounterFuncPtr;
|
||||
|
||||
/// Intra-function control flow and direct calls
|
||||
std::vector<FunctionDescription> FunctionDescriptions;
|
||||
|
@ -115,15 +115,6 @@ struct InstrumentationSummary {
|
|||
std::vector<IndCallDescription> IndCallDescriptions;
|
||||
std::vector<IndCallTargetDescription> IndCallTargetDescriptions;
|
||||
|
||||
/// Our generated initial indirect call handler function that does nothing
|
||||
/// except calling the indirect call target. The target program starts
|
||||
/// using this no-op instrumentation function until our runtime library
|
||||
/// setup runs and installs the correct handler. We need something before
|
||||
/// our setup runs in case dyld starts running init code for other libs when
|
||||
/// we did not have time to set up our indirect call counters yet.
|
||||
BinaryFunction *InitialIndCallHandlerFunction;
|
||||
BinaryFunction *InitialIndTailCallHandlerFunction;
|
||||
|
||||
static constexpr uint64_t NUM_SERIALIZED_CONTAINERS = 4;
|
||||
static constexpr uint64_t SERIALIZED_CONTAINER_SIZE =
|
||||
sizeof(uint32_t) * NUM_SERIALIZED_CONTAINERS;
|
||||
|
|
|
@ -106,7 +106,7 @@ BinaryFunction *createNewRetpoline(BinaryContext &BC,
|
|||
|
||||
// Build BB0
|
||||
MCInst DirectCall;
|
||||
MIB.createDirectCall(DirectCall, BB2.getLabel(), &Ctx);
|
||||
MIB.createDirectCall(DirectCall, BB2.getLabel(), &Ctx, /*IsTailCall*/ false);
|
||||
BB0.addInstruction(DirectCall);
|
||||
|
||||
// Build BB1
|
||||
|
@ -244,10 +244,8 @@ void createBranchReplacement(BinaryContext &BC,
|
|||
|
||||
// Call the retpoline
|
||||
MCInst RetpolineCall;
|
||||
if (BrInfo.isJump() || BrInfo.isTailCall())
|
||||
MIB.createTailCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get());
|
||||
else
|
||||
MIB.createDirectCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get());
|
||||
MIB.createDirectCall(RetpolineCall, RetpolineSymbol, BC.Ctx.get(),
|
||||
BrInfo.isJump() || BrInfo.isTailCall());
|
||||
|
||||
Replacement.push_back(RetpolineCall);
|
||||
}
|
||||
|
|
|
@ -140,7 +140,10 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
|
|||
const unsigned Psize = BC.AsmInfo->getCodePointerSize();
|
||||
emitDataPadding(Psize);
|
||||
emitLabel(Symbol);
|
||||
Streamer.emitValue(Value, Psize);
|
||||
if (Value)
|
||||
Streamer.emitValue(Value, Psize);
|
||||
else
|
||||
Streamer.emitFill(Psize, 0);
|
||||
};
|
||||
|
||||
auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName](
|
||||
|
@ -172,13 +175,8 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
|
|||
!!opts::InstrumentationNoCountersClear, 1);
|
||||
emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
|
||||
emitIntValue("__bolt_num_counters", Summary->Counters.size());
|
||||
emitValue(Summary->IndCallHandlerFunc,
|
||||
MCSymbolRefExpr::create(
|
||||
Summary->InitialIndCallHandlerFunction->getSymbol(), *BC.Ctx));
|
||||
emitValue(
|
||||
Summary->IndTailCallHandlerFunc,
|
||||
MCSymbolRefExpr::create(
|
||||
Summary->InitialIndTailCallHandlerFunction->getSymbol(), *BC.Ctx));
|
||||
emitValue(Summary->IndCallCounterFuncPtr, nullptr);
|
||||
emitValue(Summary->IndTailCallCounterFuncPtr, nullptr);
|
||||
emitIntValue("__bolt_instr_num_ind_calls",
|
||||
Summary->IndCallDescriptions.size());
|
||||
emitIntValue("__bolt_instr_num_ind_targets",
|
||||
|
|
|
@ -2971,11 +2971,7 @@ public:
|
|||
|
||||
bool createTailCall(MCInst &Inst, const MCSymbol *Target,
|
||||
MCContext *Ctx) override {
|
||||
Inst.setOpcode(X86::JMP_4);
|
||||
Inst.addOperand(MCOperand::createExpr(
|
||||
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
|
||||
setTailCall(Inst);
|
||||
return true;
|
||||
return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true);
|
||||
}
|
||||
|
||||
bool createTrap(MCInst &Inst) const override {
|
||||
|
@ -3064,12 +3060,14 @@ public:
|
|||
Inst.setOpcode(X86::LFENCE);
|
||||
}
|
||||
|
||||
bool createDirectCall(MCInst &Inst, const MCSymbol *Target,
|
||||
MCContext *Ctx) override {
|
||||
bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
|
||||
bool IsTailCall) override {
|
||||
Inst.clear();
|
||||
Inst.setOpcode(X86::CALL64pcrel32);
|
||||
Inst.setOpcode(IsTailCall ? X86::JMP_4 : X86::CALL64pcrel32);
|
||||
Inst.addOperand(MCOperand::createExpr(
|
||||
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
|
||||
if (IsTailCall)
|
||||
setTailCall(Inst);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -3211,7 +3209,7 @@ public:
|
|||
// push %rdi
|
||||
// movq $CallSiteID, %rdi
|
||||
// push %rdi
|
||||
// callq/jmp *HandlerFuncAddr
|
||||
// callq/jmp HandlerFuncAddr
|
||||
Insts.emplace_back();
|
||||
createPushRegister(Insts.back(), TempReg, 8);
|
||||
if (UsesSP) { // Only adjust SP if we really need to
|
||||
|
@ -3234,8 +3232,8 @@ public:
|
|||
Insts.emplace_back();
|
||||
createPushRegister(Insts.back(), TempReg, 8);
|
||||
Insts.emplace_back();
|
||||
createIndirectCall(Insts.back(), HandlerFuncAddr, Ctx,
|
||||
/*TailCall=*/TailCall);
|
||||
createDirectCall(Insts.back(), HandlerFuncAddr, Ctx,
|
||||
/*TailCall=*/TailCall);
|
||||
// Carry over metadata
|
||||
for (int I = MCPlus::getNumPrimeOperands(CallInst),
|
||||
E = CallInst.getNumOperands();
|
||||
|
@ -3245,35 +3243,65 @@ public:
|
|||
return Insts;
|
||||
}
|
||||
|
||||
std::vector<MCInst>
|
||||
createInstrumentedNoopIndCallHandler() const override {
|
||||
std::vector<MCInst> createInstrumentedIndCallHandlerExitBB() const override {
|
||||
const MCPhysReg TempReg = getIntArgRegister(0);
|
||||
// For the default indirect call handler that is supposed to be a no-op,
|
||||
// we just need to undo the sequence created for every ind call in
|
||||
// We just need to undo the sequence created for every ind call in
|
||||
// instrumentIndirectTarget(), which can be accomplished minimally with:
|
||||
// popfq
|
||||
// pop %rdi
|
||||
// add $16, %rsp
|
||||
// xchg (%rsp), %rdi
|
||||
// jmp *-8(%rsp)
|
||||
std::vector<MCInst> Insts(4);
|
||||
createPopRegister(Insts[0], TempReg, 8);
|
||||
createStackPointerDecrement(Insts[1], 16, /*NoFlagsClobber=*/false);
|
||||
createSwap(Insts[2], TempReg, X86::RSP, 0);
|
||||
createIndirectBranch(Insts[3], X86::RSP, -8);
|
||||
std::vector<MCInst> Insts(5);
|
||||
createPopFlags(Insts[0], 8);
|
||||
createPopRegister(Insts[1], TempReg, 8);
|
||||
createStackPointerDecrement(Insts[2], 16, /*NoFlagsClobber=*/false);
|
||||
createSwap(Insts[3], TempReg, X86::RSP, 0);
|
||||
createIndirectBranch(Insts[4], X86::RSP, -8);
|
||||
return Insts;
|
||||
}
|
||||
|
||||
std::vector<MCInst>
|
||||
createInstrumentedNoopIndTailCallHandler() const override {
|
||||
createInstrumentedIndTailCallHandlerExitBB() const override {
|
||||
const MCPhysReg TempReg = getIntArgRegister(0);
|
||||
// Same thing as above, but for tail calls
|
||||
// popfq
|
||||
// add $16, %rsp
|
||||
// pop %rdi
|
||||
// jmp *-16(%rsp)
|
||||
std::vector<MCInst> Insts(3);
|
||||
createStackPointerDecrement(Insts[0], 16, /*NoFlagsClobber=*/false);
|
||||
createPopRegister(Insts[1], TempReg, 8);
|
||||
createIndirectBranch(Insts[2], X86::RSP, -16);
|
||||
std::vector<MCInst> Insts(4);
|
||||
createPopFlags(Insts[0], 8);
|
||||
createStackPointerDecrement(Insts[1], 16, /*NoFlagsClobber=*/false);
|
||||
createPopRegister(Insts[2], TempReg, 8);
|
||||
createIndirectBranch(Insts[3], X86::RSP, -16);
|
||||
return Insts;
|
||||
}
|
||||
|
||||
std::vector<MCInst>
|
||||
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
|
||||
const MCSymbol *IndCallHandler,
|
||||
MCContext *Ctx) override {
|
||||
const MCPhysReg TempReg = getIntArgRegister(0);
|
||||
// Code sequence used to check whether InstrTampoline was initialized
|
||||
// and call it if so, returns via IndCallHandler.
|
||||
// pushfq
|
||||
// mov InstrTrampoline,%rdi
|
||||
// cmp $0x0,%rdi
|
||||
// je IndCallHandler
|
||||
// callq *%rdi
|
||||
// jmpq IndCallHandler
|
||||
std::vector<MCInst> Insts;
|
||||
Insts.emplace_back();
|
||||
createPushFlags(Insts.back(), 8);
|
||||
Insts.emplace_back();
|
||||
createMove(Insts.back(), InstrTrampoline, TempReg, Ctx);
|
||||
std::vector<MCInst> cmpJmp = createCmpJE(TempReg, 0, IndCallHandler, Ctx);
|
||||
Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
|
||||
Insts.emplace_back();
|
||||
Insts.back().setOpcode(X86::CALL64r);
|
||||
Insts.back().addOperand(MCOperand::createReg(TempReg));
|
||||
Insts.emplace_back();
|
||||
createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
|
||||
return Insts;
|
||||
}
|
||||
|
||||
|
@ -3285,7 +3313,8 @@ public:
|
|||
return Insts;
|
||||
}
|
||||
|
||||
std::vector<MCInst> createInstrLocationsGetter(MCContext *Ctx) const override {
|
||||
std::vector<MCInst>
|
||||
createInstrLocationsGetter(MCContext *Ctx) const override {
|
||||
std::vector<MCInst> Insts(2);
|
||||
MCSymbol *Locs = Ctx->getOrCreateSymbol("__bolt_instr_locations");
|
||||
createLea(Insts[0], Locs, X86::EAX, Ctx);
|
||||
|
|
Loading…
Reference in New Issue