forked from OSchip/llvm-project
[PR] Instrumentation: Generate and use _start and _fini trampolines
Summary: This commit implements new method for _start & _fini functions hooking which allows to use relative jumps for future PIE & .so library support. Instead of using absolute address of _start & _fini functions known on linking stage - we'll use dynamically created trampoline functions and use corresponding symbols in instrumentation runtime library. As we would like to use instrumentation for dynamically loaded binaries (with PIE & .so), thus we need to compile instrumentation library with "-fPIC" flag to support relative address resolution for functions and data. For shared libraries we need to handle initialization of instrumentation library case by using DT_INIT section entry point. Also this commit adds detection if the binary is executable or shared library based on existence of PT_INTERP header. In case of shared library we save information about real library init function address for further usage for instrumentation library init trampoline function creation and also update DT_INIT to point instrumentation library init function. Functions called from init/fini functions should be called with forced stack alignment to avoid issues with instructions which relies on it. E.g. optimized string operations. Vasily Leonenko, Advanced Software Technology Lab, Huawei (cherry picked from FBD30092316)
This commit is contained in:
parent
60b10a8ead
commit
ad79d51778
|
@ -22,7 +22,7 @@ add_library(bolt_rt_hugify STATIC
|
|||
)
|
||||
|
||||
# Don't let the compiler think it can create calls to standard libs
|
||||
target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti)
|
||||
target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti -fPIE)
|
||||
target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_compile_options(bolt_rt_hugify PRIVATE -ffreestanding -fno-exceptions -fno-rtti)
|
||||
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
|
|
@ -102,10 +102,10 @@ extern bool __bolt_instr_use_pid;
|
|||
// TODO: We need better linking support to make that happen.
|
||||
extern void (*__bolt_trampoline_ind_call)();
|
||||
extern void (*__bolt_trampoline_ind_tailcall)();
|
||||
// Function pointers to init/fini routines in the binary, so we can resume
|
||||
// regular execution of these functions that we hooked
|
||||
extern void (*__bolt_instr_init_ptr)();
|
||||
extern void (*__bolt_instr_fini_ptr)();
|
||||
// Function pointers to init/fini trampoline routines in the binary, so we can
|
||||
// resume regular execution of these functions that we hooked
|
||||
extern void (*__bolt_start_trampoline)();
|
||||
extern void (*__bolt_fini_trampoline)();
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1366,7 +1366,8 @@ extern "C" void __bolt_instr_clear_counters() {
|
|||
/// call this function directly to get your profile written to disk
|
||||
/// on demand.
|
||||
///
|
||||
extern "C" void __bolt_instr_data_dump() {
|
||||
extern "C" void __attribute((force_align_arg_pointer))
|
||||
__bolt_instr_data_dump() {
|
||||
// Already dumping
|
||||
if (!GlobalWriteProfileMutex->acquire())
|
||||
return;
|
||||
|
@ -1451,7 +1452,7 @@ extern "C" void __bolt_instr_indirect_call();
|
|||
extern "C" void __bolt_instr_indirect_tailcall();
|
||||
|
||||
/// Initialization code
|
||||
extern "C" void __bolt_instr_setup() {
|
||||
extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
|
||||
const uint64_t CountersStart =
|
||||
reinterpret_cast<uint64_t>(&__bolt_instr_locations[0]);
|
||||
const uint64_t CountersEnd = alignTo(
|
||||
|
@ -1526,13 +1527,16 @@ extern "C" __attribute((naked)) void __bolt_instr_start()
|
|||
__asm__ __volatile__(SAVE_ALL
|
||||
"call __bolt_instr_setup\n"
|
||||
RESTORE_ALL
|
||||
"jmp *__bolt_instr_init_ptr(%%rip)\n"
|
||||
"jmp __bolt_start_trampoline\n"
|
||||
:::);
|
||||
}
|
||||
|
||||
/// This is hooking into ELF's DT_FINI
|
||||
extern "C" void __bolt_instr_fini() {
|
||||
__bolt_instr_fini_ptr();
|
||||
// Currently using assembly inline for trampoline function call
|
||||
// due to issues with function pointer dereferencing in case of
|
||||
// C function call.
|
||||
__asm__ __volatile__("call __bolt_fini_trampoline\n" :::);
|
||||
if (__bolt_instr_sleep_time == 0)
|
||||
__bolt_instr_data_dump();
|
||||
DEBUG(report("Finished.\n"));
|
||||
|
|
|
@ -535,6 +535,9 @@ public:
|
|||
/// linked.
|
||||
bool IsStaticExecutable{false};
|
||||
|
||||
/// Set to true if the binary contains PT_INTERP header.
|
||||
bool HasInterpHeader{false};
|
||||
|
||||
/// Indicates if any of local symbols used for functions or data objects
|
||||
/// have an origin file name available.
|
||||
bool HasSymbolsWithFileName{false};
|
||||
|
|
|
@ -1765,6 +1765,12 @@ public:
|
|||
return {};
|
||||
}
|
||||
|
||||
virtual std::vector<MCInst> createSymbolTrampoline(const MCSymbol *TgtSym,
|
||||
MCContext *Ctx) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return std::vector<MCInst>();
|
||||
}
|
||||
|
||||
/// This method takes an indirect call instruction and splits it up into an
|
||||
/// equivalent set of instructions that use direct calls for target
|
||||
/// symbols/addresses that are contained in the Targets vector. This is done
|
||||
|
|
|
@ -632,6 +632,27 @@ void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) {
|
|||
BC.MIB->createInstrTablesGetter(BC.Ctx.get()));
|
||||
createSimpleFunction("__bolt_instr_num_funcs_getter",
|
||||
BC.MIB->createInstrNumFuncsGetter(BC.Ctx.get()));
|
||||
|
||||
if (BC.isELF()) {
|
||||
if (BC.StartFunctionAddress) {
|
||||
BinaryFunction *Start =
|
||||
BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
|
||||
assert(Start && "Entry point function not found");
|
||||
const MCSymbol *StartSym = Start->getSymbol();
|
||||
createSimpleFunction(
|
||||
"__bolt_start_trampoline",
|
||||
BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get()));
|
||||
}
|
||||
if (BC.FiniFunctionAddress) {
|
||||
BinaryFunction *Fini =
|
||||
BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress);
|
||||
assert(Fini && "Finalization function not found");
|
||||
const MCSymbol *FiniSym = Fini->getSymbol();
|
||||
createSimpleFunction(
|
||||
"__bolt_fini_trampoline",
|
||||
BC.MIB->createSymbolTrampoline(FiniSym, BC.Ctx.get()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {
|
||||
|
|
|
@ -567,7 +567,8 @@ void RewriteInstance::discoverStorage() {
|
|||
ELF64LE::PhdrRange PHs =
|
||||
cantFail(Obj.program_headers(), "program_headers() failed");
|
||||
for (const ELF64LE::Phdr &Phdr : PHs) {
|
||||
if (Phdr.p_type == ELF::PT_LOAD) {
|
||||
switch (Phdr.p_type) {
|
||||
case ELF::PT_LOAD:
|
||||
BC->FirstAllocAddress = std::min(BC->FirstAllocAddress,
|
||||
static_cast<uint64_t>(Phdr.p_vaddr));
|
||||
NextAvailableAddress = std::max(NextAvailableAddress,
|
||||
|
@ -580,6 +581,10 @@ void RewriteInstance::discoverStorage() {
|
|||
Phdr.p_offset,
|
||||
Phdr.p_filesz,
|
||||
Phdr.p_align};
|
||||
break;
|
||||
case ELF::PT_INTERP:
|
||||
BC->HasInterpHeader = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5015,6 +5020,15 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
|
||||
if (auto *RtLibrary = BC->getRuntimeLibrary()) {
|
||||
if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
|
||||
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
|
||||
<< Twine::utohexstr(Addr) << '\n');
|
||||
NewDE.d_un.d_ptr = Addr;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ELF::DT_FLAGS:
|
||||
if (BC->RequiresZNow) {
|
||||
|
@ -5074,6 +5088,12 @@ void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
|
|||
|
||||
for (const Elf_Dyn &Dyn : DynamicEntries) {
|
||||
switch (Dyn.d_tag) {
|
||||
case ELF::DT_INIT:
|
||||
if (!BC->HasInterpHeader) {
|
||||
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
|
||||
BC->StartFunctionAddress = Dyn.getPtr();
|
||||
}
|
||||
break;
|
||||
case ELF::DT_FINI:
|
||||
BC->FiniFunctionAddress = Dyn.getPtr();
|
||||
break;
|
||||
|
|
|
@ -74,27 +74,6 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
|
|||
|
||||
void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
|
||||
MCStreamer &Streamer) {
|
||||
const BinaryFunction *StartFunction =
|
||||
BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
|
||||
assert(!StartFunction->isFragment() && "expected main function fragment");
|
||||
if (!StartFunction) {
|
||||
errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const BinaryFunction *FiniFunction =
|
||||
BC.FiniFunctionAddress
|
||||
? BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress)
|
||||
: nullptr;
|
||||
if (BC.isELF()) {
|
||||
assert(!FiniFunction->isFragment() && "expected main function fragment");
|
||||
if (!FiniFunction) {
|
||||
errs()
|
||||
<< "BOLT-ERROR: failed to locate function at binary fini address\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
MCSection *Section = BC.isELF()
|
||||
? static_cast<MCSection *>(BC.Ctx->getELFSection(
|
||||
".bolt.instr.counters", ELF::SHT_PROGBITS,
|
||||
|
@ -200,12 +179,6 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
|
|||
emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
|
||||
emitString("__bolt_instr_filename", opts::InstrumentationFilename);
|
||||
emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
|
||||
emitValue(BC.Ctx->getOrCreateSymbol("__bolt_instr_init_ptr"),
|
||||
MCSymbolRefExpr::create(StartFunction->getSymbol(), *BC.Ctx));
|
||||
if (FiniFunction) {
|
||||
emitValue(BC.Ctx->getOrCreateSymbol("__bolt_instr_fini_ptr"),
|
||||
MCSymbolRefExpr::create(FiniFunction->getSymbol(), *BC.Ctx));
|
||||
}
|
||||
|
||||
if (BC.isMachO()) {
|
||||
MCSection *TablesSection = BC.Ctx->getMachOSection(
|
||||
|
|
|
@ -3309,6 +3309,13 @@ public:
|
|||
return Insts;
|
||||
}
|
||||
|
||||
std::vector<MCInst> createSymbolTrampoline(const MCSymbol *TgtSym,
|
||||
MCContext *Ctx) const override {
|
||||
std::vector<MCInst> Insts(1);
|
||||
createUncondBranch(Insts[0], TgtSym, Ctx);
|
||||
return Insts;
|
||||
}
|
||||
|
||||
BlocksVectorTy indirectCallPromotion(
|
||||
const MCInst &CallInst,
|
||||
const std::vector<std::pair<MCSymbol *, uint64_t>> &Targets,
|
||||
|
|
Loading…
Reference in New Issue