From ad79d51778e37293502b7a8246fa387092771969 Mon Sep 17 00:00:00 2001 From: Vasily Leonenko Date: Sat, 19 Jun 2021 04:08:35 +0800 Subject: [PATCH] [PR] Instrumentation: Generate and use _start and _fini trampolines Summary: This commit implements new method for _start & _fini functions hooking which allows to use relative jumps for future PIE & .so library support. Instead of using absolute address of _start & _fini functions known on linking stage - we'll use dynamically created trampoline functions and use corresponding symbols in instrumentation runtime library. As we would like to use instrumentation for dynamically loaded binaries (with PIE & .so), thus we need to compile instrumentation library with "-fPIC" flag to support relative address resolution for functions and data. For shared libraries we need to handle initialization of instrumentation library case by using DT_INIT section entry point. Also this commit adds detection if the binary is executable or shared library based on existence of PT_INTERP header. In case of shared library we save information about real library init function address for further usage for instrumentation library init trampoline function creation and also update DT_INIT to point instrumentation library init function. Functions called from init/fini functions should be called with forced stack alignment to avoid issues with instructions which relies on it. E.g. optimized string operations. Vasily Leonenko, Advanced Software Technology Lab, Huawei (cherry picked from FBD30092316) --- bolt/runtime/CMakeLists.txt | 2 +- bolt/runtime/instr.cpp | 20 ++++++++------ bolt/src/BinaryContext.h | 3 +++ bolt/src/MCPlusBuilder.h | 6 +++++ bolt/src/Passes/Instrumentation.cpp | 21 +++++++++++++++ bolt/src/RewriteInstance.cpp | 22 ++++++++++++++- .../InstrumentationRuntimeLibrary.cpp | 27 ------------------- bolt/src/Target/X86/X86MCPlusBuilder.cpp | 7 +++++ 8 files changed, 71 insertions(+), 37 deletions(-) diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt index 9ea769f88a2a..cc679c31ff6d 100644 --- a/bolt/runtime/CMakeLists.txt +++ b/bolt/runtime/CMakeLists.txt @@ -22,7 +22,7 @@ add_library(bolt_rt_hugify STATIC ) # Don't let the compiler think it can create calls to standard libs -target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti) +target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti -fPIE) target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_compile_options(bolt_rt_hugify PRIVATE -ffreestanding -fno-exceptions -fno-rtti) target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp index 3ad7a8cca6ff..bab860f26274 100644 --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -102,10 +102,10 @@ extern bool __bolt_instr_use_pid; // TODO: We need better linking support to make that happen. extern void (*__bolt_trampoline_ind_call)(); extern void (*__bolt_trampoline_ind_tailcall)(); -// Function pointers to init/fini routines in the binary, so we can resume -// regular execution of these functions that we hooked -extern void (*__bolt_instr_init_ptr)(); -extern void (*__bolt_instr_fini_ptr)(); +// Function pointers to init/fini trampoline routines in the binary, so we can +// resume regular execution of these functions that we hooked +extern void (*__bolt_start_trampoline)(); +extern void (*__bolt_fini_trampoline)(); #endif @@ -1366,7 +1366,8 @@ extern "C" void __bolt_instr_clear_counters() { /// call this function directly to get your profile written to disk /// on demand. /// -extern "C" void __bolt_instr_data_dump() { +extern "C" void __attribute((force_align_arg_pointer)) +__bolt_instr_data_dump() { // Already dumping if (!GlobalWriteProfileMutex->acquire()) return; @@ -1451,7 +1452,7 @@ extern "C" void __bolt_instr_indirect_call(); extern "C" void __bolt_instr_indirect_tailcall(); /// Initialization code -extern "C" void __bolt_instr_setup() { +extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() { const uint64_t CountersStart = reinterpret_cast(&__bolt_instr_locations[0]); const uint64_t CountersEnd = alignTo( @@ -1526,13 +1527,16 @@ extern "C" __attribute((naked)) void __bolt_instr_start() __asm__ __volatile__(SAVE_ALL "call __bolt_instr_setup\n" RESTORE_ALL - "jmp *__bolt_instr_init_ptr(%%rip)\n" + "jmp __bolt_start_trampoline\n" :::); } /// This is hooking into ELF's DT_FINI extern "C" void __bolt_instr_fini() { - __bolt_instr_fini_ptr(); + // Currently using assembly inline for trampoline function call + // due to issues with function pointer dereferencing in case of + // C function call. + __asm__ __volatile__("call __bolt_fini_trampoline\n" :::); if (__bolt_instr_sleep_time == 0) __bolt_instr_data_dump(); DEBUG(report("Finished.\n")); diff --git a/bolt/src/BinaryContext.h b/bolt/src/BinaryContext.h index b012306a7213..006d5cd71351 100644 --- a/bolt/src/BinaryContext.h +++ b/bolt/src/BinaryContext.h @@ -535,6 +535,9 @@ public: /// linked. bool IsStaticExecutable{false}; + /// Set to true if the binary contains PT_INTERP header. + bool HasInterpHeader{false}; + /// Indicates if any of local symbols used for functions or data objects /// have an origin file name available. bool HasSymbolsWithFileName{false}; diff --git a/bolt/src/MCPlusBuilder.h b/bolt/src/MCPlusBuilder.h index 5d52d9d86462..cd1854d9ea93 100644 --- a/bolt/src/MCPlusBuilder.h +++ b/bolt/src/MCPlusBuilder.h @@ -1765,6 +1765,12 @@ public: return {}; } + virtual std::vector createSymbolTrampoline(const MCSymbol *TgtSym, + MCContext *Ctx) const { + llvm_unreachable("not implemented"); + return std::vector(); + } + /// This method takes an indirect call instruction and splits it up into an /// equivalent set of instructions that use direct calls for target /// symbols/addresses that are contained in the Targets vector. This is done diff --git a/bolt/src/Passes/Instrumentation.cpp b/bolt/src/Passes/Instrumentation.cpp index 085fd1627289..b431ec11dcc0 100644 --- a/bolt/src/Passes/Instrumentation.cpp +++ b/bolt/src/Passes/Instrumentation.cpp @@ -632,6 +632,27 @@ void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) { BC.MIB->createInstrTablesGetter(BC.Ctx.get())); createSimpleFunction("__bolt_instr_num_funcs_getter", BC.MIB->createInstrNumFuncsGetter(BC.Ctx.get())); + + if (BC.isELF()) { + if (BC.StartFunctionAddress) { + BinaryFunction *Start = + BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress); + assert(Start && "Entry point function not found"); + const MCSymbol *StartSym = Start->getSymbol(); + createSimpleFunction( + "__bolt_start_trampoline", + BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get())); + } + if (BC.FiniFunctionAddress) { + BinaryFunction *Fini = + BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress); + assert(Fini && "Finalization function not found"); + const MCSymbol *FiniSym = Fini->getSymbol(); + createSimpleFunction( + "__bolt_fini_trampoline", + BC.MIB->createSymbolTrampoline(FiniSym, BC.Ctx.get())); + } + } } void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) { diff --git a/bolt/src/RewriteInstance.cpp b/bolt/src/RewriteInstance.cpp index 63388718a49b..1cd36a41cfc3 100644 --- a/bolt/src/RewriteInstance.cpp +++ b/bolt/src/RewriteInstance.cpp @@ -567,7 +567,8 @@ void RewriteInstance::discoverStorage() { ELF64LE::PhdrRange PHs = cantFail(Obj.program_headers(), "program_headers() failed"); for (const ELF64LE::Phdr &Phdr : PHs) { - if (Phdr.p_type == ELF::PT_LOAD) { + switch (Phdr.p_type) { + case ELF::PT_LOAD: BC->FirstAllocAddress = std::min(BC->FirstAllocAddress, static_cast(Phdr.p_vaddr)); NextAvailableAddress = std::max(NextAvailableAddress, @@ -580,6 +581,10 @@ void RewriteInstance::discoverStorage() { Phdr.p_offset, Phdr.p_filesz, Phdr.p_align}; + break; + case ELF::PT_INTERP: + BC->HasInterpHeader = true; + break; } } @@ -5015,6 +5020,15 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile *File) { } } } + if (Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { + if (auto *RtLibrary = BC->getRuntimeLibrary()) { + if (auto Addr = RtLibrary->getRuntimeStartAddress()) { + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" + << Twine::utohexstr(Addr) << '\n'); + NewDE.d_un.d_ptr = Addr; + } + } + } break; case ELF::DT_FLAGS: if (BC->RequiresZNow) { @@ -5074,6 +5088,12 @@ void RewriteInstance::readELFDynamic(ELFObjectFile *File) { for (const Elf_Dyn &Dyn : DynamicEntries) { switch (Dyn.d_tag) { + case ELF::DT_INIT: + if (!BC->HasInterpHeader) { + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); + BC->StartFunctionAddress = Dyn.getPtr(); + } + break; case ELF::DT_FINI: BC->FiniFunctionAddress = Dyn.getPtr(); break; diff --git a/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp index 00f65a8bc252..ad72ba3fe7f4 100644 --- a/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +++ b/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp @@ -74,27 +74,6 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions( void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) { - const BinaryFunction *StartFunction = - BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress); - assert(!StartFunction->isFragment() && "expected main function fragment"); - if (!StartFunction) { - errs() << "BOLT-ERROR: failed to locate function at binary start address\n"; - exit(1); - } - - const BinaryFunction *FiniFunction = - BC.FiniFunctionAddress - ? BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress) - : nullptr; - if (BC.isELF()) { - assert(!FiniFunction->isFragment() && "expected main function fragment"); - if (!FiniFunction) { - errs() - << "BOLT-ERROR: failed to locate function at binary fini address\n"; - exit(1); - } - } - MCSection *Section = BC.isELF() ? static_cast(BC.Ctx->getELFSection( ".bolt.instr.counters", ELF::SHT_PROGBITS, @@ -200,12 +179,6 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size()); emitString("__bolt_instr_filename", opts::InstrumentationFilename); emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1); - emitValue(BC.Ctx->getOrCreateSymbol("__bolt_instr_init_ptr"), - MCSymbolRefExpr::create(StartFunction->getSymbol(), *BC.Ctx)); - if (FiniFunction) { - emitValue(BC.Ctx->getOrCreateSymbol("__bolt_instr_fini_ptr"), - MCSymbolRefExpr::create(FiniFunction->getSymbol(), *BC.Ctx)); - } if (BC.isMachO()) { MCSection *TablesSection = BC.Ctx->getMachOSection( diff --git a/bolt/src/Target/X86/X86MCPlusBuilder.cpp b/bolt/src/Target/X86/X86MCPlusBuilder.cpp index fac118be0d36..6495686398e7 100644 --- a/bolt/src/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/src/Target/X86/X86MCPlusBuilder.cpp @@ -3309,6 +3309,13 @@ public: return Insts; } + std::vector createSymbolTrampoline(const MCSymbol *TgtSym, + MCContext *Ctx) const override { + std::vector Insts(1); + createUncondBranch(Insts[0], TgtSym, Ctx); + return Insts; + } + BlocksVectorTy indirectCallPromotion( const MCInst &CallInst, const std::vector> &Targets,