[PR] Instrumentation: Generate and use _start and _fini trampolines

Summary:
This commit implements new method for _start & _fini functions hooking
which allows to use relative jumps for future PIE & .so library support.
Instead of using absolute address of _start & _fini functions known on
linking stage - we'll use dynamically created trampoline functions and
use corresponding symbols in instrumentation runtime library.

As we would like to use instrumentation for dynamically loaded binaries
(with PIE & .so), thus we need to compile instrumentation library with
"-fPIC" flag to support relative address resolution for functions and
data.

For shared libraries we need to handle initialization of instrumentation
library case by using DT_INIT section entry point.

Also this commit adds detection if the binary is executable or shared
library based on existence of PT_INTERP header. In case of shared
library we save information about real library init function address
for further usage for instrumentation library init trampoline function
creation and also update DT_INIT to point instrumentation library init
function.

Functions called from init/fini functions should be called with forced
stack alignment to avoid issues with instructions which relies on it.
E.g. optimized string operations.

Vasily Leonenko,
Advanced Software Technology Lab, Huawei

(cherry picked from FBD30092316)
This commit is contained in:
Vasily Leonenko 2021-06-19 04:08:35 +08:00 committed by Maksim Panchenko
parent 60b10a8ead
commit ad79d51778
8 changed files with 71 additions and 37 deletions

View File

@ -22,7 +22,7 @@ add_library(bolt_rt_hugify STATIC
)
# Don't let the compiler think it can create calls to standard libs
target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti)
target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti -fPIE)
target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_compile_options(bolt_rt_hugify PRIVATE -ffreestanding -fno-exceptions -fno-rtti)
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

View File

@ -102,10 +102,10 @@ extern bool __bolt_instr_use_pid;
// TODO: We need better linking support to make that happen.
extern void (*__bolt_trampoline_ind_call)();
extern void (*__bolt_trampoline_ind_tailcall)();
// Function pointers to init/fini routines in the binary, so we can resume
// regular execution of these functions that we hooked
extern void (*__bolt_instr_init_ptr)();
extern void (*__bolt_instr_fini_ptr)();
// Function pointers to init/fini trampoline routines in the binary, so we can
// resume regular execution of these functions that we hooked
extern void (*__bolt_start_trampoline)();
extern void (*__bolt_fini_trampoline)();
#endif
@ -1366,7 +1366,8 @@ extern "C" void __bolt_instr_clear_counters() {
/// call this function directly to get your profile written to disk
/// on demand.
///
extern "C" void __bolt_instr_data_dump() {
extern "C" void __attribute((force_align_arg_pointer))
__bolt_instr_data_dump() {
// Already dumping
if (!GlobalWriteProfileMutex->acquire())
return;
@ -1451,7 +1452,7 @@ extern "C" void __bolt_instr_indirect_call();
extern "C" void __bolt_instr_indirect_tailcall();
/// Initialization code
extern "C" void __bolt_instr_setup() {
extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
const uint64_t CountersStart =
reinterpret_cast<uint64_t>(&__bolt_instr_locations[0]);
const uint64_t CountersEnd = alignTo(
@ -1526,13 +1527,16 @@ extern "C" __attribute((naked)) void __bolt_instr_start()
__asm__ __volatile__(SAVE_ALL
"call __bolt_instr_setup\n"
RESTORE_ALL
"jmp *__bolt_instr_init_ptr(%%rip)\n"
"jmp __bolt_start_trampoline\n"
:::);
}
/// This is hooking into ELF's DT_FINI
extern "C" void __bolt_instr_fini() {
__bolt_instr_fini_ptr();
// Currently using assembly inline for trampoline function call
// due to issues with function pointer dereferencing in case of
// C function call.
__asm__ __volatile__("call __bolt_fini_trampoline\n" :::);
if (__bolt_instr_sleep_time == 0)
__bolt_instr_data_dump();
DEBUG(report("Finished.\n"));

View File

@ -535,6 +535,9 @@ public:
/// linked.
bool IsStaticExecutable{false};
/// Set to true if the binary contains PT_INTERP header.
bool HasInterpHeader{false};
/// Indicates if any of local symbols used for functions or data objects
/// have an origin file name available.
bool HasSymbolsWithFileName{false};

View File

@ -1765,6 +1765,12 @@ public:
return {};
}
virtual std::vector<MCInst> createSymbolTrampoline(const MCSymbol *TgtSym,
MCContext *Ctx) const {
llvm_unreachable("not implemented");
return std::vector<MCInst>();
}
/// This method takes an indirect call instruction and splits it up into an
/// equivalent set of instructions that use direct calls for target
/// symbols/addresses that are contained in the Targets vector. This is done

View File

@ -632,6 +632,27 @@ void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) {
BC.MIB->createInstrTablesGetter(BC.Ctx.get()));
createSimpleFunction("__bolt_instr_num_funcs_getter",
BC.MIB->createInstrNumFuncsGetter(BC.Ctx.get()));
if (BC.isELF()) {
if (BC.StartFunctionAddress) {
BinaryFunction *Start =
BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
assert(Start && "Entry point function not found");
const MCSymbol *StartSym = Start->getSymbol();
createSimpleFunction(
"__bolt_start_trampoline",
BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get()));
}
if (BC.FiniFunctionAddress) {
BinaryFunction *Fini =
BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress);
assert(Fini && "Finalization function not found");
const MCSymbol *FiniSym = Fini->getSymbol();
createSimpleFunction(
"__bolt_fini_trampoline",
BC.MIB->createSymbolTrampoline(FiniSym, BC.Ctx.get()));
}
}
}
void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {

View File

@ -567,7 +567,8 @@ void RewriteInstance::discoverStorage() {
ELF64LE::PhdrRange PHs =
cantFail(Obj.program_headers(), "program_headers() failed");
for (const ELF64LE::Phdr &Phdr : PHs) {
if (Phdr.p_type == ELF::PT_LOAD) {
switch (Phdr.p_type) {
case ELF::PT_LOAD:
BC->FirstAllocAddress = std::min(BC->FirstAllocAddress,
static_cast<uint64_t>(Phdr.p_vaddr));
NextAvailableAddress = std::max(NextAvailableAddress,
@ -580,6 +581,10 @@ void RewriteInstance::discoverStorage() {
Phdr.p_offset,
Phdr.p_filesz,
Phdr.p_align};
break;
case ELF::PT_INTERP:
BC->HasInterpHeader = true;
break;
}
}
@ -5015,6 +5020,15 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
}
}
}
if (Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
if (auto *RtLibrary = BC->getRuntimeLibrary()) {
if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
<< Twine::utohexstr(Addr) << '\n');
NewDE.d_un.d_ptr = Addr;
}
}
}
break;
case ELF::DT_FLAGS:
if (BC->RequiresZNow) {
@ -5074,6 +5088,12 @@ void RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
for (const Elf_Dyn &Dyn : DynamicEntries) {
switch (Dyn.d_tag) {
case ELF::DT_INIT:
if (!BC->HasInterpHeader) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
BC->StartFunctionAddress = Dyn.getPtr();
}
break;
case ELF::DT_FINI:
BC->FiniFunctionAddress = Dyn.getPtr();
break;

View File

@ -74,27 +74,6 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
MCStreamer &Streamer) {
const BinaryFunction *StartFunction =
BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
assert(!StartFunction->isFragment() && "expected main function fragment");
if (!StartFunction) {
errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
exit(1);
}
const BinaryFunction *FiniFunction =
BC.FiniFunctionAddress
? BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress)
: nullptr;
if (BC.isELF()) {
assert(!FiniFunction->isFragment() && "expected main function fragment");
if (!FiniFunction) {
errs()
<< "BOLT-ERROR: failed to locate function at binary fini address\n";
exit(1);
}
}
MCSection *Section = BC.isELF()
? static_cast<MCSection *>(BC.Ctx->getELFSection(
".bolt.instr.counters", ELF::SHT_PROGBITS,
@ -200,12 +179,6 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
emitString("__bolt_instr_filename", opts::InstrumentationFilename);
emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
emitValue(BC.Ctx->getOrCreateSymbol("__bolt_instr_init_ptr"),
MCSymbolRefExpr::create(StartFunction->getSymbol(), *BC.Ctx));
if (FiniFunction) {
emitValue(BC.Ctx->getOrCreateSymbol("__bolt_instr_fini_ptr"),
MCSymbolRefExpr::create(FiniFunction->getSymbol(), *BC.Ctx));
}
if (BC.isMachO()) {
MCSection *TablesSection = BC.Ctx->getMachOSection(

View File

@ -3309,6 +3309,13 @@ public:
return Insts;
}
std::vector<MCInst> createSymbolTrampoline(const MCSymbol *TgtSym,
MCContext *Ctx) const override {
std::vector<MCInst> Insts(1);
createUncondBranch(Insts[0], TgtSym, Ctx);
return Insts;
}
BlocksVectorTy indirectCallPromotion(
const MCInst &CallInst,
const std::vector<std::pair<MCSymbol *, uint64_t>> &Targets,