forked from OSchip/llvm-project
[BOLT][PR] Instrumentation: Introduce -no-counters-clear and -wait-forks options
Summary: This PR introduces 2 new instrumentation options: 1. instrumentation-no-counters-clear: Discussed at https://github.com/facebookincubator/BOLT/issues/121 2. instrumentation-wait-forks: Since the instrumentation counters are mapped as MAP_SHARED it will be nice to add ability to wait until all forks of the parent process will die using tracking of process group. The last patch is just emitBinary code refactor. Vladislav Khmelevsky, Advanced Software Technology Lab, Huawei Pull Request resolved: https://github.com/facebookincubator/BOLT/pull/125 GitHub Author: Vladislav Khmelevskyi <Vladislav.Khmelevskyi@huawei.com> (cherry picked from FBD26919011)
This commit is contained in:
parent
225a8d7f2c
commit
76d346ca14
|
@ -1,3 +1,13 @@
|
||||||
|
//===-- common.h ------------------------------------------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#if !defined(__APPLE__)
|
#if !defined(__APPLE__)
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
@ -333,6 +343,36 @@ uint64_t __getppid() {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int __setpgid(uint64_t pid, uint64_t pgid) {
|
||||||
|
int ret;
|
||||||
|
__asm__ __volatile__("movq $109, %%rax\n"
|
||||||
|
"syscall\n"
|
||||||
|
: "=a"(ret)
|
||||||
|
: "D"(pid), "S"(pgid)
|
||||||
|
: "cc", "rcx", "r11", "memory");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t __getpgid(uint64_t pid) {
|
||||||
|
uint64_t ret;
|
||||||
|
__asm__ __volatile__("movq $121, %%rax\n"
|
||||||
|
"syscall\n"
|
||||||
|
: "=a"(ret)
|
||||||
|
: "D"(pid)
|
||||||
|
: "cc", "rcx", "r11", "memory");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __kill(uint64_t pid, int sig) {
|
||||||
|
int ret;
|
||||||
|
__asm__ __volatile__("movq $62, %%rax\n"
|
||||||
|
"syscall\n"
|
||||||
|
: "=a"(ret)
|
||||||
|
: "D"(pid), "S"(sig)
|
||||||
|
: "cc", "rcx", "r11", "memory");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void reportError(const char *Msg, uint64_t Size) {
|
void reportError(const char *Msg, uint64_t Size) {
|
||||||
|
|
|
@ -84,6 +84,10 @@ extern uint32_t __bolt_instr_num_ind_targets;
|
||||||
extern uint32_t __bolt_instr_num_funcs;
|
extern uint32_t __bolt_instr_num_funcs;
|
||||||
// Time to sleep across dumps (when we write the fdata profile to disk)
|
// Time to sleep across dumps (when we write the fdata profile to disk)
|
||||||
extern uint32_t __bolt_instr_sleep_time;
|
extern uint32_t __bolt_instr_sleep_time;
|
||||||
|
// Do not clear counters across dumps, rewrite file with the updated values
|
||||||
|
extern bool __bolt_instr_no_counters_clear;
|
||||||
|
// Wait until all forks of instrumented process will finish
|
||||||
|
extern bool __bolt_instr_wait_forks;
|
||||||
// Filename to dump data to
|
// Filename to dump data to
|
||||||
extern char __bolt_instr_filename[];
|
extern char __bolt_instr_filename[];
|
||||||
// If true, append current PID to the fdata filename when creating it so
|
// If true, append current PID to the fdata filename when creating it so
|
||||||
|
@ -1402,23 +1406,43 @@ extern "C" void __bolt_instr_data_dump() {
|
||||||
void watchProcess() {
|
void watchProcess() {
|
||||||
timespec ts, rem;
|
timespec ts, rem;
|
||||||
uint64_t Ellapsed = 0ull;
|
uint64_t Ellapsed = 0ull;
|
||||||
|
uint64_t ppid;
|
||||||
|
if (__bolt_instr_wait_forks) {
|
||||||
|
// Store parent pgid
|
||||||
|
ppid = -__getpgid(0);
|
||||||
|
// And leave parent process group
|
||||||
|
__setpgid(0, 0);
|
||||||
|
} else {
|
||||||
|
// Store parent pid
|
||||||
|
ppid = __getppid();
|
||||||
|
if (ppid == 1) {
|
||||||
|
// Parent already dead
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ts.tv_sec = 1;
|
ts.tv_sec = 1;
|
||||||
ts.tv_nsec = 0;
|
ts.tv_nsec = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
__nanosleep(&ts, &rem);
|
__nanosleep(&ts, &rem);
|
||||||
// This means our parent process died, so no need for us to keep dumping.
|
// This means our parent process or all its forks are dead,
|
||||||
// Notice that make and some systems will wait until all child processes
|
// so no need for us to keep dumping.
|
||||||
// of a command finishes before proceeding, so it is important to exit as
|
if (__kill(ppid, 0) < 0) {
|
||||||
// early as possible once our parent dies.
|
if (__bolt_instr_no_counters_clear)
|
||||||
if (__getppid() == 1) {
|
__bolt_instr_data_dump();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (++Ellapsed < __bolt_instr_sleep_time)
|
if (++Ellapsed < __bolt_instr_sleep_time)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Ellapsed = 0;
|
Ellapsed = 0;
|
||||||
__bolt_instr_data_dump();
|
__bolt_instr_data_dump();
|
||||||
__bolt_instr_clear_counters();
|
if (__bolt_instr_no_counters_clear == false)
|
||||||
|
__bolt_instr_clear_counters();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out:;
|
||||||
DEBUG(report("My parent process is dead, bye!\n"));
|
DEBUG(report("My parent process is dead, bye!\n"));
|
||||||
__exit(0);
|
__exit(0);
|
||||||
}
|
}
|
||||||
|
@ -1453,6 +1477,10 @@ extern "C" void __bolt_instr_setup() {
|
||||||
new (GlobalAlloc, 0) IndirectCallHashTable[__bolt_instr_num_ind_calls];
|
new (GlobalAlloc, 0) IndirectCallHashTable[__bolt_instr_num_ind_calls];
|
||||||
|
|
||||||
if (__bolt_instr_sleep_time != 0) {
|
if (__bolt_instr_sleep_time != 0) {
|
||||||
|
// Separate instrumented process to the own process group
|
||||||
|
if (__bolt_instr_wait_forks)
|
||||||
|
__setpgid(0, 0);
|
||||||
|
|
||||||
if (auto PID = __fork())
|
if (auto PID = __fork())
|
||||||
return;
|
return;
|
||||||
watchProcess();
|
watchProcess();
|
||||||
|
|
|
@ -49,6 +49,18 @@ cl::opt<uint32_t> InstrumentationSleepTime(
|
||||||
"program and the profile is not being dumped at the end."),
|
"program and the profile is not being dumped at the end."),
|
||||||
cl::init(0), cl::Optional, cl::cat(BoltInstrCategory));
|
cl::init(0), cl::Optional, cl::cat(BoltInstrCategory));
|
||||||
|
|
||||||
|
cl::opt<bool> InstrumentationNoCountersClear(
|
||||||
|
"instrumentation-no-counters-clear",
|
||||||
|
cl::desc("Don't clear counters across dumps "
|
||||||
|
"(use with instrumentation-sleep-time option)"),
|
||||||
|
cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
|
||||||
|
|
||||||
|
cl::opt<bool> InstrumentationWaitForks(
|
||||||
|
"instrumentation-wait-forks",
|
||||||
|
cl::desc("Wait until all forks of instrumented process will finish "
|
||||||
|
"(use with instrumentation-sleep-time option)"),
|
||||||
|
cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
|
||||||
|
|
||||||
cl::opt<bool>
|
cl::opt<bool>
|
||||||
InstrumentHotOnly("instrument-hot-only",
|
InstrumentHotOnly("instrument-hot-only",
|
||||||
cl::desc("only insert instrumentation on hot functions "
|
cl::desc("only insert instrumentation on hot functions "
|
||||||
|
|
|
@ -24,6 +24,8 @@ extern cl::OptionCategory BoltOptCategory;
|
||||||
extern cl::opt<bool> InstrumentationFileAppendPID;
|
extern cl::opt<bool> InstrumentationFileAppendPID;
|
||||||
extern cl::opt<std::string> InstrumentationFilename;
|
extern cl::opt<std::string> InstrumentationFilename;
|
||||||
extern cl::opt<uint32_t> InstrumentationSleepTime;
|
extern cl::opt<uint32_t> InstrumentationSleepTime;
|
||||||
|
extern cl::opt<bool> InstrumentationNoCountersClear;
|
||||||
|
extern cl::opt<bool> InstrumentationWaitForks;
|
||||||
|
|
||||||
cl::opt<bool>
|
cl::opt<bool>
|
||||||
Instrument("instrument",
|
Instrument("instrument",
|
||||||
|
@ -93,31 +95,43 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
|
||||||
"__BOLT", "__counters", MachO::S_REGULAR,
|
"__BOLT", "__counters", MachO::S_REGULAR,
|
||||||
SectionKind::getData()));
|
SectionKind::getData()));
|
||||||
|
|
||||||
|
Section->setAlignment(llvm::Align(BC.RegularPageSize));
|
||||||
|
Streamer.SwitchSection(Section);
|
||||||
|
|
||||||
|
auto EmitLabel = [&](MCSymbol *Symbol, bool IsGlobal = true) {
|
||||||
|
Streamer.emitLabel(Symbol);
|
||||||
|
if (IsGlobal)
|
||||||
|
Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto EmitLabelByName = [&](StringRef Name, bool IsGlobal = true) {
|
||||||
|
MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name);
|
||||||
|
EmitLabel(Symbol, IsGlobal);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto EmitValue = [&](MCSymbol *Symbol, const MCExpr *Value) {
|
||||||
|
EmitLabel(Symbol);
|
||||||
|
Streamer.emitValue(Value, /*Size*/ 8);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto EmitIntValue = [&](StringRef Name, uint64_t Value, unsigned Size = 4) {
|
||||||
|
EmitLabelByName(Name);
|
||||||
|
Streamer.emitIntValue(Value, Size);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto EmitString = [&](StringRef Name, StringRef Contents) {
|
||||||
|
EmitLabelByName(Name);
|
||||||
|
Streamer.emitBytes(Contents);
|
||||||
|
Streamer.emitFill(1, 0);
|
||||||
|
};
|
||||||
|
|
||||||
// All of the following symbols will be exported as globals to be used by the
|
// All of the following symbols will be exported as globals to be used by the
|
||||||
// instrumentation runtime library to dump the instrumentation data to disk.
|
// instrumentation runtime library to dump the instrumentation data to disk.
|
||||||
// Label marking start of the memory region containing instrumentation
|
// Label marking start of the memory region containing instrumentation
|
||||||
// counters, total vector size is Counters.size() 8-byte counters
|
// counters, total vector size is Counters.size() 8-byte counters
|
||||||
MCSymbol *Locs = BC.Ctx->getOrCreateSymbol("__bolt_instr_locations");
|
EmitLabelByName("__bolt_instr_locations");
|
||||||
MCSymbol *NumLocs = BC.Ctx->getOrCreateSymbol("__bolt_num_counters");
|
|
||||||
MCSymbol *NumIndCalls =
|
|
||||||
BC.Ctx->getOrCreateSymbol("__bolt_instr_num_ind_calls");
|
|
||||||
MCSymbol *NumIndCallTargets =
|
|
||||||
BC.Ctx->getOrCreateSymbol("__bolt_instr_num_ind_targets");
|
|
||||||
MCSymbol *NumFuncs = BC.Ctx->getOrCreateSymbol("__bolt_instr_num_funcs");
|
|
||||||
/// File name where profile is going to written to after target binary
|
|
||||||
/// finishes a run
|
|
||||||
MCSymbol *FilenameSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_filename");
|
|
||||||
MCSymbol *UsePIDSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_use_pid");
|
|
||||||
MCSymbol *InitPtr = BC.Ctx->getOrCreateSymbol("__bolt_instr_init_ptr");
|
|
||||||
MCSymbol *FiniPtr = BC.Ctx->getOrCreateSymbol("__bolt_instr_fini_ptr");
|
|
||||||
MCSymbol *SleepSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_sleep_time");
|
|
||||||
|
|
||||||
Section->setAlignment(llvm::Align(BC.RegularPageSize));
|
|
||||||
Streamer.SwitchSection(Section);
|
|
||||||
Streamer.emitLabel(Locs);
|
|
||||||
Streamer.emitSymbolAttribute(Locs, MCSymbolAttr::MCSA_Global);
|
|
||||||
for (const auto &Label : Summary->Counters) {
|
for (const auto &Label : Summary->Counters) {
|
||||||
Streamer.emitLabel(Label);
|
EmitLabel(Label, /*IsGlobal*/ false);
|
||||||
Streamer.emitFill(8, 0);
|
Streamer.emitFill(8, 0);
|
||||||
}
|
}
|
||||||
const uint64_t Padding =
|
const uint64_t Padding =
|
||||||
|
@ -125,63 +139,40 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
|
||||||
8 * Summary->Counters.size();
|
8 * Summary->Counters.size();
|
||||||
if (Padding)
|
if (Padding)
|
||||||
Streamer.emitFill(Padding, 0);
|
Streamer.emitFill(Padding, 0);
|
||||||
Streamer.emitLabel(SleepSym);
|
|
||||||
Streamer.emitSymbolAttribute(SleepSym, MCSymbolAttr::MCSA_Global);
|
|
||||||
Streamer.emitIntValue(opts::InstrumentationSleepTime, /*Size=*/4);
|
|
||||||
Streamer.emitLabel(NumLocs);
|
|
||||||
Streamer.emitSymbolAttribute(NumLocs, MCSymbolAttr::MCSA_Global);
|
|
||||||
Streamer.emitIntValue(Summary->Counters.size(), /*Size=*/4);
|
|
||||||
Streamer.emitLabel(Summary->IndCallHandlerFunc);
|
|
||||||
Streamer.emitSymbolAttribute(Summary->IndCallHandlerFunc,
|
|
||||||
MCSymbolAttr::MCSA_Global);
|
|
||||||
Streamer.emitValue(
|
|
||||||
MCSymbolRefExpr::create(
|
|
||||||
Summary->InitialIndCallHandlerFunction->getSymbol(), *BC.Ctx),
|
|
||||||
/*Size=*/8);
|
|
||||||
Streamer.emitLabel(Summary->IndTailCallHandlerFunc);
|
|
||||||
Streamer.emitSymbolAttribute(Summary->IndTailCallHandlerFunc,
|
|
||||||
MCSymbolAttr::MCSA_Global);
|
|
||||||
Streamer.emitValue(
|
|
||||||
MCSymbolRefExpr::create(
|
|
||||||
Summary->InitialIndTailCallHandlerFunction->getSymbol(), *BC.Ctx),
|
|
||||||
/*Size=*/8);
|
|
||||||
Streamer.emitLabel(NumIndCalls);
|
|
||||||
Streamer.emitSymbolAttribute(NumIndCalls, MCSymbolAttr::MCSA_Global);
|
|
||||||
Streamer.emitIntValue(Summary->IndCallDescriptions.size(), /*Size=*/4);
|
|
||||||
Streamer.emitLabel(NumIndCallTargets);
|
|
||||||
Streamer.emitSymbolAttribute(NumIndCallTargets, MCSymbolAttr::MCSA_Global);
|
|
||||||
Streamer.emitIntValue(Summary->IndCallTargetDescriptions.size(), /*Size=*/4);
|
|
||||||
Streamer.emitLabel(NumFuncs);
|
|
||||||
Streamer.emitSymbolAttribute(NumFuncs, MCSymbolAttr::MCSA_Global);
|
|
||||||
|
|
||||||
Streamer.emitIntValue(Summary->FunctionDescriptions.size(), /*Size=*/4);
|
EmitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime);
|
||||||
Streamer.emitLabel(FilenameSym);
|
EmitIntValue("__bolt_instr_no_counters_clear",
|
||||||
Streamer.emitBytes(opts::InstrumentationFilename);
|
!!opts::InstrumentationNoCountersClear, 1);
|
||||||
Streamer.emitFill(1, 0);
|
EmitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
|
||||||
Streamer.emitLabel(UsePIDSym);
|
EmitIntValue("__bolt_num_counters", Summary->Counters.size());
|
||||||
Streamer.emitIntValue(opts::InstrumentationFileAppendPID ? 1 : 0, /*Size=*/1);
|
EmitValue(Summary->IndCallHandlerFunc,
|
||||||
|
MCSymbolRefExpr::create(
|
||||||
Streamer.emitLabel(InitPtr);
|
Summary->InitialIndCallHandlerFunction->getSymbol(), *BC.Ctx));
|
||||||
Streamer.emitSymbolAttribute(InitPtr, MCSymbolAttr::MCSA_Global);
|
EmitValue(
|
||||||
Streamer.emitValue(
|
Summary->IndTailCallHandlerFunc,
|
||||||
MCSymbolRefExpr::create(StartFunction->getSymbol(), *BC.Ctx), /*Size=*/8);
|
MCSymbolRefExpr::create(
|
||||||
|
Summary->InitialIndTailCallHandlerFunction->getSymbol(), *BC.Ctx));
|
||||||
|
EmitIntValue("__bolt_instr_num_ind_calls",
|
||||||
|
Summary->IndCallDescriptions.size());
|
||||||
|
EmitIntValue("__bolt_instr_num_ind_targets",
|
||||||
|
Summary->IndCallTargetDescriptions.size());
|
||||||
|
EmitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
|
||||||
|
EmitString("__bolt_instr_filename", opts::InstrumentationFilename);
|
||||||
|
EmitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
|
||||||
|
EmitValue(BC.Ctx->getOrCreateSymbol("__bolt_instr_init_ptr"),
|
||||||
|
MCSymbolRefExpr::create(StartFunction->getSymbol(), *BC.Ctx));
|
||||||
if (FiniFunction) {
|
if (FiniFunction) {
|
||||||
Streamer.emitLabel(FiniPtr);
|
EmitValue(BC.Ctx->getOrCreateSymbol("__bolt_instr_fini_ptr"),
|
||||||
Streamer.emitSymbolAttribute(FiniPtr, MCSymbolAttr::MCSA_Global);
|
MCSymbolRefExpr::create(FiniFunction->getSymbol(), *BC.Ctx));
|
||||||
Streamer.emitValue(
|
|
||||||
MCSymbolRefExpr::create(FiniFunction->getSymbol(), *BC.Ctx), /*Size=*/8);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (BC.isMachO()) {
|
if (BC.isMachO()) {
|
||||||
MCSection *TablesSection = BC.Ctx->getMachOSection(
|
MCSection *TablesSection = BC.Ctx->getMachOSection(
|
||||||
"__BOLT", "__tables", MachO::S_REGULAR,
|
"__BOLT", "__tables", MachO::S_REGULAR,
|
||||||
SectionKind::getData());
|
SectionKind::getData());
|
||||||
MCSymbol *Tables = BC.Ctx->getOrCreateSymbol("__bolt_instr_tables");
|
|
||||||
TablesSection->setAlignment(llvm::Align(BC.RegularPageSize));
|
TablesSection->setAlignment(llvm::Align(BC.RegularPageSize));
|
||||||
Streamer.SwitchSection(TablesSection);
|
Streamer.SwitchSection(TablesSection);
|
||||||
Streamer.emitLabel(Tables);
|
EmitString("__bolt_instr_tables", buildTables(BC));
|
||||||
Streamer.emitSymbolAttribute(Tables, MCSymbolAttr::MCSA_Global);
|
|
||||||
Streamer.emitBytes(buildTables(BC));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue