llvm-project/bolt/lib/Profile/DataAggregator.cpp

2257 lines
71 KiB
C++

//===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This family of functions reads profile data written by perf record,
// aggregate it and then write it back to an output file.
//
//===----------------------------------------------------------------------===//
#include "bolt/Profile/DataAggregator.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Profile/Heatmap.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "bolt/Utils/Utils.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <unordered_map>
#define DEBUG_TYPE "aggregator"
using namespace llvm;
using namespace bolt;
namespace opts {
static cl::opt<bool>
BasicAggregation("nl",
cl::desc("aggregate basic samples (without LBR info)"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
static cl::opt<bool>
FilterMemProfile("filter-mem-profile",
cl::desc("if processing a memory profile, filter out stack or heap accesses "
"that won't be useful for BOLT to reduce profile file size"),
cl::init(true),
cl::cat(AggregatorCategory));
static cl::opt<unsigned long long>
FilterPID("pid",
cl::desc("only use samples from process with specified PID"),
cl::init(0),
cl::Optional,
cl::cat(AggregatorCategory));
static cl::opt<bool>
IgnoreBuildID("ignore-build-id",
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
cl::init(false),
cl::cat(AggregatorCategory));
static cl::opt<bool>
IgnoreInterruptLBR("ignore-interrupt-lbr",
cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
cl::init(true),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
static cl::opt<unsigned long long>
MaxSamples("max-samples",
cl::init(-1ULL),
cl::desc("maximum number of samples to read from LBR profile"),
cl::Optional,
cl::Hidden,
cl::cat(AggregatorCategory));
static cl::opt<bool>
ReadPreAggregated("pa",
cl::desc("skip perf and read data from a pre-aggregated file format"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
static cl::opt<bool>
TimeAggregator("time-aggr",
cl::desc("time BOLT aggregator"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
static cl::opt<bool>
UseEventPC("use-event-pc",
cl::desc("use event PC in combination with LBR sampling"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
static cl::opt<bool>
WriteAutoFDOData("autofdo",
cl::desc("generate autofdo textual data instead of bolt data"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
} // namespace opts
namespace {
const char TimerGroupName[] = "aggregator";
const char TimerGroupDesc[] = "Aggregator";
}
constexpr uint64_t DataAggregator::KernelBaseAddr;
DataAggregator::~DataAggregator() { deleteTempFiles(); }
namespace {
void deleteTempFile(const std::string &FileName) {
if (std::error_code Errc = sys::fs::remove(FileName.c_str())) {
errs() << "PERF2BOLT: failed to delete temporary file " << FileName
<< " with error " << Errc.message() << "\n";
}
}
}
void DataAggregator::deleteTempFiles() {
for (std::string &FileName : TempFiles) {
deleteTempFile(FileName);
}
TempFiles.clear();
}
void DataAggregator::findPerfExecutable() {
Optional<std::string> PerfExecutable =
sys::Process::FindInEnvPath("PATH", "perf");
if (!PerfExecutable) {
outs() << "PERF2BOLT: No perf executable found!\n";
exit(1);
}
PerfPath = *PerfExecutable;
}
void DataAggregator::start() {
outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
// Don't launch perf for pre-aggregated files
if (opts::ReadPreAggregated)
return;
findPerfExecutable();
if (opts::BasicAggregation) {
launchPerfProcess("events without LBR",
MainEventsPPI,
"script -F pid,event,ip",
/*Wait = */false);
} else {
launchPerfProcess("branch events",
MainEventsPPI,
"script -F pid,ip,brstack",
/*Wait = */false);
}
// Note: we launch script for mem events regardless of the option, as the
// command fails fairly fast if mem events were not collected.
launchPerfProcess("mem events",
MemEventsPPI,
"script -F pid,event,addr,ip",
/*Wait = */false);
launchPerfProcess("process events",
MMapEventsPPI,
"script --show-mmap-events",
/*Wait = */false);
launchPerfProcess("task events",
TaskEventsPPI,
"script --show-task-events",
/*Wait = */false);
}
void DataAggregator::abort() {
if (opts::ReadPreAggregated)
return;
std::string Error;
// Kill subprocesses in case they are not finished
sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
sys::Wait(MainEventsPPI.PI, 1, false, &Error);
sys::Wait(MemEventsPPI.PI, 1, false, &Error);
deleteTempFiles();
exit(1);
}
void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
const char *ArgsString, bool Wait) {
SmallVector<StringRef, 4> Argv;
outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
Argv.push_back(PerfPath.data());
char *WritableArgsString = strdup(ArgsString);
char *Str = WritableArgsString;
do {
Argv.push_back(Str);
while (*Str && *Str != ' ')
++Str;
if (!*Str)
break;
*Str++ = 0;
} while (true);
Argv.push_back("-f");
Argv.push_back("-i");
Argv.push_back(Filename.c_str());
if (std::error_code Errc =
sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
<< " with error " << Errc.message() << "\n";
exit(1);
}
TempFiles.push_back(PPI.StdoutPath.data());
if (std::error_code Errc =
sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
<< " with error " << Errc.message() << "\n";
exit(1);
}
TempFiles.push_back(PPI.StderrPath.data());
Optional<StringRef> Redirects[] = {
llvm::None, // Stdin
StringRef(PPI.StdoutPath.data()), // Stdout
StringRef(PPI.StderrPath.data())}; // Stderr
LLVM_DEBUG({
dbgs() << "Launching perf: ";
for (StringRef Arg : Argv)
dbgs() << Arg << " ";
dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
<< "\n";
});
if (Wait) {
PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
/*envp*/ llvm::None, Redirects);
} else {
PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
Redirects);
}
free(WritableArgsString);
}
void DataAggregator::processFileBuildID(StringRef FileBuildID) {
PerfProcessInfo BuildIDProcessInfo;
launchPerfProcess("buildid list",
BuildIDProcessInfo,
"buildid-list",
/*Wait = */true);
if (BuildIDProcessInfo.PI.ReturnCode != 0) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
StringRef ErrBuf = (*MB)->getBuffer();
errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
<< '\n';
errs() << ErrBuf;
return;
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
if (std::error_code EC = MB.getError()) {
errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
<< EC.message() << "\n";
return;
}
FileBuf.reset(MB->release());
ParsingBuf = FileBuf->getBuffer();
if (ParsingBuf.empty()) {
errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
"data was recorded without it\n";
return;
}
Col = 0;
Line = 1;
Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
if (!FileName) {
errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
"This indicates the input binary supplied for data aggregation "
"is not the same recorded by perf when collecting profiling "
"data, or there were no samples recorded for the binary. "
"Use -ignore-build-id option to override.\n";
if (!opts::IgnoreBuildID) {
abort();
}
} else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
BuildIDBinaryName = std::string(*FileName);
} else {
outs() << "PERF2BOLT: matched build-id and file name\n";
}
return;
}
bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
if (opts::ReadPreAggregated)
return true;
Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
if (!FD)
return false;
char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
*FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
if (!BytesRead || *BytesRead != 7)
return false;
if (strncmp(Buf, "PERFILE", 7) == 0)
return true;
return false;
}
void DataAggregator::parsePreAggregated() {
std::string Error;
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = MB.getError()) {
errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
<< EC.message() << "\n";
exit(1);
}
FileBuf.reset(MB->release());
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
if (parsePreAggregatedLBRSamples()) {
errs() << "PERF2BOLT: failed to parse samples\n";
exit(1);
}
}
std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
outs() << "PERF2BOLT: writing data for autofdo tools...\n";
NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
std::error_code EC;
raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
if (EC)
return EC;
// Format:
// number of unique traces
// from_1-to_1:count_1
// from_2-to_2:count_2
// ......
// from_n-to_n:count_n
// number of unique sample addresses
// addr_1:count_1
// addr_2:count_2
// ......
// addr_n:count_n
// number of unique LBR entries
// src_1->dst_1:count_1
// src_2->dst_2:count_2
// ......
// src_n->dst_n:count_n
const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
// AutoFDO addresses are relative to the first allocated loadable program
// segment
auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
if (Address < FirstAllocAddress)
return 0;
return Address - FirstAllocAddress;
};
OutFile << FallthroughLBRs.size() << "\n";
for (const auto &AggrLBR : FallthroughLBRs) {
const Trace &Trace = AggrLBR.first;
const FTInfo &Info = AggrLBR.second;
OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
<< Twine::utohexstr(filterAddress(Trace.To)) << ":"
<< (Info.InternCount + Info.ExternCount) << "\n";
}
OutFile << BasicSamples.size() << "\n";
for (const auto &Sample : BasicSamples) {
uint64_t PC = Sample.first;
uint64_t HitCount = Sample.second;
OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
}
OutFile << BranchLBRs.size() << "\n";
for (const auto &AggrLBR : BranchLBRs) {
const Trace &Trace = AggrLBR.first;
const BranchInfo &Info = AggrLBR.second;
OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
<< Twine::utohexstr(filterAddress(Trace.To)) << ":"
<< Info.TakenCount << "\n";
}
outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
<< BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
<< " unique branches to " << OutputFilename << "\n";
return std::error_code();
}
void DataAggregator::filterBinaryMMapInfo() {
if (opts::FilterPID) {
auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
if (MMapInfoIter != BinaryMMapInfo.end()) {
MMapInfo MMap = MMapInfoIter->second;
BinaryMMapInfo.clear();
BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
} else {
if (errs().has_colors())
errs().changeColor(raw_ostream::RED);
errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
<< opts::FilterPID << "\""
<< " for binary \"" << BC->getFilename() << "\".";
assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
errs() << " Profile for the following process is available:\n";
for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) {
outs() << " " << MMI.second.PID
<< (MMI.second.Forked ? " (forked)\n" : "\n");
}
if (errs().has_colors())
errs().resetColor();
exit(1);
}
}
}
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;
if (opts::ReadPreAggregated) {
parsePreAggregated();
return Error::success();
}
if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
processFileBuildID(*FileBuildID);
} else {
errs() << "BOLT-WARNING: build-id will not be checked because we could "
"not read one from input binary\n";
}
auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
std::string Error;
outs() << "PERF2BOLT: waiting for perf " << Name
<< " collection to finish...\n";
sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
if (!Error.empty()) {
errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
deleteTempFiles();
exit(1);
}
if (PI.ReturnCode != 0) {
ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
StringRef ErrBuf = (*ErrorMB)->getBuffer();
errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
errs() << ErrBuf;
deleteTempFiles();
exit(1);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
if (std::error_code EC = MB.getError()) {
errs() << "Cannot open " << Process.StdoutPath.data() << ": "
<< EC.message() << "\n";
deleteTempFiles();
exit(1);
}
FileBuf.reset(MB->release());
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
};
if (opts::LinuxKernelMode) {
// Current MMap parsing logic does not work with linux kernel.
// MMap entries for linux kernel uses PERF_RECORD_MMAP
// format instead of typical PERF_RECORD_MMAP2 format.
// Since linux kernel address mapping is absolute (same as
// in the ELF file), we avoid parsing MMap in linux kernel mode.
// While generating optimized linux kernel binary, we may need
// to parse MMap entries.
// In linux kernel mode, we analyze and optimize
// all linux kernel binary instructions, irrespective
// of whether they are due to system calls or due to
// interrupts. Therefore, we cannot ignore interrupt
// in Linux kernel mode.
opts::IgnoreInterruptLBR = false;
} else {
prepareToParse("mmap events", MMapEventsPPI);
if (parseMMapEvents()) {
errs() << "PERF2BOLT: failed to parse mmap events\n";
}
}
prepareToParse("task events", TaskEventsPPI);
if (parseTaskEvents()) {
errs() << "PERF2BOLT: failed to parse task events\n";
}
filterBinaryMMapInfo();
prepareToParse("events", MainEventsPPI);
if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap()) {
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
exit(1);
}
exit(0);
}
if ((!opts::BasicAggregation && parseBranchEvents()) ||
(opts::BasicAggregation && parseBasicEvents())) {
errs() << "PERF2BOLT: failed to parse samples\n";
}
// We can finish early if the goal is just to generate data for autofdo
if (opts::WriteAutoFDOData) {
if (std::error_code EC = writeAutoFDOData(opts::OutputFilename)) {
errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
}
deleteTempFiles();
exit(0);
}
// Special handling for memory events
std::string Error;
sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
if (PI.ReturnCode != 0) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
StringRef ErrBuf = (*MB)->getBuffer();
deleteTempFiles();
Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
"Cannot print 'addr' field.");
if (!NoData.match(ErrBuf)) {
errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
errs() << ErrBuf;
exit(1);
}
return Error::success();
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
if (std::error_code EC = MB.getError()) {
errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
<< EC.message() << "\n";
deleteTempFiles();
exit(1);
}
FileBuf.reset(MB->release());
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
if (const std::error_code EC = parseMemEvents()) {
errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
<< '\n';
}
deleteTempFiles();
return Error::success();
}
Error DataAggregator::readProfile(BinaryContext &BC) {
processProfile(BC);
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
convertBranchData(Function);
}
if (opts::AggregateOnly) {
if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) {
report_error("cannot create output data file", EC);
}
}
return Error::success();
}
bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
return Function.hasProfileAvailable();
}
void DataAggregator::processProfile(BinaryContext &BC) {
if (opts::ReadPreAggregated)
processPreAggregated();
else if (opts::BasicAggregation)
processBasicEvents();
else
processBranchEvents();
processMemEvents();
// Mark all functions with registered events as having a valid profile.
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
if (getBranchData(BF)) {
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;
BF.markProfiled(Flags);
}
}
// Release intermediate storage.
clear(BranchLBRs);
clear(FallthroughLBRs);
clear(AggregatedLBRs);
clear(BasicSamples);
clear(MemSamples);
}
BinaryFunction *
DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
if (!BC->containsAddress(Address))
return nullptr;
return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
/*UseMaxSize=*/true);
}
StringRef DataAggregator::getLocationName(BinaryFunction &Func,
uint64_t Count) {
if (!BAT)
return Func.getOneName();
const BinaryFunction *OrigFunc = &Func;
if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
NumColdSamples += Count;
BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
if (HotFunc)
OrigFunc = HotFunc;
}
// If it is a local function, prefer the name containing the file name where
// the local function was declared
for (StringRef AlternativeName : OrigFunc->getNames()) {
size_t FileNameIdx = AlternativeName.find('/');
// Confirm the alternative name has the pattern Symbol/FileName/1 before
// using it
if (FileNameIdx == StringRef::npos ||
AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
continue;
return AlternativeName;
}
return OrigFunc->getOneName();
}
bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
uint64_t Count) {
auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
bool Success;
StringRef LocName = getLocationName(Func, Count);
std::tie(I, Success) = NamesToSamples.insert(
std::make_pair(Func.getOneName(),
FuncSampleData(LocName, FuncSampleData::ContainerTy())));
}
Address -= Func.getAddress();
if (BAT)
Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
I->second.bumpCount(Address, Count);
return true;
}
bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
uint64_t To, uint64_t Count,
uint64_t Mispreds) {
FuncBranchData *AggrData = getBranchData(Func);
if (!AggrData) {
AggrData = &NamesToBranches[Func.getOneName()];
AggrData->Name = getLocationName(Func, Count);
setBranchData(Func, AggrData);
}
From -= Func.getAddress();
To -= Func.getAddress();
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
<< " @ " << Twine::utohexstr(From) << " -> "
<< Func.getPrintName() << " @ " << Twine::utohexstr(To)
<< '\n');
if (BAT) {
From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
<< Func.getPrintName() << " @ " << Twine::utohexstr(From)
<< " -> " << Func.getPrintName() << " @ "
<< Twine::utohexstr(To) << '\n');
}
AggrData->bumpBranchCount(From, To, Count, Mispreds);
return true;
}
bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
BinaryFunction *ToFunc, uint64_t From,
uint64_t To, uint64_t Count,
uint64_t Mispreds) {
FuncBranchData *FromAggrData = nullptr;
FuncBranchData *ToAggrData = nullptr;
StringRef SrcFunc;
StringRef DstFunc;
if (FromFunc) {
SrcFunc = getLocationName(*FromFunc, Count);
FromAggrData = getBranchData(*FromFunc);
if (!FromAggrData) {
FromAggrData = &NamesToBranches[FromFunc->getOneName()];
FromAggrData->Name = SrcFunc;
setBranchData(*FromFunc, FromAggrData);
}
From -= FromFunc->getAddress();
if (BAT)
From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
recordExit(*FromFunc, From, Mispreds, Count);
}
if (ToFunc) {
DstFunc = getLocationName(*ToFunc, 0);
ToAggrData = getBranchData(*ToFunc);
if (!ToAggrData) {
ToAggrData = &NamesToBranches[ToFunc->getOneName()];
ToAggrData->Name = DstFunc;
setBranchData(*ToFunc, ToAggrData);
}
To -= ToFunc->getAddress();
if (BAT)
To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
recordEntry(*ToFunc, To, Mispreds, Count);
}
if (FromAggrData)
FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
Count, Mispreds);
if (ToAggrData)
ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
Count, Mispreds);
return true;
}
bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
uint64_t Mispreds) {
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
if (!FromFunc && !ToFunc)
return false;
if (FromFunc == ToFunc) {
recordBranch(*FromFunc, From - FromFunc->getAddress(),
To - FromFunc->getAddress(), Count, Mispreds);
return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
}
return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
}
bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
uint64_t Count) {
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
if (!FromFunc || !ToFunc) {
LLVM_DEBUG(
dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
<< " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
<< " and ending in " << ToFunc->getPrintName() << " @ "
<< ToFunc->getPrintName() << " @ "
<< Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
NumLongRangeTraces += Count;
return false;
}
if (FromFunc != ToFunc) {
NumInvalidTraces += Count;
LLVM_DEBUG(
dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
<< " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
<< " and ending in " << ToFunc->getPrintName() << " @ "
<< ToFunc->getPrintName() << " @ "
<< Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
return false;
}
Optional<BoltAddressTranslation::FallthroughListTy> FTs =
BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
: getFallthroughsInTrace(*FromFunc, First, Second, Count);
if (!FTs) {
LLVM_DEBUG(
dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
<< " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
<< " and ending in " << ToFunc->getPrintName() << " @ "
<< ToFunc->getPrintName() << " @ "
<< Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
NumInvalidTraces += Count;
return false;
}
LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
<< FromFunc->getPrintName() << ":"
<< Twine::utohexstr(First.To) << " to "
<< Twine::utohexstr(Second.From) << ".\n");
for (const std::pair<uint64_t, uint64_t> &Pair : *FTs) {
doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
Pair.second + FromFunc->getAddress(), Count, false);
}
return true;
}
bool DataAggregator::recordTrace(
BinaryFunction &BF,
const LBREntry &FirstLBR,
const LBREntry &SecondLBR,
uint64_t Count,
SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
BinaryContext &BC = BF.getBinaryContext();
if (!BF.isSimple())
return false;
assert(BF.hasCFG() && "can only record traces in CFG state");
// Offsets of the trace within this function.
const uint64_t From = FirstLBR.To - BF.getAddress();
const uint64_t To = SecondLBR.From - BF.getAddress();
if (From > To)
return false;
BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
if (!FromBB || !ToBB)
return false;
// Adjust FromBB if the first LBR is a return from the last instruction in
// the previous block (that instruction should be a call).
if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
!FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
if (PrevBB->getSuccessor(FromBB->getLabel())) {
const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
if (Instr && BC.MIB->isCall(*Instr)) {
FromBB = PrevBB;
} else {
LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
<< '\n');
}
} else {
LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
}
}
// Fill out information for fall-through edges. The From and To could be
// within the same basic block, e.g. when two call instructions are in the
// same block. In this case we skip the processing.
if (FromBB == ToBB) {
return true;
}
// Process blocks in the original layout order.
BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
assert(BB == FromBB && "index mismatch");
while (BB != ToBB) {
BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
// Check for bad LBRs.
if (!BB->getSuccessor(NextBB->getLabel())) {
LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
<< " " << FirstLBR << '\n'
<< " " << SecondLBR << '\n');
return false;
}
// Record fall-through jumps
BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
BI.Count += Count;
if (Branches) {
const MCInst *Instr = BB->getLastNonPseudoInstr();
uint64_t Offset = 0;
if (Instr) {
Offset = BC.MIB->getAnnotationWithDefault<uint32_t>(*Instr, "Offset");
} else {
Offset = BB->getOffset();
}
Branches->emplace_back(Offset, NextBB->getOffset());
}
BB = NextBB;
}
return true;
}
Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
const LBREntry &FirstLBR,
const LBREntry &SecondLBR,
uint64_t Count) const {
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
return NoneType();
return Res;
}
bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
uint64_t Count) const {
if (To > BF.getSize())
return false;
if (!BF.hasProfile())
BF.ExecutionCount = 0;
BinaryBasicBlock *EntryBB = nullptr;
if (To == 0) {
BF.ExecutionCount += Count;
if (!BF.empty())
EntryBB = &BF.front();
} else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
if (BB->isEntryPoint())
EntryBB = BB;
}
if (EntryBB)
EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
return true;
}
bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
uint64_t Count) const {
if (!BF.isSimple() || From > BF.getSize())
return false;
if (!BF.hasProfile())
BF.ExecutionCount = 0;
return true;
}
ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
LBREntry Res;
ErrorOr<StringRef> FromStrRes = parseString('/');
if (std::error_code EC = FromStrRes.getError())
return EC;
StringRef OffsetStr = FromStrRes.get();
if (OffsetStr.getAsInteger(0, Res.From)) {
reportError("expected hexadecimal number with From address");
Diag << "Found: " << OffsetStr << "\n";
return make_error_code(llvm::errc::io_error);
}
ErrorOr<StringRef> ToStrRes = parseString('/');
if (std::error_code EC = ToStrRes.getError())
return EC;
OffsetStr = ToStrRes.get();
if (OffsetStr.getAsInteger(0, Res.To)) {
reportError("expected hexadecimal number with To address");
Diag << "Found: " << OffsetStr << "\n";
return make_error_code(llvm::errc::io_error);
}
ErrorOr<StringRef> MispredStrRes = parseString('/');
if (std::error_code EC = MispredStrRes.getError())
return EC;
StringRef MispredStr = MispredStrRes.get();
if (MispredStr.size() != 1 ||
(MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
reportError("expected single char for mispred bit");
Diag << "Found: " << MispredStr << "\n";
return make_error_code(llvm::errc::io_error);
}
Res.Mispred = MispredStr[0] == 'M';
static bool MispredWarning = true;
if (MispredStr[0] == '-' && MispredWarning) {
errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
MispredWarning = false;
}
ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
if (std::error_code EC = Rest.getError())
return EC;
if (Rest.get().size() < 5) {
reportError("expected rest of LBR entry");
Diag << "Found: " << Rest.get() << "\n";
return make_error_code(llvm::errc::io_error);
}
return Res;
}
bool DataAggregator::checkAndConsumeFS() {
if (ParsingBuf[0] != FieldSeparator) {
return false;
}
ParsingBuf = ParsingBuf.drop_front(1);
Col += 1;
return true;
}
void DataAggregator::consumeRestOfLine() {
size_t LineEnd = ParsingBuf.find_first_of('\n');
if (LineEnd == StringRef::npos) {
ParsingBuf = StringRef();
Col = 0;
Line += 1;
return;
}
ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
Col = 0;
Line += 1;
}
ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
PerfBranchSample Res;
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
if (std::error_code EC = PIDRes.getError())
return EC;
auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
consumeRestOfLine();
return make_error_code(errc::no_such_process);
}
while (checkAndConsumeFS()) {
}
ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
if (std::error_code EC = PCRes.getError())
return EC;
Res.PC = PCRes.get();
if (checkAndConsumeNewLine())
return Res;
while (!checkAndConsumeNewLine()) {
checkAndConsumeFS();
ErrorOr<LBREntry> LBRRes = parseLBREntry();
if (std::error_code EC = LBRRes.getError())
return EC;
LBREntry LBR = LBRRes.get();
if (ignoreKernelInterrupt(LBR))
continue;
if (!BC->HasFixedLoadAddress)
adjustLBR(LBR, MMapInfoIter->second);
Res.LBR.push_back(LBR);
}
return Res;
}
ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
if (std::error_code EC = PIDRes.getError())
return EC;
auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
if (MMapInfoIter == BinaryMMapInfo.end()) {
consumeRestOfLine();
return PerfBasicSample{StringRef(), 0};
}
while (checkAndConsumeFS()) {
}
ErrorOr<StringRef> Event = parseString(FieldSeparator);
if (std::error_code EC = Event.getError())
return EC;
while (checkAndConsumeFS()) {
}
ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
if (std::error_code EC = AddrRes.getError()) {
return EC;
}
if (!checkAndConsumeNewLine()) {
reportError("expected end of line");
return make_error_code(llvm::errc::io_error);
}
uint64_t Address = *AddrRes;
if (!BC->HasFixedLoadAddress)
adjustAddress(Address, MMapInfoIter->second);
return PerfBasicSample{Event.get(), Address};
}
ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
PerfMemSample Res{0, 0};
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
if (std::error_code EC = PIDRes.getError())
return EC;
auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
if (MMapInfoIter == BinaryMMapInfo.end()) {
consumeRestOfLine();
return Res;
}
while (checkAndConsumeFS()) {
}
ErrorOr<StringRef> Event = parseString(FieldSeparator);
if (std::error_code EC = Event.getError())
return EC;
if (Event.get().find("mem-loads") == StringRef::npos) {
consumeRestOfLine();
return Res;
}
while (checkAndConsumeFS()) {
}
ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
if (std::error_code EC = AddrRes.getError()) {
return EC;
}
while (checkAndConsumeFS()) {
}
ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
if (std::error_code EC = PCRes.getError()) {
consumeRestOfLine();
return EC;
}
if (!checkAndConsumeNewLine()) {
reportError("expected end of line");
return make_error_code(llvm::errc::io_error);
}
uint64_t Address = *AddrRes;
if (!BC->HasFixedLoadAddress)
adjustAddress(Address, MMapInfoIter->second);
return PerfMemSample{PCRes.get(), Address};
}
ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
auto parseOffset = [this]() -> ErrorOr<Location> {
ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
if (std::error_code EC = Res.getError())
return EC;
return Location(Res.get());
};
size_t Sep = ParsingBuf.find_first_of(" \n");
if (Sep == StringRef::npos)
return parseOffset();
StringRef LookAhead = ParsingBuf.substr(0, Sep);
if (LookAhead.find_first_of(":") == StringRef::npos)
return parseOffset();
ErrorOr<StringRef> BuildID = parseString(':');
if (std::error_code EC = BuildID.getError())
return EC;
ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
if (std::error_code EC = Offset.getError())
return EC;
return Location(true, BuildID.get(), Offset.get());
}
ErrorOr<DataAggregator::AggregatedLBREntry>
DataAggregator::parseAggregatedLBREntry() {
while (checkAndConsumeFS()) {
}
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
auto Type = AggregatedLBREntry::BRANCH;
if (TypeOrErr.get() == "B") {
Type = AggregatedLBREntry::BRANCH;
} else if (TypeOrErr.get() == "F") {
Type = AggregatedLBREntry::FT;
} else if (TypeOrErr.get() == "f") {
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
} else {
reportError("expected B, F or f");
return make_error_code(llvm::errc::io_error);
}
while (checkAndConsumeFS()) {
}
ErrorOr<Location> From = parseLocationOrOffset();
if (std::error_code EC = From.getError())
return EC;
while (checkAndConsumeFS()) {
}
ErrorOr<Location> To = parseLocationOrOffset();
if (std::error_code EC = To.getError())
return EC;
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> Frequency =
parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
if (std::error_code EC = Frequency.getError())
return EC;
uint64_t Mispreds = 0;
if (Type == AggregatedLBREntry::BRANCH) {
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
if (std::error_code EC = MispredsOrErr.getError())
return EC;
Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
}
if (!checkAndConsumeNewLine()) {
reportError("expected end of line");
return make_error_code(llvm::errc::io_error);
}
return AggregatedLBREntry{From.get(), To.get(),
static_cast<uint64_t>(Frequency.get()), Mispreds,
Type};
}
bool DataAggregator::hasData() {
if (ParsingBuf.size() == 0)
return false;
return true;
}
bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
return opts::IgnoreInterruptLBR &&
(LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
}
std::error_code DataAggregator::printLBRHeatMap() {
outs() << "PERF2BOLT: parse branch events...\n";
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
if (opts::LinuxKernelMode) {
opts::HeatmapMaxAddress = 0xffffffffffffffff;
opts::HeatmapMinAddress = KernelBaseAddr;
}
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
opts::HeatmapMaxAddress);
uint64_t NumTotalSamples = 0;
while (hasData()) {
ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
if (std::error_code EC = SampleRes.getError()) {
if (EC == errc::no_such_process)
continue;
return EC;
}
PerfBranchSample &Sample = SampleRes.get();
// LBRs are stored in reverse execution order. NextLBR refers to the next
// executed branch record.
const LBREntry *NextLBR = nullptr;
for (const LBREntry &LBR : Sample.LBR) {
if (NextLBR) {
// Record fall-through trace.
const uint64_t TraceFrom = LBR.To;
const uint64_t TraceTo = NextLBR->From;
++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
}
NextLBR = &LBR;
}
if (!Sample.LBR.empty()) {
HM.registerAddress(Sample.LBR.front().To);
HM.registerAddress(Sample.LBR.back().From);
}
NumTotalSamples += Sample.LBR.size();
}
if (!NumTotalSamples) {
errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
"Cannot build heatmap.\n";
exit(1);
}
outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
outs() << "HEATMAP: building heat map...\n";
for (const auto &LBR : FallthroughLBRs) {
const Trace &Trace = LBR.first;
const FTInfo &Info = LBR.second;
HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
}
if (HM.getNumInvalidRanges())
outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
if (!HM.size()) {
errs() << "HEATMAP-ERROR: no valid traces registered\n";
exit(1);
}
HM.print(opts::HeatmapFile);
if (opts::HeatmapFile == "-") {
HM.printCDF(opts::HeatmapFile);
} else {
HM.printCDF(opts::HeatmapFile + ".csv");
}
return std::error_code();
}
std::error_code DataAggregator::parseBranchEvents() {
outs() << "PERF2BOLT: parse branch events...\n";
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
uint64_t NumTotalSamples = 0;
uint64_t NumEntries = 0;
uint64_t NumSamples = 0;
uint64_t NumSamplesNoLBR = 0;
uint64_t NumTraces = 0;
bool NeedsSkylakeFix = false;
while (hasData() && NumTotalSamples < opts::MaxSamples) {
++NumTotalSamples;
ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
if (std::error_code EC = SampleRes.getError()) {
if (EC == errc::no_such_process)
continue;
return EC;
}
++NumSamples;
PerfBranchSample &Sample = SampleRes.get();
if (opts::WriteAutoFDOData)
++BasicSamples[Sample.PC];
if (Sample.LBR.empty()) {
++NumSamplesNoLBR;
continue;
}
NumEntries += Sample.LBR.size();
if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
NeedsSkylakeFix = true;
}
// LBRs are stored in reverse execution order. NextPC refers to the next
// recorded executed PC.
uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
uint32_t NumEntry = 0;
for (const LBREntry &LBR : Sample.LBR) {
++NumEntry;
// Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
// sometimes record entry 32 as an exact copy of entry 31. This will cause
// us to likely record an invalid trace and generate a stale function for
// BAT mode (non BAT disassembles the function and is able to ignore this
// trace at aggregation time). Drop first 2 entries (last two, in
// chronological order)
if (NeedsSkylakeFix && NumEntry <= 2)
continue;
if (NextPC) {
// Record fall-through trace.
const uint64_t TraceFrom = LBR.To;
const uint64_t TraceTo = NextPC;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
if (TraceBF->containsAddress(LBR.From)) {
++Info.InternCount;
} else {
++Info.ExternCount;
}
} else {
if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
LLVM_DEBUG(dbgs()
<< "Invalid trace starting in "
<< TraceBF->getPrintName() << " @ "
<< Twine::utohexstr(TraceFrom - TraceBF->getAddress())
<< " and ending @ " << Twine::utohexstr(TraceTo)
<< '\n');
++NumInvalidTraces;
} else {
LLVM_DEBUG(dbgs()
<< "Out of range trace starting in "
<< (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
<< Twine::utohexstr(
TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
<< " and ending in "
<< (getBinaryFunctionContainingAddress(TraceTo)
? getBinaryFunctionContainingAddress(TraceTo)
->getPrintName()
: "None")
<< " @ "
<< Twine::utohexstr(
TraceTo -
(getBinaryFunctionContainingAddress(TraceTo)
? getBinaryFunctionContainingAddress(TraceTo)
->getAddress()
: 0))
<< '\n');
++NumLongRangeTraces;
}
}
++NumTraces;
}
NextPC = LBR.From;
uint64_t From = LBR.From;
if (!getBinaryFunctionContainingAddress(From))
From = 0;
uint64_t To = LBR.To;
if (!getBinaryFunctionContainingAddress(To))
To = 0;
if (!From && !To)
continue;
BranchInfo &Info = BranchLBRs[Trace(From, To)];
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
}
for (const auto &LBR : BranchLBRs) {
const Trace &Trace = LBR.first;
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
BF->setHasProfileAvailable();
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
BF->setHasProfileAvailable();
}
auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
OS << " (";
if (OS.has_colors()) {
if (Percent > T2) {
OS.changeColor(raw_ostream::RED);
} else if (Percent > T1) {
OS.changeColor(raw_ostream::YELLOW);
} else {
OS.changeColor(raw_ostream::GREEN);
}
}
OS << format("%.1f%%", Percent);
if (OS.has_colors())
OS.resetColor();
OS << ")";
};
outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
<< " LBR entries\n";
if (NumTotalSamples) {
if (NumSamples && NumSamplesNoLBR == NumSamples) {
// Note: we don't know if perf2bolt is being used to parse memory samples
// at this point. In this case, it is OK to parse zero LBRs.
errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
"LBR. Record profile with perf record -j any or run perf2bolt "
"in no-LBR mode with -nl (the performance improvement in -nl "
"mode may be limited)\n";
} else {
const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
printColored(outs(), PercentIgnored, 20, 50);
outs() << " were ignored\n";
if (PercentIgnored > 50.0f) {
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
"were attributed to the input binary\n";
}
}
}
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
<< NumInvalidTraces;
float Perc = 0.0f;
if (NumTraces > 0) {
Perc = NumInvalidTraces * 100.0f / NumTraces;
printColored(outs(), Perc, 5, 10);
}
outs() << "\n";
if (Perc > 10.0f) {
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance.\n\n";
}
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
<< NumLongRangeTraces;
if (NumTraces > 0) {
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
}
outs() << "\n";
if (NumColdSamples > 0) {
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
outs() << "PERF2BOLT: " << NumColdSamples
<< format(" (%.1f%%)", ColdSamples)
<< " samples recorded in cold regions of split functions.\n";
if (ColdSamples > 5.0f) {
outs()
<< "WARNING: The BOLT-processed binary where samples were collected "
"likely used bad data or your service observed a large shift in "
"profile. You may want to audit this.\n";
}
}
return std::error_code();
}
void DataAggregator::processBranchEvents() {
outs() << "PERF2BOLT: processing branch events...\n";
NamedRegionTimer T("processBranch", "Processing branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
for (const auto &AggrLBR : FallthroughLBRs) {
const Trace &Loc = AggrLBR.first;
const FTInfo &Info = AggrLBR.second;
LBREntry First{Loc.From, Loc.From, false};
LBREntry Second{Loc.To, Loc.To, false};
if (Info.InternCount) {
doTrace(First, Second, Info.InternCount);
}
if (Info.ExternCount) {
First.From = 0;
doTrace(First, Second, Info.ExternCount);
}
}
for (const auto &AggrLBR : BranchLBRs) {
const Trace &Loc = AggrLBR.first;
const BranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
}
}
std::error_code DataAggregator::parseBasicEvents() {
outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
while (hasData()) {
ErrorOr<PerfBasicSample> Sample = parseBasicSample();
if (std::error_code EC = Sample.getError())
return EC;
if (!Sample->PC)
continue;
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
BF->setHasProfileAvailable();
++BasicSamples[Sample->PC];
EventNames.insert(Sample->EventName);
}
return std::error_code();
}
void DataAggregator::processBasicEvents() {
outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
uint64_t OutOfRangeSamples = 0;
uint64_t NumSamples = 0;
for (auto &Sample : BasicSamples) {
const uint64_t PC = Sample.first;
const uint64_t HitCount = Sample.second;
NumSamples += HitCount;
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) {
OutOfRangeSamples += HitCount;
continue;
}
doSample(*Func, PC, HitCount);
}
outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
<< OutOfRangeSamples;
float Perc = 0.0f;
if (NumSamples > 0) {
outs() << " (";
Perc = OutOfRangeSamples * 100.0f / NumSamples;
if (outs().has_colors()) {
if (Perc > 60.0f) {
outs().changeColor(raw_ostream::RED);
} else if (Perc > 40.0f) {
outs().changeColor(raw_ostream::YELLOW);
} else {
outs().changeColor(raw_ostream::GREEN);
}
}
outs() << format("%.1f%%", Perc);
if (outs().has_colors())
outs().resetColor();
outs() << ")";
}
outs() << "\n";
if (Perc > 80.0f) {
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance.\n\n";
}
}
std::error_code DataAggregator::parseMemEvents() {
outs() << "PERF2BOLT: parsing memory events...\n";
NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
while (hasData()) {
ErrorOr<PerfMemSample> Sample = parseMemSample();
if (std::error_code EC = Sample.getError())
return EC;
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
BF->setHasProfileAvailable();
MemSamples.emplace_back(std::move(Sample.get()));
}
return std::error_code();
}
void DataAggregator::processMemEvents() {
NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
for (const PerfMemSample &Sample : MemSamples) {
uint64_t PC = Sample.PC;
uint64_t Addr = Sample.Addr;
StringRef FuncName;
StringRef MemName;
// Try to resolve symbol for PC
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) {
LLVM_DEBUG(if (PC != 0) {
dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
<< Twine::utohexstr(Addr) << "\n";
});
continue;
}
FuncName = Func->getOneName();
PC -= Func->getAddress();
// Try to resolve symbol for memory load
if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
MemName = BD->getName();
Addr -= BD->getAddress();
} else if (opts::FilterMemProfile) {
// Filter out heap/stack accesses
continue;
}
const Location FuncLoc(!FuncName.empty(), FuncName, PC);
const Location AddrLoc(!MemName.empty(), MemName, Addr);
FuncMemData *MemData = &NamesToMemEvents[FuncName];
setMemData(*Func, MemData);
MemData->update(FuncLoc, AddrLoc);
LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
}
}
std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
while (hasData()) {
ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
if (std::error_code EC = AggrEntry.getError())
return EC;
if (BinaryFunction *BF =
getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
BF->setHasProfileAvailable();
if (BinaryFunction *BF =
getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
BF->setHasProfileAvailable();
AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
}
return std::error_code();
}
void DataAggregator::processPreAggregated() {
outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
uint64_t NumTraces = 0;
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
AggrEntry.Mispreds);
break;
case AggregatedLBREntry::FT:
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
? AggrEntry.From.Offset
: 0,
AggrEntry.From.Offset, false};
LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
doTrace(First, Second, AggrEntry.Count);
NumTraces += AggrEntry.Count;
break;
}
}
}
outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
<< " aggregated LBR entries\n";
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
<< NumInvalidTraces;
float Perc = 0.0f;
if (NumTraces > 0) {
outs() << " (";
Perc = NumInvalidTraces * 100.0f / NumTraces;
if (outs().has_colors()) {
if (Perc > 10.0f) {
outs().changeColor(raw_ostream::RED);
} else if (Perc > 5.0f) {
outs().changeColor(raw_ostream::YELLOW);
} else {
outs().changeColor(raw_ostream::GREEN);
}
}
outs() << format("%.1f%%", Perc);
if (outs().has_colors())
outs().resetColor();
outs() << ")";
}
outs() << "\n";
if (Perc > 10.0f) {
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance.\n\n";
}
outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
<< NumLongRangeTraces;
if (NumTraces > 0) {
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
}
outs() << "\n";
}
Optional<int32_t> DataAggregator::parseCommExecEvent() {
size_t LineEnd = ParsingBuf.find_first_of("\n");
if (LineEnd == StringRef::npos) {
reportError("expected rest of line");
Diag << "Found: " << ParsingBuf << "\n";
return NoneType();
}
StringRef Line = ParsingBuf.substr(0, LineEnd);
size_t Pos = Line.find("PERF_RECORD_COMM exec");
if (Pos == StringRef::npos) {
return NoneType();
}
Line = Line.drop_front(Pos);
// Line:
// PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
StringRef PIDStr = Line.rsplit(':').second.split('/').first;
int32_t PID;
if (PIDStr.getAsInteger(10, PID)) {
reportError("expected PID");
Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
return NoneType();
}
return PID;
}
namespace {
Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
const StringRef SecTimeStr = TimeStr.split('.').first;
const StringRef USecTimeStr = TimeStr.split('.').second;
uint64_t SecTime;
uint64_t USecTime;
if (SecTimeStr.getAsInteger(10, SecTime) ||
USecTimeStr.getAsInteger(10, USecTime)) {
return NoneType();
}
return SecTime * 1000000ULL + USecTime;
}
}
Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
while (checkAndConsumeFS()) {
}
size_t LineEnd = ParsingBuf.find_first_of("\n");
if (LineEnd == StringRef::npos) {
reportError("expected rest of line");
Diag << "Found: " << ParsingBuf << "\n";
return NoneType();
}
StringRef Line = ParsingBuf.substr(0, LineEnd);
size_t Pos = Line.find("PERF_RECORD_FORK");
if (Pos == StringRef::npos) {
consumeRestOfLine();
return NoneType();
}
ForkInfo FI;
const StringRef TimeStr =
Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
FI.Time = *TimeRes;
}
Line = Line.drop_front(Pos);
// Line:
// PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
reportError("expected PID");
Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
return NoneType();
}
const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
reportError("expected PID");
Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
return NoneType();
}
consumeRestOfLine();
return FI;
}
ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
DataAggregator::parseMMapEvent() {
while (checkAndConsumeFS()) {
}
MMapInfo ParsedInfo;
size_t LineEnd = ParsingBuf.find_first_of("\n");
if (LineEnd == StringRef::npos) {
reportError("expected rest of line");
Diag << "Found: " << ParsingBuf << "\n";
return make_error_code(llvm::errc::io_error);
}
StringRef Line = ParsingBuf.substr(0, LineEnd);
size_t Pos = Line.find("PERF_RECORD_MMAP2");
if (Pos == StringRef::npos) {
consumeRestOfLine();
return std::make_pair(StringRef(), ParsedInfo);
}
// Line:
// {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
const StringRef TimeStr =
Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
ParsedInfo.Time = *TimeRes;
}
Line = Line.drop_front(Pos);
// Line:
// PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
StringRef FileName = Line.rsplit(FieldSeparator).second;
if (FileName.startswith("//") || FileName.startswith("[")) {
consumeRestOfLine();
return std::make_pair(StringRef(), ParsedInfo);
}
FileName = sys::path::filename(FileName);
const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
reportError("expected PID");
Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
return make_error_code(llvm::errc::io_error);
}
const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) {
reportError("expected base address");
Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
return make_error_code(llvm::errc::io_error);
}
const StringRef SizeStr = Line.split('(').second.split(')').first;
if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
reportError("expected mmaped size");
Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
return make_error_code(llvm::errc::io_error);
}
const StringRef OffsetStr =
Line.split('@').second.ltrim().split(FieldSeparator).first;
if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
reportError("expected mmaped page-aligned offset");
Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
return make_error_code(llvm::errc::io_error);
}
consumeRestOfLine();
return std::make_pair(FileName, ParsedInfo);
}
std::error_code DataAggregator::parseMMapEvents() {
outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
while (hasData()) {
ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
if (std::error_code EC = FileMMapInfoRes.getError())
return EC;
std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
if (FileMMapInfo.second.PID == -1)
continue;
// Consider only the first mapping of the file for any given PID
bool PIDExists = false;
auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
for (auto MI = Range.first; MI != Range.second; ++MI) {
if (MI->second.PID == FileMMapInfo.second.PID) {
PIDExists = true;
break;
}
}
if (PIDExists)
continue;
GlobalMMapInfo.insert(FileMMapInfo);
}
LLVM_DEBUG({
dbgs() << "FileName -> mmap info:\n";
for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo) {
dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x"
<< Twine::utohexstr(Pair.second.BaseAddress) << ", "
<< Twine::utohexstr(Pair.second.Size) << " @ "
<< Twine::utohexstr(Pair.second.Offset) << "]\n";
}
});
StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
<< "\" for profile matching\n";
NameToUse = BuildIDBinaryName;
}
auto Range = GlobalMMapInfo.equal_range(NameToUse);
for (auto I = Range.first; I != Range.second; ++I) {
const MMapInfo &MMapInfo = I->second;
if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) {
// Check that the binary mapping matches one of the segments.
bool MatchFound = false;
for (auto &KV : BC->SegmentMapInfo) {
SegmentInfo &SegInfo = KV.second;
// The mapping is page-aligned and hence the BaseAddress could be
// different from the segment start address. We cannot know the page
// size of the mapping, but we know it should not exceed the segment
// alignment value. Hence we are performing an approximate check.
if (SegInfo.Address >= MMapInfo.BaseAddress &&
SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) {
MatchFound = true;
break;
}
}
if (!MatchFound) {
errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
<< " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n';
continue;
}
}
BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
}
if (BinaryMMapInfo.empty()) {
if (errs().has_colors())
errs().changeColor(raw_ostream::RED);
errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
<< BC->getFilename() << "\".";
if (!GlobalMMapInfo.empty()) {
errs() << " Profile for the following binary name(s) is available:\n";
for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
I = GlobalMMapInfo.upper_bound(I->first)) {
errs() << " " << I->first << '\n';
}
errs() << "Please rename the input binary.\n";
} else {
errs() << " Failed to extract any binary name from a profile.\n";
}
if (errs().has_colors())
errs().resetColor();
exit(1);
}
return std::error_code();
}
std::error_code DataAggregator::parseTaskEvents() {
outs() << "PERF2BOLT: parsing perf-script task events output\n";
NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
while (hasData()) {
if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
// Remove forked child that ran execve
auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) {
BinaryMMapInfo.erase(MMapInfoIter);
}
consumeRestOfLine();
continue;
}
Optional<ForkInfo> ForkInfo = parseForkEvent();
if (!ForkInfo)
continue;
if (ForkInfo->ParentPID == ForkInfo->ChildPID)
continue;
if (ForkInfo->Time == 0) {
// Process was forked and mmaped before perf ran. In this case the child
// should have its own mmap entry unless it was execve'd.
continue;
}
auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
if (MMapInfoIter == BinaryMMapInfo.end())
continue;
MMapInfo MMapInfo = MMapInfoIter->second;
MMapInfo.PID = ForkInfo->ChildPID;
MMapInfo.Forked = true;
BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
}
outs() << "PERF2BOLT: input binary is associated with "
<< BinaryMMapInfo.size() << " PID(s)\n";
LLVM_DEBUG({
for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) {
outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
<< ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x"
<< Twine::utohexstr(MMI.second.Size) << ")\n";
}
});
return std::error_code();
}
Optional<std::pair<StringRef, StringRef>>
DataAggregator::parseNameBuildIDPair() {
while (checkAndConsumeFS()) {
}
ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
if (std::error_code EC = BuildIDStr.getError())
return NoneType();
ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
if (std::error_code EC = NameStr.getError())
return NoneType();
consumeRestOfLine();
return std::make_pair(NameStr.get(), BuildIDStr.get());
}
Optional<StringRef>
DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
while (hasData()) {
Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
if (!IDPair)
return NoneType();
if (IDPair->second.startswith(FileBuildID))
return sys::path::filename(IDPair->first);
}
return NoneType();
}
std::error_code
DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
std::error_code EC;
raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
if (EC)
return EC;
bool WriteMemLocs = false;
auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
if (WriteMemLocs)
OutFile << (Loc.IsSymbol ? "4 " : "3 ");
else
OutFile << (Loc.IsSymbol ? "1 " : "0 ");
OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
<< " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
};
uint64_t BranchValues = 0;
uint64_t MemValues = 0;
if (BAT)
OutFile << "boltedcollection\n";
if (opts::BasicAggregation) {
OutFile << "no_lbr";
for (const StringMapEntry<NoneType> &Entry : EventNames) {
OutFile << " " << Entry.getKey();
}
OutFile << "\n";
for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
for (const SampleInfo &SI : Func.getValue().Data) {
writeLocation(SI.Loc);
OutFile << SI.Hits << "\n";
++BranchValues;
}
}
} else {
for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
}
for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
// Do not output if source is a known symbol, since this was already
// accounted for in the source function
if (BI.From.IsSymbol)
continue;
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
}
}
WriteMemLocs = true;
for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
for (const MemInfo &MemEvent : Func.getValue().Data) {
writeLocation(MemEvent.Offset);
writeLocation(MemEvent.Addr);
OutFile << MemEvent.Count << "\n";
++MemValues;
}
}
}
outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
<< " memory objects to " << OutputFilename << "\n";
return std::error_code();
}
void DataAggregator::dump() const { DataReader::dump(); }
void DataAggregator::dump(const LBREntry &LBR) const {
Diag << "From: " << Twine::utohexstr(LBR.From)
<< " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
<< "\n";
}
void DataAggregator::dump(const PerfBranchSample &Sample) const {
Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
for (const LBREntry &LBR : Sample.LBR) {
dump(LBR);
}
}
void DataAggregator::dump(const PerfMemSample &Sample) const {
Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
}