[BOLT] Add value profiling to BOLT

Summary:
Add support for reading value profiling info from perf data.  This diff adds support in DataReader/DataAggregator for value profiling data.  Each event is recorded as two Locations (a PC and an address/value) and a count.

For now, I'm assuming that the value profiling data is in the same file as the usual BOLT profiling data.  Collecting both at the same time seems to work.

(cherry picked from FBD6076877)
This commit is contained in:
Bill Nell 2017-10-16 13:09:43 -07:00 committed by Maksim Panchenko
parent 1288c81c9b
commit 9e42885d04
6 changed files with 699 additions and 104 deletions

View File

@ -369,6 +369,16 @@ void BinaryContext::printInstruction(raw_ostream &OS,
}
}
auto *MD = Function ? DR.getFuncMemData(Function->getNames()) : nullptr;
if (MD) {
bool DidPrint = false;
for (auto &MI : MD->getMemInfoRange(Offset)) {
OS << (DidPrint ? ", " : " # Loads: ");
OS << MI.Addr << "/" << MI.Count;
DidPrint = true;
}
}
OS << "\n";
if (printMCInst) {

View File

@ -21,6 +21,7 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/Timer.h"
#include <unistd.h>
@ -64,7 +65,8 @@ void DataAggregator::start(StringRef PerfDataFilename) {
outs() << "PERF2BOLT: Starting data aggregation job for " << PerfDataFilename
<< "\n";
findPerfExecutable();
launchPerfEventsNoWait();
launchPerfBranchEventsNoWait();
launchPerfMemEventsNoWait();
launchPerfTasksNoWait();
}
@ -73,17 +75,18 @@ void DataAggregator::abort() {
// Kill subprocesses in case they are not finished
sys::Wait(TasksPI, 1, false, &Error);
sys::Wait(EventsPI, 1, false, &Error);
sys::Wait(BranchEventsPI, 1, false, &Error);
sys::Wait(MemEventsPI, 1, false, &Error);
deleteTempFiles();
}
bool DataAggregator::launchPerfEventsNoWait() {
bool DataAggregator::launchPerfBranchEventsNoWait() {
SmallVector<const char*, 4> Argv;
SmallVector<StringRef, 3> Redirects;
SmallVector<const StringRef*, 3> RedirectPtrs;
outs() << "PERF2BOLT: Spawning perf-script job to read events\n";
outs() << "PERF2BOLT: Spawning perf-script job to read branch events\n";
Argv.push_back(PerfPath.data());
Argv.push_back("script");
Argv.push_back("-F");
@ -93,32 +96,77 @@ bool DataAggregator::launchPerfEventsNoWait() {
Argv.push_back(nullptr);
if (auto Errc = sys::fs::createTemporaryFile("perf.script", "out",
PerfEventsOutputPath)) {
PerfBranchEventsOutputPath)) {
outs() << "PERF2BOLT: Failed to create temporary file "
<< PerfEventsOutputPath << " with error " << Errc.message() << "\n";
<< PerfBranchEventsOutputPath << " with error " << Errc.message() << "\n";
exit(1);
}
if (auto Errc = sys::fs::createTemporaryFile("perf.script", "err",
PerfEventsErrPath)) {
PerfBranchEventsErrPath)) {
outs() << "PERF2BOLT: Failed to create temporary file "
<< PerfEventsErrPath << " with error " << Errc.message() << "\n";
<< PerfBranchEventsErrPath << " with error " << Errc.message() << "\n";
exit(1);
}
Redirects.push_back(""); // Stdin
Redirects.push_back(StringRef(PerfEventsOutputPath.data())); // Stdout
Redirects.push_back(StringRef(PerfEventsErrPath.data())); // Stderr
Redirects.push_back(""); // Stdin
Redirects.push_back(StringRef(PerfBranchEventsOutputPath.data())); // Stdout
Redirects.push_back(StringRef(PerfBranchEventsErrPath.data())); // Stderr
RedirectPtrs.push_back(&Redirects[0]);
RedirectPtrs.push_back(&Redirects[1]);
RedirectPtrs.push_back(&Redirects[2]);
DEBUG(dbgs() << "Launching perf: " << PerfPath.data() << " 1> "
<< PerfEventsOutputPath.data() << " 2> "
<< PerfEventsErrPath.data() << "\n");
<< PerfBranchEventsOutputPath.data() << " 2> "
<< PerfBranchEventsErrPath.data() << "\n");
EventsPI = sys::ExecuteNoWait(PerfPath.data(), Argv.data(),
/*envp*/ nullptr, &RedirectPtrs[0]);
BranchEventsPI = sys::ExecuteNoWait(PerfPath.data(), Argv.data(),
/*envp*/ nullptr, &RedirectPtrs[0]);
return true;
}
bool DataAggregator::launchPerfMemEventsNoWait() {
SmallVector<const char*, 4> Argv;
SmallVector<StringRef, 3> Redirects;
SmallVector<const StringRef*, 3> RedirectPtrs;
outs() << "PERF2BOLT: Spawning perf-script job to read mem events\n";
Argv.push_back(PerfPath.data());
Argv.push_back("script");
Argv.push_back("-F");
Argv.push_back("pid,event,addr,ip");
Argv.push_back("-i");
Argv.push_back(PerfDataFilename.data());
Argv.push_back(nullptr);
if (auto Errc = sys::fs::createTemporaryFile("perf.script", "out",
PerfMemEventsOutputPath)) {
outs() << "PERF2BOLT: Failed to create temporary file "
<< PerfMemEventsOutputPath << " with error " << Errc.message() << "\n";
exit(1);
}
if (auto Errc = sys::fs::createTemporaryFile("perf.script", "err",
PerfMemEventsErrPath)) {
outs() << "PERF2BOLT: Failed to create temporary file "
<< PerfMemEventsErrPath << " with error " << Errc.message() << "\n";
exit(1);
}
Redirects.push_back(""); // Stdin
Redirects.push_back(StringRef(PerfMemEventsOutputPath.data())); // Stdout
Redirects.push_back(StringRef(PerfMemEventsErrPath.data())); // Stderr
RedirectPtrs.push_back(&Redirects[0]);
RedirectPtrs.push_back(&Redirects[1]);
RedirectPtrs.push_back(&Redirects[2]);
DEBUG(dbgs() << "Launching perf: " << PerfPath.data() << " 1> "
<< PerfMemEventsOutputPath.data() << " 2> "
<< PerfMemEventsErrPath.data() << "\n");
MemEventsPI = sys::ExecuteNoWait(PerfPath.data(), Argv.data(),
/*envp*/ nullptr, &RedirectPtrs[0]);
return true;
}
@ -276,8 +324,10 @@ void DataAggregator::deleteTempFile(StringRef File) {
}
void DataAggregator::deleteTempFiles() {
deleteTempFile(PerfEventsErrPath.data());
deleteTempFile(PerfEventsOutputPath.data());
deleteTempFile(PerfBranchEventsErrPath.data());
deleteTempFile(PerfBranchEventsOutputPath.data());
deleteTempFile(PerfMemEventsErrPath.data());
deleteTempFile(PerfMemEventsOutputPath.data());
deleteTempFile(PerfTasksErrPath.data());
deleteTempFile(PerfTasksOutputPath.data());
}
@ -328,7 +378,7 @@ bool DataAggregator::aggregate(BinaryContext &BC,
outs()
<< "PERF2BOLT: Waiting for perf events collection to finish...\n";
auto PI2 = sys::Wait(EventsPI, 0, true, &Error);
auto PI2 = sys::Wait(BranchEventsPI, 0, true, &Error);
if (!Error.empty()) {
errs() << "PERF-ERROR: " << Error << "\n";
@ -338,7 +388,7 @@ bool DataAggregator::aggregate(BinaryContext &BC,
if (PI2.ReturnCode != 0) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(PerfEventsErrPath.data());
MemoryBuffer::getFileOrSTDIN(PerfBranchEventsErrPath.data());
StringRef ErrBuf = (*MB)->getBuffer();
errs() << "PERF-ERROR: Return code " << PI2.ReturnCode << "\n";
@ -348,23 +398,59 @@ bool DataAggregator::aggregate(BinaryContext &BC,
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MB2 =
MemoryBuffer::getFileOrSTDIN(PerfEventsOutputPath.data());
MemoryBuffer::getFileOrSTDIN(PerfBranchEventsOutputPath.data());
if (std::error_code EC = MB2.getError()) {
errs() << "Cannot open " << PerfEventsOutputPath.data() << ": "
errs() << "Cannot open " << PerfBranchEventsOutputPath.data() << ": "
<< EC.message() << "\n";
deleteTempFiles();
exit(1);
}
FileBuf.reset(MB2->release());
deleteTempFiles();
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
if (parseEvents()) {
outs() << "PERF2BOLT: Failed to parse events\n";
if (parseBranchEvents()) {
outs() << "PERF2BOLT: Failed to parse branch events\n";
}
auto PI3 = sys::Wait(MemEventsPI, 0, true, &Error);
if (PI3.ReturnCode != 0) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(PerfMemEventsErrPath.data());
StringRef ErrBuf = (*MB)->getBuffer();
deleteTempFiles();
Regex NoData("Samples for '.*' event do not have ADDR attribute set. Cannot print 'addr' field.");
if (!NoData.match(ErrBuf)) {
errs() << "PERF-ERROR: Return code " << PI3.ReturnCode << "\n";
errs() << ErrBuf;
exit(1);
}
return true;
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MB3 =
MemoryBuffer::getFileOrSTDIN(PerfMemEventsOutputPath.data());
if (std::error_code EC = MB3.getError()) {
errs() << "Cannot open " << PerfMemEventsOutputPath.data() << ": "
<< EC.message() << "\n";
deleteTempFiles();
exit(1);
}
FileBuf.reset(MB3->release());
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
if (parseMemEvents()) {
outs() << "PERF2BOLT: Failed to parse memory events\n";
}
deleteTempFiles();
return true;
}
@ -547,8 +633,8 @@ void DataAggregator::consumeRestOfLine() {
Line += 1;
}
ErrorOr<PerfSample> DataAggregator::parseSample() {
PerfSample Res;
ErrorOr<PerfBranchSample> DataAggregator::parseBranchSample() {
PerfBranchSample Res;
while (checkAndConsumeFS()) {}
@ -572,6 +658,49 @@ ErrorOr<PerfSample> DataAggregator::parseSample() {
return Res;
}
ErrorOr<PerfMemSample> DataAggregator::parseMemSample() {
PerfMemSample Res{0,0};
while (checkAndConsumeFS()) {}
auto PIDRes = parseNumberField(FieldSeparator, true);
if (std::error_code EC = PIDRes.getError())
return EC;
if (!PIDs.empty() && !PIDs.count(PIDRes.get())) {
consumeRestOfLine();
return Res;
}
while (checkAndConsumeFS()) {}
auto Event = parseString(FieldSeparator);
if (std::error_code EC = Event.getError())
return EC;
if (Event.get().find("mem-loads") == StringRef::npos) {
consumeRestOfLine();
return Res;
}
while (checkAndConsumeFS()) {}
auto AddrRes = parseHexField(FieldSeparator);
if (std::error_code EC = AddrRes.getError()) {
return EC;
}
while (checkAndConsumeFS()) {}
auto PCRes = parseHexField(FieldSeparator, true);
if (std::error_code EC = PCRes.getError()) {
consumeRestOfLine();
return EC;
}
checkAndConsumeNewLine();
return PerfMemSample{PCRes.get(), AddrRes.get()};
}
bool DataAggregator::hasData() {
if (ParsingBuf.size() == 0)
return false;
@ -579,14 +708,14 @@ bool DataAggregator::hasData() {
return true;
}
std::error_code DataAggregator::parseEvents() {
outs() << "PERF2BOLT: Aggregating...\n";
NamedRegionTimer T("Samples parsing", TimerGroupName, opts::TimeAggregator);
std::error_code DataAggregator::parseBranchEvents() {
outs() << "PERF2BOLT: Aggregating branch events...\n";
NamedRegionTimer T("Branch samples parsing", TimerGroupName, opts::TimeAggregator);
uint64_t NumEntries{0};
uint64_t NumSamples{0};
uint64_t NumTraces{0};
while (hasData()) {
auto SampleRes = parseSample();
auto SampleRes = parseBranchSample();
if (std::error_code EC = SampleRes.getError())
return EC;
@ -648,6 +777,58 @@ std::error_code DataAggregator::parseEvents() {
return std::error_code();
}
std::error_code DataAggregator::parseMemEvents() {
outs() << "PERF2BOLT: Aggregating memory events...\n";
NamedRegionTimer T("Mem samples parsing", TimerGroupName, opts::TimeAggregator);
while (hasData()) {
auto SampleRes = parseMemSample();
if (std::error_code EC = SampleRes.getError())
return EC;
auto PC = SampleRes.get().PC;
auto Addr = SampleRes.get().Addr;
StringRef FuncName;
StringRef MemName;
// Try to resolve symbol for PC
auto *Func = getBinaryFunctionContainingAddress(PC);
if (Func) {
FuncName = Func->getNames()[0];
PC -= Func->getAddress();
}
// Try to resolve symbol for memory load
auto *MemFunc = getBinaryFunctionContainingAddress(Addr);
if (MemFunc) {
MemName = MemFunc->getNames()[0];
Addr -= MemFunc->getAddress();
} else {
// TODO: global symbol size?
auto Sym = BC->getGlobalSymbolAtAddress(Addr);
if (Sym) {
MemName = Sym->getName();
Addr = 0;
}
}
const Location FuncLoc(!FuncName.empty(), FuncName, PC);
const Location AddrLoc(!MemName.empty(), MemName, Addr);
// TODO what does it mean when PC is 0 (or not a known function)?
DEBUG(if (!Func && PC != 0) {
dbgs() << "Skipped mem event: " << FuncLoc << " = " << AddrLoc << "\n";
});
if (Func) {
FuncsToMemEvents[FuncName].update(FuncLoc, AddrLoc);
DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
}
}
return std::error_code();
}
ErrorOr<int64_t> DataAggregator::parseTaskPID() {
while (checkAndConsumeFS()) {}
@ -745,35 +926,52 @@ std::error_code DataAggregator::writeAggregatedFile() const {
if (EC)
return EC;
uint64_t Values{0};
bool WriteMemLocs = false;
auto writeLocation = [&OutFile,&WriteMemLocs](const Location &Loc) {
if (WriteMemLocs)
OutFile << (Loc.IsSymbol ? "4 " : "3 ");
else
OutFile << (Loc.IsSymbol ? "1 " : "0 ");
OutFile << (Loc.Name.empty() ? "[unknown]" : Loc.Name) << " "
<< Twine::utohexstr(Loc.Offset)
<< FieldSeparator;
};
uint64_t BranchValues{0};
uint64_t MemValues{0};
for (const auto &Func : FuncsToBranches) {
for (const auto &BI : Func.getValue().Data) {
OutFile << (BI.From.IsSymbol ? "1 " : "0 ")
<< (BI.From.Name.empty() ? "[unknown]" : BI.From.Name) << " "
<< Twine::utohexstr(BI.From.Offset) << " "
<< (BI.To.IsSymbol ? "1 " : "0 ")
<< (BI.To.Name.empty() ? "[unknown]" : BI.To.Name) << " "
<< Twine::utohexstr(BI.To.Offset) << " " << BI.Mispreds << " "
<< BI.Branches << "\n";
++Values;
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
}
for (const auto &BI : Func.getValue().EntryData) {
// Do not output if source is a known symbol, since this was already
// accounted for in the source function
if (BI.From.IsSymbol)
continue;
OutFile << (BI.From.IsSymbol ? "1 " : "0 ")
<< (BI.From.Name.empty() ? "[unknown]" : BI.From.Name) << " "
<< Twine::utohexstr(BI.From.Offset) << " "
<< (BI.To.IsSymbol ? "1 " : "0 ")
<< (BI.To.Name.empty() ? "[unknown]" : BI.To.Name) << " "
<< Twine::utohexstr(BI.To.Offset) << " " << BI.Mispreds << " "
<< BI.Branches << "\n";
++Values;
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
}
}
outs() << "PERF2BOLT: Wrote " << Values << " objects to "
<< OutputFDataName << "\n";
WriteMemLocs = true;
for (const auto &Func : FuncsToMemEvents) {
for (const auto &MemEvent : Func.getValue().Data) {
writeLocation(MemEvent.Offset);
writeLocation(MemEvent.Addr);
OutFile << MemEvent.Count << "\n";
++MemValues;
}
}
outs() << "PERF2BOLT: Wrote " << BranchValues << " branch objects and "
<< MemValues << " memory objects to " << OutputFDataName << "\n";
return std::error_code();
}
@ -788,9 +986,13 @@ void DataAggregator::dump(const LBREntry &LBR) const {
<< "\n";
}
void DataAggregator::dump(const PerfSample &Sample) const {
void DataAggregator::dump(const PerfBranchSample &Sample) const {
Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
for (const auto &LBR : Sample.LBR) {
dump(LBR);
}
}
void DataAggregator::dump(const PerfMemSample &Sample) const {
Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
}

View File

@ -34,10 +34,15 @@ struct LBREntry {
bool Mispred;
};
struct PerfSample {
struct PerfBranchSample {
SmallVector<LBREntry, 16> LBR;
};
struct PerfMemSample {
uint64_t PC;
uint64_t Addr;
};
/// DataAggregator inherits all parsing logic from DataReader as well as
/// its data structures used to represent aggregated profile data in memory.
///
@ -61,10 +66,13 @@ struct PerfSample {
class DataAggregator : public DataReader {
// Perf process spawning bookkeeping
std::string PerfPath;
sys::ProcessInfo EventsPI;
sys::ProcessInfo BranchEventsPI;
sys::ProcessInfo MemEventsPI;
sys::ProcessInfo TasksPI;
SmallVector<char, 256> PerfEventsOutputPath;
SmallVector<char, 256> PerfEventsErrPath;
SmallVector<char, 256> PerfBranchEventsOutputPath;
SmallVector<char, 256> PerfBranchEventsErrPath;
SmallVector<char, 256> PerfMemEventsOutputPath;
SmallVector<char, 256> PerfMemEventsErrPath;
SmallVector<char, 256> PerfTasksOutputPath;
SmallVector<char, 256> PerfTasksErrPath;
@ -93,9 +101,13 @@ class DataAggregator : public DataReader {
/// Looks into system PATH for Linux Perf and set up the aggregator to use it
void findPerfExecutable();
/// Launch a subprocess to read all perf samples and write them to an output
/// file we will parse later
bool launchPerfEventsNoWait();
/// Launch a subprocess to read all perf branch samples and write them to an
/// output file we will parse later
bool launchPerfBranchEventsNoWait();
/// Launch a subprocess to read all perf memory event samples and write them
/// to an output file we will parse later
bool launchPerfMemEventsNoWait();
/// Launch a subprocess to read all perf task events. They contain the mapping
/// of binary file name to PIDs used during data collection time. We later use
@ -139,7 +151,11 @@ class DataAggregator : public DataReader {
/// Parse a single perf sample containing a PID associated with a sequence of
/// LBR entries
ErrorOr<PerfSample> parseSample();
ErrorOr<PerfBranchSample> parseBranchSample();
/// Parse a single perf sample containing a PID associated with an IP and
/// address.
ErrorOr<PerfMemSample> parseMemSample();
/// Check if a field separator is the next char to parse and, if yes, consume
/// it and return true
@ -151,8 +167,11 @@ class DataAggregator : public DataReader {
/// Parse a single LBR entry as output by perf script -Fbrstack
ErrorOr<LBREntry> parseLBREntry();
/// Parse the full output generated by perf script to report LBR samples
std::error_code parseEvents();
/// Parse the full output generated by perf script to report LBR samples.
std::error_code parseBranchEvents();
/// Parse the full output generated by perf script to report memory events.
std::error_code parseMemEvents();
/// Parse a single line of a PERF_RECORD_COMM event looking for an association
/// between the binary name and its PID. Return -1 if binary name is not
@ -207,7 +226,8 @@ public:
/// Debugging dump methods
void dump() const;
void dump(const LBREntry &LBR) const;
void dump(const PerfSample &Sample) const;
void dump(const PerfBranchSample &Sample) const;
void dump(const PerfMemSample &Sample) const;
};
}
}

View File

@ -41,6 +41,17 @@ StringRef normalizeName(StringRef Name) {
} // anonymous namespace
raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
if (Loc.IsSymbol) {
OS << Loc.Name;
if (Loc.Offset)
OS << "+" << Twine::utohexstr(Loc.Offset);
} else {
OS << Twine::utohexstr(Loc.Offset);
}
return OS;
}
iterator_range<FuncBranchData::ContainerTy::const_iterator>
FuncBranchData::getBranchRange(uint64_t From) const {
assert(std::is_sorted(Data.begin(), Data.end()));
@ -285,6 +296,39 @@ FuncBranchData::getDirectCallBranch(uint64_t From) const {
return make_error_code(llvm::errc::invalid_argument);
}
void MemInfo::print(raw_ostream &OS) const {
OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
<< Twine::utohexstr(Offset.Offset) << " "
<< (Addr.IsSymbol + 3) << " " << Addr.Name << " "
<< Twine::utohexstr(Addr.Offset) << " "
<< Count << "\n";
}
iterator_range<FuncMemData::ContainerTy::const_iterator>
FuncMemData::getMemInfoRange(uint64_t Offset) const {
assert(std::is_sorted(Data.begin(), Data.end()));
struct Compare {
bool operator()(const MemInfo &MI, const uint64_t Val) const {
return MI.Offset.Offset < Val;
}
bool operator()(const uint64_t Val, const MemInfo &MI) const {
return Val < MI.Offset.Offset;
}
};
auto Range = std::equal_range(Data.begin(), Data.end(), Offset, Compare());
return iterator_range<ContainerTy::const_iterator>(Range.first, Range.second);
}
void FuncMemData::update(const Location &Offset, const Location &Addr) {
auto Iter = EventIndex[Offset.Offset].find(Addr);
if (Iter == EventIndex[Offset.Offset].end()) {
Data.emplace_back(MemInfo(Offset, Addr, 1));
EventIndex[Offset.Offset][Addr] = Data.size() - 1;
return;
}
++Data[Iter->second].Count;
}
ErrorOr<std::unique_ptr<DataReader>>
DataReader::readPerfData(StringRef Path, raw_ostream &Diag) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
@ -295,7 +339,7 @@ DataReader::readPerfData(StringRef Path, raw_ostream &Diag) {
}
auto DR = make_unique<DataReader>(std::move(MB.get()), Diag);
DR->parse();
DR->buildLTONameMap();
DR->buildLTONameMaps();
return std::move(DR);
}
@ -366,16 +410,43 @@ ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
return Num;
}
ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl) {
ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
auto NumStrRes = parseString(EndChar, EndNl);
if (std::error_code EC = NumStrRes.getError())
return EC;
StringRef NumStr = NumStrRes.get();
uint64_t Num;
if (NumStr.getAsInteger(16, Num)) {
reportError("expected hexidecimal number");
Diag << "Found: " << NumStr << "\n";
return make_error_code(llvm::errc::io_error);
}
return Num;
}
ErrorOr<Location> DataReader::parseLocation(char EndChar,
bool EndNl,
bool ExpectMemLoc) {
// Read whether the location of the branch should be DSO or a symbol
// 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
// symbol.
if (ParsingBuf[0] != '0' && ParsingBuf[0] != '1' && ParsingBuf[0] != '2') {
// The symbol flag is also used to tag memory load events by adding 3 to the
// base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
if (!ExpectMemLoc &&
ParsingBuf[0] != '0' && ParsingBuf[0] != '1' && ParsingBuf[0] != '2') {
reportError("expected 0, 1 or 2");
return make_error_code(llvm::errc::io_error);
}
bool IsSymbol = ParsingBuf[0] == '1' || ParsingBuf[0] == '2';
if (ExpectMemLoc &&
ParsingBuf[0] != '3' && ParsingBuf[0] != '4' && ParsingBuf[0] != '5') {
reportError("expected 3, 4 or 5");
return make_error_code(llvm::errc::io_error);
}
bool IsSymbol =
(!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
(ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
ParsingBuf = ParsingBuf.drop_front(1);
Col += 1;
@ -389,18 +460,11 @@ ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl) {
StringRef Name = NameRes.get();
// Read the offset
auto OffsetStrRes = parseString(EndChar, EndNl);
if (std::error_code EC = OffsetStrRes.getError())
auto Offset = parseHexField(EndChar, EndNl);
if (std::error_code EC = Offset.getError())
return EC;
StringRef OffsetStr = OffsetStrRes.get();
uint64_t Offset;
if (OffsetStr.getAsInteger(16, Offset)) {
reportError("expected hexadecimal number");
Diag << "Found: " << OffsetStr << "\n";
return make_error_code(llvm::errc::io_error);
}
return Location(IsSymbol, Name, Offset);
return Location(IsSymbol, Name, Offset.get());
}
ErrorOr<BranchHistory> DataReader::parseBranchHistory() {
@ -483,6 +547,26 @@ ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
std::move(Histories));
}
ErrorOr<MemInfo> DataReader::parseMemInfo() {
auto Res = parseMemLocation(FieldSeparator);
if (std::error_code EC = Res.getError())
return EC;
Location Offset = Res.get();
Res = parseMemLocation(FieldSeparator);
if (std::error_code EC = Res.getError())
return EC;
Location Addr = Res.get();
auto CountRes = parseNumberField(FieldSeparator, true);
if (std::error_code EC = CountRes.getError())
return EC;
checkAndConsumeNewLine();
return MemInfo(Offset, Addr, CountRes.get());
}
ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
auto Res = parseLocation(FieldSeparator);
if (std::error_code EC = Res.getError())
@ -525,7 +609,7 @@ ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
return true;
}
bool DataReader::hasData() {
bool DataReader::hasBranchData() {
if (ParsingBuf.size() == 0)
return false;
@ -534,6 +618,15 @@ bool DataReader::hasData() {
return false;
}
bool DataReader::hasMemData() {
if (ParsingBuf.size() == 0)
return false;
if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
return true;
return false;
}
std::error_code DataReader::parseInNoLBRMode() {
auto GetOrCreateFuncEntry = [&](StringRef Name) {
auto I = FuncsToSamples.find(Name);
@ -547,7 +640,18 @@ std::error_code DataReader::parseInNoLBRMode() {
return I;
};
while (hasData()) {
auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
auto I = FuncsToMemEvents.find(Name);
if (I == FuncsToMemEvents.end()) {
bool success;
std::tie(I, success) = FuncsToMemEvents.insert(
std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
assert(success && "unexpected result of insert");
}
return I;
};
while (hasBranchData()) {
auto Res = parseSampleInfo();
if (std::error_code EC = Res.getError())
return EC;
@ -562,11 +666,31 @@ std::error_code DataReader::parseInNoLBRMode() {
I->getValue().Data.emplace_back(std::move(SI));
}
while (hasMemData()) {
auto Res = parseMemInfo();
if (std::error_code EC = Res.getError())
return EC;
MemInfo MI = Res.get();
// Ignore memory events not involving known pc.
if (!MI.Offset.IsSymbol)
continue;
auto I = GetOrCreateFuncMemEntry(MI.Offset.Name);
I->getValue().Data.emplace_back(std::move(MI));
}
for (auto &FuncSamples : FuncsToSamples) {
std::stable_sort(FuncSamples.second.Data.begin(),
FuncSamples.second.Data.end());
}
for (auto &MemEvents : FuncsToMemEvents) {
std::stable_sort(MemEvents.second.Data.begin(),
MemEvents.second.Data.end());
}
return std::error_code();
}
@ -584,6 +708,17 @@ std::error_code DataReader::parse() {
return I;
};
auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
auto I = FuncsToMemEvents.find(Name);
if (I == FuncsToMemEvents.end()) {
bool success;
std::tie(I, success) = FuncsToMemEvents.insert(
std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
assert(success && "unexpected result of insert");
}
return I;
};
Col = 0;
Line = 1;
auto FlagOrErr = maybeParseNoLBRFlag();
@ -593,7 +728,7 @@ std::error_code DataReader::parse() {
if (NoLBRMode)
return parseInNoLBRMode();
while (hasData()) {
while (hasBranchData()) {
auto Res = parseBranchInfo();
if (std::error_code EC = Res.getError())
return EC;
@ -624,21 +759,48 @@ std::error_code DataReader::parse() {
}
}
while (hasMemData()) {
auto Res = parseMemInfo();
if (std::error_code EC = Res.getError())
return EC;
MemInfo MI = Res.get();
// Ignore memory events not involving known pc.
if (!MI.Offset.IsSymbol)
continue;
auto I = GetOrCreateFuncMemEntry(MI.Offset.Name);
I->getValue().Data.emplace_back(std::move(MI));
}
for (auto &FuncBranches : FuncsToBranches) {
std::stable_sort(FuncBranches.second.Data.begin(),
FuncBranches.second.Data.end());
}
for (auto &MemEvents : FuncsToMemEvents) {
std::stable_sort(MemEvents.second.Data.begin(),
MemEvents.second.Data.end());
}
return std::error_code();
}
void DataReader::buildLTONameMap() {
void DataReader::buildLTONameMaps() {
for (auto &FuncData : FuncsToBranches) {
const auto FuncName = FuncData.getKey();
const auto CommonName = getLTOCommonName(FuncName);
if (CommonName)
LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
}
for (auto &FuncData : FuncsToMemEvents) {
const auto FuncName = FuncData.getKey();
const auto CommonName = getLTOCommonName(FuncName);
if (CommonName)
LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
}
}
namespace {
@ -654,21 +816,14 @@ fetchMapEntry(MapTy &Map, const std::vector<std::string> &FuncNames) {
}
return nullptr;
}
}
FuncBranchData *
DataReader::getFuncBranchData(const std::vector<std::string> &FuncNames) {
return fetchMapEntry<FuncsToBranchesMapTy>(FuncsToBranches, FuncNames);
}
FuncSampleData *
DataReader::getFuncSampleData(const std::vector<std::string> &FuncNames) {
return fetchMapEntry<FuncsToSamplesMapTy>(FuncsToSamples, FuncNames);
}
std::vector<FuncBranchData *>
DataReader::getFuncBranchDataRegex(const std::vector<std::string> &FuncNames) {
std::vector<FuncBranchData *> AllData;
template <typename MapTy>
std::vector<decltype(MapTy::MapEntryTy::second) *>
fetchMapEntriesRegex(
MapTy &Map,
const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>> &LTOCommonNameMap,
const std::vector<std::string> &FuncNames) {
std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
// Do a reverse order iteration since the name in profile has a higher chance
// of matching a name at the end of the list.
for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
@ -682,8 +837,8 @@ DataReader::getFuncBranchDataRegex(const std::vector<std::string> &FuncNames) {
AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
}
} else {
auto I = FuncsToBranches.find(Name);
if (I != FuncsToBranches.end()) {
auto I = Map.find(Name);
if (I != Map.end()) {
return {&I->getValue()};
}
}
@ -691,6 +846,33 @@ DataReader::getFuncBranchDataRegex(const std::vector<std::string> &FuncNames) {
return AllData;
}
}
FuncBranchData *
DataReader::getFuncBranchData(const std::vector<std::string> &FuncNames) {
return fetchMapEntry<FuncsToBranchesMapTy>(FuncsToBranches, FuncNames);
}
FuncMemData *
DataReader::getFuncMemData(const std::vector<std::string> &FuncNames) {
return fetchMapEntry<FuncsToMemEventsMapTy>(FuncsToMemEvents, FuncNames);
}
FuncSampleData *
DataReader::getFuncSampleData(const std::vector<std::string> &FuncNames) {
return fetchMapEntry<FuncsToSamplesMapTy>(FuncsToSamples, FuncNames);
}
std::vector<FuncBranchData *>
DataReader::getFuncBranchDataRegex(const std::vector<std::string> &FuncNames) {
return fetchMapEntriesRegex(FuncsToBranches, LTOCommonNameMap, FuncNames);
}
std::vector<FuncMemData *>
DataReader::getFuncMemDataRegex(const std::vector<std::string> &FuncNames) {
return fetchMapEntriesRegex(FuncsToMemEvents, LTOCommonNameMemMap, FuncNames);
}
bool DataReader::hasLocalsWithFileName() const {
for (const auto &Func : FuncsToBranches) {
const auto &FuncName = Func.getKey();
@ -739,6 +921,20 @@ void DataReader::dump() const {
<< SI.Occurrences << "\n";
}
}
for (const auto &Func : FuncsToMemEvents) {
Diag << "Memory events for " << Func.getValue().Name;
Location LastOffset(0);
for (auto &MI : Func.getValue().Data) {
if (MI.Offset == LastOffset) {
Diag << ", " << MI.Addr << "/" << MI.Count;
} else {
Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
}
LastOffset = MI.Offset;
}
Diag << "\n";
}
}
} // namespace bolt

View File

@ -32,7 +32,7 @@ namespace llvm {
namespace bolt {
/// LTO-generated function names take a form:
//
///
/// <function_name>.lto_priv.<decimal_number>/...
/// or
/// <function_name>.constprop.<decimal_number>/...
@ -62,6 +62,9 @@ struct Location {
StringRef Name;
uint64_t Offset;
explicit Location(uint64_t Offset)
: IsSymbol(false), Name("[unknown]"), Offset(Offset) {}
Location(bool IsSymbol, StringRef Name, uint64_t Offset)
: IsSymbol(IsSymbol), Name(Name), Offset(Offset) {}
@ -80,6 +83,8 @@ struct Location {
return Name != "[heap]" && Offset < RHS.Offset;
}
friend raw_ostream &operator<<(raw_ostream &OS, const Location &Loc);
};
typedef std::vector<std::pair<Location, Location>> BranchContext;
@ -123,7 +128,7 @@ struct BranchInfo {
}
/// Merges the branch and misprediction counts as well as the histories of BI
/// with those of this objetc.
/// with those of this object.
void mergeWith(const BranchInfo &BI);
void print(raw_ostream &OS) const;
@ -145,10 +150,10 @@ struct FuncBranchData {
FuncBranchData() {}
FuncBranchData(StringRef Name, ContainerTy Data)
: Name(Name), Data(std::move(Data)) {}
: Name(Name), Data(std::move(Data)) {}
FuncBranchData(StringRef Name, ContainerTy Data, ContainerTy EntryData)
: Name(Name), Data(std::move(Data)), EntryData(std::move(EntryData)) {}
: Name(Name), Data(std::move(Data)), EntryData(std::move(EntryData)) {}
ErrorOr<const BranchInfo &> getBranch(uint64_t From, uint64_t To) const;
@ -176,6 +181,63 @@ struct FuncBranchData {
void bumpEntryCount(const Location &From, uint64_t OffsetTo, bool Mispred);
};
/// MemInfo represents a single memory load from an address \p Addr at an \p
/// Offset within a function. \p Count represents how many times a particular
/// address was seen.
struct MemInfo {
Location Offset;
Location Addr;
uint64_t Count;
bool operator==(const MemInfo &RHS) const {
return Offset == RHS.Offset && Addr == RHS.Addr;
}
bool operator<(const MemInfo &RHS) const {
if (Offset < RHS.Offset)
return true;
if (Offset == RHS.Offset)
return (Addr < RHS.Addr);
return false;
}
void mergeWith(const MemInfo &MI) {
Count += MI.Count;
}
void print(raw_ostream &OS) const;
MemInfo(const Location &Offset, const Location &Addr, uint64_t Count = 0)
: Offset(Offset), Addr(Addr), Count(Count) {}
};
/// Helper class to store memory load events recorded in the address space of
/// a given function, analogous to FuncBranchData but for memory load events
/// instead of branches.
struct FuncMemData {
typedef std::vector<MemInfo> ContainerTy;
StringRef Name;
ContainerTy Data;
DenseMap<uint64_t, DenseMap<Location, size_t>> EventIndex;
/// Find all the memory events originating at Offset.
iterator_range<ContainerTy::const_iterator> getMemInfoRange(
uint64_t Offset) const;
/// Update \p Data with a memory event. Events with the same
/// \p Offset and \p Addr will be coalesced.
void update(const Location &Offset, const Location &Addr);
FuncMemData() {}
FuncMemData(StringRef Name, ContainerTy Data)
: Name(Name), Data(std::move(Data)) {}
};
/// Similar to BranchInfo, but instead of recording from-to address (an edge),
/// it records the address of a perf event and the number of times samples hit
/// this address.
@ -297,6 +359,9 @@ public:
FuncBranchData *
getFuncBranchData(const std::vector<std::string> &FuncNames);
/// Return mem data matching one of the names in \p FuncNames.
FuncMemData *getFuncMemData(const std::vector<std::string> &FuncNames);
FuncSampleData *
getFuncSampleData(const std::vector<std::string> &FuncNames);
@ -306,10 +371,18 @@ public:
std::vector<FuncBranchData *>
getFuncBranchDataRegex(const std::vector<std::string> &FuncNames);
/// Return a vector of all FuncMemData matching the list of names.
/// Internally use fuzzy matching to match special names like LTO-generated
/// function names.
std::vector<FuncMemData *>
getFuncMemDataRegex(const std::vector<std::string> &FuncNames);
using FuncsToBranchesMapTy = StringMap<FuncBranchData>;
using FuncsToSamplesMapTy = StringMap<FuncSampleData>;
using FuncsToMemEventsMapTy = StringMap<FuncMemData>;
FuncsToBranchesMapTy &getAllFuncsBranchData() { return FuncsToBranches; }
FuncsToMemEventsMapTy &getAllFuncsMemData() { return FuncsToMemEvents; }
FuncsToSamplesMapTy &getAllFuncsSampleData() { return FuncsToSamples; }
const FuncsToBranchesMapTy &getAllFuncsData() const {
@ -348,15 +421,24 @@ protected:
bool checkAndConsumeNewLine();
ErrorOr<StringRef> parseString(char EndChar, bool EndNl=false);
ErrorOr<int64_t> parseNumberField(char EndChar, bool EndNl=false);
ErrorOr<Location> parseLocation(char EndChar, bool EndNl=false);
ErrorOr<uint64_t> parseHexField(char EndChar, bool EndNl=false);
ErrorOr<Location> parseLocation(char EndChar, bool EndNl, bool ExpectMemLoc);
ErrorOr<Location> parseLocation(char EndChar, bool EndNl=false) {
return parseLocation(EndChar, EndNl, false);
}
ErrorOr<Location> parseMemLocation(char EndChar, bool EndNl=false) {
return parseLocation(EndChar, EndNl, true);
}
ErrorOr<BranchHistory> parseBranchHistory();
ErrorOr<BranchInfo> parseBranchInfo();
ErrorOr<SampleInfo> parseSampleInfo();
ErrorOr<MemInfo> parseMemInfo();
ErrorOr<bool> maybeParseNoLBRFlag();
bool hasData();
bool hasBranchData();
bool hasMemData();
/// Build suffix map once the profile data is parsed.
void buildLTONameMap();
void buildLTONameMaps();
/// An in-memory copy of the input data file - owns strings used in reader.
std::unique_ptr<MemoryBuffer> FileBuf;
@ -366,12 +448,14 @@ protected:
unsigned Col;
FuncsToBranchesMapTy FuncsToBranches;
FuncsToSamplesMapTy FuncsToSamples;
FuncsToMemEventsMapTy FuncsToMemEvents;
bool NoLBRMode{false};
StringSet<> EventNames;
static const char FieldSeparator = ' ';
/// Map of common LTO names to possible matching profiles.
/// Maps of common LTO names to possible matching profiles.
StringMap<std::vector<FuncBranchData *>> LTOCommonNameMap;
StringMap<std::vector<FuncMemData *>> LTOCommonNameMemMap;
};
}

View File

@ -94,6 +94,7 @@ int main(int argc, char **argv) {
// All merged data.
DataReader::FuncsToBranchesMapTy MergedFunctionsBranchData;
DataReader::FuncsToSamplesMapTy MergedFunctionsSampleData;
DataReader::FuncsToMemEventsMapTy MergedFunctionsMemData;
StringSet<> EventNames;
// Merged functions data has to replace strings refs with strings from the
@ -143,6 +144,22 @@ int main(int argc, char **argv) {
AllStrings.emplace_back(ToNamePtr); // keep the reference
};
// Copy mem info replacing string references with internal storage
// references.
auto CopyMemInfo = [&](const MemInfo &MI, std::vector<MemInfo> &MIData) {
auto OffsetNamePtr = MergedStringPool.intern(MI.Offset.Name);
auto AddrNamePtr = MergedStringPool.intern(MI.Addr.Name);
MIData.emplace_back(MemInfo(Location(MI.Offset.IsSymbol,
*OffsetNamePtr,
MI.Offset.Offset),
Location(MI.Addr.IsSymbol,
*AddrNamePtr,
MI.Addr.Offset),
MI.Count));
AllStrings.emplace_back(OffsetNamePtr); // keep the reference
AllStrings.emplace_back(AddrNamePtr); // keep the reference
};
auto CopySampleInfo = [&](const SampleInfo &SI,
std::vector<SampleInfo> &SIData) {
auto NamePtr = MergedStringPool.intern(SI.Address.Name);
@ -183,6 +200,16 @@ int main(int argc, char **argv) {
AllStrings.emplace_back(NamePtr); // keep the reference
};
auto replaceMIStringRefs = [&] (MemInfo &MI) {
auto OffsetNamePtr = MergedStringPool.intern(MI.Offset.Name);
MI.Offset.Name = *OffsetNamePtr;
AllStrings.emplace_back(OffsetNamePtr); // keep the reference
auto AddrNamePtr = MergedStringPool.intern(MI.Addr.Name);
MI.Addr.Name = *AddrNamePtr;
AllStrings.emplace_back(AddrNamePtr); // keep the reference
};
for (auto &InputDataFilename : opts::InputDataFilenames) {
if (!sys::fs::exists(InputDataFilename))
report_error(InputDataFilename, errc::no_such_file_or_directory);
@ -313,6 +340,56 @@ int main(int argc, char **argv) {
}
}
}
for (auto &FI : ReaderOrErr.get()->getAllFuncsMemData()) {
auto MI = MergedFunctionsMemData.find(FI.second.Name);
if (MI != MergedFunctionsMemData.end()) {
std::vector<MemInfo> TmpMI;
for (auto &MMI : FI.second.Data) {
// Find and merge a corresponding entry or copy data.
auto TI = std::lower_bound(MI->second.Data.begin(),
MI->second.Data.end(),
MMI);
if (TI != MI->second.Data.end() && *TI == MMI) {
replaceMIStringRefs(MMI);
TI->mergeWith(MMI);
} else {
CopyMemInfo(MMI, TmpMI);
}
}
// Merge in the temp vector making sure it doesn't contain duplicates.
std::sort(TmpMI.begin(), TmpMI.end());
MemInfo *PrevMI = nullptr;
for (auto &MMI : TmpMI) {
if (PrevMI && *PrevMI == MMI) {
PrevMI->mergeWith(MMI);
} else {
MI->second.Data.emplace_back(MMI);
PrevMI = &MI->second.Data.back();
}
}
std::sort(MI->second.Data.begin(), MI->second.Data.end());
} else {
auto NamePtr = MergedStringPool.intern(FI.second.Name);
AllStrings.emplace_back(NamePtr); // keep the ref
bool Success;
std::tie(MI, Success) = MergedFunctionsMemData.insert(
std::make_pair(*NamePtr,
FuncMemData(*NamePtr, FuncMemData::ContainerTy())));
// Copy with string conversion while eliminating duplicates.
std::sort(FI.second.Data.begin(), FI.second.Data.end());
MemInfo *PrevMI = nullptr;
for (auto &MMI : FI.second.Data) {
if (PrevMI && *PrevMI == MMI) {
replaceMIStringRefs(MMI);
PrevMI->mergeWith(MMI);
} else {
CopyMemInfo(MMI, MI->second.Data);
PrevMI = &MI->second.Data.back();
}
}
}
}
}
if (!opts::SuppressMergedDataOutput) {
@ -336,11 +413,17 @@ int main(int argc, char **argv) {
SD.print(outs());
}
}
for (const auto &FDI : MergedFunctionsMemData) {
for (const auto &MD : FDI.second.Data) {
MD.print(outs());
}
}
}
errs() << "Data for "
<< (MergedFunctionsBranchData.size() +
MergedFunctionsSampleData.size())
MergedFunctionsSampleData.size() +
MergedFunctionsMemData.size())
<< " unique objects successfully merged.\n";
if (opts::PrintFunctionList != opts::ST_NONE) {