[perf2bolt] Add support for generating autofdo input

Summary:
Autofdo tools support.

(cherry picked from FBD13779026)
This commit is contained in:
Rafael Auler 2019-01-22 17:21:45 -08:00 committed by Maksim Panchenko
parent c6ce2abb7d
commit af81c7ff80
4 changed files with 121 additions and 16 deletions

View File

@ -45,6 +45,13 @@ BasicAggregation("nl",
cl::ZeroOrMore, cl::ZeroOrMore,
cl::cat(AggregatorCategory)); cl::cat(AggregatorCategory));
static cl::opt<bool>
WriteAutoFDOData("autofdo",
cl::desc("generate autofdo textual data instead of bolt data"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
static cl::opt<bool> static cl::opt<bool>
ReadPreAggregated("pa", ReadPreAggregated("pa",
cl::desc("skip perf and read data from a pre-aggregated file format"), cl::desc("skip perf and read data from a pre-aggregated file format"),
@ -123,7 +130,7 @@ void DataAggregator::start(StringRef PerfDataFilename) {
} else { } else {
launchPerfProcess("branch events", launchPerfProcess("branch events",
MainEventsPPI, MainEventsPPI,
"script -F pid,brstack", "script -F pid,ip,brstack",
/*Wait = */false); /*Wait = */false);
} }
@ -323,6 +330,75 @@ void DataAggregator::parsePreAggregated() {
} }
} }
std::error_code DataAggregator::writeAutoFDOData() {
outs() << "PERF2BOLT: writing data for autofdo tools...\n";
NamedRegionTimer T("writeAutoFDO", "Processing branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
std::error_code EC;
raw_fd_ostream OutFile(OutputFDataName, EC, sys::fs::OpenFlags::F_None);
if (EC)
return EC;
// Format:
// number of unique traces
// from_1-to_1:count_1
// from_2-to_2:count_2
// ......
// from_n-to_n:count_n
// number of unique sample addresses
// addr_1:count_1
// addr_2:count_2
// ......
// addr_n:count_n
// number of unique LBR entries
// src_1->dst_1:count_1
// src_2->dst_2:count_2
// ......
// src_n->dst_n:count_n
const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
// AutoFDO addresses are relative to the first allocated loadable program
// segment
auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
if (Address < FirstAllocAddress)
return 0;
return Address - FirstAllocAddress;
};
OutFile << FallthroughLBRs.size() << "\n";
for (const auto &AggrLBR : FallthroughLBRs) {
auto &Trace = AggrLBR.first;
auto &Info = AggrLBR.second;
OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
<< Twine::utohexstr(filterAddress(Trace.To)) << ":"
<< (Info.InternCount + Info.ExternCount) << "\n";
}
OutFile << BasicSamples.size() << "\n";
for (const auto &Sample : BasicSamples) {
auto PC = Sample.first;
auto HitCount = Sample.second;
OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
}
OutFile << BranchLBRs.size() << "\n";
for (const auto &AggrLBR : BranchLBRs) {
auto &Trace = AggrLBR.first;
auto &Info = AggrLBR.second;
OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
<< Twine::utohexstr(filterAddress(Trace.To)) << ":"
<< Info.TakenCount << "\n";
}
outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
<< BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
<< " unique branches to " << OutputFDataName << "\n";
return std::error_code();
}
void DataAggregator::parseProfile( void DataAggregator::parseProfile(
BinaryContext &BC, BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs) { std::map<uint64_t, BinaryFunction> &BFs) {
@ -388,6 +464,15 @@ void DataAggregator::parseProfile(
errs() << "PERF2BOLT: failed to parse samples\n"; errs() << "PERF2BOLT: failed to parse samples\n";
} }
// We can finish early if the goal is just to generate data for autofdo
if (opts::WriteAutoFDOData) {
if (std::error_code EC = writeAutoFDOData()) {
errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
}
deleteTempFiles();
exit(0);
}
// Special handling for memory events // Special handling for memory events
std::string Error; std::string Error;
auto PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error); auto PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
@ -475,8 +560,8 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) {
return &FI->second; return &FI->second;
} }
bool bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
DataAggregator::doSample(BinaryFunction &Func, uint64_t Address) { uint64_t Count) {
auto I = FuncsToSamples.find(Func.getNames()[0]); auto I = FuncsToSamples.find(Func.getNames()[0]);
if (I == FuncsToSamples.end()) { if (I == FuncsToSamples.end()) {
bool Success; bool Success;
@ -485,7 +570,7 @@ DataAggregator::doSample(BinaryFunction &Func, uint64_t Address) {
FuncSampleData(Func.getNames()[0], FuncSampleData::ContainerTy()))); FuncSampleData(Func.getNames()[0], FuncSampleData::ContainerTy())));
} }
I->second.bumpCount(Address - Func.getAddress()); I->second.bumpCount(Address - Func.getAddress(), Count);
return true; return true;
} }
@ -682,6 +767,16 @@ ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
return Res; return Res;
} }
while (checkAndConsumeFS()) {}
auto PCRes = parseHexField(FieldSeparator, true);
if (std::error_code EC = PCRes.getError())
return EC;
Res.PC = PCRes.get();
if (checkAndConsumeNewLine())
return Res;
while (!checkAndConsumeNewLine()) { while (!checkAndConsumeNewLine()) {
checkAndConsumeFS(); checkAndConsumeFS();
@ -890,6 +985,9 @@ std::error_code DataAggregator::parseBranchEvents() {
return EC; return EC;
auto &Sample = SampleRes.get(); auto &Sample = SampleRes.get();
if (opts::WriteAutoFDOData)
++BasicSamples[Sample.PC];
if (Sample.LBR.empty()) if (Sample.LBR.empty())
continue; continue;
@ -1041,7 +1139,8 @@ std::error_code DataAggregator::parseBasicEvents() {
if (auto *BF = getBinaryFunctionContainingAddress(Sample->PC)) if (auto *BF = getBinaryFunctionContainingAddress(Sample->PC))
BF->setHasProfileAvailable(); BF->setHasProfileAvailable();
BasicSamples.emplace_back(std::move(Sample.get())); ++BasicSamples[Sample->PC];
EventNames.insert(Sample->EventName);
} }
return std::error_code(); return std::error_code();
@ -1052,17 +1151,19 @@ void DataAggregator::processBasicEvents() {
NamedRegionTimer T("processBasic", "Processing basic events", NamedRegionTimer T("processBasic", "Processing basic events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator); TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
uint64_t OutOfRangeSamples{0}; uint64_t OutOfRangeSamples{0};
uint64_t NumSamples{0};
for (auto &Sample : BasicSamples) { for (auto &Sample : BasicSamples) {
auto *Func = getBinaryFunctionContainingAddress(Sample.PC); const auto PC = Sample.first;
const auto HitCount = Sample.second;
NumSamples += HitCount;
auto *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) { if (!Func) {
++OutOfRangeSamples; OutOfRangeSamples += HitCount;
continue; continue;
} }
doSample(*Func, Sample.PC); doSample(*Func, PC, HitCount);
EventNames.insert(Sample.EventName);
} }
const auto NumSamples = BasicSamples.size();
outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "

View File

@ -53,6 +53,7 @@ class DataAggregator : public DataReader {
struct PerfBranchSample { struct PerfBranchSample {
SmallVector<LBREntry, 16> LBR; SmallVector<LBREntry, 16> LBR;
uint64_t PC;
}; };
struct PerfBasicSample { struct PerfBasicSample {
@ -106,7 +107,7 @@ class DataAggregator : public DataReader {
std::unordered_map<Trace, BranchInfo, TraceHash> BranchLBRs; std::unordered_map<Trace, BranchInfo, TraceHash> BranchLBRs;
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs; std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
std::vector<AggregatedLBREntry> AggregatedLBRs; std::vector<AggregatedLBREntry> AggregatedLBRs;
std::vector<PerfBasicSample> BasicSamples; std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples; std::vector<PerfMemSample> MemSamples;
template<typename T> void clear(T& Container) { template<typename T> void clear(T& Container) {
@ -197,7 +198,7 @@ class DataAggregator : public DataReader {
/// Semantic actions - parser hooks to interpret parsed perf samples /// Semantic actions - parser hooks to interpret parsed perf samples
/// Register a sample (non-LBR mode), i.e. a new hit at \p Address /// Register a sample (non-LBR mode), i.e. a new hit at \p Address
bool doSample(BinaryFunction &Func, const uint64_t Address); bool doSample(BinaryFunction &Func, const uint64_t Address, uint64_t Count);
/// Register an intraprocedural branch \p Branch. /// Register an intraprocedural branch \p Branch.
bool doIntraBranch(BinaryFunction &Func, uint64_t From, uint64_t To, bool doIntraBranch(BinaryFunction &Func, uint64_t From, uint64_t To,
@ -256,6 +257,9 @@ class DataAggregator : public DataReader {
/// Process all branch events. /// Process all branch events.
void processBranchEvents(); void processBranchEvents();
/// This member function supports generating data for AutoFDO LLVM tools.
std::error_code writeAutoFDOData();
/// Parse the full output generated by perf script to report non-LBR samples. /// Parse the full output generated by perf script to report non-LBR samples.
std::error_code parseBasicEvents(); std::error_code parseBasicEvents();

View File

@ -118,15 +118,15 @@ FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
return Result; return Result;
} }
void FuncSampleData::bumpCount(uint64_t Offset) { void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
auto Iter = Index.find(Offset); auto Iter = Index.find(Offset);
if (Iter == Index.end()) { if (Iter == Index.end()) {
Data.emplace_back(Location(true, Name, Offset), 1); Data.emplace_back(Location(true, Name, Offset), Count);
Index[Offset] = Data.size() - 1; Index[Offset] = Data.size() - 1;
return; return;
} }
auto &SI = Data[Iter->second]; auto &SI = Data[Iter->second];
++SI.Hits; SI.Hits += Count;
} }
void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo, void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,

View File

@ -285,7 +285,7 @@ struct FuncSampleData {
/// Aggregation helper /// Aggregation helper
DenseMap<uint64_t, size_t> Index; DenseMap<uint64_t, size_t> Index;
void bumpCount(uint64_t Offset); void bumpCount(uint64_t Offset, uint64_t Count);
}; };
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//