forked from OSchip/llvm-project
[llvm-profgen] Refactor PerfReader to allow different types of perf scripts
In order to support different types of perf scripts, this change tried to refactor `PerfReader` by adding the base class `PerfReaderBase` and current HybridPerfReader is derived from it for CS profile generation. Common functions like, passMM2PEvents, extract_lbrs, extract_callstack, etc. can be reused. Next step is to add LBR only reader(for non-CS profile) and aggregated perf scripts reader(do a pre-aggregation of scripts). Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D107014
This commit is contained in:
parent
9205143f07
commit
6da9241aab
|
@ -241,7 +241,7 @@ bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void PerfReader::validateCommandLine(
|
||||
void PerfReaderBase::validateCommandLine(
|
||||
cl::list<std::string> &BinaryFilenames,
|
||||
cl::list<std::string> &PerfTraceFilenames) {
|
||||
// Allow the invalid perfscript if we only use to show binary disassembly
|
||||
|
@ -276,15 +276,32 @@ void PerfReader::validateCommandLine(
|
|||
}
|
||||
}
|
||||
|
||||
PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames,
|
||||
std::unique_ptr<PerfReaderBase>
|
||||
PerfReaderBase::create(cl::list<std::string> &BinaryFilenames,
|
||||
cl::list<std::string> &PerfTraceFilenames) {
|
||||
validateCommandLine(BinaryFilenames, PerfTraceFilenames);
|
||||
|
||||
PerfScriptType PerfType = extractPerfType(PerfTraceFilenames);
|
||||
std::unique_ptr<PerfReaderBase> PerfReader;
|
||||
if (PerfType == PERF_LBR_STACK) {
|
||||
PerfReader.reset(new HybridPerfReader(BinaryFilenames));
|
||||
} else if (PerfType == PERF_LBR) {
|
||||
// TODO:
|
||||
exitWithError("Unsupported perfscript!");
|
||||
} else {
|
||||
exitWithError("Unsupported perfscript!");
|
||||
}
|
||||
|
||||
return PerfReader;
|
||||
}
|
||||
|
||||
PerfReaderBase::PerfReaderBase(cl::list<std::string> &BinaryFilenames) {
|
||||
// Load the binaries.
|
||||
for (auto Filename : BinaryFilenames)
|
||||
loadBinary(Filename, /*AllowNameConflict*/ false);
|
||||
}
|
||||
|
||||
ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath,
|
||||
ProfiledBinary &PerfReaderBase::loadBinary(const StringRef BinaryPath,
|
||||
bool AllowNameConflict) {
|
||||
// The binary table is currently indexed by the binary name not the full
|
||||
// binary path. This is because the user-given path may not match the one
|
||||
|
@ -303,7 +320,7 @@ ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath,
|
|||
return Ret.first->second;
|
||||
}
|
||||
|
||||
void PerfReader::updateBinaryAddress(const MMapEvent &Event) {
|
||||
void PerfReaderBase::updateBinaryAddress(const MMapEvent &Event) {
|
||||
// Load the binary.
|
||||
StringRef BinaryPath = Event.BinaryPath;
|
||||
StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
|
||||
|
@ -353,7 +370,7 @@ void PerfReader::updateBinaryAddress(const MMapEvent &Event) {
|
|||
}
|
||||
}
|
||||
|
||||
ProfiledBinary *PerfReader::getBinary(uint64_t Address) {
|
||||
ProfiledBinary *PerfReaderBase::getBinary(uint64_t Address) {
|
||||
auto Iter = AddrToBinaryMap.lower_bound(Address);
|
||||
if (Iter == AddrToBinaryMap.end() || Iter->first != Address) {
|
||||
if (Iter == AddrToBinaryMap.begin())
|
||||
|
@ -415,7 +432,7 @@ static void printBranchCounter(ContextSampleCounterMap &Counter,
|
|||
printSampleCounter(OrderedCounter);
|
||||
}
|
||||
|
||||
void PerfReader::printUnwinderOutput() {
|
||||
void HybridPerfReader::printUnwinderOutput() {
|
||||
for (auto I : BinarySampleCounters) {
|
||||
const ProfiledBinary *Binary = I.first;
|
||||
outs() << "Binary(" << Binary->getName().str() << ")'s Range Counter:\n";
|
||||
|
@ -425,7 +442,7 @@ void PerfReader::printUnwinderOutput() {
|
|||
}
|
||||
}
|
||||
|
||||
void PerfReader::unwindSamples() {
|
||||
void HybridPerfReader::unwindSamples() {
|
||||
for (const auto &Item : AggregatedSamples) {
|
||||
const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());
|
||||
VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary],
|
||||
|
@ -437,7 +454,7 @@ void PerfReader::unwindSamples() {
|
|||
printUnwinderOutput();
|
||||
}
|
||||
|
||||
bool PerfReader::extractLBRStack(TraceStream &TraceIt,
|
||||
bool PerfReaderBase::extractLBRStack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<LBREntry> &LBRStack,
|
||||
ProfiledBinary *Binary) {
|
||||
// The raw format of LBR stack is like:
|
||||
|
@ -531,7 +548,7 @@ bool PerfReader::extractLBRStack(TraceStream &TraceIt,
|
|||
return !LBRStack.empty();
|
||||
}
|
||||
|
||||
bool PerfReader::extractCallstack(TraceStream &TraceIt,
|
||||
bool PerfReaderBase::extractCallstack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<uint64_t> &CallStack) {
|
||||
// The raw format of call stack is like:
|
||||
// 4005dc # leaf frame
|
||||
|
@ -593,7 +610,7 @@ bool PerfReader::extractCallstack(TraceStream &TraceIt,
|
|||
!Binary->addressInPrologEpilog(CallStack.front());
|
||||
}
|
||||
|
||||
void PerfReader::parseHybridSample(TraceStream &TraceIt) {
|
||||
void HybridPerfReader::parseSample(TraceStream &TraceIt) {
|
||||
// The raw hybird sample started with call stack in FILO order and followed
|
||||
// intermediately by LBR sample
|
||||
// e.g.
|
||||
|
@ -631,7 +648,7 @@ void PerfReader::parseHybridSample(TraceStream &TraceIt) {
|
|||
}
|
||||
}
|
||||
|
||||
void PerfReader::parseMMap2Event(TraceStream &TraceIt) {
|
||||
void PerfReaderBase::parseMMap2Event(TraceStream &TraceIt) {
|
||||
// Parse a line like:
|
||||
// PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
|
||||
// 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
|
||||
|
@ -677,26 +694,23 @@ void PerfReader::parseMMap2Event(TraceStream &TraceIt) {
|
|||
TraceIt.advance();
|
||||
}
|
||||
|
||||
void PerfReader::parseEventOrSample(TraceStream &TraceIt) {
|
||||
void PerfReaderBase::parseEventOrSample(TraceStream &TraceIt) {
|
||||
if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2"))
|
||||
parseMMap2Event(TraceIt);
|
||||
else if (getPerfScriptType() == PERF_LBR_STACK)
|
||||
parseHybridSample(TraceIt);
|
||||
else {
|
||||
// TODO: parse other type sample
|
||||
TraceIt.advance();
|
||||
}
|
||||
else
|
||||
parseSample(TraceIt);
|
||||
}
|
||||
|
||||
void PerfReader::parseAndAggregateTrace(StringRef Filename) {
|
||||
void PerfReaderBase::parseAndAggregateTrace(StringRef Filename) {
|
||||
// Trace line iterator
|
||||
TraceStream TraceIt(Filename);
|
||||
while (!TraceIt.isAtEoF())
|
||||
parseEventOrSample(TraceIt);
|
||||
}
|
||||
|
||||
void PerfReader::checkAndSetPerfType(
|
||||
cl::list<std::string> &PerfTraceFilenames) {
|
||||
PerfScriptType
|
||||
PerfReaderBase::extractPerfType(cl::list<std::string> &PerfTraceFilenames) {
|
||||
PerfScriptType PerfType = PERF_UNKNOWN;
|
||||
for (auto FileName : PerfTraceFilenames) {
|
||||
PerfScriptType Type = checkPerfScriptType(FileName);
|
||||
if (Type == PERF_INVALID)
|
||||
|
@ -705,20 +719,13 @@ void PerfReader::checkAndSetPerfType(
|
|||
exitWithError("Inconsistent sample among different perf scripts");
|
||||
PerfType = Type;
|
||||
}
|
||||
return PerfType;
|
||||
}
|
||||
|
||||
void PerfReader::generateRawProfile() {
|
||||
if (getPerfScriptType() == PERF_LBR_STACK) {
|
||||
// Unwind samples if it's hybird sample
|
||||
unwindSamples();
|
||||
} else if (getPerfScriptType() == PERF_LBR) {
|
||||
// TODO: range overlap computation for regular AutoFDO
|
||||
}
|
||||
}
|
||||
void HybridPerfReader::generateRawProfile() { unwindSamples(); }
|
||||
|
||||
void PerfReader::parsePerfTraces(cl::list<std::string> &PerfTraceFilenames) {
|
||||
// Check and set current perfscript type
|
||||
checkAndSetPerfType(PerfTraceFilenames);
|
||||
void PerfReaderBase::parsePerfTraces(
|
||||
cl::list<std::string> &PerfTraceFilenames) {
|
||||
// Parse perf traces and do aggregation.
|
||||
for (auto Filename : PerfTraceFilenames)
|
||||
parseAndAggregateTrace(Filename);
|
||||
|
|
|
@ -541,10 +541,12 @@ using BinarySampleCounterMap =
|
|||
std::unordered_map<ProfiledBinary *, ContextSampleCounterMap>;
|
||||
|
||||
// Load binaries and read perf trace to parse the events and samples
|
||||
class PerfReader {
|
||||
|
||||
class PerfReaderBase {
|
||||
public:
|
||||
PerfReader(cl::list<std::string> &BinaryFilenames,
|
||||
PerfReaderBase(cl::list<std::string> &BinaryFilenames);
|
||||
virtual ~PerfReaderBase() = default;
|
||||
static std::unique_ptr<PerfReaderBase>
|
||||
create(cl::list<std::string> &BinaryFilenames,
|
||||
cl::list<std::string> &PerfTraceFilenames);
|
||||
|
||||
// A LBR sample is like:
|
||||
|
@ -614,10 +616,12 @@ public:
|
|||
return BinarySampleCounters;
|
||||
}
|
||||
|
||||
private:
|
||||
protected:
|
||||
/// Validate the command line input
|
||||
void validateCommandLine(cl::list<std::string> &BinaryFilenames,
|
||||
static void validateCommandLine(cl::list<std::string> &BinaryFilenames,
|
||||
cl::list<std::string> &PerfTraceFilenames);
|
||||
static PerfScriptType
|
||||
extractPerfType(cl::list<std::string> &PerfTraceFilenames);
|
||||
/// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
|
||||
/// mapping between the binary name and its memory layout.
|
||||
///
|
||||
|
@ -626,8 +630,6 @@ private:
|
|||
void parseAndAggregateTrace(StringRef Filename);
|
||||
// Parse either an MMAP event or a perf sample
|
||||
void parseEventOrSample(TraceStream &TraceIt);
|
||||
// Parse the hybrid sample including the call and LBR line
|
||||
void parseHybridSample(TraceStream &TraceIt);
|
||||
// Extract call stack from the perf trace lines
|
||||
bool extractCallstack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<uint64_t> &CallStack);
|
||||
|
@ -635,13 +637,12 @@ private:
|
|||
bool extractLBRStack(TraceStream &TraceIt,
|
||||
SmallVectorImpl<LBREntry> &LBRStack,
|
||||
ProfiledBinary *Binary);
|
||||
void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames);
|
||||
// Parse one sample from multiple perf lines, override this for different
|
||||
// sample type
|
||||
virtual void parseSample(TraceStream &TraceIt) = 0;
|
||||
// Post process the profile after trace aggregation, we will do simple range
|
||||
// overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
|
||||
void generateRawProfile();
|
||||
// Unwind the hybrid samples after aggregration
|
||||
void unwindSamples();
|
||||
void printUnwinderOutput();
|
||||
virtual void generateRawProfile() = 0;
|
||||
// Helper function for looking up binary in AddressBinaryMap
|
||||
ProfiledBinary *getBinary(uint64_t Address);
|
||||
|
||||
|
@ -654,6 +655,31 @@ private:
|
|||
PerfScriptType PerfType = PERF_UNKNOWN;
|
||||
};
|
||||
|
||||
/*
|
||||
Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
|
||||
which is used to generate CS profile. An example of hybrid sample:
|
||||
4005dc # call stack leaf
|
||||
400634
|
||||
400684 # call stack root
|
||||
0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
|
||||
... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
|
||||
*/
|
||||
class HybridPerfReader : public PerfReaderBase {
|
||||
public:
|
||||
HybridPerfReader(cl::list<std::string> &BinaryFilenames)
|
||||
: PerfReaderBase(BinaryFilenames) {
|
||||
PerfType = PERF_LBR_STACK;
|
||||
};
|
||||
// Parse the hybrid sample including the call and LBR line
|
||||
void parseSample(TraceStream &TraceIt) override;
|
||||
void generateRawProfile() override;
|
||||
|
||||
private:
|
||||
// Unwind the hybrid samples after aggregration
|
||||
void unwindSamples();
|
||||
void printUnwinderOutput();
|
||||
};
|
||||
|
||||
} // end namespace sampleprof
|
||||
} // end namespace llvm
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "llvm/MC/MCTargetOptions.h"
|
||||
#include "llvm/Object/ELFObjectFile.h"
|
||||
#include "llvm/ProfileData/SampleProf.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include <list>
|
||||
#include <set>
|
||||
|
|
|
@ -49,14 +49,20 @@ int main(int argc, const char *argv[]) {
|
|||
cl::HideUnrelatedOptions({&ProfGenCategory, &getColorCategory()});
|
||||
cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n");
|
||||
|
||||
// Load binaries and parse perf events and samples
|
||||
PerfReader Reader(BinaryFilenames, PerfTraceFilenames);
|
||||
if (ShowDisassemblyOnly)
|
||||
if (ShowDisassemblyOnly) {
|
||||
for (auto BinaryPath : BinaryFilenames) {
|
||||
(void)ProfiledBinary(BinaryPath);
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
Reader.parsePerfTraces(PerfTraceFilenames);
|
||||
}
|
||||
|
||||
// Load binaries and parse perf events and samples
|
||||
std::unique_ptr<PerfReaderBase> Reader =
|
||||
PerfReaderBase::create(BinaryFilenames, PerfTraceFilenames);
|
||||
Reader->parsePerfTraces(PerfTraceFilenames);
|
||||
|
||||
std::unique_ptr<ProfileGenerator> Generator = ProfileGenerator::create(
|
||||
Reader.getBinarySampleCounters(), Reader.getPerfScriptType());
|
||||
Reader->getBinarySampleCounters(), Reader->getPerfScriptType());
|
||||
Generator->generateProfile();
|
||||
Generator->write();
|
||||
|
||||
|
|
Loading…
Reference in New Issue