[llvm-profgen] Read sample profiles for post-processing.

Sometimes we would like to run post-processing repeatedly on the original sample profile for tuning. In order to avoid regenerating the original profile from scratch every time, this change adds the support of reading in the original profile (called symbolized profile) and running the post-processor on it.

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D121655
This commit is contained in:
Hongtao Yu 2022-03-30 12:27:10 -07:00
parent 10cda6e36c
commit 937924eb49
5 changed files with 240 additions and 62 deletions

View File

@ -0,0 +1,63 @@
; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0
; RUN: llvm-profgen --format=text --llvm-sample-profile=%t1 --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t2 --trim-cold-profile=1 --profile-summary-cold-count=1000
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-TRIM
;CHECK-TRIM: partition_pivot_last:5187:7
;CHECK-TRIM: partition_pivot_first:3010:5
;CHECK-TRIM-NOT: quick_sort:903:25
;CHECK-TRIM-NOT: main:820:0
; original code:
; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
#include <stdio.h>
#include <stdlib.h>
void swap(int *a, int *b) {
int t = *a;
*a = *b;
*b = t;
}
int partition_pivot_last(int* array, int low, int high) {
int pivot = array[high];
int i = low - 1;
for (int j = low; j < high; j++)
if (array[j] < pivot)
swap(&array[++i], &array[j]);
swap(&array[i + 1], &array[high]);
return (i + 1);
}
int partition_pivot_first(int* array, int low, int high) {
int pivot = array[low];
int i = low + 1;
for (int j = low + 1; j <= high; j++)
if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
swap(&array[i - 1], &array[low]);
return i - 1;
}
void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
if (low < high) {
int pi = (*partition_func)(array, low, high);
quick_sort(array, low, pi - 1, partition_func);
quick_sort(array, pi + 1, high, partition_func);
}
}
int main() {
const int size = 200;
int sum = 0;
int *array = malloc(size * sizeof(int));
for(int i = 0; i < 100 * 1000; i++) {
for(int j = 0; j < size; j++)
array[j] = j % 10 ? rand() % size: j;
int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
quick_sort(array, 0, size - 1, fptr);
sum += array[i % size];
}
printf("sum=%d\n", sum);
return 0;
}

View File

@ -0,0 +1,43 @@
; Test default llvm-profgen with preinline off
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --csspgo-preinliner=0 --gen-cs-nested-profile=0 --output=%t1
; Test llvm-profgen with preinliner on will merge not inlinable profile into base profile.
; RUN: llvm-profgen --format=text --llvm-sample-profile=%t1 --binary=%S/Inputs/inline-cs-noprobe.perfbin --csspgo-preinliner=1 --gen-cs-nested-profile=0 --sample-profile-hot-inline-threshold=3000 --sample-profile-cold-inline-threshold=45 --output=%t2
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-PREINL
; Test default llvm-profgen with preinline off
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --csspgo-preinliner=0 --gen-cs-nested-profile=0 --output=%t3
; Test llvm-profgen with preinliner on will merge not inlinable profile into base profile.
; RUN: llvm-profgen --format=text --llvm-sample-profile=%t3 --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --csspgo-preinliner=1 --gen-cs-nested-profile=0 --sample-profile-hot-inline-threshold=3000 --sample-profile-cold-inline-threshold=45 --output=%t4
; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-PREINL-PROBE
; CHECK-PREINL: [foo]:309:0
; CHECK-PREINL-NEXT: 2.1: 14
; CHECK-PREINL-NEXT: 3: 15
; CHECK-PREINL-NEXT: 3.1: 14 bar:14
; CHECK-PREINL-NEXT: 3.2: 1
; CHECK-PREINL-NEXT: 65526: 14
; CHECK-PREINL-NEXT: !Attributes: 1
; CHECK-PREINL-NEXT:[foo:3.1 @ bar]:84:0
; CHECK-PREINL-NEXT: 1: 14
; CHECK-PREINL-NEXT: !Attributes: 3
; CHECK-PREINL-PROBE: [foo]:74:0
; CHECK-PREINL-PROBE-NEXT: 1: 0
; CHECK-PREINL-PROBE-NEXT: 2: 15
; CHECK-PREINL-PROBE-NEXT: 3: 15
; CHECK-PREINL-PROBE-NEXT: 4: 14
; CHECK-PREINL-PROBE-NEXT: 5: 1
; CHECK-PREINL-PROBE-NEXT: 6: 15
; CHECK-PREINL-PROBE-NEXT: 7: 0
; CHECK-PREINL-PROBE-NEXT: 8: 14 bar:14
; CHECK-PREINL-PROBE-NEXT: 9: 0
; CHECK-PREINL-PROBE-NEXT: !CFGChecksum: 563088904013236
; CHECK-PREINL-PROBE-NEXT: !Attributes: 1
; CHECK-PREINL-PROBE-NEXT:[foo:8 @ bar]:28:14
; CHECK-PREINL-PROBE-NEXT: 1: 14
; CHECK-PREINL-PROBE-NEXT: 4: 14
; CHECK-PREINL-PROBE-NEXT: !CFGChecksum: 72617220756
; CHECK-PREINL-PROBE-NEXT: !Attributes: 3

View File

@ -7,12 +7,14 @@
//===----------------------------------------------------------------------===//
#include "ProfileGenerator.h"
#include "ErrorHandling.h"
#include "PerfReader.h"
#include "ProfiledBinary.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include <algorithm>
#include <float.h>
#include <unordered_set>
#include <utility>
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::Required,
@ -109,7 +111,7 @@ bool ProfileGeneratorBase::UseFSDiscriminator = false;
std::unique_ptr<ProfileGeneratorBase>
ProfileGeneratorBase::create(ProfiledBinary *Binary,
const ContextSampleCounterMap &SampleCounters,
const ContextSampleCounterMap *SampleCounters,
bool ProfileIsCSFlat) {
std::unique_ptr<ProfileGeneratorBase> Generator;
if (ProfileIsCSFlat) {
@ -125,6 +127,24 @@ ProfileGeneratorBase::create(ProfiledBinary *Binary,
return Generator;
}
std::unique_ptr<ProfileGeneratorBase>
ProfileGeneratorBase::create(ProfiledBinary *Binary,
const SampleProfileMap &&Profiles,
bool ProfileIsCSFlat) {
std::unique_ptr<ProfileGeneratorBase> Generator;
if (ProfileIsCSFlat) {
if (Binary->useFSDiscriminator())
exitWithError("FS discriminator is not supported in CS profile.");
Generator.reset(new CSProfileGenerator(Binary, std::move(Profiles)));
} else {
Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
}
ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
return Generator;
}
void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
SampleProfileMap &ProfileMap) {
// Populate profile symbol list if extended binary format is used.
@ -372,31 +392,39 @@ void ProfileGeneratorBase::updateTotalSamples() {
void ProfileGeneratorBase::collectProfiledFunctions() {
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
// Go through all the stacks, ranges and branches in sample counters, use the
// start of the range to look up the function it belongs and record the
// function.
for (const auto &CI : SampleCounters) {
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
for (auto Addr : CtxKey->Context) {
if (FuncRange *FRange = Binary->findFuncRangeForOffset(
Binary->virtualAddrToOffset(Addr)))
if (SampleCounters) {
// Go through all the stacks, ranges and branches in sample counters, use
// the start of the range to look up the function it belongs and record the
// function.
for (const auto &CI : *SampleCounters) {
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
for (auto Addr : CtxKey->Context) {
if (FuncRange *FRange = Binary->findFuncRangeForOffset(
Binary->virtualAddrToOffset(Addr)))
ProfiledFunctions.insert(FRange->Func);
}
}
for (auto Item : CI.second.RangeCounter) {
uint64_t StartOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
ProfiledFunctions.insert(FRange->Func);
}
for (auto Item : CI.second.BranchCounter) {
uint64_t SourceOffset = Item.first.first;
uint64_t TargetOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
ProfiledFunctions.insert(FRange->Func);
if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
ProfiledFunctions.insert(FRange->Func);
}
}
for (auto Item : CI.second.RangeCounter) {
uint64_t StartOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
ProfiledFunctions.insert(FRange->Func);
}
for (auto Item : CI.second.BranchCounter) {
uint64_t SourceOffset = Item.first.first;
uint64_t TargetOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
ProfiledFunctions.insert(FRange->Func);
if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
ProfiledFunctions.insert(FRange->Func);
} else {
// This is for the case the input is a llvm sample profile.
for (const auto &FS : ProfileMap) {
if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
ProfiledFunctions.insert(Func);
}
}
@ -416,11 +444,18 @@ ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
void ProfileGenerator::generateProfile() {
collectProfiledFunctions();
if (Binary->usePseudoProbes()) {
generateProbeBasedProfile();
} else {
generateLineNumBasedProfile();
if (Binary->usePseudoProbes())
Binary->decodePseudoProbe();
if (SampleCounters) {
if (Binary->usePseudoProbes()) {
generateProbeBasedProfile();
} else {
generateLineNumBasedProfile();
}
}
postProcessProfiles();
}
@ -448,9 +483,9 @@ void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
}
void ProfileGenerator::generateLineNumBasedProfile() {
assert(SampleCounters.size() == 1 &&
assert(SampleCounters->size() == 1 &&
"Must have one entry for profile generation.");
const SampleCounter &SC = SampleCounters.begin()->second;
const SampleCounter &SC = SampleCounters->begin()->second;
// Fill in function body samples
populateBodySamplesForAllFunctions(SC.RangeCounter);
// Fill in boundary sample counts as well as call site samples for calls
@ -460,12 +495,11 @@ void ProfileGenerator::generateLineNumBasedProfile() {
}
void ProfileGenerator::generateProbeBasedProfile() {
assert(SampleCounters.size() == 1 &&
assert(SampleCounters->size() == 1 &&
"Must have one entry for profile generation.");
Binary->decodePseudoProbe();
// Enable pseudo probe functionalities in SampleProf
FunctionSamples::ProfileIsProbeBased = true;
const SampleCounter &SC = SampleCounters.begin()->second;
const SampleCounter &SC = SampleCounters->begin()->second;
// Fill in function body samples
populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
// Fill in boundary sample counts as well as call site samples for calls
@ -687,10 +721,15 @@ void CSProfileGenerator::generateProfile() {
collectProfiledFunctions();
if (Binary->usePseudoProbes()) {
generateProbeBasedProfile();
} else {
generateLineNumBasedProfile();
if (Binary->usePseudoProbes())
Binary->decodePseudoProbe();
if (SampleCounters) {
if (Binary->usePseudoProbes()) {
generateProbeBasedProfile();
} else {
generateLineNumBasedProfile();
}
}
if (Binary->getTrackFuncContextSize())
@ -709,7 +748,7 @@ void CSProfileGenerator::computeSizeForProfiledFunctions() {
}
void CSProfileGenerator::generateLineNumBasedProfile() {
for (const auto &CI : SampleCounters) {
for (const auto &CI : *SampleCounters) {
const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
// Get or create function profile for the range
@ -967,10 +1006,9 @@ extractPrefixContextStack(SampleContextFrameVector &ContextStack,
}
void CSProfileGenerator::generateProbeBasedProfile() {
Binary->decodePseudoProbe();
// Enable pseudo probe functionalities in SampleProf
FunctionSamples::ProfileIsProbeBased = true;
for (const auto &CI : SampleCounters) {
for (const auto &CI : *SampleCounters) {
const AddrBasedCtxKey *CtxKey =
dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
SampleContextFrameVector ContextStack;

View File

@ -33,11 +33,18 @@ class ProfileGeneratorBase {
public:
ProfileGeneratorBase(ProfiledBinary *Binary,
const ContextSampleCounterMap &Counters)
const ContextSampleCounterMap *Counters)
: Binary(Binary), SampleCounters(Counters){};
ProfileGeneratorBase(ProfiledBinary *Binary,
const SampleProfileMap &&Profiles)
: Binary(Binary), ProfileMap(std::move(Profiles)){};
virtual ~ProfileGeneratorBase() = default;
static std::unique_ptr<ProfileGeneratorBase>
create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters,
create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters,
bool ProfileIsCSFlat);
static std::unique_ptr<ProfileGeneratorBase>
create(ProfiledBinary *Binary, const SampleProfileMap &&ProfileMap,
bool ProfileIsCSFlat);
virtual void generateProfile() = 0;
void write();
@ -113,20 +120,22 @@ protected:
uint64_t ColdCountThreshold;
ProfiledBinary *Binary = nullptr;
// Used by SampleProfileWriter
SampleProfileMap ProfileMap;
ProfiledBinary *Binary = nullptr;
const ContextSampleCounterMap &SampleCounters;
const ContextSampleCounterMap *SampleCounters = nullptr;
};
class ProfileGenerator : public ProfileGeneratorBase {
public:
ProfileGenerator(ProfiledBinary *Binary,
const ContextSampleCounterMap &Counters)
const ContextSampleCounterMap *Counters)
: ProfileGeneratorBase(Binary, Counters){};
ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
: ProfileGeneratorBase(Binary, std::move(Profiles)){};
void generateProfile() override;
private:
@ -143,9 +152,10 @@ private:
void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
void
populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
void populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
void
populateBoundarySamplesWithProbesForAllFunctions(const BranchSample &BranchCounters);
populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
void populateBoundarySamplesWithProbesForAllFunctions(
const BranchSample &BranchCounters);
void postProcessProfiles();
void trimColdProfiles(const SampleProfileMap &Profiles,
uint64_t ColdCntThreshold);
@ -154,9 +164,10 @@ private:
class CSProfileGenerator : public ProfileGeneratorBase {
public:
CSProfileGenerator(ProfiledBinary *Binary,
const ContextSampleCounterMap &Counters)
const ContextSampleCounterMap *Counters)
: ProfileGeneratorBase(Binary, Counters){};
CSProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
: ProfileGeneratorBase(Binary, std::move(Profiles)){};
void generateProfile() override;
// Trim the context stack at a given depth.

View File

@ -49,6 +49,12 @@ static cl::opt<std::string> UnsymbolizedProfFilename(
static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"),
cl::aliasopt(UnsymbolizedProfFilename));
static cl::opt<std::string>
SampleProfFilename("llvm-sample-profile",
cl::value_desc("llvm sample profile"), cl::ZeroOrMore,
cl::desc("Path of the LLVM sample profile"),
cl::cat(ProfGenCategory));
static cl::opt<std::string>
BinaryPath("binary", cl::value_desc("binary"), cl::Required,
cl::desc("Path of profiled executable binary."),
@ -76,7 +82,9 @@ static void validateCommandLine() {
uint16_t HasPerfScript = PerfScriptFilename.getNumOccurrences();
uint16_t HasUnsymbolizedProfile =
UnsymbolizedProfFilename.getNumOccurrences();
uint16_t S = HasPerfData + HasPerfScript + HasUnsymbolizedProfile;
uint16_t HasSampleProfile = SampleProfFilename.getNumOccurrences();
uint16_t S =
HasPerfData + HasPerfScript + HasUnsymbolizedProfile + HasSampleProfile;
if (S != 1) {
std::string Msg =
S > 1
@ -97,6 +105,7 @@ static void validateCommandLine() {
CheckFileExists(HasPerfData, PerfDataFilename);
CheckFileExists(HasPerfScript, PerfScriptFilename);
CheckFileExists(HasUnsymbolizedProfile, UnsymbolizedProfFilename);
CheckFileExists(HasSampleProfile, SampleProfFilename);
}
if (!llvm::sys::fs::exists(BinaryPath)) {
@ -146,20 +155,34 @@ int main(int argc, const char *argv[]) {
if (ShowDisassemblyOnly)
return EXIT_SUCCESS;
PerfInputFile PerfFile = getPerfInputFile();
std::unique_ptr<PerfReaderBase> Reader =
PerfReaderBase::create(Binary.get(), PerfFile);
// Parse perf events and samples
Reader->parsePerfTraces();
if (SampleProfFilename.getNumOccurrences()) {
LLVMContext Context;
auto ReaderOrErr = SampleProfileReader::create(SampleProfFilename, Context);
std::unique_ptr<sampleprof::SampleProfileReader> Reader =
std::move(ReaderOrErr.get());
Reader->read();
std::unique_ptr<ProfileGeneratorBase> Generator =
ProfileGeneratorBase::create(Binary.get(),
std::move(Reader->getProfiles()),
Reader->profileIsCSFlat());
Generator->generateProfile();
Generator->write();
} else {
PerfInputFile PerfFile = getPerfInputFile();
std::unique_ptr<PerfReaderBase> Reader =
PerfReaderBase::create(Binary.get(), PerfFile);
// Parse perf events and samples
Reader->parsePerfTraces();
if (SkipSymbolization)
return EXIT_SUCCESS;
if (SkipSymbolization)
return EXIT_SUCCESS;
std::unique_ptr<ProfileGeneratorBase> Generator =
ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(),
Reader->profileIsCSFlat());
Generator->generateProfile();
Generator->write();
std::unique_ptr<ProfileGeneratorBase> Generator =
ProfileGeneratorBase::create(Binary.get(), &Reader->getSampleCounters(),
Reader->profileIsCSFlat());
Generator->generateProfile();
Generator->write();
}
return EXIT_SUCCESS;
}