forked from OSchip/llvm-project
[llvm-profgen] Trim cold function profiles for non-CS AutoFDO
This change allows to trim the profile if it's considered to be cold for baseline AutoFDO. We reuse the cold threshold from `ProfileSummaryBuilder::getColdCountThreshold(..)` which can be set by percent(--profile-summary-cutoff-cold) or by value(--profile-summary-cold-count). Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D113785
This commit is contained in:
parent
277f86d610
commit
27cb3707db
|
@ -0,0 +1,55 @@
|
|||
27
|
||||
400540-400540:10
|
||||
400650-40066d:31
|
||||
400686-400689:3
|
||||
40068b-4006a2:30
|
||||
4006b0-4006b7:3
|
||||
4006b0-4006bf:60
|
||||
4006b0-4006c8:6
|
||||
4006d0-4006ea:51
|
||||
4006d0-400700:4
|
||||
4006ec-400700:30
|
||||
400710-40072f:5
|
||||
400740-400753:3
|
||||
400740-40075b:9
|
||||
400740-40076e:14
|
||||
400743-400753:3
|
||||
400743-40075b:43
|
||||
400743-40076e:11
|
||||
400755-40075b:4
|
||||
400770-400788:6
|
||||
400790-400792:12
|
||||
400790-4007a6:12
|
||||
4007a8-4007b8:11
|
||||
4007bd-4007ca:12
|
||||
4007cf-4007d7:12
|
||||
4007d7-4007d7:12
|
||||
400870-400870:12
|
||||
400875-4008bf:10
|
||||
26
|
||||
40066d->400686:3
|
||||
400675->400682:1
|
||||
400689->4006b9:4
|
||||
4006a2->4007a8:6
|
||||
4006b7->40068b:3
|
||||
4006bf->4006d0:9
|
||||
4006c8->4006b0:7
|
||||
4006ca->4006ec:3
|
||||
4006ea->4006b0:5
|
||||
400700->4006b0:7
|
||||
40072f->400755:5
|
||||
400753->400770:6
|
||||
40075b->400743:58
|
||||
40075f->400740:2
|
||||
40076e->400740:25
|
||||
400788->4007a8:6
|
||||
400792->4007d7:12
|
||||
4007a6->400650:7
|
||||
4007a6->400710:5
|
||||
4007b8->400790:12
|
||||
4007ca->400790:12
|
||||
4007d7->4007bd:12
|
||||
4007d7->4007cf:13
|
||||
40082f->400790:1
|
||||
400870->400540:12
|
||||
4008bf->400870:15
|
|
@ -0,0 +1,68 @@
|
|||
; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=0
|
||||
; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-NOTRIM
|
||||
; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=1 --profile-summary-cold-count=100
|
||||
; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-TRIM
|
||||
|
||||
;CHECK-NOTRIM: partition_pivot_last:1091:7
|
||||
;CHECK-NOTRIM: partition_pivot_first:365:5
|
||||
;CHECK-NOTRIM: quick_sort:83:25
|
||||
;CHECK-NOTRIM: main:52:0
|
||||
|
||||
;CHECK-TRIM: partition_pivot_last:1091:7
|
||||
;CHECK-TRIM: partition_pivot_first:365:5
|
||||
;CHECK-TRIM-NOT: quick_sort:83:25
|
||||
;CHECK-TRIM-NOT: main:52:0
|
||||
|
||||
; original code:
|
||||
; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
void swap(int *a, int *b) {
|
||||
int t = *a;
|
||||
*a = *b;
|
||||
*b = t;
|
||||
}
|
||||
|
||||
int partition_pivot_last(int* array, int low, int high) {
|
||||
int pivot = array[high];
|
||||
int i = low - 1;
|
||||
for (int j = low; j < high; j++)
|
||||
if (array[j] < pivot)
|
||||
swap(&array[++i], &array[j]);
|
||||
swap(&array[i + 1], &array[high]);
|
||||
return (i + 1);
|
||||
}
|
||||
|
||||
int partition_pivot_first(int* array, int low, int high) {
|
||||
int pivot = array[low];
|
||||
int i = low + 1;
|
||||
for (int j = low + 1; j <= high; j++)
|
||||
if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
|
||||
swap(&array[i - 1], &array[low]);
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
|
||||
if (low < high) {
|
||||
int pi = (*partition_func)(array, low, high);
|
||||
quick_sort(array, low, pi - 1, partition_func);
|
||||
quick_sort(array, pi + 1, high, partition_func);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
const int size = 200;
|
||||
int sum = 0;
|
||||
int *array = malloc(size * sizeof(int));
|
||||
for(int i = 0; i < 100 * 1000; i++) {
|
||||
for(int j = 0; j < size; j++)
|
||||
array[j] = j % 10 ? rand() % size: j;
|
||||
int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
|
||||
quick_sort(array, 0, size - 1, fptr);
|
||||
sum += array[i % size];
|
||||
}
|
||||
printf("sum=%d\n", sum);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -11,7 +11,7 @@
|
|||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL
|
||||
|
||||
; Test cold profile trimming. Only base profiles should be dropped.
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --csprof-trim-cold-context=1 --profile-summary-hot-count=250
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --trim-cold-profile=1 --profile-summary-hot-count=250
|
||||
|
||||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM
|
||||
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --profile-summary-hot-count=8
|
||||
; RUN: FileCheck %s --input-file %t1
|
||||
|
||||
; Test --csprof-trim-cold-context=0
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0
|
||||
; Test --trim-cold-profile=0
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0
|
||||
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD
|
||||
|
||||
; Test --csprof-merge-cold-context=0
|
||||
|
@ -11,7 +11,7 @@
|
|||
; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
|
||||
|
||||
; Test --csprof-frame-depth-for-cold-context
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0 --csprof-max-cold-context-depth=2
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csprof-max-cold-context-depth=2
|
||||
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH
|
||||
|
||||
; CHECK: [fa]:14:4
|
||||
|
|
|
@ -50,17 +50,17 @@ static cl::opt<int32_t, true> RecursionCompression(
|
|||
cl::Hidden,
|
||||
cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
|
||||
|
||||
static cl::opt<bool>
|
||||
TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("If the total count of the profile is smaller "
|
||||
"than threshold, it will be trimmed."));
|
||||
|
||||
static cl::opt<bool> CSProfMergeColdContext(
|
||||
"csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
|
||||
cl::desc("If the total count of context profile is smaller than "
|
||||
"the threshold, it will be merged into context-less base "
|
||||
"profile."));
|
||||
|
||||
static cl::opt<bool> CSProfTrimColdContext(
|
||||
"csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("If the total count of the profile after all merge is done "
|
||||
"is still smaller than threshold, it will be trimmed."));
|
||||
|
||||
static cl::opt<uint32_t> CSProfMaxColdContextDepth(
|
||||
"csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
|
||||
cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
|
||||
|
@ -378,9 +378,27 @@ void ProfileGenerator::generateProfile() {
|
|||
|
||||
void ProfileGenerator::postProcessProfiles() {
|
||||
computeSummaryAndThreshold();
|
||||
trimColdProfiles(ProfileMap, ColdCountThreshold);
|
||||
calculateAndShowDensity(ProfileMap);
|
||||
}
|
||||
|
||||
void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
|
||||
uint64_t ColdCntThreshold) {
|
||||
if (!TrimColdProfile)
|
||||
return;
|
||||
|
||||
// Move cold profiles into a tmp container.
|
||||
std::vector<SampleContext> ColdProfiles;
|
||||
for (const auto &I : ProfileMap) {
|
||||
if (I.second.getTotalSamples() < ColdCntThreshold)
|
||||
ColdProfiles.emplace_back(I.first);
|
||||
}
|
||||
|
||||
// Remove the cold profile from ProfileMap.
|
||||
for (const auto &I : ColdProfiles)
|
||||
ProfileMap.erase(I);
|
||||
}
|
||||
|
||||
void ProfileGenerator::generateLineNumBasedProfile() {
|
||||
assert(SampleCounters.size() == 1 &&
|
||||
"Must have one entry for profile generation.");
|
||||
|
@ -732,10 +750,10 @@ void CSProfileGenerator::postProcessProfiles() {
|
|||
}
|
||||
|
||||
// Trim and merge cold context profile using cold threshold above.
|
||||
if (CSProfTrimColdContext || CSProfMergeColdContext) {
|
||||
if (TrimColdProfile || CSProfMergeColdContext) {
|
||||
SampleContextTrimmer(ProfileMap)
|
||||
.trimAndMergeColdContextProfiles(
|
||||
HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
|
||||
HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
|
||||
CSProfMaxColdContextDepth, EnableCSPreInliner);
|
||||
}
|
||||
|
||||
|
|
|
@ -129,6 +129,8 @@ private:
|
|||
void
|
||||
populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
|
||||
void postProcessProfiles();
|
||||
void trimColdProfiles(const SampleProfileMap &Profiles,
|
||||
uint64_t ColdCntThreshold);
|
||||
};
|
||||
|
||||
using ProbeCounterMap =
|
||||
|
|
Loading…
Reference in New Issue