[llvm-profgen] Trim cold function profiles for non-CS AutoFDO

This change allows to trim the profile if it's considered to be cold for baseline AutoFDO. We reuse the cold threshold from `ProfileSummaryBuilder::getColdCountThreshold(..)` which can be set by percent(--profile-summary-cutoff-cold) or by value(--profile-summary-cold-count).

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D113785
This commit is contained in:
wlei 2021-11-28 23:43:11 -08:00
parent 277f86d610
commit 27cb3707db
6 changed files with 154 additions and 11 deletions

View File

@ -0,0 +1,55 @@
27
400540-400540:10
400650-40066d:31
400686-400689:3
40068b-4006a2:30
4006b0-4006b7:3
4006b0-4006bf:60
4006b0-4006c8:6
4006d0-4006ea:51
4006d0-400700:4
4006ec-400700:30
400710-40072f:5
400740-400753:3
400740-40075b:9
400740-40076e:14
400743-400753:3
400743-40075b:43
400743-40076e:11
400755-40075b:4
400770-400788:6
400790-400792:12
400790-4007a6:12
4007a8-4007b8:11
4007bd-4007ca:12
4007cf-4007d7:12
4007d7-4007d7:12
400870-400870:12
400875-4008bf:10
26
40066d->400686:3
400675->400682:1
400689->4006b9:4
4006a2->4007a8:6
4006b7->40068b:3
4006bf->4006d0:9
4006c8->4006b0:7
4006ca->4006ec:3
4006ea->4006b0:5
400700->4006b0:7
40072f->400755:5
400753->400770:6
40075b->400743:58
40075f->400740:2
40076e->400740:25
400788->4007a8:6
400792->4007d7:12
4007a6->400650:7
4007a6->400710:5
4007b8->400790:12
4007ca->400790:12
4007d7->4007bd:12
4007d7->4007cf:13
40082f->400790:1
400870->400540:12
4008bf->400870:15

View File

@ -0,0 +1,68 @@
; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=0
; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-NOTRIM
; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=1 --profile-summary-cold-count=100
; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-TRIM
;CHECK-NOTRIM: partition_pivot_last:1091:7
;CHECK-NOTRIM: partition_pivot_first:365:5
;CHECK-NOTRIM: quick_sort:83:25
;CHECK-NOTRIM: main:52:0
;CHECK-TRIM: partition_pivot_last:1091:7
;CHECK-TRIM: partition_pivot_first:365:5
;CHECK-TRIM-NOT: quick_sort:83:25
;CHECK-TRIM-NOT: main:52:0
; original code:
; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
#include <stdio.h>
#include <stdlib.h>
void swap(int *a, int *b) {
int t = *a;
*a = *b;
*b = t;
}
int partition_pivot_last(int* array, int low, int high) {
int pivot = array[high];
int i = low - 1;
for (int j = low; j < high; j++)
if (array[j] < pivot)
swap(&array[++i], &array[j]);
swap(&array[i + 1], &array[high]);
return (i + 1);
}
int partition_pivot_first(int* array, int low, int high) {
int pivot = array[low];
int i = low + 1;
for (int j = low + 1; j <= high; j++)
if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
swap(&array[i - 1], &array[low]);
return i - 1;
}
void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
if (low < high) {
int pi = (*partition_func)(array, low, high);
quick_sort(array, low, pi - 1, partition_func);
quick_sort(array, pi + 1, high, partition_func);
}
}
int main() {
const int size = 200;
int sum = 0;
int *array = malloc(size * sizeof(int));
for(int i = 0; i < 100 * 1000; i++) {
for(int j = 0; j < size; j++)
array[j] = j % 10 ? rand() % size: j;
int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
quick_sort(array, 0, size - 1, fptr);
sum += array[i % size];
}
printf("sum=%d\n", sum);
return 0;
}

View File

@ -11,7 +11,7 @@
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL
; Test cold profile trimming. Only base profiles should be dropped.
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --csprof-trim-cold-context=1 --profile-summary-hot-count=250
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --trim-cold-profile=1 --profile-summary-hot-count=250
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM

View File

@ -2,8 +2,8 @@
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --profile-summary-hot-count=8
; RUN: FileCheck %s --input-file %t1
; Test --csprof-trim-cold-context=0
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0
; Test --trim-cold-profile=0
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD
; Test --csprof-merge-cold-context=0
@ -11,7 +11,7 @@
; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
; Test --csprof-frame-depth-for-cold-context
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0 --csprof-max-cold-context-depth=2
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csprof-max-cold-context-depth=2
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH
; CHECK: [fa]:14:4

View File

@ -50,17 +50,17 @@ static cl::opt<int32_t, true> RecursionCompression(
cl::Hidden,
cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
static cl::opt<bool>
TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
cl::desc("If the total count of the profile is smaller "
"than threshold, it will be trimmed."));
static cl::opt<bool> CSProfMergeColdContext(
"csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
cl::desc("If the total count of context profile is smaller than "
"the threshold, it will be merged into context-less base "
"profile."));
static cl::opt<bool> CSProfTrimColdContext(
"csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore,
cl::desc("If the total count of the profile after all merge is done "
"is still smaller than threshold, it will be trimmed."));
static cl::opt<uint32_t> CSProfMaxColdContextDepth(
"csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
@ -378,9 +378,27 @@ void ProfileGenerator::generateProfile() {
void ProfileGenerator::postProcessProfiles() {
computeSummaryAndThreshold();
trimColdProfiles(ProfileMap, ColdCountThreshold);
calculateAndShowDensity(ProfileMap);
}
void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
uint64_t ColdCntThreshold) {
if (!TrimColdProfile)
return;
// Move cold profiles into a tmp container.
std::vector<SampleContext> ColdProfiles;
for (const auto &I : ProfileMap) {
if (I.second.getTotalSamples() < ColdCntThreshold)
ColdProfiles.emplace_back(I.first);
}
// Remove the cold profile from ProfileMap.
for (const auto &I : ColdProfiles)
ProfileMap.erase(I);
}
void ProfileGenerator::generateLineNumBasedProfile() {
assert(SampleCounters.size() == 1 &&
"Must have one entry for profile generation.");
@ -732,10 +750,10 @@ void CSProfileGenerator::postProcessProfiles() {
}
// Trim and merge cold context profile using cold threshold above.
if (CSProfTrimColdContext || CSProfMergeColdContext) {
if (TrimColdProfile || CSProfMergeColdContext) {
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
CSProfMaxColdContextDepth, EnableCSPreInliner);
}

View File

@ -129,6 +129,8 @@ private:
void
populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
void postProcessProfiles();
void trimColdProfiles(const SampleProfileMap &Profiles,
uint64_t ColdCntThreshold);
};
using ProbeCounterMap =