forked from OSchip/llvm-project
[llvm-profgen] Update callsite body samples by summing up all call target samples.
Current profile generation caculcates callsite body samples and call target samples separately. The former is done based on LBR range samples while the latter is done based on branch samples. Note that there's a subtle difference. LBR ranges is formed from two consecutive branch samples. Therefore the last entry in a LBR record will not be counted towards body samples while there's still a chance for it to be counted towards call targets if it is a function call. I'm making sense of the call body samples by updating it to the aggregation of call targets. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D122609
This commit is contained in:
parent
f287da8a15
commit
acfd0a3456
|
@ -387,6 +387,13 @@ public:
|
|||
return SortCallTargets(CallTargets);
|
||||
}
|
||||
|
||||
uint64_t getCallTargetSum() const {
|
||||
uint64_t Sum = 0;
|
||||
for (const auto &I : CallTargets)
|
||||
Sum += I.second;
|
||||
return Sum;
|
||||
}
|
||||
|
||||
/// Sort call targets in descending order of call frequency.
|
||||
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
|
||||
SortedCallTargetSet SortedTargets;
|
||||
|
@ -779,6 +786,19 @@ public:
|
|||
return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
|
||||
}
|
||||
|
||||
// Accumulate all call target samples to update the body samples.
|
||||
void updateCallsiteSamples() {
|
||||
for (auto &I : BodySamples) {
|
||||
uint64_t TargetSamples = I.second.getCallTargetSum();
|
||||
// It's possible that the body sample count can be greater than the call
|
||||
// target sum. E.g, if some call targets are external targets, they won't
|
||||
// be considered valid call targets, but the body sample count which is
|
||||
// from lbr ranges can actually include them.
|
||||
if (TargetSamples > I.second.getSamples())
|
||||
I.second.addSamples(TargetSamples - I.second.getSamples());
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate all body samples to set total samples.
|
||||
void updateTotalSamples() {
|
||||
setTotalSamples(0);
|
||||
|
|
|
@ -59,7 +59,7 @@
|
|||
;CHECK: 6.1: 17
|
||||
;CHECK: 6.3: 17
|
||||
;CHECK: 7: 0
|
||||
;CHECK: 8: 0 quick_sort:1
|
||||
;CHECK: 8: 1 quick_sort:1
|
||||
;CHECK: 9: 0
|
||||
;CHECK: 11: 0
|
||||
;CHECK: 14: 0
|
||||
|
@ -97,7 +97,7 @@
|
|||
;CHECK: quick_sort:903:25
|
||||
;CHECK: 1: 24
|
||||
;CHECK: 2: 12 partition_pivot_last:7 partition_pivot_first:5
|
||||
;CHECK: 3: 11 quick_sort:12
|
||||
;CHECK: 3: 12 quick_sort:12
|
||||
;CHECK: 4: 12 quick_sort:12
|
||||
;CHECK: 6: 24
|
||||
;CHECK: 65507: 12
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
;CHECK: 0: 0
|
||||
;CHECK: 1: 0
|
||||
;CHECK: 2: 19
|
||||
;CHECK: 3: 19 bar:21
|
||||
;CHECK: 3: 21 bar:21
|
||||
;CHECK: 4: 0
|
||||
;CHECK: 5: 0
|
||||
;CHECK: bar:926:21
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t1
|
||||
; RUN: FileCheck %s --input-file %t1 --check-prefix=CALLSITE
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t2 --update-total-samples=1
|
||||
; RUN: FileCheck %s --input-file %t2 --check-prefix=TOTAL
|
||||
|
||||
|
||||
;CALLSITE: foo:1241:0
|
||||
;CALLSITE: 0: 0
|
||||
;CALLSITE: 1: 0
|
||||
;CALLSITE: 2: 19
|
||||
;CALLSITE: 3: 21 bar:21
|
||||
;CALLSITE: 4: 0
|
||||
;CALLSITE: 5: 0
|
||||
|
||||
;TOTAL: foo:40:0
|
||||
;TOTAL: 0: 0
|
||||
;TOTAL: 1: 0
|
||||
;TOTAL: 2: 19
|
||||
;TOTAL: 3: 21 bar:21
|
||||
;TOTAL: 4: 0
|
||||
;TOTAL: 5: 0
|
||||
|
||||
|
||||
; original code:
|
||||
; clang -O3 -g -fdebug-info-for-profiling test.c -fno-inline -o a.out
|
||||
#include <stdio.h>
|
||||
|
||||
int bar(int x, int y) {
|
||||
if (x % 3) {
|
||||
return x - y;
|
||||
}
|
||||
return x + y;
|
||||
}
|
||||
|
||||
void foo() {
|
||||
int s, i = 0;
|
||||
while (i++ < 4000 * 4000)
|
||||
if (i % 91) s = bar(i, s); else s += 30;
|
||||
printf("sum is %d\n", s);
|
||||
}
|
||||
|
||||
int main() {
|
||||
foo();
|
||||
return 0;
|
||||
}
|
|
@ -381,15 +381,26 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
|
|||
}
|
||||
|
||||
void ProfileGeneratorBase::updateTotalSamples() {
|
||||
if (!UpdateTotalSamples)
|
||||
return;
|
||||
|
||||
for (auto &Item : ProfileMap) {
|
||||
FunctionSamples &FunctionProfile = Item.second;
|
||||
FunctionProfile.updateTotalSamples();
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::updateCallsiteSamples() {
|
||||
for (auto &Item : ProfileMap) {
|
||||
FunctionSamples &FunctionProfile = Item.second;
|
||||
FunctionProfile.updateCallsiteSamples();
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::updateFunctionSamples() {
|
||||
updateCallsiteSamples();
|
||||
|
||||
if (UpdateTotalSamples)
|
||||
updateTotalSamples();
|
||||
}
|
||||
|
||||
void ProfileGeneratorBase::collectProfiledFunctions() {
|
||||
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
|
||||
if (SampleCounters) {
|
||||
|
@ -491,7 +502,7 @@ void ProfileGenerator::generateLineNumBasedProfile() {
|
|||
// Fill in boundary sample counts as well as call site samples for calls
|
||||
populateBoundarySamplesForAllFunctions(SC.BranchCounter);
|
||||
|
||||
updateTotalSamples();
|
||||
updateFunctionSamples();
|
||||
}
|
||||
|
||||
void ProfileGenerator::generateProbeBasedProfile() {
|
||||
|
@ -505,7 +516,7 @@ void ProfileGenerator::generateProbeBasedProfile() {
|
|||
// Fill in boundary sample counts as well as call site samples for calls
|
||||
populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
|
||||
|
||||
updateTotalSamples();
|
||||
updateFunctionSamples();
|
||||
}
|
||||
|
||||
void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
|
||||
|
@ -785,7 +796,7 @@ void CSProfileGenerator::generateLineNumBasedProfile() {
|
|||
// body sample.
|
||||
populateInferredFunctionSamples();
|
||||
|
||||
updateTotalSamples();
|
||||
updateFunctionSamples();
|
||||
}
|
||||
|
||||
void CSProfileGenerator::populateBodySamplesForFunction(
|
||||
|
|
|
@ -100,8 +100,13 @@ protected:
|
|||
void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
|
||||
const SampleContextFrame &LeafLoc,
|
||||
uint64_t Count);
|
||||
|
||||
void updateFunctionSamples();
|
||||
|
||||
void updateTotalSamples();
|
||||
|
||||
void updateCallsiteSamples();
|
||||
|
||||
StringRef getCalleeNameForOffset(uint64_t TargetOffset);
|
||||
|
||||
void computeSummaryAndThreshold();
|
||||
|
|
Loading…
Reference in New Issue