forked from OSchip/llvm-project
[CSSPGO] Do not recount callee samples when computing profile summary for nested CS profile.
When generating nested CS profile with all calling contexts of a function duplicated into a base profile under `--generate-merged-base-profiles`, do not recount callee samples when computing profile summary. This fixes the profile summary mismatch between flat cs profile and nested cs profile, for both extbinary and text format. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D119494
This commit is contained in:
parent
088fbc0cb4
commit
f0f70ae674
|
@ -413,6 +413,8 @@ enum ContextAttributeMask {
|
|||
ContextNone = 0x0,
|
||||
ContextWasInlined = 0x1, // Leaf of context was inlined in previous build
|
||||
ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
|
||||
ContextDuplicatedIntoBase =
|
||||
0x4, // Leaf of context is duplicated into the base profile
|
||||
};
|
||||
|
||||
// Represents a context frame with function name and line location
|
||||
|
|
|
@ -110,7 +110,13 @@ void SampleProfileSummaryBuilder::addRecord(
|
|||
NumFunctions++;
|
||||
if (FS.getHeadSamples() > MaxFunctionCount)
|
||||
MaxFunctionCount = FS.getHeadSamples();
|
||||
} else if (FS.getContext().hasAttribute(
|
||||
sampleprof::ContextDuplicatedIntoBase)) {
|
||||
// Do not recount callee samples if they are already merged into their base
|
||||
// profiles. This can happen to CS nested profile.
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto &I : FS.getBodySamples()) {
|
||||
uint64_t Count = I.second.getSamples();
|
||||
addCount(Count);
|
||||
|
|
|
@ -531,8 +531,14 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
|
|||
// thus done optionally. It is seen that duplicating context profiles into
|
||||
// base profiles improves the code quality for thinlto build by allowing a
|
||||
// profile in the prelink phase for to-be-fully-inlined functions.
|
||||
if (!NodeProfile || GenerateMergedBaseProfiles)
|
||||
if (!NodeProfile) {
|
||||
ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
|
||||
} else if (GenerateMergedBaseProfiles) {
|
||||
ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
|
||||
auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
|
||||
SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
|
||||
ContextDuplicatedIntoBase);
|
||||
}
|
||||
|
||||
// Contexts coming with a `ContextShouldBeInlined` attribute indicate this
|
||||
// is a preinliner-computed profile.
|
||||
|
|
|
@ -6,8 +6,12 @@ RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sa
|
|||
RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin
|
||||
RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace
|
||||
RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE
|
||||
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
|
||||
RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT
|
||||
RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
|
||||
RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT
|
||||
RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
|
||||
RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY
|
||||
RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY
|
||||
RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY
|
||||
|
||||
|
||||
; CHECK:main:1968679:12
|
||||
|
@ -60,8 +64,8 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
|
|||
; RECOUNT-NEXT: 3: 287884
|
||||
; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608
|
||||
; RECOUNT-NEXT: 15: 23
|
||||
; RECOUNT-NEXT: !Attributes: 2
|
||||
; RECOUNT-NEXT: !Attributes: 2
|
||||
; RECOUNT-NEXT: !Attributes: 6
|
||||
; RECOUNT-NEXT: !Attributes: 6
|
||||
; RECOUNT-NEXT: 3.1: _Z5funcBi:500973
|
||||
; RECOUNT-NEXT: 0: 19
|
||||
; RECOUNT-NEXT: 1: 19 _Z8funcLeafi:20
|
||||
|
@ -74,8 +78,8 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
|
|||
; RECOUNT-NEXT: 10: 23324
|
||||
; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228
|
||||
; RECOUNT-NEXT: 15: 11
|
||||
; RECOUNT-NEXT: !Attributes: 2
|
||||
; RECOUNT-NEXT: !Attributes: 2
|
||||
; RECOUNT-NEXT: !Attributes: 6
|
||||
; RECOUNT-NEXT: !Attributes: 6
|
||||
; RECOUNT-NEXT:_Z8funcLeafi:1968152:31
|
||||
; RECOUNT-NEXT: 0: 21
|
||||
; RECOUNT-NEXT: 1: 21
|
||||
|
@ -95,7 +99,7 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
|
|||
; RECOUNT-NEXT: 3: 287884
|
||||
; RECOUNT-NEXT: 4: 287864 _Z3fibi:315608
|
||||
; RECOUNT-NEXT: 15: 23
|
||||
; RECOUNT-NEXT: !Attributes: 2
|
||||
; RECOUNT-NEXT: !Attributes: 6
|
||||
; RECOUNT-NEXT: !Attributes: 2
|
||||
; RECOUNT-NEXT:_Z5funcBi:501213:32
|
||||
; RECOUNT-NEXT: 0: 32
|
||||
|
@ -109,7 +113,7 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
|
|||
; RECOUNT-NEXT: 10: 23324
|
||||
; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228
|
||||
; RECOUNT-NEXT: 15: 11
|
||||
; RECOUNT-NEXT: !Attributes: 2
|
||||
; RECOUNT-NEXT: !Attributes: 6
|
||||
|
||||
; PROBE:main:1968679:12
|
||||
; PROBE-NEXT: 2: 24
|
||||
|
@ -153,3 +157,27 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
|
|||
|
||||
|
||||
; PREINLINE: ProfileSummarySection {{.*}} Flags: {context-nested}
|
||||
|
||||
|
||||
; SUMMARY: Total functions: 4
|
||||
; SUMMARY-NEXT: Maximum function count: 32
|
||||
; SUMMARY-NEXT: Maximum block count: 362830
|
||||
; SUMMARY-NEXT: Total number of blocks: 16
|
||||
; SUMMARY-NEXT: Total count: 772562
|
||||
; SUMMARY-NEXT: Detailed summary:
|
||||
; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 1 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 10 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 20 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 30 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 40 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 50 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 60 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 70 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 80 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 90 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 3 blocks with count >= 23327 account for 95 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 4 blocks with count >= 23324 account for 99 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 4 blocks with count >= 23324 account for 99.9 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 11 blocks with count >= 24 account for 99.99 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.999 percentage of the total counts.
|
||||
; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.9999 percentage of the total counts.
|
||||
|
|
|
@ -65,4 +65,4 @@
|
|||
; CHECK-PREINL-NEST-NEXT: 65526: 14
|
||||
; CHECK-PREINL-NEST-NEXT: 3.1: bar:84
|
||||
; CHECK-PREINL-NEST-NEXT: 1: 14
|
||||
; CHECK-PREINL-NEST-NEXT: !Attributes: 3
|
||||
; CHECK-PREINL-NEST-NEXT: !Attributes: 7
|
||||
|
|
Loading…
Reference in New Issue