[llvm-profgen] Skip duplication factor outside of body sample computation

We incorrectly use duplication factor for total samples even though we already accumulate samples instead of taking MAX. It causes profile to have bloated total samples for functions with loop unrolled or vectorized. The change fix the issue for total sample, head sample and call target samples.

Differential Revision: https://reviews.llvm.org/D112042
This commit is contained in:
Wenlei He 2021-10-18 17:44:45 -07:00
parent 063c2f89aa
commit e8c245dcd3
2 changed files with 12 additions and 12 deletions

View File

@ -46,7 +46,7 @@
;CHECK-NEXT: 1: 6
;CHECK-NEXT: 2: 6
;CHECK-NEXT: 3: 6
;CHECK-NEXT: partition_pivot_last:647:7
;CHECK-NEXT: partition_pivot_last:389:7
;CHECK-NEXT: 1: 6
;CHECK-NEXT: 2: 6
;CHECK-NEXT: 3: 6
@ -62,7 +62,7 @@
;CHECK-NEXT: 6: 5
;CHECK-NEXT: 7: 5
;CHECK-NEXT: 5: swap:116
;CHECK-NEXT: 5: swap:61
;w/o duplication factor : 1: 9
;w/o duplication factor : 2: 9

View File

@ -274,6 +274,12 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
uint64_t Count) {
// Use the maximum count of samples with same line location
uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
// Use duplication factor to compensated for loop unroll/vectorization.
// Note that this is only needed when we're taking MAX of the counts at
// the location instead of SUM.
Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
ErrorOr<uint64_t> R =
FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
@ -384,12 +390,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
const SampleContextFrameVector &FrameVec =
Binary->getFrameLocationStack(Offset);
if (!FrameVec.empty()) {
uint64_t DC = Count * getDuplicationFactor(
FrameVec.back().Location.Discriminator);
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, DC);
getLeafProfileAndAddTotalSamples(FrameVec, Count);
updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
DC);
Count);
}
// Move to next IP within the range.
IP.advance();
@ -430,7 +434,6 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
const SampleContextFrameVector &FrameVec =
Binary->getFrameLocationStack(SourceOffset);
if (!FrameVec.empty()) {
Count *= getDuplicationFactor(FrameVec.back().Location.Discriminator);
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, Count);
FunctionProfile.addCalledTargetSamples(
@ -545,10 +548,8 @@ void CSProfileGenerator::populateBodySamplesForFunction(
auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
if (LeafLoc.hasValue()) {
// Recording body sample for this specific context
uint64_t DC =
Count * getDuplicationFactor(LeafLoc->Location.Discriminator);
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, DC);
FunctionProfile.addTotalSamples(DC);
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
FunctionProfile.addTotalSamples(Count);
}
// Move to next IP within the range
@ -575,7 +576,6 @@ void CSProfileGenerator::populateBoundarySamplesForFunction(
auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
if (!LeafLoc.hasValue())
continue;
Count *= getDuplicationFactor(LeafLoc->Location.Discriminator);
FunctionProfile.addCalledTargetSamples(
LeafLoc->Location.LineOffset,
getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,