forked from OSchip/llvm-project
[llvm-profgen] Skip duplication factor outside of body sample computation
We incorrectly use duplication factor for total samples even though we already accumulate samples instead of taking MAX. It causes profile to have bloated total samples for functions with loop unrolled or vectorized. The change fix the issue for total sample, head sample and call target samples. Differential Revision: https://reviews.llvm.org/D112042
This commit is contained in:
parent
063c2f89aa
commit
e8c245dcd3
|
@ -46,7 +46,7 @@
|
|||
;CHECK-NEXT: 1: 6
|
||||
;CHECK-NEXT: 2: 6
|
||||
;CHECK-NEXT: 3: 6
|
||||
;CHECK-NEXT: partition_pivot_last:647:7
|
||||
;CHECK-NEXT: partition_pivot_last:389:7
|
||||
;CHECK-NEXT: 1: 6
|
||||
;CHECK-NEXT: 2: 6
|
||||
;CHECK-NEXT: 3: 6
|
||||
|
@ -62,7 +62,7 @@
|
|||
|
||||
;CHECK-NEXT: 6: 5
|
||||
;CHECK-NEXT: 7: 5
|
||||
;CHECK-NEXT: 5: swap:116
|
||||
;CHECK-NEXT: 5: swap:61
|
||||
|
||||
;w/o duplication factor : 1: 9
|
||||
;w/o duplication factor : 2: 9
|
||||
|
|
|
@ -274,6 +274,12 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
|
|||
uint64_t Count) {
|
||||
// Use the maximum count of samples with same line location
|
||||
uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
|
||||
|
||||
// Use duplication factor to compensated for loop unroll/vectorization.
|
||||
// Note that this is only needed when we're taking MAX of the counts at
|
||||
// the location instead of SUM.
|
||||
Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
|
||||
|
||||
ErrorOr<uint64_t> R =
|
||||
FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
|
||||
|
||||
|
@ -384,12 +390,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
|
|||
const SampleContextFrameVector &FrameVec =
|
||||
Binary->getFrameLocationStack(Offset);
|
||||
if (!FrameVec.empty()) {
|
||||
uint64_t DC = Count * getDuplicationFactor(
|
||||
FrameVec.back().Location.Discriminator);
|
||||
FunctionSamples &FunctionProfile =
|
||||
getLeafProfileAndAddTotalSamples(FrameVec, DC);
|
||||
getLeafProfileAndAddTotalSamples(FrameVec, Count);
|
||||
updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
|
||||
DC);
|
||||
Count);
|
||||
}
|
||||
// Move to next IP within the range.
|
||||
IP.advance();
|
||||
|
@ -430,7 +434,6 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
|
|||
const SampleContextFrameVector &FrameVec =
|
||||
Binary->getFrameLocationStack(SourceOffset);
|
||||
if (!FrameVec.empty()) {
|
||||
Count *= getDuplicationFactor(FrameVec.back().Location.Discriminator);
|
||||
FunctionSamples &FunctionProfile =
|
||||
getLeafProfileAndAddTotalSamples(FrameVec, Count);
|
||||
FunctionProfile.addCalledTargetSamples(
|
||||
|
@ -545,10 +548,8 @@ void CSProfileGenerator::populateBodySamplesForFunction(
|
|||
auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
|
||||
if (LeafLoc.hasValue()) {
|
||||
// Recording body sample for this specific context
|
||||
uint64_t DC =
|
||||
Count * getDuplicationFactor(LeafLoc->Location.Discriminator);
|
||||
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, DC);
|
||||
FunctionProfile.addTotalSamples(DC);
|
||||
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
|
||||
FunctionProfile.addTotalSamples(Count);
|
||||
}
|
||||
|
||||
// Move to next IP within the range
|
||||
|
@ -575,7 +576,6 @@ void CSProfileGenerator::populateBoundarySamplesForFunction(
|
|||
auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
|
||||
if (!LeafLoc.hasValue())
|
||||
continue;
|
||||
Count *= getDuplicationFactor(LeafLoc->Location.Discriminator);
|
||||
FunctionProfile.addCalledTargetSamples(
|
||||
LeafLoc->Location.LineOffset,
|
||||
getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
|
||||
|
|
Loading…
Reference in New Issue