[SampleFDO] Don't mix up the existing indirect call value profile with the new

value profile annotated after inlining.

In https://reviews.llvm.org/D96806 and https://reviews.llvm.org/D97350, we
use the magic number -1 in the value profile to avoid repeated indirect call
promotion to the same target for an indirect call. Function updateIDTMetaData
is used to mark an target as being promoted in the value profile with the
magic number. updateIDTMetaData is also used to update the value profile
when an indirect call is inlined and new inline instance profile should be
applied. For the second case, currently updateIDTMetaData mixes up the
existing value profile of the indirect call with the new profile, leading
to the problematic senario that a target count is larger than the total count
in the value profile.

The patch fixes the problem. When updateIDTMetaData is used to update the
value profile after inlining, all the values in the existing value profile
will be dropped except the values with the magic number counts.

Differential Revision: https://reviews.llvm.org/D98835
This commit is contained in:
Wei Mi 2021-03-17 17:51:27 -07:00
parent 92ccc6cb17
commit 14756b70ee
3 changed files with 112 additions and 32 deletions

View File

@ -755,14 +755,8 @@ static void
updateIDTMetaData(Instruction &Inst, updateIDTMetaData(Instruction &Inst,
const SmallVectorImpl<InstrProfValueData> &CallTargets, const SmallVectorImpl<InstrProfValueData> &CallTargets,
uint64_t Sum) { uint64_t Sum) {
assert((Sum != 0 || (CallTargets.size() == 1 &&
CallTargets[0].Count == NOMORE_ICP_MAGICNUM)) &&
"If sum is 0, assume only one element in CallTargets with count "
"being NOMORE_ICP_MAGICNUM");
uint32_t NumVals = 0; uint32_t NumVals = 0;
// OldSum is the existing total count in the value profile data. // OldSum is the existing total count in the value profile data.
// It will be replaced by Sum if Sum is not 0.
uint64_t OldSum = 0; uint64_t OldSum = 0;
std::unique_ptr<InstrProfValueData[]> ValueData = std::unique_ptr<InstrProfValueData[]> ValueData =
std::make_unique<InstrProfValueData[]>(MaxNumPromotions); std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
@ -771,34 +765,44 @@ updateIDTMetaData(Instruction &Inst,
ValueData.get(), NumVals, OldSum, true); ValueData.get(), NumVals, OldSum, true);
DenseMap<uint64_t, uint64_t> ValueCountMap; DenseMap<uint64_t, uint64_t> ValueCountMap;
// Initialize ValueCountMap with existing value profile data. if (Sum == 0) {
if (Valid) { assert((CallTargets.size() == 1 &&
for (uint32_t I = 0; I < NumVals; I++) CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
ValueCountMap[ValueData[I].Value] = ValueData[I].Count; "If sum is 0, assume only one element in CallTargets "
} "with count being NOMORE_ICP_MAGICNUM");
// Initialize ValueCountMap with existing value profile data.
for (const auto &Data : CallTargets) { if (Valid) {
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count); for (uint32_t I = 0; I < NumVals; I++)
if (Pair.second) ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
continue; }
// Whenever the count is NOMORE_ICP_MAGICNUM for a value, keep it auto Pair =
// in the ValueCountMap. If both the count in CallTargets and the ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
// count in ValueCountMap is not NOMORE_ICP_MAGICNUM, keep the // If the target already exists in value profile, decrease the total
// count in CallTargets. // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
if (Pair.first->second != NOMORE_ICP_MAGICNUM && if (!Pair.second) {
Data.Count == NOMORE_ICP_MAGICNUM) {
OldSum -= Pair.first->second; OldSum -= Pair.first->second;
Pair.first->second = NOMORE_ICP_MAGICNUM; Pair.first->second = NOMORE_ICP_MAGICNUM;
} else if (Pair.first->second == NOMORE_ICP_MAGICNUM && }
Data.Count != NOMORE_ICP_MAGICNUM) { Sum = OldSum;
} else {
// Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
// counts in the value profile.
if (Valid) {
for (uint32_t I = 0; I < NumVals; I++) {
if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
}
}
for (const auto &Data : CallTargets) {
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
if (Pair.second)
continue;
// The target represented by Data.Value has already been promoted.
// Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
// Sum by Data.Count.
assert(Sum >= Data.Count && "Sum should never be less than Data.Count"); assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
Sum -= Data.Count; Sum -= Data.Count;
} else if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
Data.Count != NOMORE_ICP_MAGICNUM) {
// Sum will be used in this case. Although the existing count
// for the current value in value profile will be overriden,
// no need to update OldSum.
Pair.first->second = Data.Count;
} }
} }
@ -818,8 +822,7 @@ updateIDTMetaData(Instruction &Inst,
uint32_t MaxMDCount = uint32_t MaxMDCount =
std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions)); std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst, annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
NewCallTargets, Sum ? Sum : OldSum, IPVK_IndirectCallTarget, NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
MaxMDCount);
} }
/// Attempt to promote indirect call and also inline the promoted call. /// Attempt to promote indirect call and also inline the promoted call.

View File

@ -0,0 +1,6 @@
_Z3foov:225715:1
2: 5553
3: 5391
1: _Z3goov:5860
1: 5279 _Z3hoov:5860 _Z3moov:210
2: 5279

View File

@ -0,0 +1,71 @@
; RUN: opt < %s -passes=sample-profile -sample-profile-icp-max-prom=4 -sample-profile-file=%S/Inputs/norepeated-icp-3.prof -S | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [5 x i8] c"hoo\0A\00", align 1
@p = dso_local global void ()* null, align 8
@str = private unnamed_addr constant [4 x i8] c"hoo\00", align 1
; Function Attrs: nofree nounwind
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) #1
; Function Attrs: uwtable mustprogress
define dso_local void @_Z3goov() #0 !dbg !11 {
entry:
%0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13
call void %0(), !dbg !17, !prof !22
ret void, !dbg !18
}
; After the indirect call in _Z3goov is inlined into _Z3foov, it will be
; annotated with new inline instance profile. The existing value profile
; associated with the indirect call should be dropped except those values
; wth NOMORE_ICP_MAGICNUM magic number indicating promoted targets.
; CHECK-LABEL: @_Z3foov(
; CHECK: call void %0(), {{.*}} !prof ![[PROF_ID:[0-9]+]]
; CHECK-NEXT: ret void
; Function Attrs: uwtable mustprogress
define dso_local void @_Z3foov() #0 !dbg !19 {
entry:
call void @_Z3goov(), !dbg !20
ret void, !dbg !21
}
; Function Attrs: nofree nounwind
declare noundef i32 @puts(i8* nocapture noundef readonly) #2
attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nofree nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
!1 = !DIFile(filename: "1.cc", directory: "")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!""}
!8 = !DISubroutineType(types: !2)
!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!12 = !DILocation(line: 7, column: 5, scope: !11)
!13 = !{!14, !14, i64 0}
!14 = !{!"any pointer", !15, i64 0}
!15 = !{!"omnipotent char", !16, i64 0}
!16 = !{!"Simple C++ TBAA"}
!17 = !DILocation(line: 7, column: 3, scope: !11)
!18 = !DILocation(line: 8, column: 1, scope: !11)
!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!20 = !DILocation(line: 11, column: 3, scope: !19)
!21 = !DILocation(line: 12, column: 3, scope: !19)
; The original value 125292384912345234234 and its count 8000 should
; be dropped when the indirect call is annotated with new profile.
; The original value -7383239051784516332 and its count -1 should be kept
; because -1 is NOMORE_ICP_MAGICNUM.
; CHECK: ![[PROF_ID]] = !{!"VP", i32 0, i64 5860, i64 -7383239051784516332, i64 -1, i64 -7701940972712279918, i64 5860}
!22 = !{!"VP", i32 0, i64 8000, i64 -7383239051784516332, i64 -1, i64 125292384912345234234, i64 8000}