forked from OSchip/llvm-project
[SampleFDO] Don't mix up the existing indirect call value profile with the new
value profile annotated after inlining. In https://reviews.llvm.org/D96806 and https://reviews.llvm.org/D97350, we use the magic number -1 in the value profile to avoid repeated indirect call promotion to the same target for an indirect call. Function updateIDTMetaData is used to mark an target as being promoted in the value profile with the magic number. updateIDTMetaData is also used to update the value profile when an indirect call is inlined and new inline instance profile should be applied. For the second case, currently updateIDTMetaData mixes up the existing value profile of the indirect call with the new profile, leading to the problematic senario that a target count is larger than the total count in the value profile. The patch fixes the problem. When updateIDTMetaData is used to update the value profile after inlining, all the values in the existing value profile will be dropped except the values with the magic number counts. Differential Revision: https://reviews.llvm.org/D98835
This commit is contained in:
parent
92ccc6cb17
commit
14756b70ee
|
@ -755,14 +755,8 @@ static void
|
||||||
updateIDTMetaData(Instruction &Inst,
|
updateIDTMetaData(Instruction &Inst,
|
||||||
const SmallVectorImpl<InstrProfValueData> &CallTargets,
|
const SmallVectorImpl<InstrProfValueData> &CallTargets,
|
||||||
uint64_t Sum) {
|
uint64_t Sum) {
|
||||||
assert((Sum != 0 || (CallTargets.size() == 1 &&
|
|
||||||
CallTargets[0].Count == NOMORE_ICP_MAGICNUM)) &&
|
|
||||||
"If sum is 0, assume only one element in CallTargets with count "
|
|
||||||
"being NOMORE_ICP_MAGICNUM");
|
|
||||||
|
|
||||||
uint32_t NumVals = 0;
|
uint32_t NumVals = 0;
|
||||||
// OldSum is the existing total count in the value profile data.
|
// OldSum is the existing total count in the value profile data.
|
||||||
// It will be replaced by Sum if Sum is not 0.
|
|
||||||
uint64_t OldSum = 0;
|
uint64_t OldSum = 0;
|
||||||
std::unique_ptr<InstrProfValueData[]> ValueData =
|
std::unique_ptr<InstrProfValueData[]> ValueData =
|
||||||
std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
|
std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
|
||||||
|
@ -771,34 +765,44 @@ updateIDTMetaData(Instruction &Inst,
|
||||||
ValueData.get(), NumVals, OldSum, true);
|
ValueData.get(), NumVals, OldSum, true);
|
||||||
|
|
||||||
DenseMap<uint64_t, uint64_t> ValueCountMap;
|
DenseMap<uint64_t, uint64_t> ValueCountMap;
|
||||||
// Initialize ValueCountMap with existing value profile data.
|
if (Sum == 0) {
|
||||||
if (Valid) {
|
assert((CallTargets.size() == 1 &&
|
||||||
for (uint32_t I = 0; I < NumVals; I++)
|
CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
|
||||||
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
|
"If sum is 0, assume only one element in CallTargets "
|
||||||
}
|
"with count being NOMORE_ICP_MAGICNUM");
|
||||||
|
// Initialize ValueCountMap with existing value profile data.
|
||||||
for (const auto &Data : CallTargets) {
|
if (Valid) {
|
||||||
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
|
for (uint32_t I = 0; I < NumVals; I++)
|
||||||
if (Pair.second)
|
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
|
||||||
continue;
|
}
|
||||||
// Whenever the count is NOMORE_ICP_MAGICNUM for a value, keep it
|
auto Pair =
|
||||||
// in the ValueCountMap. If both the count in CallTargets and the
|
ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
|
||||||
// count in ValueCountMap is not NOMORE_ICP_MAGICNUM, keep the
|
// If the target already exists in value profile, decrease the total
|
||||||
// count in CallTargets.
|
// count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
|
||||||
if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
|
if (!Pair.second) {
|
||||||
Data.Count == NOMORE_ICP_MAGICNUM) {
|
|
||||||
OldSum -= Pair.first->second;
|
OldSum -= Pair.first->second;
|
||||||
Pair.first->second = NOMORE_ICP_MAGICNUM;
|
Pair.first->second = NOMORE_ICP_MAGICNUM;
|
||||||
} else if (Pair.first->second == NOMORE_ICP_MAGICNUM &&
|
}
|
||||||
Data.Count != NOMORE_ICP_MAGICNUM) {
|
Sum = OldSum;
|
||||||
|
} else {
|
||||||
|
// Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
|
||||||
|
// counts in the value profile.
|
||||||
|
if (Valid) {
|
||||||
|
for (uint32_t I = 0; I < NumVals; I++) {
|
||||||
|
if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
|
||||||
|
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &Data : CallTargets) {
|
||||||
|
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
|
||||||
|
if (Pair.second)
|
||||||
|
continue;
|
||||||
|
// The target represented by Data.Value has already been promoted.
|
||||||
|
// Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
|
||||||
|
// Sum by Data.Count.
|
||||||
assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
|
assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
|
||||||
Sum -= Data.Count;
|
Sum -= Data.Count;
|
||||||
} else if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
|
|
||||||
Data.Count != NOMORE_ICP_MAGICNUM) {
|
|
||||||
// Sum will be used in this case. Although the existing count
|
|
||||||
// for the current value in value profile will be overriden,
|
|
||||||
// no need to update OldSum.
|
|
||||||
Pair.first->second = Data.Count;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -818,8 +822,7 @@ updateIDTMetaData(Instruction &Inst,
|
||||||
uint32_t MaxMDCount =
|
uint32_t MaxMDCount =
|
||||||
std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
|
std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
|
||||||
annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
|
annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
|
||||||
NewCallTargets, Sum ? Sum : OldSum, IPVK_IndirectCallTarget,
|
NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
|
||||||
MaxMDCount);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Attempt to promote indirect call and also inline the promoted call.
|
/// Attempt to promote indirect call and also inline the promoted call.
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
_Z3foov:225715:1
|
||||||
|
2: 5553
|
||||||
|
3: 5391
|
||||||
|
1: _Z3goov:5860
|
||||||
|
1: 5279 _Z3hoov:5860 _Z3moov:210
|
||||||
|
2: 5279
|
|
@ -0,0 +1,71 @@
|
||||||
|
; RUN: opt < %s -passes=sample-profile -sample-profile-icp-max-prom=4 -sample-profile-file=%S/Inputs/norepeated-icp-3.prof -S | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@.str = private unnamed_addr constant [5 x i8] c"hoo\0A\00", align 1
|
||||||
|
@p = dso_local global void ()* null, align 8
|
||||||
|
@str = private unnamed_addr constant [4 x i8] c"hoo\00", align 1
|
||||||
|
|
||||||
|
; Function Attrs: nofree nounwind
|
||||||
|
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) #1
|
||||||
|
|
||||||
|
; Function Attrs: uwtable mustprogress
|
||||||
|
define dso_local void @_Z3goov() #0 !dbg !11 {
|
||||||
|
entry:
|
||||||
|
%0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13
|
||||||
|
call void %0(), !dbg !17, !prof !22
|
||||||
|
ret void, !dbg !18
|
||||||
|
}
|
||||||
|
|
||||||
|
; After the indirect call in _Z3goov is inlined into _Z3foov, it will be
|
||||||
|
; annotated with new inline instance profile. The existing value profile
|
||||||
|
; associated with the indirect call should be dropped except those values
|
||||||
|
; wth NOMORE_ICP_MAGICNUM magic number indicating promoted targets.
|
||||||
|
; CHECK-LABEL: @_Z3foov(
|
||||||
|
; CHECK: call void %0(), {{.*}} !prof ![[PROF_ID:[0-9]+]]
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
|
||||||
|
; Function Attrs: uwtable mustprogress
|
||||||
|
define dso_local void @_Z3foov() #0 !dbg !19 {
|
||||||
|
entry:
|
||||||
|
call void @_Z3goov(), !dbg !20
|
||||||
|
ret void, !dbg !21
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nofree nounwind
|
||||||
|
declare noundef i32 @puts(i8* nocapture noundef readonly) #2
|
||||||
|
|
||||||
|
attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
|
||||||
|
attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||||
|
attributes #2 = { nofree nounwind }
|
||||||
|
|
||||||
|
!llvm.dbg.cu = !{!0}
|
||||||
|
!llvm.module.flags = !{!3, !4, !5}
|
||||||
|
!llvm.ident = !{!6}
|
||||||
|
|
||||||
|
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
|
||||||
|
!1 = !DIFile(filename: "1.cc", directory: "")
|
||||||
|
!2 = !{}
|
||||||
|
!3 = !{i32 7, !"Dwarf Version", i32 4}
|
||||||
|
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||||
|
!5 = !{i32 1, !"wchar_size", i32 4}
|
||||||
|
!6 = !{!""}
|
||||||
|
!8 = !DISubroutineType(types: !2)
|
||||||
|
!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||||
|
!12 = !DILocation(line: 7, column: 5, scope: !11)
|
||||||
|
!13 = !{!14, !14, i64 0}
|
||||||
|
!14 = !{!"any pointer", !15, i64 0}
|
||||||
|
!15 = !{!"omnipotent char", !16, i64 0}
|
||||||
|
!16 = !{!"Simple C++ TBAA"}
|
||||||
|
!17 = !DILocation(line: 7, column: 3, scope: !11)
|
||||||
|
!18 = !DILocation(line: 8, column: 1, scope: !11)
|
||||||
|
!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||||
|
!20 = !DILocation(line: 11, column: 3, scope: !19)
|
||||||
|
!21 = !DILocation(line: 12, column: 3, scope: !19)
|
||||||
|
; The original value 125292384912345234234 and its count 8000 should
|
||||||
|
; be dropped when the indirect call is annotated with new profile.
|
||||||
|
; The original value -7383239051784516332 and its count -1 should be kept
|
||||||
|
; because -1 is NOMORE_ICP_MAGICNUM.
|
||||||
|
; CHECK: ![[PROF_ID]] = !{!"VP", i32 0, i64 5860, i64 -7383239051784516332, i64 -1, i64 -7701940972712279918, i64 5860}
|
||||||
|
!22 = !{!"VP", i32 0, i64 8000, i64 -7383239051784516332, i64 -1, i64 125292384912345234234, i64 8000}
|
Loading…
Reference in New Issue