forked from OSchip/llvm-project
[SampleFDO] Do not scale the magic number NOMORE_ICP_MAGICNUM in value profile
during profile update. When we inline a function and update the profile, the value profiles of the indirect call in the inliner and inlinee will be scaled. In https://reviews.llvm.org/D96806 and https://reviews.llvm.org/D97350, we start using the magic number NOMORE_ICP_MAGICNUM (-1) to mark targets which have been promoted. The magic number shouldn't be scaled during the profile update. Although the problem has been suppressed by https://reviews.llvm.org/D98187 for SampleFDO, which stops profile update for inlining in sampleFDO, the patch is still wanted since it will be more consistent to handle the magic number properly in profile update. Differential Revision: https://reviews.llvm.org/D99394
This commit is contained in:
parent
9320ac9b49
commit
3cbf44190b
|
@ -52,6 +52,10 @@ enum LLVMConstants : uint32_t {
|
|||
DEBUG_METADATA_VERSION = 3 // Current debug info version number.
|
||||
};
|
||||
|
||||
/// Magic number in the value profile metadata showing a target has been
|
||||
/// promoted for the instruction and shouldn't be promoted again.
|
||||
const uint64_t NOMORE_ICP_MAGICNUM = -1;
|
||||
|
||||
/// Root of the metadata hierarchy.
|
||||
///
|
||||
/// This is a root class for typeless data in the IR.
|
||||
|
|
|
@ -253,10 +253,6 @@ void annotateValueSite(Module &M, Instruction &Inst,
|
|||
ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
|
||||
InstrProfValueKind ValueKind, uint32_t MaxMDCount);
|
||||
|
||||
/// Magic number in the value profile data showing a target has been
|
||||
/// promoted for the instruction and shouldn't be promoted again.
|
||||
const uint64_t NOMORE_ICP_MAGICNUM = -1;
|
||||
|
||||
/// Extract the value profile data from \p Inst which is annotated with
|
||||
/// value profile meta data. Return false if there is no value data annotated,
|
||||
/// otherwise return true.
|
||||
|
|
|
@ -595,11 +595,17 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
|
|||
for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
|
||||
// The first value is the key of the value profile, which will not change.
|
||||
Vals.push_back(ProfileData->getOperand(i));
|
||||
// Using APInt::div may be expensive, but most cases should fit 64 bits.
|
||||
APInt Val(128,
|
||||
uint64_t Count =
|
||||
mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
|
||||
->getValue()
|
||||
.getZExtValue());
|
||||
.getZExtValue();
|
||||
// Don't scale the magic number.
|
||||
if (Count == NOMORE_ICP_MAGICNUM) {
|
||||
Vals.push_back(ProfileData->getOperand(i + 1));
|
||||
continue;
|
||||
}
|
||||
// Using APInt::div may be expensive, but most cases should fit 64 bits.
|
||||
APInt Val(128, Count);
|
||||
Val *= APS;
|
||||
Vals.push_back(MDB.createConstant(
|
||||
ConstantInt::get(Type::getInt64Ty(getContext()),
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@p = dso_local global void ()* null, align 8
|
||||
|
||||
; After _Z3goov is inlined into _Z3foov, the value profile of the indirect
|
||||
; call in _Z3goov and _Z3foov need to be scaled. The test is to make sure
|
||||
; the magic number NOMORE_ICP_MAGICNUM used for prevent recursive indirect
|
||||
; call will be kept during the scaling.
|
||||
;
|
||||
; CHECK-LABEL: @_Z3goov(
|
||||
; CHECK: call void %t0(), {{.*}} !prof ![[PROF_ID1:[0-9]+]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; CHECK-LABEL: @_Z3foov(
|
||||
; CHECK: call void %t0.i(), {{.*}} !prof ![[PROF_ID2:[0-9]+]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; Function Attrs: uwtable mustprogress
|
||||
define dso_local void @_Z3goov() #0 !dbg !11 !prof !23 {
|
||||
entry:
|
||||
%t0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13
|
||||
call void %t0(), !dbg !17, !prof !22
|
||||
ret void, !dbg !18
|
||||
}
|
||||
|
||||
; Function Attrs: uwtable mustprogress
|
||||
define dso_local void @_Z3foov() #0 !dbg !19 {
|
||||
entry:
|
||||
call void @_Z3goov(), !dbg !20, !prof !24
|
||||
ret void, !dbg !21
|
||||
}
|
||||
|
||||
attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4, !5, !25}
|
||||
!llvm.ident = !{!6}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "1.cc", directory: "")
|
||||
!2 = !{}
|
||||
!3 = !{i32 7, !"Dwarf Version", i32 4}
|
||||
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!5 = !{i32 1, !"wchar_size", i32 4}
|
||||
!6 = !{!""}
|
||||
!8 = !DISubroutineType(types: !2)
|
||||
!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!12 = !DILocation(line: 7, column: 5, scope: !11)
|
||||
!13 = !{!14, !14, i64 0}
|
||||
!14 = !{!"any pointer", !15, i64 0}
|
||||
!15 = !{!"omnipotent char", !16, i64 0}
|
||||
!16 = !{!"Simple C++ TBAA"}
|
||||
!17 = !DILocation(line: 7, column: 3, scope: !11)
|
||||
!18 = !DILocation(line: 8, column: 1, scope: !11)
|
||||
!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!20 = !DILocation(line: 11, column: 3, scope: !19)
|
||||
!21 = !DILocation(line: 12, column: 3, scope: !19)
|
||||
; CHECK: ![[PROF_ID1]] = !{!"VP", i32 0, i64 7200, i64 -7383239051784516332, i64 -1, i64 -3834823603621627078, i64 7200}
|
||||
; CHECK: ![[PROF_ID2]] = !{!"VP", i32 0, i64 800, i64 -7383239051784516332, i64 -1, i64 -3834823603621627078, i64 800}
|
||||
!22 = !{!"VP", i32 0, i64 8000, i64 -7383239051784516332, i64 -1, i64 125292384912345234234, i64 8000}
|
||||
!23 = !{!"function_entry_count", i64 1000}
|
||||
!24 = !{!"branch_weights", i32 100}
|
||||
!25 = !{i32 1, !"ProfileSummary", !26}
|
||||
!26 = !{!27, !28, !29, !30, !31, !32, !33, !34}
|
||||
!27 = !{!"ProfileFormat", !"SampleProfile"}
|
||||
!28 = !{!"TotalCount", i64 10000}
|
||||
!29 = !{!"MaxCount", i64 1000}
|
||||
!30 = !{!"MaxInternalCount", i64 1}
|
||||
!31 = !{!"MaxFunctionCount", i64 1000}
|
||||
!32 = !{!"NumCounts", i64 3}
|
||||
!33 = !{!"NumFunctions", i64 3}
|
||||
!34 = !{!"DetailedSummary", !35}
|
||||
!35 = !{!36, !37, !38}
|
||||
!36 = !{i32 10000, i64 100, i32 1}
|
||||
!37 = !{i32 999000, i64 100, i32 1}
|
||||
!38 = !{i32 999999, i64 1, i32 2}
|
Loading…
Reference in New Issue