forked from OSchip/llvm-project
[ThinLTO] Ensure callees get hot threshold when first seen on cold path
This is split out from D27696, since it turned out to be a bug fix and not part of the NFC efficiency change. Keep the same adjusted (possibly decayed) threshold in both the worklist and the ImportList. Otherwise if we encountered it first along a cold path, the callee would be added to the worklist with a lower decayed threshold than when it is later encountered along a hot path. But the logic uses the threshold recorded in the ImportList entry to check if we should re-add it, and without this patch the threshold recorded there is the same along both paths so we don't re-add it. Using the same possibly decayed threshold in the ImportList ensures we re-add it later with the higher non-decayed hot path threshold. llvm-svn: 289843
This commit is contained in:
parent
1662da2832
commit
1b859a2306
|
@ -316,35 +316,6 @@ static void computeImportForFunction(
|
|||
assert(ResolvedCalleeSummary->instCount() <= NewThreshold &&
|
||||
"selectCallee() didn't honor the threshold");
|
||||
|
||||
auto ExportModulePath = ResolvedCalleeSummary->modulePath();
|
||||
auto &ProcessedThreshold = ImportList[ExportModulePath][GUID];
|
||||
/// Since the traversal of the call graph is DFS, we can revisit a function
|
||||
/// a second time with a higher threshold. In this case, it is added back to
|
||||
/// the worklist with the new threshold.
|
||||
if (ProcessedThreshold && ProcessedThreshold >= Threshold) {
|
||||
DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
|
||||
<< ProcessedThreshold << "\n");
|
||||
continue;
|
||||
}
|
||||
// Mark this function as imported in this module, with the current Threshold
|
||||
ProcessedThreshold = Threshold;
|
||||
|
||||
// Make exports in the source module.
|
||||
if (ExportLists) {
|
||||
auto &ExportList = (*ExportLists)[ExportModulePath];
|
||||
ExportList.insert(GUID);
|
||||
// Mark all functions and globals referenced by this function as exported
|
||||
// to the outside if they are defined in the same source module.
|
||||
for (auto &Edge : ResolvedCalleeSummary->calls()) {
|
||||
auto CalleeGUID = Edge.first.getGUID();
|
||||
exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList);
|
||||
}
|
||||
for (auto &Ref : ResolvedCalleeSummary->refs()) {
|
||||
auto GUID = Ref.getGUID();
|
||||
exportGlobalInModule(Index, ExportModulePath, GUID, ExportList);
|
||||
}
|
||||
}
|
||||
|
||||
auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
|
||||
// Adjust the threshold for next level of imported functions.
|
||||
// The threshold is different for hot callsites because we can then
|
||||
|
@ -355,10 +326,43 @@ static void computeImportForFunction(
|
|||
};
|
||||
|
||||
bool IsHotCallsite = Edge.second.Hotness == CalleeInfo::HotnessType::Hot;
|
||||
const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
|
||||
|
||||
auto ExportModulePath = ResolvedCalleeSummary->modulePath();
|
||||
auto &ProcessedThreshold = ImportList[ExportModulePath][GUID];
|
||||
/// Since the traversal of the call graph is DFS, we can revisit a function
|
||||
/// a second time with a higher threshold. In this case, it is added back to
|
||||
/// the worklist with the new threshold.
|
||||
if (ProcessedThreshold && ProcessedThreshold >= AdjThreshold) {
|
||||
DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
|
||||
<< ProcessedThreshold << "\n");
|
||||
continue;
|
||||
}
|
||||
bool PreviouslyImported = ProcessedThreshold != 0;
|
||||
// Mark this function as imported in this module, with the current Threshold
|
||||
ProcessedThreshold = AdjThreshold;
|
||||
|
||||
// Make exports in the source module.
|
||||
if (ExportLists) {
|
||||
auto &ExportList = (*ExportLists)[ExportModulePath];
|
||||
ExportList.insert(GUID);
|
||||
if (!PreviouslyImported) {
|
||||
// This is the first time this function was exported from its source
|
||||
// module, so mark all functions and globals it references as exported
|
||||
// to the outside if they are defined in the same source module.
|
||||
for (auto &Edge : ResolvedCalleeSummary->calls()) {
|
||||
auto CalleeGUID = Edge.first.getGUID();
|
||||
exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList);
|
||||
}
|
||||
for (auto &Ref : ResolvedCalleeSummary->refs()) {
|
||||
auto GUID = Ref.getGUID();
|
||||
exportGlobalInModule(Index, ExportModulePath, GUID, ExportList);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the newly imported function to the worklist.
|
||||
Worklist.emplace_back(ResolvedCalleeSummary,
|
||||
GetAdjustedThreshold(Threshold, IsHotCallsite));
|
||||
Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll'
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
|
||||
define void @hot() #1 !prof !28 {
|
||||
call void @calledFromHot()
|
||||
ret void
|
||||
}
|
||||
|
||||
; 9 instructions so it is above decayed cold threshold of 7 and below
|
||||
; decayed hot threshold of 10.
|
||||
define void @calledFromHot() !prof !28 {
|
||||
%b = alloca i32, align 4
|
||||
store i32 1, i32* %b, align 4
|
||||
store i32 1, i32* %b, align 4
|
||||
store i32 1, i32* %b, align 4
|
||||
store i32 1, i32* %b, align 4
|
||||
store i32 1, i32* %b, align 4
|
||||
store i32 1, i32* %b, align 4
|
||||
store i32 1, i32* %b, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!1}
|
||||
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 222}
|
||||
!5 = !{!"MaxCount", i64 110}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 110}
|
||||
!8 = !{!"NumCounts", i64 4}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 110, i32 2}
|
||||
!13 = !{i32 999000, i64 2, i32 4}
|
||||
!14 = !{i32 999999, i64 2, i32 4}
|
||||
!28 = !{!"function_entry_count", i64 110}
|
||||
!29 = !{!"function_entry_count", i64 1}
|
|
@ -0,0 +1,53 @@
|
|||
; Test to check that callee reached from cold and then hot path gets
|
||||
; hot thresholds.
|
||||
; RUN: opt -module-summary %s -o %t.bc
|
||||
; RUN: opt -module-summary %p/Inputs/hotness_based_import2.ll -o %t2.bc
|
||||
; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
|
||||
|
||||
; Teset with limit set to 10 and multipliers set to 1. Since cold call to
|
||||
; hot is first in the other module, we'll first add calledFromHot to worklist
|
||||
; with threshold decayed by default 0.7 factor. Test ensures that when we
|
||||
; encounter it again from hot path, we re-enqueue with higher non-decayed
|
||||
; threshold which will allow it to be imported.
|
||||
; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=10 -import-hot-multiplier=1.0 -import-cold-multiplier=1.0 -S | FileCheck %s --check-prefix=CHECK
|
||||
; CHECK-DAG: define available_externally void @hot()
|
||||
; CHECK-DAG: define available_externally void @calledFromHot()
|
||||
|
||||
; ModuleID = 'thinlto-function-summary-callgraph.ll'
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; This function has a high profile count, so entry block is hot.
|
||||
define void @hot_function(i1 %a, i1 %a2) !prof !28 {
|
||||
entry:
|
||||
call void @hot()
|
||||
ret void
|
||||
}
|
||||
|
||||
; This function has a low profile count, so entry block is hot.
|
||||
define void @cold_function(i1 %a, i1 %a2) !prof !29 {
|
||||
entry:
|
||||
call void @hot()
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @hot() #1
|
||||
|
||||
!llvm.module.flags = !{!1}
|
||||
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 222}
|
||||
!5 = !{!"MaxCount", i64 110}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 110}
|
||||
!8 = !{!"NumCounts", i64 4}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 110, i32 2}
|
||||
!13 = !{i32 999000, i64 2, i32 4}
|
||||
!14 = !{i32 999999, i64 2, i32 4}
|
||||
!28 = !{!"function_entry_count", i64 110}
|
||||
!29 = !{!"function_entry_count", i64 1}
|
Loading…
Reference in New Issue