From 3bcccdfe38f28f8d19faa957cd56ed4d03cd40fb Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Thu, 17 Jan 2019 20:48:34 +0000 Subject: [PATCH] [SampleFDO] Skip profile reading when flattened profile used in ThinLTO postlink If the sample profile has no inlining hierachy information included, we call the sample profile is flattened. For flattened profile, in ThinLTO postlink phase, SampleProfileLoader's hot function inlining and profile annotation will do nothing, so it is better to save the effort to read in the profile and run the sample profile loader pass. It is helpful for reducing compile time when the flattened profile is huge. Differential Revision: https://reviews.llvm.org/D54819 llvm-svn: 351476 --- llvm/lib/Passes/PassBuilder.cpp | 12 ++++-- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 17 +++++++- .../SampleProfile/Inputs/flattened.prof | 2 + .../Transforms/SampleProfile/flattened.ll | 39 +++++++++++++++++++ 4 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/flattened.prof create mode 100644 llvm/test/Transforms/SampleProfile/flattened.ll diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 5ec94ea6f40a..e56c2d4c8fb3 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -211,6 +211,8 @@ static cl::opt extern cl::opt EnableHotColdSplit; +extern cl::opt FlattenedProfileUsed; + static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { switch (Level) { case PassBuilder::O0: @@ -615,9 +617,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, - PGOOpt->ProfileRemappingFile, - Phase == ThinLTOPhase::PreLink)); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink)) + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + PGOOpt->ProfileRemappingFile, + Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index b92d3ba4f48e..1dd07fa026ac 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -104,6 +104,10 @@ static cl::opt EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable preparation for ThinLTO.")); +static cl::opt + EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, + cl::desc("Enable performing ThinLTO.")); + cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden, cl::desc("Enable hot-cold splitting pass")); @@ -146,6 +150,11 @@ static cl::opt EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); +cl::opt FlattenedProfileUsed( + "flattened-profile-used", cl::init(false), cl::Hidden, + cl::desc("Indicate the sample profile being used is flattened, i.e., " + "no inline hierachy exists in the profile. ")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -166,7 +175,7 @@ PassManagerBuilder::PassManagerBuilder() { PGOInstrUse = ""; PGOSampleUse = ""; PrepareForThinLTO = EnablePrepareForThinLTO; - PerformThinLTO = false; + PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; } @@ -414,7 +423,11 @@ void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { if (!PGOSampleUse.empty()) { MPM.add(createPruneEHPass()); - MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!(FlattenedProfileUsed && PerformThinLTO)) + MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); } // Allow forcing function attributes as a debugging and tuning aid. diff --git a/llvm/test/Transforms/SampleProfile/Inputs/flattened.prof b/llvm/test/Transforms/SampleProfile/Inputs/flattened.prof new file mode 100644 index 000000000000..962bc6e58e55 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/flattened.prof @@ -0,0 +1,2 @@ +foo:100:100 + 1: 100 diff --git a/llvm/test/Transforms/SampleProfile/flattened.ll b/llvm/test/Transforms/SampleProfile/flattened.ll new file mode 100644 index 000000000000..7a1e53b966c7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/flattened.ll @@ -0,0 +1,39 @@ +; Check flattened profile will not be read in thinlto postlink. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -perform-thinlto=true -S | FileCheck %s +; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s +; +; Check flattened profile will be read in thinlto prelink. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -prepare-for-thinlto=true -S | FileCheck %s --check-prefix=PRELINK +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=PRELINK +; +; Check flattened profile will be read in non-thinlto mode. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -S | FileCheck %s --check-prefix=NOTHINLTO +; RUN: opt < %s -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=NOTHINLTO +; +; CHECK-NOT: !{!"ProfileFormat", !"SampleProfile"} +; PRELINK: !{!"ProfileFormat", !"SampleProfile"} +; NOTHINLTO: !{!"ProfileFormat", !"SampleProfile"} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @foo() local_unnamed_addr !dbg !7 { +entry: + ret i32 -1, !dbg !9 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 8.0.0 (trunk 345241)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 3, scope: !7)