Move the SampleProfileLoader right after EarlyFPM.

Summary: SampleProfileLoader pass do need to happen after some early cleanup passes so that inlining can happen correctly inside the SampleProfileLoader pass.

Reviewers: chandlerc, davidxl, tejohnson

Reviewed By: chandlerc, tejohnson

Subscribers: sanjoy, mehdi_amini, eraman, llvm-commits

Differential Revision: https://reviews.llvm.org/D36333

llvm-svn: 310296
This commit is contained in:
Dehao Chen 2017-08-07 20:23:20 +00:00
parent 53431bc046
commit 08f8831e57
3 changed files with 49 additions and 25 deletions

View File

@ -540,8 +540,32 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
EarlyFPM.addPass(SROA());
EarlyFPM.addPass(EarlyCSEPass());
EarlyFPM.addPass(LowerExpectIntrinsicPass());
// In SamplePGO ThinLTO backend, we need instcombine before profile annotation
// to convert bitcast to direct calls so that they can be inlined during the
// profile annotation prepration step.
// More details about SamplePGO design can be found in:
// https://research.google.com/pubs/pub45290.html
// FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
if (PGOOpt && !PGOOpt->SampleProfileFile.empty() &&
Phase == ThinLTOPhase::PostLink)
EarlyFPM.addPass(InstCombinePass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) {
// Annotate sample profile right after early FPM to ensure freshness of
// the debug info.
MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile));
// Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
// for the profile annotation to be accurate in the ThinLTO backend.
if (Phase != ThinLTOPhase::PreLink)
// We perform early indirect call promotion here, before globalopt.
// This is important for the ThinLTO backend phase because otherwise
// imported available_externally functions look unreferenced and are
// removed.
MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink,
true));
}
// Interprocedural constant propagation now that basic cleanup has occured
// and prior to optimizing globals.
// FIXME: This position in the pipeline hasn't been carefully considered in
@ -768,13 +792,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
if (PGOOpt && PGOOpt->SamplePGOSupport) {
if (PGOOpt && PGOOpt->SamplePGOSupport)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
if (!PGOOpt->SampleProfileFile.empty()) {
MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile));
MPM.addPass(PGOIndirectCallPromotion(false, true));
}
}
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None,
@ -796,14 +815,8 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
// Invoke the SamplePGO annotation pass for the first time to annotate
// profile for functions in the current module to give ThinLink info
// about module grouping.
if (PGOOpt && PGOOpt->SamplePGOSupport) {
if (PGOOpt && PGOOpt->SamplePGOSupport)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
if (!PGOOpt->SampleProfileFile.empty())
MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile));
}
// If we are planning to perform ThinLTO later, we don't bloat the code with
// unrolling/vectorization/... now. Just simplify the module as much as we
@ -839,16 +852,14 @@ PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level,
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
// Invoke the SamplePGO annotation pass for the second time to annotate on
// functions imported from other modules.
if (PGOOpt && !PGOOpt->SampleProfileFile.empty())
MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile));
// During the ThinLTO backend phase we perform early indirect call promotion
// here, before globalopt. Otherwise imported available_externally functions
// look unreferenced and are removed.
MPM.addPass(PGOIndirectCallPromotion(
true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
// FIXME: move this into buildModuleSimplificationPipeline to merge the logic
// with SamplePGO.
if (PGOOpt && !PGOOpt->ProfileUseFile.empty())
MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */,
false /* SamplePGO */));
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink,

View File

@ -1,13 +1,27 @@
; RUN: opt -debug-pass-manager -passes='default<O2>' -pgo-kind=new-pm-pgo-instr-gen-pipeline -profile-file='temp' %s 2>&1 |FileCheck %s --check-prefixes=GEN
; RUN: llvm-profdata merge %S/Inputs/new-pm-pgo.proftext -o %t.profdata
; RUN: opt -debug-pass-manager -passes='default<O2>' -pgo-kind=new-pm-pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE
; RUN: opt -debug-pass-manager -passes='default<O2>' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 |FileCheck %s --check-prefixes=SAMPLE_USE
; RUN: opt -debug-pass-manager -passes='default<O2>' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \
; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_O
; RUN: opt -debug-pass-manager -passes='thinlto-pre-link<O2>' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \
; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_PRE_LINK
; RUN: opt -debug-pass-manager -passes='thinlto<O2>' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \
; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_POST_LINK
; RUN: opt -debug-pass-manager -passes='default<O2>' -new-pm-debug-info-for-profiling %s 2>&1 |FileCheck %s --check-prefixes=SAMPLE_GEN
;
; GEN: Running pass: PGOInstrumentationGen
; USE: Running pass: PGOInstrumentationUse
; SAMPLE_USE: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
; SAMPLE_USE_O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
; SAMPLE_USE_PRE_LINK: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
; SAMPLE_USE: Running pass: SimplifyCFGPass
; SAMPLE_USE: Running pass: SROA
; SAMPLE_USE: Running pass: EarlyCSEPass
; SAMPLE_USE: Running pass: LowerExpectIntrinsicPass
; SAMPLE_USE_POST_LINK: Running pass: InstCombinePass
; SAMPLE_USE: Running pass: SampleProfileLoaderPass
; SAMPLE_USE_O: Running pass: PGOIndirectCallPromotion
; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass
; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion
; SAMPLE_GEN: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
define void @foo() {

View File

@ -53,15 +53,13 @@
; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass
; CHECK-DIS-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
; CHECK-DIS-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-POSTLINK-O-NEXT: Running pass: PGOIndirectCallPromotion
; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function
; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
; CHECK-O-NEXT: Starting llvm::Module pass manager run.
; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
; CHECK-PRELINK-O-NODIS-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Starting llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
@ -80,6 +78,7 @@
; CHECK-O-NEXT: Starting llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA