forked from OSchip/llvm-project
[llvm][NFC] Factor out inlining pipeline as a module pipeline.
Summary: This simplifies testing in scenarios where we want to set up module-wide analyses for inlining. The patch enables treating inlining and its function cleanups, as a module pass. The alternative would be for tests to describe the pipeline, which is tedious and adds maintenance overhead. Reviewers: davidxl, dblaikie, jdoerfert, sstefan1 Subscribers: hiraditya, steven_wu, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78512
This commit is contained in:
parent
44ce588670
commit
c3770c5d6d
|
@ -343,6 +343,12 @@ public:
|
|||
ThinLTOPhase Phase,
|
||||
bool DebugLogging = false);
|
||||
|
||||
/// Construct the module pipeline that performs inlining as well as
|
||||
/// the inlining-driven cleanups.
|
||||
ModulePassManager buildInlinerPipeline(OptimizationLevel Level,
|
||||
ThinLTOPhase Phase,
|
||||
bool DebugLogging = false);
|
||||
|
||||
/// Construct the core LLVM module optimization pipeline.
|
||||
///
|
||||
/// This pipeline focuses on optimizing the execution speed of the IR. It
|
||||
|
|
|
@ -690,10 +690,73 @@ getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
|
|||
return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
|
||||
}
|
||||
|
||||
ModulePassManager
|
||||
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
|
||||
ThinLTOPhase Phase,
|
||||
bool DebugLogging) {
|
||||
ModulePassManager PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
|
||||
ThinLTOPhase Phase,
|
||||
bool DebugLogging) {
|
||||
ModulePassManager MPM(DebugLogging);
|
||||
|
||||
// Now begin the main postorder CGSCC pipeline.
|
||||
// FIXME: The current CGSCC pipeline has its origins in the legacy pass
|
||||
// manager and trying to emulate its precise behavior. Much of this doesn't
|
||||
// make a lot of sense and we should revisit the core CGSCC structure.
|
||||
CGSCCPassManager MainCGPipeline(DebugLogging);
|
||||
|
||||
// Note: historically, the PruneEH pass was run first to deduce nounwind and
|
||||
// generally clean up exception handling overhead. It isn't clear this is
|
||||
// valuable as the inliner doesn't currently care whether it is inlining an
|
||||
// invoke or a call.
|
||||
|
||||
// Run the inliner first. The theory is that we are walking bottom-up and so
|
||||
// the callees have already been fully optimized, and we want to inline them
|
||||
// into the callers so that our optimizations can reflect that.
|
||||
// For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
|
||||
// because it makes profile annotation in the backend inaccurate.
|
||||
InlineParams IP = getInlineParamsFromOptLevel(Level);
|
||||
if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
|
||||
PGOOpt->Action == PGOOptions::SampleUse)
|
||||
IP.HotCallSiteThreshold = 0;
|
||||
MainCGPipeline.addPass(InlinerPass(IP));
|
||||
|
||||
if (AttributorRun & AttributorRunOption::CGSCC)
|
||||
MainCGPipeline.addPass(AttributorCGSCCPass());
|
||||
|
||||
if (PTO.Coroutines)
|
||||
MainCGPipeline.addPass(CoroSplitPass());
|
||||
|
||||
// Now deduce any function attributes based in the current code.
|
||||
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
|
||||
|
||||
// When at O3 add argument promotion to the pass pipeline.
|
||||
// FIXME: It isn't at all clear why this should be limited to O3.
|
||||
if (Level == OptimizationLevel::O3)
|
||||
MainCGPipeline.addPass(ArgumentPromotionPass());
|
||||
|
||||
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
|
||||
// there are no OpenMP runtime calls present in the module.
|
||||
if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
|
||||
MainCGPipeline.addPass(OpenMPOptPass());
|
||||
|
||||
// Lastly, add the core function simplification pipeline nested inside the
|
||||
// CGSCC walk.
|
||||
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
|
||||
buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
|
||||
|
||||
for (auto &C : CGSCCOptimizerLateEPCallbacks)
|
||||
C(MainCGPipeline, Level);
|
||||
|
||||
// We wrap the CGSCC pipeline in a devirtualization repeater. This will try
|
||||
// to detect when we devirtualize indirect calls and iterate the SCC passes
|
||||
// in that case to try and catch knock-on inlining or function attrs
|
||||
// opportunities. Then we add it to the module pipeline by walking the SCCs
|
||||
// in postorder (or bottom-up).
|
||||
MPM.addPass(
|
||||
createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
|
||||
std::move(MainCGPipeline), MaxDevirtIterations)));
|
||||
return MPM;
|
||||
}
|
||||
|
||||
ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
|
||||
OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
|
||||
ModulePassManager MPM(DebugLogging);
|
||||
|
||||
bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
|
||||
|
@ -830,64 +893,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
|
|||
// the inliner pass.
|
||||
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
|
||||
|
||||
// Now begin the main postorder CGSCC pipeline.
|
||||
// FIXME: The current CGSCC pipeline has its origins in the legacy pass
|
||||
// manager and trying to emulate its precise behavior. Much of this doesn't
|
||||
// make a lot of sense and we should revisit the core CGSCC structure.
|
||||
CGSCCPassManager MainCGPipeline(DebugLogging);
|
||||
|
||||
// Note: historically, the PruneEH pass was run first to deduce nounwind and
|
||||
// generally clean up exception handling overhead. It isn't clear this is
|
||||
// valuable as the inliner doesn't currently care whether it is inlining an
|
||||
// invoke or a call.
|
||||
|
||||
// Run the inliner first. The theory is that we are walking bottom-up and so
|
||||
// the callees have already been fully optimized, and we want to inline them
|
||||
// into the callers so that our optimizations can reflect that.
|
||||
// For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
|
||||
// because it makes profile annotation in the backend inaccurate.
|
||||
InlineParams IP = getInlineParamsFromOptLevel(Level);
|
||||
if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
|
||||
PGOOpt->Action == PGOOptions::SampleUse)
|
||||
IP.HotCallSiteThreshold = 0;
|
||||
MainCGPipeline.addPass(InlinerPass(IP));
|
||||
|
||||
if (AttributorRun & AttributorRunOption::CGSCC)
|
||||
MainCGPipeline.addPass(AttributorCGSCCPass());
|
||||
|
||||
if (PTO.Coroutines)
|
||||
MainCGPipeline.addPass(CoroSplitPass());
|
||||
|
||||
// Now deduce any function attributes based in the current code.
|
||||
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
|
||||
|
||||
// When at O3 add argument promotion to the pass pipeline.
|
||||
// FIXME: It isn't at all clear why this should be limited to O3.
|
||||
if (Level == OptimizationLevel::O3)
|
||||
MainCGPipeline.addPass(ArgumentPromotionPass());
|
||||
|
||||
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
|
||||
// there are no OpenMP runtime calls present in the module.
|
||||
if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
|
||||
MainCGPipeline.addPass(OpenMPOptPass());
|
||||
|
||||
// Lastly, add the core function simplification pipeline nested inside the
|
||||
// CGSCC walk.
|
||||
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
|
||||
buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
|
||||
|
||||
for (auto &C : CGSCCOptimizerLateEPCallbacks)
|
||||
C(MainCGPipeline, Level);
|
||||
|
||||
// We wrap the CGSCC pipeline in a devirtualization repeater. This will try
|
||||
// to detect when we devirtualize indirect calls and iterate the SCC passes
|
||||
// in that case to try and catch knock-on inlining or function attrs
|
||||
// opportunities. Then we add it to the module pipeline by walking the SCCs
|
||||
// in postorder (or bottom-up).
|
||||
MPM.addPass(
|
||||
createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
|
||||
std::move(MainCGPipeline), MaxDevirtIterations)));
|
||||
|
||||
MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging));
|
||||
return MPM;
|
||||
}
|
||||
|
||||
|
@ -1260,11 +1266,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
|
|||
// Propagate constants at call sites into the functions they call. This
|
||||
// opens opportunities for globalopt (and inlining) by substituting function
|
||||
// pointers passed as arguments to direct uses of functions.
|
||||
MPM.addPass(IPSCCPPass());
|
||||
MPM.addPass(IPSCCPPass());
|
||||
|
||||
// Attach metadata to indirect call sites indicating the set of functions
|
||||
// they may target at run-time. This should follow IPSCCP.
|
||||
MPM.addPass(CalledValuePropagationPass());
|
||||
// Attach metadata to indirect call sites indicating the set of functions
|
||||
// they may target at run-time. This should follow IPSCCP.
|
||||
MPM.addPass(CalledValuePropagationPass());
|
||||
}
|
||||
|
||||
// Now deduce any function attributes based in the current code.
|
||||
|
|
|
@ -82,6 +82,8 @@ MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC())
|
|||
MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
|
||||
MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
|
||||
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
|
||||
MODULE_PASS("scc-oz-module-inliner",
|
||||
buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging))
|
||||
MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
|
||||
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
|
||||
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
|
||||
|
|
|
@ -132,6 +132,8 @@
|
|||
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
|
||||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting llvm::Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
|
||||
|
@ -226,6 +228,7 @@
|
|||
; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass
|
||||
; CHECK-O-NEXT: Finished CGSCC pass manager run.
|
||||
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
|
||||
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting llvm::Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
|
|
|
@ -97,6 +97,8 @@
|
|||
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
|
||||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
|
||||
; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting llvm::Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
|
||||
|
@ -196,6 +198,7 @@
|
|||
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
|
||||
; CHECK-O-NEXT: Finished CGSCC pass manager run.
|
||||
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
|
||||
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
|
||||
; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass
|
||||
; CHECK-POSTLINK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-POSTLINK-O-NEXT: Starting llvm::Module pass manager run.
|
||||
|
|
|
@ -69,6 +69,8 @@
|
|||
; CHECK-O-NEXT: Running analysis: GlobalsAA
|
||||
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
|
||||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
|
||||
|
@ -167,6 +169,7 @@
|
|||
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
|
||||
; CHECK-O-NEXT: Finished CGSCC pass manager run.
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
|
|
|
@ -77,6 +77,8 @@
|
|||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
|
||||
; CHECK-O-NEXT: Running analysis: GlobalsAA
|
||||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
|
||||
|
@ -178,6 +180,7 @@
|
|||
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
|
||||
; CHECK-O-NEXT: Finished CGSCC pass manager run.
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
|
|
|
@ -96,6 +96,8 @@
|
|||
; CHECK-O-NEXT: Running analysis: GlobalsAA
|
||||
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
|
||||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
|
||||
|
@ -216,6 +218,7 @@
|
|||
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
|
||||
; CHECK-O-NEXT: Finished CGSCC pass manager run.
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar
|
||||
; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis on bar
|
||||
|
|
|
@ -77,6 +77,8 @@
|
|||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
|
||||
; CHECK-O-NEXT: Running analysis: GlobalsAA
|
||||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
|
||||
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
|
||||
; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
|
||||
|
@ -177,6 +179,7 @@
|
|||
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
|
||||
; CHECK-O-NEXT: Finished CGSCC pass manager run.
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: NameAnonGlobalPass
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
; modify_value will be inlined into main. With just the inliner pass, at most
|
||||
; some trivial DCE would happen, which in this case doesn't modify post-inlined
|
||||
; main much.
|
||||
; In contrast, with the full set of module inliner-related passes, at the end of
|
||||
; inlining (incl. function cleanups ran after inlining), main will be reduced to
|
||||
; a 'ret 10'
|
||||
;
|
||||
; RUN: opt -passes=inline -S < %s | FileCheck %s --check-prefix=INLINE --check-prefix=CHECK
|
||||
; RUN: opt -passes=scc-oz-module-inliner -S < %s | FileCheck %s --check-prefix=MODULE --check-prefix=CHECK
|
||||
|
||||
define void @modify_value({i32, float}* %v) {
|
||||
%f = getelementptr { i32, float }, { i32, float }* %v, i64 0, i32 0
|
||||
store i32 10, i32* %f
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @main() {
|
||||
%my_val = alloca {i32, float}
|
||||
call void @modify_value({i32, float}* %my_val)
|
||||
%f = getelementptr { i32, float }, { i32, float }* %my_val, i64 0, i32 0
|
||||
%ret = load i32, i32* %f
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @main
|
||||
; INLINE-NEXT: %my_val = alloca
|
||||
; MODULE-NEXT: ret i32 10
|
Loading…
Reference in New Issue