forked from OSchip/llvm-project
[PGO] IRPGO pre-cleanup pass changes
This patch adds a selected set of cleanup passes including a pre-inline pass before LLVM IR PGO instrumentation. The inline is only intended to apply those obvious/trivial ones before instrumentation so that much less instrumentation is needed to get better profiling information. This will drastically improve the instrumented code performance for large C++ applications. Another benefit is the context sensitive counts that can potentially improve the PGO optimization. Differential Revision: http://reviews.llvm.org/D21405 llvm-svn: 275588
This commit is contained in:
parent
32f900730c
commit
96a19d35ae
|
@ -125,6 +125,15 @@ static cl::opt<bool> UseLoopVersioningLICM(
|
|||
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable the experimental Loop Versioning LICM pass"));
|
||||
|
||||
static cl::opt<bool>
|
||||
DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
|
||||
cl::desc("Disable pre-instrumentation inliner"));
|
||||
|
||||
static cl::opt<int> PreInlineThreshold(
|
||||
"preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
|
||||
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
|
||||
"(default = 75)"));
|
||||
|
||||
PassManagerBuilder::PassManagerBuilder() {
|
||||
OptLevel = 2;
|
||||
SizeLevel = 0;
|
||||
|
@ -229,6 +238,19 @@ void PassManagerBuilder::populateFunctionPassManager(
|
|||
|
||||
// Do PGO instrumentation generation or use pass as the option specified.
|
||||
void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
|
||||
if (PGOInstrGen.empty() && PGOInstrUse.empty())
|
||||
return;
|
||||
// Perform the preinline and cleanup passes for O1 and above.
|
||||
// And avoid doing them if optimizing for size.
|
||||
if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) {
|
||||
// Create preinline pass.
|
||||
MPM.add(createFunctionInliningPass(PreInlineThreshold));
|
||||
MPM.add(createSROAPass());
|
||||
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
|
||||
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
|
||||
MPM.add(createInstructionCombiningPass()); // Combine silly seq's
|
||||
addExtensionsToPM(EP_Peephole, MPM);
|
||||
}
|
||||
if (!PGOInstrGen.empty()) {
|
||||
MPM.add(createPGOInstrumentationGenLegacyPass());
|
||||
// Add the profile lowering pass.
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: opt < %s -O2 -profile-generate=default.profraw -S | FileCheck %s --check-prefix=GEN
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @foo(i32 %i) {
|
||||
entry:
|
||||
; GEN: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo
|
||||
; GEN-NOT: %pgocount.i = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__bar
|
||||
%call = call i32 @bar()
|
||||
%add = add nsw i32 %i, %call
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
define internal i32 @bar() {
|
||||
; check that bar is inlined into foo and eliminiated from IR.
|
||||
; GEN-NOT: define internal i32 @bar
|
||||
entry:
|
||||
%call = call i32 (...) @bar1()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare i32 @bar1(...)
|
Loading…
Reference in New Issue