[PGO] IRPGO pre-cleanup pass changes

This patch adds a selected set of cleanup passes including a pre-inline pass
before LLVM IR PGO instrumentation. The inline is only intended to apply those
obvious/trivial ones before instrumentation so that much less instrumentation
is needed to get better profiling information. This will drastically improve
the instrumented code performance for large C++ applications. Another benefit
is the context sensitive counts that can potentially improve the PGO
optimization.

Differential Revision: http://reviews.llvm.org/D21405

llvm-svn: 275588
This commit is contained in:
Rong Xu 2016-07-15 18:10:49 +00:00
parent 32f900730c
commit 96a19d35ae
2 changed files with 44 additions and 0 deletions

View File

@ -125,6 +125,15 @@ static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
static cl::opt<bool>
DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
cl::desc("Disable pre-instrumentation inliner"));
static cl::opt<int> PreInlineThreshold(
"preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@ -229,6 +238,19 @@ void PassManagerBuilder::populateFunctionPassManager(
// Do PGO instrumentation generation or use pass as the option specified.
void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
if (PGOInstrGen.empty() && PGOInstrUse.empty())
return;
// Perform the preinline and cleanup passes for O1 and above.
// And avoid doing them if optimizing for size.
if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) {
// Create preinline pass.
MPM.add(createFunctionInliningPass(PreInlineThreshold));
MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Combine silly seq's
addExtensionsToPM(EP_Peephole, MPM);
}
if (!PGOInstrGen.empty()) {
MPM.add(createPGOInstrumentationGenLegacyPass());
// Add the profile lowering pass.

View File

@ -0,0 +1,22 @@
; RUN: opt < %s -O2 -profile-generate=default.profraw -S | FileCheck %s --check-prefix=GEN
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(i32 %i) {
entry:
; GEN: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo
; GEN-NOT: %pgocount.i = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__bar
%call = call i32 @bar()
%add = add nsw i32 %i, %call
ret i32 %add
}
define internal i32 @bar() {
; check that bar is inlined into foo and eliminiated from IR.
; GEN-NOT: define internal i32 @bar
entry:
%call = call i32 (...) @bar1()
ret i32 %call
}
declare i32 @bar1(...)