[PartialInlining] Hook up inline cost analysis

Differential Revision: http://reviews.llvm.org/D32666

llvm-svn: 301894
This commit is contained in:
Xinliang David Li 2017-05-02 02:44:14 +00:00
parent 28355efdad
commit 6133846be1
3 changed files with 141 additions and 14 deletions

View File

@ -16,8 +16,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@ -31,7 +35,8 @@ using namespace llvm;
#define DEBUG_TYPE "partial-inlining"
STATISTIC(NumPartialInlined, "Number of functions partially inlined");
STATISTIC(NumPartialInlined,
"Number of callsites functions partially inlined into.");
// Command line option to disable partial-inlining. The default is false:
static cl::opt<bool>
@ -70,16 +75,25 @@ struct FunctionOutliningInfo {
};
struct PartialInlinerImpl {
PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
PartialInlinerImpl(
std::function<AssumptionCache &(Function &)> *GetAC,
std::function<TargetTransformInfo &(Function &)> *GTTI,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
ProfileSummaryInfo *ProfSI)
: GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
bool run(Module &M);
Function *unswitchFunction(Function *F);
std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
private:
InlineFunctionInfo IFI;
int NumPartialInlining = 0;
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
std::function<TargetTransformInfo &(Function &)> *GetTTI;
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
ProfileSummaryInfo *PSI;
bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);
bool IsLimitReached() {
return (MaxNumPartialInlining != -1 &&
NumPartialInlining >= MaxNumPartialInlining);
@ -94,18 +108,30 @@ struct PartialInlinerLegacyPass : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
TargetTransformInfoWrapperPass *TTIWP =
&getAnalysis<TargetTransformInfoWrapperPass>();
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&ACT](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
return PartialInlinerImpl(IFI).run(M);
std::function<TargetTransformInfo &(Function &)> GetTTI =
[&TTIWP](Function &F) -> TargetTransformInfo & {
return TTIWP->getTTI(F);
};
return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
}
};
}
@ -263,6 +289,49 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
return OutliningInfo;
}
bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
OptimizationRemarkEmitter &ORE) {
// TODO : more sharing with shouldInline in Inliner.cpp
using namespace ore;
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee);
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
*GetAssumptionCache, GetBFI, PSI);
if (IC.isAlways()) {
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
<< NV("Callee", Callee)
<< " should always be fully inlined, not partially");
return false;
}
if (IC.isNever()) {
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
<< NV("Callee", Callee) << " not partially inlined into "
<< NV("Caller", Caller)
<< " because it should never be inlined (cost=never)");
return false;
}
if (!IC) {
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
<< NV("Callee", Callee) << " not partially inlined into "
<< NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
return false;
}
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
<< NV("Callee", Callee) << " can be partially inlined into "
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
<< " (threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
return true;
}
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
if (F->hasAddressTaken())
@ -277,7 +346,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
Function *DuplicateFunction = CloneFunction(F, VMap);
DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
BasicBlock *NewReturnBlock =
cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]);
BasicBlock *NewNonReturnBlock =
@ -385,16 +453,21 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
if (IsLimitReached())
continue;
NumPartialInlining++;
OptimizationRemarkEmitter ORE(CS.getCaller());
if (!shouldPartialInline(CS, ORE))
continue;
DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
BasicBlock *Block = CS.getParent();
ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
<< ore::NV("Callee", F) << " partially inlined into "
<< ore::NV("Caller", CS.getCaller()));
InlineFunctionInfo IFI(nullptr, GetAssumptionCache);
InlineFunction(CS, IFI);
NumPartialInlining++;
// update stats
NumPartialInlined++;
}
// Ditch the duplicate, since we're done with it, and rewrite all remaining
@ -402,7 +475,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
DuplicateFunction->replaceAllUsesWith(F);
DuplicateFunction->eraseFromParent();
++NumPartialInlined;
return ExtractedFunction;
}
@ -448,6 +520,8 @@ char PartialInlinerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
"Partial Inliner", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
"Partial Inliner", false, false)
@ -458,12 +532,25 @@ ModulePass *llvm::createPartialInliningPass() {
PreservedAnalyses PartialInlinerPass::run(Module &M,
ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&FAM](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
if (PartialInlinerImpl(IFI).run(M))
std::function<BlockFrequencyInfo &(Function &)> GetBFI =
[&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
std::function<TargetTransformInfo &(Function &)> GetTTI =
[&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}

View File

@ -7,6 +7,8 @@
; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining --disable-partial-inlining < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
; RUN: opt -S -partial-inliner -pass-remarks=partial-inlining -max-partial-inlining=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining -max-partial-inlining=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
; RUN: opt -S -partial-inliner -pass-remarks=partial-inlining -inline-threshold=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining -inline-threshold=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
define i32 @bar(i32 %arg) local_unnamed_addr #0 !dbg !5 {
bb:
@ -30,6 +32,38 @@ bb2: ; preds = %bb1, %bb
ret i32 %tmp3, !dbg !19
}
define i32 @bar_noinline(i32 %arg) local_unnamed_addr #1 !dbg !5 {
bb:
%tmp = icmp slt i32 %arg, 0, !dbg !7
br i1 %tmp, label %bb1, label %bb2, !dbg !8
bb1: ; preds = %bb
tail call void (...) @foo() #0, !dbg !9
tail call void (...) @foo() #0, !dbg !10
tail call void (...) @foo() #0, !dbg !11
br label %bb2, !dbg !18
bb2: ; preds = %bb1, %bb
%tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
ret i32 %tmp3, !dbg !19
}
define i32 @bar_alwaysinline(i32 %arg) local_unnamed_addr #2 !dbg !5 {
bb:
%tmp = icmp slt i32 %arg, 0, !dbg !7
br i1 %tmp, label %bb1, label %bb2, !dbg !8
bb1: ; preds = %bb
tail call void (...) @foo() #0, !dbg !9
tail call void (...) @foo() #0, !dbg !10
tail call void (...) @foo() #0, !dbg !11
br label %bb2, !dbg !18
bb2: ; preds = %bb1, %bb
%tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
ret i32 %tmp3, !dbg !19
}
; Function Attrs: nounwind
declare void @foo(...) local_unnamed_addr #0
@ -37,12 +71,18 @@ declare void @foo(...) local_unnamed_addr #0
define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 !dbg !20 {
bb:
; CHECK:remark{{.*}}bar partially inlined into dummy_caller
; CHECK-NOT:remark{{.*}}bar_noinline partially inlined into dummy_caller
; CHECK-NOT:remark{{.*}}bar_alwaysinline partially inlined into dummy_caller
; LIMIT-NOT:remark{{.*}}bar partially inlined into dummy_caller
%tmp = tail call i32 @bar(i32 %arg), !dbg !21
%tmp2 = tail call i32 @bar_noinline(i32 %arg), !dbg !21
%tmp3 = tail call i32 @bar_alwaysinline(i32 %arg), !dbg !21
ret i32 %tmp, !dbg !22
}
attributes #0 = { nounwind }
attributes #1 = { noinline nounwind }
attributes #2 = { alwaysinline nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3}

View File

@ -3,7 +3,7 @@
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
; Function Attrs: noinline nounwind uwtable
; Function Attrs: nounwind uwtable
define i32 @bar(i32 %arg) local_unnamed_addr #0 {
bb:
%tmp = icmp slt i32 %arg, 0
@ -35,7 +35,7 @@ declare i32 @channels(...) local_unnamed_addr
declare void @foo(...) local_unnamed_addr
; Function Attrs: noinline nounwind uwtable
; Function Attrs: nounwind uwtable
define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
bb:
; CHECK-LABEL: @dummy_caller
@ -89,7 +89,7 @@ define i32 @dummy_caller2(i32 %arg) local_unnamed_addr #0 {
ret i32 %tmp
}
attributes #0 = { noinline nounwind uwtable }
attributes #0 = { nounwind uwtable }
attributes #1 = { nounwind }
!llvm.ident = !{!0}