forked from OSchip/llvm-project
[PartialInlining] Hook up inline cost analysis
Differential Revision: http://reviews.llvm.org/D32666 llvm-svn: 301894
This commit is contained in:
parent
28355efdad
commit
6133846be1
|
@ -16,8 +16,12 @@
|
|||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||
#include "llvm/Analysis/InlineCost.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
|
@ -31,7 +35,8 @@ using namespace llvm;
|
|||
|
||||
#define DEBUG_TYPE "partial-inlining"
|
||||
|
||||
STATISTIC(NumPartialInlined, "Number of functions partially inlined");
|
||||
STATISTIC(NumPartialInlined,
|
||||
"Number of callsites functions partially inlined into.");
|
||||
|
||||
// Command line option to disable partial-inlining. The default is false:
|
||||
static cl::opt<bool>
|
||||
|
@ -70,16 +75,25 @@ struct FunctionOutliningInfo {
|
|||
};
|
||||
|
||||
struct PartialInlinerImpl {
|
||||
PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
|
||||
PartialInlinerImpl(
|
||||
std::function<AssumptionCache &(Function &)> *GetAC,
|
||||
std::function<TargetTransformInfo &(Function &)> *GTTI,
|
||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
|
||||
ProfileSummaryInfo *ProfSI)
|
||||
: GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
|
||||
bool run(Module &M);
|
||||
Function *unswitchFunction(Function *F);
|
||||
|
||||
std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
|
||||
|
||||
private:
|
||||
InlineFunctionInfo IFI;
|
||||
int NumPartialInlining = 0;
|
||||
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
|
||||
std::function<TargetTransformInfo &(Function &)> *GetTTI;
|
||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);
|
||||
bool IsLimitReached() {
|
||||
return (MaxNumPartialInlining != -1 &&
|
||||
NumPartialInlining >= MaxNumPartialInlining);
|
||||
|
@ -94,18 +108,30 @@ struct PartialInlinerLegacyPass : public ModulePass {
|
|||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<AssumptionCacheTracker>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
}
|
||||
bool runOnModule(Module &M) override {
|
||||
if (skipModule(M))
|
||||
return false;
|
||||
|
||||
AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
|
||||
TargetTransformInfoWrapperPass *TTIWP =
|
||||
&getAnalysis<TargetTransformInfoWrapperPass>();
|
||||
ProfileSummaryInfo *PSI =
|
||||
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
|
||||
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
|
||||
[&ACT](Function &F) -> AssumptionCache & {
|
||||
return ACT->getAssumptionCache(F);
|
||||
};
|
||||
InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
|
||||
return PartialInlinerImpl(IFI).run(M);
|
||||
|
||||
std::function<TargetTransformInfo &(Function &)> GetTTI =
|
||||
[&TTIWP](Function &F) -> TargetTransformInfo & {
|
||||
return TTIWP->getTTI(F);
|
||||
};
|
||||
|
||||
return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -263,6 +289,49 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
|
|||
return OutliningInfo;
|
||||
}
|
||||
|
||||
bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
|
||||
OptimizationRemarkEmitter &ORE) {
|
||||
// TODO : more sharing with shouldInline in Inliner.cpp
|
||||
using namespace ore;
|
||||
Instruction *Call = CS.getInstruction();
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
Function *Caller = CS.getCaller();
|
||||
auto &CalleeTTI = (*GetTTI)(*Callee);
|
||||
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
|
||||
*GetAssumptionCache, GetBFI, PSI);
|
||||
|
||||
if (IC.isAlways()) {
|
||||
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
|
||||
<< NV("Callee", Callee)
|
||||
<< " should always be fully inlined, not partially");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (IC.isNever()) {
|
||||
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
|
||||
<< NV("Callee", Callee) << " not partially inlined into "
|
||||
<< NV("Caller", Caller)
|
||||
<< " because it should never be inlined (cost=never)");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!IC) {
|
||||
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
|
||||
<< NV("Callee", Callee) << " not partially inlined into "
|
||||
<< NV("Caller", Caller) << " because too costly to inline (cost="
|
||||
<< NV("Cost", IC.getCost()) << ", threshold="
|
||||
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
|
||||
return false;
|
||||
}
|
||||
|
||||
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
|
||||
<< NV("Callee", Callee) << " can be partially inlined into "
|
||||
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
|
||||
<< " (threshold="
|
||||
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
|
||||
return true;
|
||||
}
|
||||
|
||||
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
||||
|
||||
if (F->hasAddressTaken())
|
||||
|
@ -277,7 +346,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
|||
// Clone the function, so that we can hack away on it.
|
||||
ValueToValueMapTy VMap;
|
||||
Function *DuplicateFunction = CloneFunction(F, VMap);
|
||||
DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
|
||||
BasicBlock *NewReturnBlock =
|
||||
cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]);
|
||||
BasicBlock *NewNonReturnBlock =
|
||||
|
@ -385,16 +453,21 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
|||
if (IsLimitReached())
|
||||
continue;
|
||||
|
||||
NumPartialInlining++;
|
||||
|
||||
OptimizationRemarkEmitter ORE(CS.getCaller());
|
||||
if (!shouldPartialInline(CS, ORE))
|
||||
continue;
|
||||
|
||||
DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
|
||||
BasicBlock *Block = CS.getParent();
|
||||
ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
|
||||
<< ore::NV("Callee", F) << " partially inlined into "
|
||||
<< ore::NV("Caller", CS.getCaller()));
|
||||
|
||||
InlineFunctionInfo IFI(nullptr, GetAssumptionCache);
|
||||
InlineFunction(CS, IFI);
|
||||
NumPartialInlining++;
|
||||
// update stats
|
||||
NumPartialInlined++;
|
||||
}
|
||||
|
||||
// Ditch the duplicate, since we're done with it, and rewrite all remaining
|
||||
|
@ -402,7 +475,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
|||
DuplicateFunction->replaceAllUsesWith(F);
|
||||
DuplicateFunction->eraseFromParent();
|
||||
|
||||
++NumPartialInlined;
|
||||
|
||||
return ExtractedFunction;
|
||||
}
|
||||
|
@ -448,6 +520,8 @@ char PartialInlinerLegacyPass::ID = 0;
|
|||
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
|
||||
"Partial Inliner", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
|
||||
"Partial Inliner", false, false)
|
||||
|
||||
|
@ -458,12 +532,25 @@ ModulePass *llvm::createPartialInliningPass() {
|
|||
PreservedAnalyses PartialInlinerPass::run(Module &M,
|
||||
ModuleAnalysisManager &AM) {
|
||||
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
||||
|
||||
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
|
||||
[&FAM](Function &F) -> AssumptionCache & {
|
||||
return FAM.getResult<AssumptionAnalysis>(F);
|
||||
};
|
||||
InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
|
||||
if (PartialInlinerImpl(IFI).run(M))
|
||||
|
||||
std::function<BlockFrequencyInfo &(Function &)> GetBFI =
|
||||
[&FAM](Function &F) -> BlockFrequencyInfo & {
|
||||
return FAM.getResult<BlockFrequencyAnalysis>(F);
|
||||
};
|
||||
|
||||
std::function<TargetTransformInfo &(Function &)> GetTTI =
|
||||
[&FAM](Function &F) -> TargetTransformInfo & {
|
||||
return FAM.getResult<TargetIRAnalysis>(F);
|
||||
};
|
||||
|
||||
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
|
||||
|
||||
if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
|
||||
return PreservedAnalyses::none();
|
||||
return PreservedAnalyses::all();
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining --disable-partial-inlining < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
|
||||
; RUN: opt -S -partial-inliner -pass-remarks=partial-inlining -max-partial-inlining=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
|
||||
; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining -max-partial-inlining=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
|
||||
; RUN: opt -S -partial-inliner -pass-remarks=partial-inlining -inline-threshold=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
|
||||
; RUN: opt -S -passes=partial-inliner -pass-remarks=partial-inlining -inline-threshold=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT %s
|
||||
|
||||
define i32 @bar(i32 %arg) local_unnamed_addr #0 !dbg !5 {
|
||||
bb:
|
||||
|
@ -30,6 +32,38 @@ bb2: ; preds = %bb1, %bb
|
|||
ret i32 %tmp3, !dbg !19
|
||||
}
|
||||
|
||||
define i32 @bar_noinline(i32 %arg) local_unnamed_addr #1 !dbg !5 {
|
||||
bb:
|
||||
%tmp = icmp slt i32 %arg, 0, !dbg !7
|
||||
br i1 %tmp, label %bb1, label %bb2, !dbg !8
|
||||
|
||||
bb1: ; preds = %bb
|
||||
tail call void (...) @foo() #0, !dbg !9
|
||||
tail call void (...) @foo() #0, !dbg !10
|
||||
tail call void (...) @foo() #0, !dbg !11
|
||||
br label %bb2, !dbg !18
|
||||
|
||||
bb2: ; preds = %bb1, %bb
|
||||
%tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
|
||||
ret i32 %tmp3, !dbg !19
|
||||
}
|
||||
|
||||
define i32 @bar_alwaysinline(i32 %arg) local_unnamed_addr #2 !dbg !5 {
|
||||
bb:
|
||||
%tmp = icmp slt i32 %arg, 0, !dbg !7
|
||||
br i1 %tmp, label %bb1, label %bb2, !dbg !8
|
||||
|
||||
bb1: ; preds = %bb
|
||||
tail call void (...) @foo() #0, !dbg !9
|
||||
tail call void (...) @foo() #0, !dbg !10
|
||||
tail call void (...) @foo() #0, !dbg !11
|
||||
br label %bb2, !dbg !18
|
||||
|
||||
bb2: ; preds = %bb1, %bb
|
||||
%tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
|
||||
ret i32 %tmp3, !dbg !19
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @foo(...) local_unnamed_addr #0
|
||||
|
||||
|
@ -37,12 +71,18 @@ declare void @foo(...) local_unnamed_addr #0
|
|||
define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 !dbg !20 {
|
||||
bb:
|
||||
; CHECK:remark{{.*}}bar partially inlined into dummy_caller
|
||||
; CHECK-NOT:remark{{.*}}bar_noinline partially inlined into dummy_caller
|
||||
; CHECK-NOT:remark{{.*}}bar_alwaysinline partially inlined into dummy_caller
|
||||
; LIMIT-NOT:remark{{.*}}bar partially inlined into dummy_caller
|
||||
%tmp = tail call i32 @bar(i32 %arg), !dbg !21
|
||||
%tmp2 = tail call i32 @bar_noinline(i32 %arg), !dbg !21
|
||||
%tmp3 = tail call i32 @bar_alwaysinline(i32 %arg), !dbg !21
|
||||
ret i32 %tmp, !dbg !22
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { noinline nounwind }
|
||||
attributes #2 = { alwaysinline nounwind }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
||||
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
||||
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @bar(i32 %arg) local_unnamed_addr #0 {
|
||||
bb:
|
||||
%tmp = icmp slt i32 %arg, 0
|
||||
|
@ -35,7 +35,7 @@ declare i32 @channels(...) local_unnamed_addr
|
|||
|
||||
declare void @foo(...) local_unnamed_addr
|
||||
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
|
||||
bb:
|
||||
; CHECK-LABEL: @dummy_caller
|
||||
|
@ -89,7 +89,7 @@ define i32 @dummy_caller2(i32 %arg) local_unnamed_addr #0 {
|
|||
ret i32 %tmp
|
||||
}
|
||||
|
||||
attributes #0 = { noinline nounwind uwtable }
|
||||
attributes #0 = { nounwind uwtable }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
|
Loading…
Reference in New Issue