forked from OSchip/llvm-project
[PartialInlining] Profile based cost analysis
Implemented frequency based cost/saving analysis and related options. The pass is now in a state ready to be turne on in the pipeline (in follow up). Differential Revision: http://reviews.llvm.org/D32783 llvm-svn: 302967
This commit is contained in:
parent
3f6dd7a86c
commit
66bdfca77a
|
@ -16,6 +16,7 @@
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||||
|
#include "llvm/Analysis/CodeMetrics.h"
|
||||||
#include "llvm/Analysis/InlineCost.h"
|
#include "llvm/Analysis/InlineCost.h"
|
||||||
#include "llvm/Analysis/LoopInfo.h"
|
#include "llvm/Analysis/LoopInfo.h"
|
||||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||||
|
@ -42,6 +43,11 @@ STATISTIC(NumPartialInlined,
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
DisablePartialInlining("disable-partial-inlining", cl::init(false),
|
DisablePartialInlining("disable-partial-inlining", cl::init(false),
|
||||||
cl::Hidden, cl::desc("Disable partial ininling"));
|
cl::Hidden, cl::desc("Disable partial ininling"));
|
||||||
|
// This is an option used by testing:
|
||||||
|
static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
|
||||||
|
cl::init(false), cl::ZeroOrMore,
|
||||||
|
cl::ReallyHidden,
|
||||||
|
cl::desc("Skip Cost Analysis"));
|
||||||
|
|
||||||
static cl::opt<unsigned> MaxNumInlineBlocks(
|
static cl::opt<unsigned> MaxNumInlineBlocks(
|
||||||
"max-num-inline-blocks", cl::init(5), cl::Hidden,
|
"max-num-inline-blocks", cl::init(5), cl::Hidden,
|
||||||
|
@ -53,6 +59,15 @@ static cl::opt<int> MaxNumPartialInlining(
|
||||||
"max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
|
"max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
|
||||||
cl::desc("Max number of partial inlining. The default is unlimited"));
|
cl::desc("Max number of partial inlining. The default is unlimited"));
|
||||||
|
|
||||||
|
// Used only when PGO or user annotated branch data is absent. It is
|
||||||
|
// the least value that is used to weigh the outline region. If BFI
|
||||||
|
// produces larger value, the BFI value will be used.
|
||||||
|
static cl::opt<int>
|
||||||
|
OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
|
||||||
|
cl::Hidden, cl::ZeroOrMore,
|
||||||
|
cl::desc("Relative frequency of outline region to "
|
||||||
|
"the entry block"));
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct FunctionOutliningInfo {
|
struct FunctionOutliningInfo {
|
||||||
|
@ -84,8 +99,6 @@ struct PartialInlinerImpl {
|
||||||
bool run(Module &M);
|
bool run(Module &M);
|
||||||
Function *unswitchFunction(Function *F);
|
Function *unswitchFunction(Function *F);
|
||||||
|
|
||||||
std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int NumPartialInlining = 0;
|
int NumPartialInlining = 0;
|
||||||
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
|
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
|
||||||
|
@ -93,11 +106,84 @@ private:
|
||||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
|
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
|
||||||
ProfileSummaryInfo *PSI;
|
ProfileSummaryInfo *PSI;
|
||||||
|
|
||||||
bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);
|
// Return the frequency of the OutlininingBB relative to F's entry point.
|
||||||
|
// The result is no larger than 1 and is represented using BP.
|
||||||
|
// (Note that the outlined region's 'head' block can only have incoming
|
||||||
|
// edges from the guarding entry blocks).
|
||||||
|
BranchProbability getOutliningCallBBRelativeFreq(Function *F,
|
||||||
|
FunctionOutliningInfo *OI,
|
||||||
|
Function *DuplicateFunction,
|
||||||
|
BlockFrequencyInfo *BFI,
|
||||||
|
BasicBlock *OutliningCallBB);
|
||||||
|
|
||||||
|
// Return true if the callee of CS should be partially inlined with
|
||||||
|
// profit.
|
||||||
|
bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,
|
||||||
|
BlockFrequencyInfo *CalleeBFI,
|
||||||
|
BasicBlock *OutliningCallBB,
|
||||||
|
int OutliningCallOverhead,
|
||||||
|
OptimizationRemarkEmitter &ORE);
|
||||||
|
|
||||||
|
// Try to inline DuplicateFunction (cloned from F with call to
|
||||||
|
// the OutlinedFunction into its callers. Return true
|
||||||
|
// if there is any successful inlining.
|
||||||
|
bool tryPartialInline(Function *DuplicateFunction,
|
||||||
|
Function *F, /*orignal function */
|
||||||
|
FunctionOutliningInfo *OI, Function *OutlinedFunction,
|
||||||
|
BlockFrequencyInfo *CalleeBFI);
|
||||||
|
|
||||||
|
// Compute the mapping from use site of DuplicationFunction to the enclosing
|
||||||
|
// BB's profile count.
|
||||||
|
void computeCallsiteToProfCountMap(Function *DuplicateFunction,
|
||||||
|
DenseMap<User *, uint64_t> &SiteCountMap);
|
||||||
|
|
||||||
bool IsLimitReached() {
|
bool IsLimitReached() {
|
||||||
return (MaxNumPartialInlining != -1 &&
|
return (MaxNumPartialInlining != -1 &&
|
||||||
NumPartialInlining >= MaxNumPartialInlining);
|
NumPartialInlining >= MaxNumPartialInlining);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CallSite getCallSite(User *U) {
|
||||||
|
CallSite CS;
|
||||||
|
if (CallInst *CI = dyn_cast<CallInst>(U))
|
||||||
|
CS = CallSite(CI);
|
||||||
|
else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
|
||||||
|
CS = CallSite(II);
|
||||||
|
else
|
||||||
|
llvm_unreachable("All uses must be calls");
|
||||||
|
return CS;
|
||||||
|
}
|
||||||
|
|
||||||
|
CallSite getOneCallSiteTo(Function *F) {
|
||||||
|
User *User = *F->user_begin();
|
||||||
|
return getCallSite(User);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
|
||||||
|
CallSite CS = getOneCallSiteTo(F);
|
||||||
|
DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
|
||||||
|
BasicBlock *Block = CS.getParent();
|
||||||
|
return std::make_tuple(DLoc, Block);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the costs associated with function outlining:
|
||||||
|
// - The first value is the non-weighted runtime cost for making the call
|
||||||
|
// to the outlined function 'OutlinedFunction', including the addtional
|
||||||
|
// setup cost in the outlined function itself;
|
||||||
|
// - The second value is the estimated size of the new call sequence in
|
||||||
|
// basic block 'OutliningCallBB';
|
||||||
|
// - The third value is the estimated size of the original code from
|
||||||
|
// function 'F' that is extracted into the outlined function.
|
||||||
|
std::tuple<int, int, int>
|
||||||
|
computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,
|
||||||
|
Function *OutlinedFunction,
|
||||||
|
BasicBlock *OutliningCallBB);
|
||||||
|
// Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
|
||||||
|
// approximate both the size and runtime cost (Note that in the current
|
||||||
|
// inline cost analysis, there is no clear distinction there either).
|
||||||
|
int computeBBInlineCost(BasicBlock *BB);
|
||||||
|
|
||||||
|
std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PartialInlinerLegacyPass : public ModulePass {
|
struct PartialInlinerLegacyPass : public ModulePass {
|
||||||
|
@ -223,7 +309,8 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
|
||||||
// Do sanity check of the entries: threre should not
|
// Do sanity check of the entries: threre should not
|
||||||
// be any successors (not in the entry set) other than
|
// be any successors (not in the entry set) other than
|
||||||
// {ReturnBlock, NonReturnBlock}
|
// {ReturnBlock, NonReturnBlock}
|
||||||
assert(OutliningInfo->Entries[0] == &F->front());
|
assert(OutliningInfo->Entries[0] == &F->front() &&
|
||||||
|
"Function Entry must be the first in Entries vector");
|
||||||
DenseSet<BasicBlock *> Entries;
|
DenseSet<BasicBlock *> Entries;
|
||||||
for (BasicBlock *E : OutliningInfo->Entries)
|
for (BasicBlock *E : OutliningInfo->Entries)
|
||||||
Entries.insert(E);
|
Entries.insert(E);
|
||||||
|
@ -289,10 +376,54 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
|
||||||
return OutliningInfo;
|
return OutliningInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
|
// Check if there is PGO data or user annoated branch data:
|
||||||
OptimizationRemarkEmitter &ORE) {
|
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
|
||||||
// TODO : more sharing with shouldInline in Inliner.cpp
|
if (F->getEntryCount())
|
||||||
|
return true;
|
||||||
|
// Now check if any of the entry block has MD_prof data:
|
||||||
|
for (auto *E : OI->Entries) {
|
||||||
|
BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
|
||||||
|
if (!BR || BR->isUnconditional())
|
||||||
|
continue;
|
||||||
|
uint64_t T, F;
|
||||||
|
if (BR->extractProfMetadata(T, F))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
|
||||||
|
Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,
|
||||||
|
BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {
|
||||||
|
|
||||||
|
auto EntryFreq =
|
||||||
|
BFI->getBlockFreq(&DuplicateFunction->getEntryBlock());
|
||||||
|
auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB);
|
||||||
|
|
||||||
|
auto OutlineRegionRelFreq =
|
||||||
|
BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
|
||||||
|
EntryFreq.getFrequency());
|
||||||
|
|
||||||
|
if (hasProfileData(F, OI))
|
||||||
|
return OutlineRegionRelFreq;
|
||||||
|
|
||||||
|
// When profile data is not available, we need to be very
|
||||||
|
// conservative in estimating the overall savings. We need to make sure
|
||||||
|
// the outline region relative frequency is not below the threshold
|
||||||
|
// specified by the option.
|
||||||
|
OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
|
||||||
|
|
||||||
|
return OutlineRegionRelFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PartialInlinerImpl::shouldPartialInline(
|
||||||
|
CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,
|
||||||
|
BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,
|
||||||
|
int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {
|
||||||
using namespace ore;
|
using namespace ore;
|
||||||
|
if (SkipCostAnalysis)
|
||||||
|
return true;
|
||||||
|
|
||||||
Instruction *Call = CS.getInstruction();
|
Instruction *Call = CS.getInstruction();
|
||||||
Function *Callee = CS.getCalledFunction();
|
Function *Callee = CS.getCalledFunction();
|
||||||
Function *Caller = CS.getCaller();
|
Function *Caller = CS.getCaller();
|
||||||
|
@ -302,36 +433,166 @@ bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
|
||||||
|
|
||||||
if (IC.isAlways()) {
|
if (IC.isAlways()) {
|
||||||
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
|
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
|
||||||
<< NV("Callee", Callee)
|
<< NV("Callee", F)
|
||||||
<< " should always be fully inlined, not partially");
|
<< " should always be fully inlined, not partially");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IC.isNever()) {
|
if (IC.isNever()) {
|
||||||
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
|
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
|
||||||
<< NV("Callee", Callee) << " not partially inlined into "
|
<< NV("Callee", F) << " not partially inlined into "
|
||||||
<< NV("Caller", Caller)
|
<< NV("Caller", Caller)
|
||||||
<< " because it should never be inlined (cost=never)");
|
<< " because it should never be inlined (cost=never)");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!IC) {
|
if (!IC) {
|
||||||
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
|
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
|
||||||
<< NV("Callee", Callee) << " not partially inlined into "
|
<< NV("Callee", F) << " not partially inlined into "
|
||||||
<< NV("Caller", Caller) << " because too costly to inline (cost="
|
<< NV("Caller", Caller) << " because too costly to inline (cost="
|
||||||
<< NV("Cost", IC.getCost()) << ", threshold="
|
<< NV("Cost", IC.getCost()) << ", threshold="
|
||||||
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
|
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
const DataLayout &DL = Caller->getParent()->getDataLayout();
|
||||||
|
// The savings of eliminating the call:
|
||||||
|
int NonWeightedSavings = getCallsiteCost(CS, DL);
|
||||||
|
BlockFrequency NormWeightedSavings(NonWeightedSavings);
|
||||||
|
|
||||||
|
auto RelativeFreq =
|
||||||
|
getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB);
|
||||||
|
auto NormWeightedRcost =
|
||||||
|
BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq;
|
||||||
|
|
||||||
|
// Weighted saving is smaller than weighted cost, return false
|
||||||
|
if (NormWeightedSavings < NormWeightedRcost) {
|
||||||
|
ORE.emit(
|
||||||
|
OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
|
||||||
|
<< NV("Callee", F) << " not partially inlined into "
|
||||||
|
<< NV("Caller", Caller) << " runtime overhead (overhead="
|
||||||
|
<< NV("Overhead", (unsigned)NormWeightedRcost.getFrequency())
|
||||||
|
<< ", savings="
|
||||||
|
<< NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
|
||||||
|
<< " of making the outlined call is too high");
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
|
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
|
||||||
<< NV("Callee", Callee) << " can be partially inlined into "
|
<< NV("Callee", F) << " can be partially inlined into "
|
||||||
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
|
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
|
||||||
<< " (threshold="
|
<< " (threshold="
|
||||||
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
|
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Ideally we should share Inliner's InlineCost Analysis code.
|
||||||
|
// For now use a simplified version. The returned 'InlineCost' will be used
|
||||||
|
// to esimate the size cost as well as runtime cost of the BB.
|
||||||
|
int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
|
||||||
|
int InlineCost = 0;
|
||||||
|
const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
|
||||||
|
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||||
|
if (isa<DbgInfoIntrinsic>(I))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (CallInst *CI = dyn_cast<CallInst>(I)) {
|
||||||
|
InlineCost += getCallsiteCost(CallSite(CI), DL);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
|
||||||
|
InlineCost += getCallsiteCost(CallSite(II), DL);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
|
||||||
|
InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
InlineCost += InlineConstants::InstrCost;
|
||||||
|
}
|
||||||
|
return InlineCost;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
|
||||||
|
Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
|
||||||
|
BasicBlock *OutliningCallBB) {
|
||||||
|
// First compute the cost of the outlined region 'OI' in the original
|
||||||
|
// function 'F':
|
||||||
|
int OutlinedRegionCost = 0;
|
||||||
|
for (BasicBlock &BB : *F) {
|
||||||
|
if (&BB != OI->ReturnBlock &&
|
||||||
|
// Assuming Entry set is small -- do a linear search here:
|
||||||
|
std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==
|
||||||
|
OI->Entries.end()) {
|
||||||
|
OutlinedRegionCost += computeBBInlineCost(&BB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now compute the cost of the call sequence to the outlined function
|
||||||
|
// 'OutlinedFunction' in BB 'OutliningCallBB':
|
||||||
|
int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB);
|
||||||
|
|
||||||
|
// Now compute the cost of the extracted/outlined function itself:
|
||||||
|
int OutlinedFunctionCost = 0;
|
||||||
|
for (BasicBlock &BB : *OutlinedFunction) {
|
||||||
|
OutlinedFunctionCost += computeBBInlineCost(&BB);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(OutlinedFunctionCost >= OutlinedRegionCost &&
|
||||||
|
"Outlined function cost should be no less than the outlined region");
|
||||||
|
int OutliningRuntimeOverhead =
|
||||||
|
OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
|
||||||
|
|
||||||
|
return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
|
||||||
|
OutlinedRegionCost);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the callsite to profile count map which is
|
||||||
|
// used to update the original function's entry count,
|
||||||
|
// after the function is partially inlined into the callsite.
|
||||||
|
void PartialInlinerImpl::computeCallsiteToProfCountMap(
|
||||||
|
Function *DuplicateFunction,
|
||||||
|
DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
|
||||||
|
std::vector<User *> Users(DuplicateFunction->user_begin(),
|
||||||
|
DuplicateFunction->user_end());
|
||||||
|
Function *CurrentCaller = nullptr;
|
||||||
|
BlockFrequencyInfo *CurrentCallerBFI = nullptr;
|
||||||
|
|
||||||
|
auto ComputeCurrBFI = [&,this](Function *Caller) {
|
||||||
|
// For the old pass manager:
|
||||||
|
if (!GetBFI) {
|
||||||
|
if (CurrentCallerBFI)
|
||||||
|
delete CurrentCallerBFI;
|
||||||
|
DominatorTree DT(*Caller);
|
||||||
|
LoopInfo LI(DT);
|
||||||
|
BranchProbabilityInfo BPI(*Caller, LI);
|
||||||
|
CurrentCallerBFI = new BlockFrequencyInfo(*Caller, BPI, LI);
|
||||||
|
} else {
|
||||||
|
// New pass manager:
|
||||||
|
CurrentCallerBFI = &(*GetBFI)(*Caller);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (User *User : Users) {
|
||||||
|
CallSite CS = getCallSite(User);
|
||||||
|
Function *Caller = CS.getCaller();
|
||||||
|
if (CurrentCaller != Caller) {
|
||||||
|
CurrentCaller = Caller;
|
||||||
|
ComputeCurrBFI(Caller);
|
||||||
|
} else {
|
||||||
|
assert(CurrentCallerBFI && "CallerBFI is not set");
|
||||||
|
}
|
||||||
|
BasicBlock *CallBB = CS.getInstruction()->getParent();
|
||||||
|
auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
|
||||||
|
if (Count)
|
||||||
|
CallSiteToProfCountMap[User] = *Count;
|
||||||
|
else
|
||||||
|
CallSiteToProfCountMap[User] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
||||||
|
|
||||||
if (F->hasAddressTaken())
|
if (F->hasAddressTaken())
|
||||||
|
@ -347,21 +608,21 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
||||||
if (PSI->isFunctionEntryCold(F))
|
if (PSI->isFunctionEntryCold(F))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
|
if (F->user_begin() == F->user_end())
|
||||||
computeOutliningInfo(F);
|
return nullptr;
|
||||||
|
|
||||||
if (!OutliningInfo)
|
std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
|
||||||
|
|
||||||
|
if (!OI)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
// Clone the function, so that we can hack away on it.
|
// Clone the function, so that we can hack away on it.
|
||||||
ValueToValueMapTy VMap;
|
ValueToValueMapTy VMap;
|
||||||
Function *DuplicateFunction = CloneFunction(F, VMap);
|
Function *DuplicateFunction = CloneFunction(F, VMap);
|
||||||
BasicBlock *NewReturnBlock =
|
BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
|
||||||
cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]);
|
BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
|
||||||
BasicBlock *NewNonReturnBlock =
|
|
||||||
cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]);
|
|
||||||
DenseSet<BasicBlock *> NewEntries;
|
DenseSet<BasicBlock *> NewEntries;
|
||||||
for (BasicBlock *BB : OutliningInfo->Entries) {
|
for (BasicBlock *BB : OI->Entries) {
|
||||||
NewEntries.insert(cast<BasicBlock>(VMap[BB]));
|
NewEntries.insert(cast<BasicBlock>(VMap[BB]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -390,7 +651,7 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
||||||
BasicBlock *PreReturn = NewReturnBlock;
|
BasicBlock *PreReturn = NewReturnBlock;
|
||||||
// only split block when necessary:
|
// only split block when necessary:
|
||||||
PHINode *FirstPhi = getFirstPHI(PreReturn);
|
PHINode *FirstPhi = getFirstPHI(PreReturn);
|
||||||
unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size();
|
unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
|
||||||
if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
|
if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
|
||||||
|
|
||||||
NewReturnBlock = NewReturnBlock->splitBasicBlock(
|
NewReturnBlock = NewReturnBlock->splitBasicBlock(
|
||||||
|
@ -408,14 +669,14 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
||||||
Ins = NewReturnBlock->getFirstNonPHI();
|
Ins = NewReturnBlock->getFirstNonPHI();
|
||||||
|
|
||||||
RetPhi->addIncoming(&*I, PreReturn);
|
RetPhi->addIncoming(&*I, PreReturn);
|
||||||
for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) {
|
for (BasicBlock *E : OI->ReturnBlockPreds) {
|
||||||
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
|
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
|
||||||
RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
|
RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
|
||||||
OldPhi->removeIncomingValue(NewE);
|
OldPhi->removeIncomingValue(NewE);
|
||||||
}
|
}
|
||||||
++I;
|
++I;
|
||||||
}
|
}
|
||||||
for (auto E : OutliningInfo->ReturnBlockPreds) {
|
for (auto E : OI->ReturnBlockPreds) {
|
||||||
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
|
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
|
||||||
NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
|
NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
|
||||||
}
|
}
|
||||||
|
@ -443,50 +704,107 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
|
||||||
BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
|
BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
|
||||||
|
|
||||||
// Extract the body of the if.
|
// Extract the body of the if.
|
||||||
Function *ExtractedFunction =
|
Function *OutlinedFunction =
|
||||||
CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
|
CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
|
||||||
.extractCodeRegion();
|
.extractCodeRegion();
|
||||||
|
|
||||||
// Inline the top-level if test into all callers.
|
bool AnyInline =
|
||||||
std::vector<User *> Users(DuplicateFunction->user_begin(),
|
tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);
|
||||||
DuplicateFunction->user_end());
|
|
||||||
|
|
||||||
for (User *User : Users) {
|
|
||||||
CallSite CS;
|
|
||||||
if (CallInst *CI = dyn_cast<CallInst>(User))
|
|
||||||
CS = CallSite(CI);
|
|
||||||
else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
|
|
||||||
CS = CallSite(II);
|
|
||||||
else
|
|
||||||
llvm_unreachable("All uses must be calls");
|
|
||||||
|
|
||||||
if (IsLimitReached())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
OptimizationRemarkEmitter ORE(CS.getCaller());
|
|
||||||
if (!shouldPartialInline(CS, ORE))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
|
|
||||||
BasicBlock *Block = CS.getParent();
|
|
||||||
ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
|
|
||||||
<< ore::NV("Callee", F) << " partially inlined into "
|
|
||||||
<< ore::NV("Caller", CS.getCaller()));
|
|
||||||
|
|
||||||
InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
|
|
||||||
InlineFunction(CS, IFI);
|
|
||||||
NumPartialInlining++;
|
|
||||||
// update stats
|
|
||||||
NumPartialInlined++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ditch the duplicate, since we're done with it, and rewrite all remaining
|
// Ditch the duplicate, since we're done with it, and rewrite all remaining
|
||||||
// users (function pointers, etc.) back to the original function.
|
// users (function pointers, etc.) back to the original function.
|
||||||
DuplicateFunction->replaceAllUsesWith(F);
|
DuplicateFunction->replaceAllUsesWith(F);
|
||||||
DuplicateFunction->eraseFromParent();
|
DuplicateFunction->eraseFromParent();
|
||||||
|
if (!AnyInline && OutlinedFunction)
|
||||||
|
OutlinedFunction->eraseFromParent();
|
||||||
|
return OutlinedFunction;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
|
||||||
|
Function *F,
|
||||||
|
FunctionOutliningInfo *OI,
|
||||||
|
Function *OutlinedFunction,
|
||||||
|
BlockFrequencyInfo *CalleeBFI) {
|
||||||
|
if (OutlinedFunction == nullptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
return ExtractedFunction;
|
int NonWeightedRcost;
|
||||||
|
int SizeCost;
|
||||||
|
int OutlinedRegionSizeCost;
|
||||||
|
|
||||||
|
auto OutliningCallBB =
|
||||||
|
getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent();
|
||||||
|
|
||||||
|
std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =
|
||||||
|
computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);
|
||||||
|
|
||||||
|
// The call sequence to the outlined function is larger than the original
|
||||||
|
// outlined region size, it does not increase the chances of inlining
|
||||||
|
// 'F' with outlining (The inliner usies the size increase to model the
|
||||||
|
// the cost of inlining a callee).
|
||||||
|
if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {
|
||||||
|
OptimizationRemarkEmitter ORE(F);
|
||||||
|
DebugLoc DLoc;
|
||||||
|
BasicBlock *Block;
|
||||||
|
std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction);
|
||||||
|
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
|
||||||
|
DLoc, Block)
|
||||||
|
<< ore::NV("Function", F)
|
||||||
|
<< " not partially inlined into callers (Original Size = "
|
||||||
|
<< ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost)
|
||||||
|
<< ", Size of call sequence to outlined function = "
|
||||||
|
<< ore::NV("NewSize", SizeCost) << ")");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(F->user_begin() == F->user_end() &&
|
||||||
|
"F's users should all be replaced!");
|
||||||
|
std::vector<User *> Users(DuplicateFunction->user_begin(),
|
||||||
|
DuplicateFunction->user_end());
|
||||||
|
|
||||||
|
DenseMap<User *, uint64_t> CallSiteToProfCountMap;
|
||||||
|
if (F->getEntryCount())
|
||||||
|
computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap);
|
||||||
|
|
||||||
|
auto CalleeEntryCount = F->getEntryCount();
|
||||||
|
uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
|
||||||
|
bool AnyInline = false;
|
||||||
|
for (User *User : Users) {
|
||||||
|
CallSite CS = getCallSite(User);
|
||||||
|
|
||||||
|
if (IsLimitReached())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
OptimizationRemarkEmitter ORE(CS.getCaller());
|
||||||
|
|
||||||
|
if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,
|
||||||
|
NonWeightedRcost, ORE))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ORE.emit(
|
||||||
|
OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
|
||||||
|
<< ore::NV("Callee", F) << " partially inlined into "
|
||||||
|
<< ore::NV("Caller", CS.getCaller()));
|
||||||
|
|
||||||
|
InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
|
||||||
|
InlineFunction(CS, IFI);
|
||||||
|
|
||||||
|
// Now update the entry count:
|
||||||
|
if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
|
||||||
|
uint64_t CallSiteCount = CallSiteToProfCountMap[User];
|
||||||
|
CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
AnyInline = true;
|
||||||
|
NumPartialInlining++;
|
||||||
|
// Update the stats
|
||||||
|
NumPartialInlined++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (AnyInline && CalleeEntryCount)
|
||||||
|
F->setEntryCount(CalleeEntryCountV);
|
||||||
|
|
||||||
|
return AnyInline;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PartialInlinerImpl::run(Module &M) {
|
bool PartialInlinerImpl::run(Module &M) {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: opt < %s -partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
|
||||||
|
|
||||||
; This test checks to make sure that the CodeExtractor
|
; This test checks to make sure that the CodeExtractor
|
||||||
; properly sets the entry count for the function that is
|
; properly sets the entry count for the function that is
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s
|
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis -S | FileCheck %s
|
||||||
|
|
||||||
; This test checks to make sure that CodeExtractor updates
|
; This test checks to make sure that CodeExtractor updates
|
||||||
; the exit branch probabilities for multiple exit blocks.
|
; the exit branch probabilities for multiple exit blocks.
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
; RUN: opt < %s -partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -partial-inliner -S | FileCheck %s
|
||||||
; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
|
||||||
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
||||||
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
||||||
|
|
||||||
; Function Attrs: nounwind uwtable
|
; Function Attrs: nounwind uwtable
|
||||||
define i32 @bar(i32 %arg) local_unnamed_addr #0 {
|
define i32 @bar(i32 %arg) local_unnamed_addr #0 {
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
; RUN: opt < %s -skip-partial-inlining-cost-analysis -partial-inliner -S | FileCheck %s
|
||||||
|
; RUN: opt < %s -skip-partial-inlining-cost-analysis -passes=partial-inliner -S | FileCheck %s
|
||||||
|
|
||||||
|
define i32 @Func(i1 %cond, i32* align 4 %align.val) !prof !1 {
|
||||||
|
; CHECK: @Func({{.*}}) !prof [[REMAINCOUNT:![0-9]+]]
|
||||||
|
entry:
|
||||||
|
br i1 %cond, label %if.then, label %return
|
||||||
|
if.then:
|
||||||
|
; Dummy store to have more than 0 uses
|
||||||
|
store i32 10, i32* %align.val, align 4
|
||||||
|
br label %return
|
||||||
|
return: ; preds = %entry
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal i32 @Caller1(i1 %cond, i32* align 2 %align.val) !prof !3{
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: @Caller1
|
||||||
|
; CHECK: br
|
||||||
|
; CHECK: call void @Func.1_
|
||||||
|
; CHECK: br
|
||||||
|
; CHECK: call void @Func.1_
|
||||||
|
%val = call i32 @Func(i1 %cond, i32* %align.val)
|
||||||
|
%val2 = call i32 @Func(i1 %cond, i32* %align.val)
|
||||||
|
ret i32 %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal i32 @Caller2(i1 %cond, i32* align 2 %align.val) !prof !2{
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: @Caller2
|
||||||
|
; CHECK: br
|
||||||
|
; CHECK: call void @Func.1_
|
||||||
|
%val = call i32 @Func(i1 %cond, i32* %align.val)
|
||||||
|
ret i32 %val
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: [[REMAINCOUNT]] = !{!"function_entry_count", i64 150}
|
||||||
|
!1 = !{!"function_entry_count", i64 200}
|
||||||
|
!2 = !{!"function_entry_count", i64 10}
|
||||||
|
!3 = !{!"function_entry_count", i64 20}
|
||||||
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
; The outlined region has high frequency and the outlining
|
||||||
|
; call sequence is expensive (input, output, multiple exit etc)
|
||||||
|
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s
|
||||||
|
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s
|
||||||
|
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=NOCOST %s
|
||||||
|
; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=NOCOST %s
|
||||||
|
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
define i32 @bar_hot_outline_region(i32 %arg) local_unnamed_addr #0 {
|
||||||
|
bb:
|
||||||
|
%tmp = icmp slt i32 %arg, 0
|
||||||
|
br i1 %tmp, label %bb1, label %bb16, !prof !1
|
||||||
|
|
||||||
|
bb1: ; preds = %bb
|
||||||
|
%tmp2 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp3 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp4 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp5 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp6 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp7 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp8 = add nsw i32 %arg, 1
|
||||||
|
%tmp9 = tail call i32 @goo(i32 %tmp8) #0
|
||||||
|
%tmp10 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp11 = icmp eq i32 %tmp10, 0
|
||||||
|
br i1 %tmp11, label %bb12, label %bb16
|
||||||
|
|
||||||
|
bb12: ; preds = %bb1
|
||||||
|
%tmp13 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp14 = icmp eq i32 %tmp13, 0
|
||||||
|
%tmp15 = select i1 %tmp14, i32 0, i32 3
|
||||||
|
br label %bb16
|
||||||
|
|
||||||
|
bb16: ; preds = %bb12, %bb1, %bb
|
||||||
|
%tmp17 = phi i32 [ 2, %bb1 ], [ %tmp15, %bb12 ], [ 0, %bb ]
|
||||||
|
ret i32 %tmp17
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @bar_cold_outline_region(i32 %arg) local_unnamed_addr #0 {
|
||||||
|
bb:
|
||||||
|
%tmp = icmp slt i32 %arg, 0
|
||||||
|
br i1 %tmp, label %bb1, label %bb16, !prof !2
|
||||||
|
|
||||||
|
bb1: ; preds = %bb
|
||||||
|
%tmp2 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp3 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp4 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp5 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp6 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp7 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp8 = add nsw i32 %arg, 1
|
||||||
|
%tmp9 = tail call i32 @goo(i32 %tmp8) #0
|
||||||
|
%tmp10 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp11 = icmp eq i32 %tmp10, 0
|
||||||
|
br i1 %tmp11, label %bb12, label %bb16
|
||||||
|
|
||||||
|
bb12: ; preds = %bb1
|
||||||
|
%tmp13 = tail call i32 (...) @foo() #0
|
||||||
|
%tmp14 = icmp eq i32 %tmp13, 0
|
||||||
|
%tmp15 = select i1 %tmp14, i32 0, i32 3
|
||||||
|
br label %bb16
|
||||||
|
|
||||||
|
bb16: ; preds = %bb12, %bb1, %bb
|
||||||
|
%tmp17 = phi i32 [ 2, %bb1 ], [ %tmp15, %bb12 ], [ 0, %bb ]
|
||||||
|
ret i32 %tmp17
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
declare i32 @foo(...) local_unnamed_addr #0
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
declare i32 @goo(i32) local_unnamed_addr #0
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
|
||||||
|
bb:
|
||||||
|
; CHECK-LABEL: @dummy_caller
|
||||||
|
; CHECK-NOT: br i1
|
||||||
|
; CHECK-NOT: call{{.*}}bar_hot_outline_region.
|
||||||
|
; NOCOST-LABEL: @dummy_caller
|
||||||
|
; NOCOST: br i1
|
||||||
|
; NOCOST: call{{.*}}bar_hot_outline_region.
|
||||||
|
|
||||||
|
%tmp = tail call i32 @bar_hot_outline_region(i32 %arg)
|
||||||
|
ret i32 %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @dummy_caller2(i32 %arg) local_unnamed_addr #0 {
|
||||||
|
bb:
|
||||||
|
; CHECK-LABEL: @dummy_caller2
|
||||||
|
; CHECK: br i1
|
||||||
|
; CHECK: call{{.*}}bar_cold_outline_region.
|
||||||
|
; NOCOST-LABEL: @dummy_caller2
|
||||||
|
; NOCOST: br i1
|
||||||
|
; NOCOST: call{{.*}}bar_cold_outline_region.
|
||||||
|
|
||||||
|
%tmp = tail call i32 @bar_cold_outline_region(i32 %arg)
|
||||||
|
ret i32 %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
||||||
|
|
||||||
|
!llvm.ident = !{!0}
|
||||||
|
|
||||||
|
!0 = !{!"clang version 5.0.0 (trunk 301898)"}
|
||||||
|
!1 = !{!"branch_weights", i32 2000, i32 1}
|
||||||
|
!2 = !{!"branch_weights", i32 1, i32 100}
|
|
@ -1,5 +1,5 @@
|
||||||
; RUN: opt < %s -partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
|
||||||
; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
|
||||||
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
||||||
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
; RUN: opt < %s -partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -partial-inliner -S | FileCheck %s
|
||||||
; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
|
||||||
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT3 %s
|
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -skip-partial-inlining-cost-analysis -S | FileCheck --check-prefix=LIMIT3 %s
|
||||||
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT3 %s
|
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -skip-partial-inlining-cost-analysis -S | FileCheck --check-prefix=LIMIT3 %s
|
||||||
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s
|
; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s
|
||||||
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s
|
; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
; RUN: opt < %s -partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -skip-partial-inlining-cost-analysis -partial-inliner -S | FileCheck %s
|
||||||
; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -skip-partial-inlining-cost-analysis -passes=partial-inliner -S | FileCheck %s
|
||||||
|
|
||||||
define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) {
|
define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) {
|
||||||
entry:
|
entry:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
; RUN: opt < %s -partial-inliner | llc -filetype=null
|
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis | llc -filetype=null
|
||||||
; RUN: opt < %s -partial-inliner -S | FileCheck %s
|
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
|
||||||
; This testcase checks to see if CodeExtractor properly inherits
|
; This testcase checks to see if CodeExtractor properly inherits
|
||||||
; target specific attributes for the extracted function. This can
|
; target specific attributes for the extracted function. This can
|
||||||
; cause certain instructions that depend on the attributes to not
|
; cause certain instructions that depend on the attributes to not
|
||||||
|
|
Loading…
Reference in New Issue