forked from OSchip/llvm-project
[InlineCost] Add cl::opt to allow full inline cost to be computed for debugging purposes.
Currently, the inline cost model will bail once the inline cost exceeds the inline threshold in order to avoid unnecessary compile-time. However, when debugging it is useful to compute the full cost, so this command line option is added to override the default behavior. I took over this work from Chad Rosier (mcrosier@codeaurora.org). Differential Revision: https://reviews.llvm.org/D35850 llvm-svn: 311371
This commit is contained in:
parent
4eb18742ca
commit
0812c5bea3
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/CallGraphSCCPass.h"
|
||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
|
||||
|
@ -185,12 +186,11 @@ int getCallsiteCost(CallSite CS, const DataLayout &DL);
|
|||
///
|
||||
/// Also note that calling this function *dynamically* computes the cost of
|
||||
/// inlining the callsite. It is an expensive, heavyweight call.
|
||||
InlineCost
|
||||
getInlineCost(CallSite CS, const InlineParams &Params,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
|
||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
|
||||
ProfileSummaryInfo *PSI);
|
||||
InlineCost getInlineCost(
|
||||
CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
|
||||
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
|
||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
|
||||
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr);
|
||||
|
||||
/// \brief Get an InlineCost with the callee explicitly specified.
|
||||
/// This allows you to calculate the cost of inlining a function via a
|
||||
|
@ -202,7 +202,7 @@ getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
|
|||
TargetTransformInfo &CalleeTTI,
|
||||
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
|
||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
|
||||
ProfileSummaryInfo *PSI);
|
||||
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
|
||||
|
||||
/// \brief Minimal filter to detect invalid constructs for inlining.
|
||||
bool isInlineViable(Function &Callee);
|
||||
|
|
|
@ -82,6 +82,11 @@ static cl::opt<int> HotCallSiteRelFreq(
|
|||
"entry frequency, for a callsite to be hot in the absence of "
|
||||
"profile information."));
|
||||
|
||||
static cl::opt<bool> ComputeFullInlineCost(
|
||||
"inline-cost-full", cl::Hidden, cl::init(false),
|
||||
cl::desc("Compute the full inline cost of a call site even when the cost "
|
||||
"exceeds the threshold."));
|
||||
|
||||
namespace {
|
||||
|
||||
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||
|
@ -106,6 +111,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
|||
// Cache the DataLayout since we use it a lot.
|
||||
const DataLayout &DL;
|
||||
|
||||
/// The OptimizationRemarkEmitter available for this compilation.
|
||||
OptimizationRemarkEmitter *ORE;
|
||||
|
||||
/// The candidate callsite being analyzed. Please do not use this to do
|
||||
/// analysis in the caller function; we want the inline cost query to be
|
||||
/// easily cacheable. Instead, use the cover function paramHasAttr.
|
||||
|
@ -243,10 +251,10 @@ public:
|
|||
CallAnalyzer(const TargetTransformInfo &TTI,
|
||||
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
|
||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
|
||||
ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
|
||||
const InlineParams &Params)
|
||||
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
|
||||
Function &Callee, CallSite CSArg, const InlineParams &Params)
|
||||
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
|
||||
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
|
||||
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
|
||||
CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
|
||||
Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
|
||||
ExposesReturnsTwice(false), HasDynamicAlloca(false),
|
||||
|
@ -1138,7 +1146,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
|
|||
// out. Pretend to inline the function, with a custom threshold.
|
||||
auto IndirectCallParams = Params;
|
||||
IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
|
||||
CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
|
||||
CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
|
||||
IndirectCallParams);
|
||||
if (CA.analyzeCall(CS)) {
|
||||
// We were able to inline the indirect call! Subtract the cost from the
|
||||
|
@ -1198,7 +1206,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
|
|||
std::min((int64_t)CostUpperBound,
|
||||
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
|
||||
|
||||
if (CostLowerBound > Threshold) {
|
||||
if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
|
||||
Cost = CostLowerBound;
|
||||
return false;
|
||||
}
|
||||
|
@ -1347,21 +1355,36 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
|
|||
else
|
||||
Cost += InlineConstants::InstrCost;
|
||||
|
||||
using namespace ore;
|
||||
// If the visit this instruction detected an uninlinable pattern, abort.
|
||||
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
|
||||
HasIndirectBr || HasFrameEscape)
|
||||
HasIndirectBr || HasFrameEscape) {
|
||||
if (ORE)
|
||||
ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
|
||||
CandidateCS.getInstruction())
|
||||
<< NV("Callee", &F)
|
||||
<< " has uninlinable pattern and cost is not fully computed");
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the caller is a recursive function then we don't want to inline
|
||||
// functions which allocate a lot of stack space because it would increase
|
||||
// the caller stack usage dramatically.
|
||||
if (IsCallerRecursive &&
|
||||
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
|
||||
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
|
||||
if (ORE)
|
||||
ORE->emit(
|
||||
OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
|
||||
CandidateCS.getInstruction())
|
||||
<< NV("Callee", &F)
|
||||
<< " is recursive and allocates too much stack space. Cost is "
|
||||
"not fully computed");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if we've past the maximum possible threshold so we don't spin in
|
||||
// huge basic blocks that will never inline.
|
||||
if (Cost > Threshold)
|
||||
if (Cost > Threshold && !ComputeFullInlineCost)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1447,7 +1470,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||
Cost += InlineConstants::ColdccPenalty;
|
||||
|
||||
// Check if we're done. This can happen due to bonuses and penalties.
|
||||
if (Cost > Threshold)
|
||||
if (Cost > Threshold && !ComputeFullInlineCost)
|
||||
return false;
|
||||
|
||||
if (F.empty())
|
||||
|
@ -1513,7 +1536,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||
for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
|
||||
// Bail out the moment we cross the threshold. This means we'll under-count
|
||||
// the cost, but only when undercounting doesn't matter.
|
||||
if (Cost > Threshold)
|
||||
if (Cost > Threshold && !ComputeFullInlineCost)
|
||||
break;
|
||||
|
||||
BasicBlock *BB = BBWorklist[Idx];
|
||||
|
@ -1657,9 +1680,9 @@ InlineCost llvm::getInlineCost(
|
|||
CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
|
||||
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
|
||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
|
||||
ProfileSummaryInfo *PSI) {
|
||||
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
|
||||
return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
|
||||
GetAssumptionCache, GetBFI, PSI);
|
||||
GetAssumptionCache, GetBFI, PSI, ORE);
|
||||
}
|
||||
|
||||
InlineCost llvm::getInlineCost(
|
||||
|
@ -1667,7 +1690,7 @@ InlineCost llvm::getInlineCost(
|
|||
TargetTransformInfo &CalleeTTI,
|
||||
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
|
||||
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
|
||||
ProfileSummaryInfo *PSI) {
|
||||
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
|
||||
|
||||
// Cannot inline indirect calls.
|
||||
if (!Callee)
|
||||
|
@ -1699,10 +1722,13 @@ InlineCost llvm::getInlineCost(
|
|||
CS.isNoInline())
|
||||
return llvm::InlineCost::getNever();
|
||||
|
||||
if (ORE)
|
||||
ComputeFullInlineCost = true;
|
||||
|
||||
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
|
||||
<< "... (caller:" << Caller->getName() << ")\n");
|
||||
|
||||
CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
|
||||
CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
|
||||
Params);
|
||||
bool ShouldInline = CA.analyzeCall(CS);
|
||||
|
||||
|
|
|
@ -57,12 +57,13 @@ public:
|
|||
InlineCost getInlineCost(CallSite CS) override {
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
|
||||
OptimizationRemarkEmitter ORE(CS.getCaller());
|
||||
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
|
||||
[&](Function &F) -> AssumptionCache & {
|
||||
return ACT->getAssumptionCache(F);
|
||||
};
|
||||
return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
|
||||
/*GetBFI=*/None, PSI);
|
||||
/*GetBFI=*/None, PSI, &ORE);
|
||||
}
|
||||
|
||||
bool runOnSCC(CallGraphSCC &SCC) override;
|
||||
|
|
|
@ -845,6 +845,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
|
|||
FunctionAnalysisManager &FAM =
|
||||
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
|
||||
.getManager();
|
||||
|
||||
// Get the remarks emission analysis for the caller.
|
||||
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
|
||||
|
||||
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
|
||||
[&](Function &F) -> AssumptionCache & {
|
||||
return FAM.getResult<AssumptionAnalysis>(F);
|
||||
|
@ -857,12 +861,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
|
|||
Function &Callee = *CS.getCalledFunction();
|
||||
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
|
||||
return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
|
||||
PSI);
|
||||
PSI, &ORE);
|
||||
};
|
||||
|
||||
// Get the remarks emission analysis for the caller.
|
||||
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
|
||||
|
||||
// Now process as many calls as we have within this caller in the sequnece.
|
||||
// We bail out as soon as the caller has to change so we can update the
|
||||
// call graph and prepare the context of that new caller.
|
||||
|
|
|
@ -462,7 +462,7 @@ bool PartialInlinerImpl::shouldPartialInline(
|
|||
Function *Caller = CS.getCaller();
|
||||
auto &CalleeTTI = (*GetTTI)(*Callee);
|
||||
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
|
||||
*GetAssumptionCache, GetBFI, PSI);
|
||||
*GetAssumptionCache, GetBFI, PSI, &ORE);
|
||||
|
||||
if (IC.isAlways()) {
|
||||
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
|
||||
|
|
Loading…
Reference in New Issue