[InlineCost] Add cl::opt to allow full inline cost to be computed for debugging purposes.

Currently, the inline cost model will bail once the inline cost exceeds the
inline threshold in order to avoid unnecessary compile-time. However, when
debugging it is useful to compute the full cost, so this command line option
is added to override the default behavior.

I took over this work from Chad Rosier (mcrosier@codeaurora.org).

Differential Revision: https://reviews.llvm.org/D35850

llvm-svn: 311371
This commit is contained in:
Haicheng Wu 2017-08-21 20:00:09 +00:00
parent 4eb18742ca
commit 0812c5bea3
5 changed files with 55 additions and 27 deletions

View File

@ -16,6 +16,7 @@
#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include <cassert> #include <cassert>
#include <climits> #include <climits>
@ -185,12 +186,11 @@ int getCallsiteCost(CallSite CS, const DataLayout &DL);
/// ///
/// Also note that calling this function *dynamically* computes the cost of /// Also note that calling this function *dynamically* computes the cost of
/// inlining the callsite. It is an expensive, heavyweight call. /// inlining the callsite. It is an expensive, heavyweight call.
InlineCost InlineCost getInlineCost(
getInlineCost(CallSite CS, const InlineParams &Params, CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr);
ProfileSummaryInfo *PSI);
/// \brief Get an InlineCost with the callee explicitly specified. /// \brief Get an InlineCost with the callee explicitly specified.
/// This allows you to calculate the cost of inlining a function via a /// This allows you to calculate the cost of inlining a function via a
@ -202,7 +202,7 @@ getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
TargetTransformInfo &CalleeTTI, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache, std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI); ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
/// \brief Minimal filter to detect invalid constructs for inlining. /// \brief Minimal filter to detect invalid constructs for inlining.
bool isInlineViable(Function &Callee); bool isInlineViable(Function &Callee);

View File

@ -82,6 +82,11 @@ static cl::opt<int> HotCallSiteRelFreq(
"entry frequency, for a callsite to be hot in the absence of " "entry frequency, for a callsite to be hot in the absence of "
"profile information.")); "profile information."));
static cl::opt<bool> ComputeFullInlineCost(
"inline-cost-full", cl::Hidden, cl::init(false),
cl::desc("Compute the full inline cost of a call site even when the cost "
"exceeds the threshold."));
namespace { namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@ -106,6 +111,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
// Cache the DataLayout since we use it a lot. // Cache the DataLayout since we use it a lot.
const DataLayout &DL; const DataLayout &DL;
/// The OptimizationRemarkEmitter available for this compilation.
OptimizationRemarkEmitter *ORE;
/// The candidate callsite being analyzed. Please do not use this to do /// The candidate callsite being analyzed. Please do not use this to do
/// analysis in the caller function; we want the inline cost query to be /// analysis in the caller function; we want the inline cost query to be
/// easily cacheable. Instead, use the cover function paramHasAttr. /// easily cacheable. Instead, use the cover function paramHasAttr.
@ -243,10 +251,10 @@ public:
CallAnalyzer(const TargetTransformInfo &TTI, CallAnalyzer(const TargetTransformInfo &TTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache, std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
const InlineParams &Params) Function &Callee, CallSite CSArg, const InlineParams &Params)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
@ -1138,7 +1146,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// out. Pretend to inline the function, with a custom threshold. // out. Pretend to inline the function, with a custom threshold.
auto IndirectCallParams = Params; auto IndirectCallParams = Params;
IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS, CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
IndirectCallParams); IndirectCallParams);
if (CA.analyzeCall(CS)) { if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the // We were able to inline the indirect call! Subtract the cost from the
@ -1198,7 +1206,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
std::min((int64_t)CostUpperBound, std::min((int64_t)CostUpperBound,
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
if (CostLowerBound > Threshold) { if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
Cost = CostLowerBound; Cost = CostLowerBound;
return false; return false;
} }
@ -1347,21 +1355,36 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
else else
Cost += InlineConstants::InstrCost; Cost += InlineConstants::InstrCost;
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort. // If the visit this instruction detected an uninlinable pattern, abort.
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
HasIndirectBr || HasFrameEscape) HasIndirectBr || HasFrameEscape) {
if (ORE)
ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
CandidateCS.getInstruction())
<< NV("Callee", &F)
<< " has uninlinable pattern and cost is not fully computed");
return false; return false;
}
// If the caller is a recursive function then we don't want to inline // If the caller is a recursive function then we don't want to inline
// functions which allocate a lot of stack space because it would increase // functions which allocate a lot of stack space because it would increase
// the caller stack usage dramatically. // the caller stack usage dramatically.
if (IsCallerRecursive && if (IsCallerRecursive &&
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
if (ORE)
ORE->emit(
OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
CandidateCS.getInstruction())
<< NV("Callee", &F)
<< " is recursive and allocates too much stack space. Cost is "
"not fully computed");
return false; return false;
}
// Check if we've past the maximum possible threshold so we don't spin in // Check if we've past the maximum possible threshold so we don't spin in
// huge basic blocks that will never inline. // huge basic blocks that will never inline.
if (Cost > Threshold) if (Cost > Threshold && !ComputeFullInlineCost)
return false; return false;
} }
@ -1447,7 +1470,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Cost += InlineConstants::ColdccPenalty; Cost += InlineConstants::ColdccPenalty;
// Check if we're done. This can happen due to bonuses and penalties. // Check if we're done. This can happen due to bonuses and penalties.
if (Cost > Threshold) if (Cost > Threshold && !ComputeFullInlineCost)
return false; return false;
if (F.empty()) if (F.empty())
@ -1513,7 +1536,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
// Bail out the moment we cross the threshold. This means we'll under-count // Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter. // the cost, but only when undercounting doesn't matter.
if (Cost > Threshold) if (Cost > Threshold && !ComputeFullInlineCost)
break; break;
BasicBlock *BB = BBWorklist[Idx]; BasicBlock *BB = BBWorklist[Idx];
@ -1657,9 +1680,9 @@ InlineCost llvm::getInlineCost(
CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache, std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI) { ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
GetAssumptionCache, GetBFI, PSI); GetAssumptionCache, GetBFI, PSI, ORE);
} }
InlineCost llvm::getInlineCost( InlineCost llvm::getInlineCost(
@ -1667,7 +1690,7 @@ InlineCost llvm::getInlineCost(
TargetTransformInfo &CalleeTTI, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache, std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI) { ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
// Cannot inline indirect calls. // Cannot inline indirect calls.
if (!Callee) if (!Callee)
@ -1699,10 +1722,13 @@ InlineCost llvm::getInlineCost(
CS.isNoInline()) CS.isNoInline())
return llvm::InlineCost::getNever(); return llvm::InlineCost::getNever();
if (ORE)
ComputeFullInlineCost = true;
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "... (caller:" << Caller->getName() << ")\n"); << "... (caller:" << Caller->getName() << ")\n");
CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS, CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
Params); Params);
bool ShouldInline = CA.analyzeCall(CS); bool ShouldInline = CA.analyzeCall(CS);

View File

@ -57,12 +57,13 @@ public:
InlineCost getInlineCost(CallSite CS) override { InlineCost getInlineCost(CallSite CS) override {
Function *Callee = CS.getCalledFunction(); Function *Callee = CS.getCalledFunction();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
OptimizationRemarkEmitter ORE(CS.getCaller());
std::function<AssumptionCache &(Function &)> GetAssumptionCache = std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & { [&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F); return ACT->getAssumptionCache(F);
}; };
return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
/*GetBFI=*/None, PSI); /*GetBFI=*/None, PSI, &ORE);
} }
bool runOnSCC(CallGraphSCC &SCC) override; bool runOnSCC(CallGraphSCC &SCC) override;

View File

@ -845,6 +845,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
FunctionAnalysisManager &FAM = FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG) AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
.getManager(); .getManager();
// Get the remarks emission analysis for the caller.
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
std::function<AssumptionCache &(Function &)> GetAssumptionCache = std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & { [&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F); return FAM.getResult<AssumptionAnalysis>(F);
@ -857,12 +861,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Function &Callee = *CS.getCalledFunction(); Function &Callee = *CS.getCalledFunction();
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
PSI); PSI, &ORE);
}; };
// Get the remarks emission analysis for the caller.
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
// Now process as many calls as we have within this caller in the sequnece. // Now process as many calls as we have within this caller in the sequnece.
// We bail out as soon as the caller has to change so we can update the // We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller. // call graph and prepare the context of that new caller.

View File

@ -462,7 +462,7 @@ bool PartialInlinerImpl::shouldPartialInline(
Function *Caller = CS.getCaller(); Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee); auto &CalleeTTI = (*GetTTI)(*Callee);
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
*GetAssumptionCache, GetBFI, PSI); *GetAssumptionCache, GetBFI, PSI, &ORE);
if (IC.isAlways()) { if (IC.isAlways()) {
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)