[InlineCost] Add cl::opt to allow full inline cost to be computed for debugging purposes.

Currently, the inline cost model will bail once the inline cost exceeds the
inline threshold in order to avoid unnecessary compile-time. However, when
debugging it is useful to compute the full cost, so this command line option
is added to override the default behavior.

I took over this work from Chad Rosier (mcrosier@codeaurora.org).

Differential Revision: https://reviews.llvm.org/D35850

llvm-svn: 311371
This commit is contained in:
Haicheng Wu 2017-08-21 20:00:09 +00:00
parent 4eb18742ca
commit 0812c5bea3
5 changed files with 55 additions and 27 deletions

View File

@ -16,6 +16,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include <cassert>
#include <climits>
@ -185,12 +186,11 @@ int getCallsiteCost(CallSite CS, const DataLayout &DL);
///
/// Also note that calling this function *dynamically* computes the cost of
/// inlining the callsite. It is an expensive, heavyweight call.
InlineCost
getInlineCost(CallSite CS, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI);
InlineCost getInlineCost(
CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr);
/// \brief Get an InlineCost with the callee explicitly specified.
/// This allows you to calculate the cost of inlining a function via a
@ -202,7 +202,7 @@ getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI);
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
/// \brief Minimal filter to detect invalid constructs for inlining.
bool isInlineViable(Function &Callee);

View File

@ -82,6 +82,11 @@ static cl::opt<int> HotCallSiteRelFreq(
"entry frequency, for a callsite to be hot in the absence of "
"profile information."));
static cl::opt<bool> ComputeFullInlineCost(
"inline-cost-full", cl::Hidden, cl::init(false),
cl::desc("Compute the full inline cost of a call site even when the cost "
"exceeds the threshold."));
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@ -106,6 +111,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
// Cache the DataLayout since we use it a lot.
const DataLayout &DL;
/// The OptimizationRemarkEmitter available for this compilation.
OptimizationRemarkEmitter *ORE;
/// The candidate callsite being analyzed. Please do not use this to do
/// analysis in the caller function; we want the inline cost query to be
/// easily cacheable. Instead, use the cover function paramHasAttr.
@ -243,10 +251,10 @@ public:
CallAnalyzer(const TargetTransformInfo &TTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
const InlineParams &Params)
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
Function &Callee, CallSite CSArg, const InlineParams &Params)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
@ -1138,7 +1146,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// out. Pretend to inline the function, with a custom threshold.
auto IndirectCallParams = Params;
IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
IndirectCallParams);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
@ -1198,7 +1206,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
std::min((int64_t)CostUpperBound,
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
if (CostLowerBound > Threshold) {
if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
Cost = CostLowerBound;
return false;
}
@ -1347,21 +1355,36 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
else
Cost += InlineConstants::InstrCost;
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
HasIndirectBr || HasFrameEscape)
HasIndirectBr || HasFrameEscape) {
if (ORE)
ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
CandidateCS.getInstruction())
<< NV("Callee", &F)
<< " has uninlinable pattern and cost is not fully computed");
return false;
}
// If the caller is a recursive function then we don't want to inline
// functions which allocate a lot of stack space because it would increase
// the caller stack usage dramatically.
if (IsCallerRecursive &&
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
if (ORE)
ORE->emit(
OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
CandidateCS.getInstruction())
<< NV("Callee", &F)
<< " is recursive and allocates too much stack space. Cost is "
"not fully computed");
return false;
}
// Check if we've past the maximum possible threshold so we don't spin in
// huge basic blocks that will never inline.
if (Cost > Threshold)
if (Cost > Threshold && !ComputeFullInlineCost)
return false;
}
@ -1447,7 +1470,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Cost += InlineConstants::ColdccPenalty;
// Check if we're done. This can happen due to bonuses and penalties.
if (Cost > Threshold)
if (Cost > Threshold && !ComputeFullInlineCost)
return false;
if (F.empty())
@ -1513,7 +1536,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
// Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter.
if (Cost > Threshold)
if (Cost > Threshold && !ComputeFullInlineCost)
break;
BasicBlock *BB = BBWorklist[Idx];
@ -1657,9 +1680,9 @@ InlineCost llvm::getInlineCost(
CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI) {
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
GetAssumptionCache, GetBFI, PSI);
GetAssumptionCache, GetBFI, PSI, ORE);
}
InlineCost llvm::getInlineCost(
@ -1667,7 +1690,7 @@ InlineCost llvm::getInlineCost(
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI) {
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
// Cannot inline indirect calls.
if (!Callee)
@ -1699,10 +1722,13 @@ InlineCost llvm::getInlineCost(
CS.isNoInline())
return llvm::InlineCost::getNever();
if (ORE)
ComputeFullInlineCost = true;
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "... (caller:" << Caller->getName() << ")\n");
CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
Params);
bool ShouldInline = CA.analyzeCall(CS);

View File

@ -57,12 +57,13 @@ public:
InlineCost getInlineCost(CallSite CS) override {
Function *Callee = CS.getCalledFunction();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
OptimizationRemarkEmitter ORE(CS.getCaller());
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
/*GetBFI=*/None, PSI);
/*GetBFI=*/None, PSI, &ORE);
}
bool runOnSCC(CallGraphSCC &SCC) override;

View File

@ -845,6 +845,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
.getManager();
// Get the remarks emission analysis for the caller.
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
@ -857,12 +861,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Function &Callee = *CS.getCalledFunction();
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
PSI);
PSI, &ORE);
};
// Get the remarks emission analysis for the caller.
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
// Now process as many calls as we have within this caller in the sequnece.
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.

View File

@ -462,7 +462,7 @@ bool PartialInlinerImpl::shouldPartialInline(
Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee);
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
*GetAssumptionCache, GetBFI, PSI);
*GetAssumptionCache, GetBFI, PSI, &ORE);
if (IC.isAlways()) {
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)