forked from OSchip/llvm-project
[NFC][InlineCost] cleanup - comments, overflow handling.
Reviewed By: apilipenko Tags: #llvm Differential Revision: https://reviews.llvm.org/D60751 llvm-svn: 359609
This commit is contained in:
parent
07ab4e7db8
commit
eeae45dc77
|
@ -125,26 +125,38 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||||
/// Tunable parameters that control the analysis.
|
/// Tunable parameters that control the analysis.
|
||||||
const InlineParams &Params;
|
const InlineParams &Params;
|
||||||
|
|
||||||
|
/// Upper bound for the inlining cost. Bonuses are being applied to account
|
||||||
|
/// for speculative "expected profit" of the inlining decision.
|
||||||
int Threshold;
|
int Threshold;
|
||||||
int Cost;
|
|
||||||
|
/// Inlining cost measured in abstract units, accounts for all the
|
||||||
|
/// instructions expected to be executed for a given function invocation.
|
||||||
|
/// Instructions that are statically proven to be dead based on call-site
|
||||||
|
/// arguments are not counted here.
|
||||||
|
int Cost = 0;
|
||||||
|
|
||||||
bool ComputeFullInlineCost;
|
bool ComputeFullInlineCost;
|
||||||
|
|
||||||
bool IsCallerRecursive;
|
bool IsCallerRecursive = false;
|
||||||
bool IsRecursiveCall;
|
bool IsRecursiveCall = false;
|
||||||
bool ExposesReturnsTwice;
|
bool ExposesReturnsTwice = false;
|
||||||
bool HasDynamicAlloca;
|
bool HasDynamicAlloca = false;
|
||||||
bool ContainsNoDuplicateCall;
|
bool ContainsNoDuplicateCall = false;
|
||||||
bool HasReturn;
|
bool HasReturn = false;
|
||||||
bool HasIndirectBr;
|
bool HasIndirectBr = false;
|
||||||
bool HasUninlineableIntrinsic;
|
bool HasUninlineableIntrinsic = false;
|
||||||
bool InitsVargArgs;
|
bool InitsVargArgs = false;
|
||||||
|
|
||||||
/// Number of bytes allocated statically by the callee.
|
/// Number of bytes allocated statically by the callee.
|
||||||
uint64_t AllocatedSize;
|
uint64_t AllocatedSize = 0;
|
||||||
unsigned NumInstructions, NumVectorInstructions;
|
unsigned NumInstructions = 0;
|
||||||
int VectorBonus, TenPercentVectorBonus;
|
unsigned NumVectorInstructions = 0;
|
||||||
// Bonus to be applied when the callee has only one reachable basic block.
|
|
||||||
int SingleBBBonus;
|
/// Bonus to be applied when percentage of vector instructions in callee is
|
||||||
|
/// high (see more details in updateThreshold).
|
||||||
|
int VectorBonus = 0;
|
||||||
|
/// Bonus to be applied when the callee has only one reachable basic block.
|
||||||
|
int SingleBBBonus = 0;
|
||||||
|
|
||||||
/// While we walk the potentially-inlined instructions, we build up and
|
/// While we walk the potentially-inlined instructions, we build up and
|
||||||
/// maintain a mapping of simplified values specific to this callsite. The
|
/// maintain a mapping of simplified values specific to this callsite. The
|
||||||
|
@ -179,7 +191,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||||
/// loads.
|
/// loads.
|
||||||
bool EnableLoadElimination;
|
bool EnableLoadElimination;
|
||||||
SmallPtrSet<Value *, 16> LoadAddrSet;
|
SmallPtrSet<Value *, 16> LoadAddrSet;
|
||||||
int LoadEliminationCost;
|
int LoadEliminationCost = 0;
|
||||||
|
|
||||||
// Custom simplification helper routines.
|
// Custom simplification helper routines.
|
||||||
bool isAllocaDerivedArg(Value *V);
|
bool isAllocaDerivedArg(Value *V);
|
||||||
|
@ -230,6 +242,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||||
InlineResult analyzeBlock(BasicBlock *BB,
|
InlineResult analyzeBlock(BasicBlock *BB,
|
||||||
SmallPtrSetImpl<const Value *> &EphValues);
|
SmallPtrSetImpl<const Value *> &EphValues);
|
||||||
|
|
||||||
|
/// Handle a capped 'int' increment for Cost.
|
||||||
|
void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
|
||||||
|
assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
|
||||||
|
Cost = (int)std::min(UpperBound, Cost + Inc);
|
||||||
|
}
|
||||||
|
|
||||||
// Disable several entry points to the visitor so we don't accidentally use
|
// Disable several entry points to the visitor so we don't accidentally use
|
||||||
// them by declaring but not defining them here.
|
// them by declaring but not defining them here.
|
||||||
void visit(Module *);
|
void visit(Module *);
|
||||||
|
@ -278,18 +296,9 @@ public:
|
||||||
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
|
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
|
||||||
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
|
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
|
||||||
CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
|
CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
|
||||||
Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
|
ComputeFullInlineCost(OptComputeFullInlineCost ||
|
||||||
Params.ComputeFullInlineCost || ORE),
|
Params.ComputeFullInlineCost || ORE),
|
||||||
IsCallerRecursive(false), IsRecursiveCall(false),
|
EnableLoadElimination(true) {}
|
||||||
ExposesReturnsTwice(false), HasDynamicAlloca(false),
|
|
||||||
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
|
|
||||||
HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
|
|
||||||
NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
|
|
||||||
SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
|
|
||||||
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
|
|
||||||
NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
|
|
||||||
NumInstructionsSimplified(0), SROACostSavings(0),
|
|
||||||
SROACostSavingsLost(0) {}
|
|
||||||
|
|
||||||
InlineResult analyzeCall(CallBase &Call);
|
InlineResult analyzeCall(CallBase &Call);
|
||||||
|
|
||||||
|
@ -298,14 +307,14 @@ public:
|
||||||
|
|
||||||
// Keep a bunch of stats about the cost savings found so we can print them
|
// Keep a bunch of stats about the cost savings found so we can print them
|
||||||
// out when debugging.
|
// out when debugging.
|
||||||
unsigned NumConstantArgs;
|
unsigned NumConstantArgs = 0;
|
||||||
unsigned NumConstantOffsetPtrArgs;
|
unsigned NumConstantOffsetPtrArgs = 0;
|
||||||
unsigned NumAllocaArgs;
|
unsigned NumAllocaArgs = 0;
|
||||||
unsigned NumConstantPtrCmps;
|
unsigned NumConstantPtrCmps = 0;
|
||||||
unsigned NumConstantPtrDiffs;
|
unsigned NumConstantPtrDiffs = 0;
|
||||||
unsigned NumInstructionsSimplified;
|
unsigned NumInstructionsSimplified = 0;
|
||||||
unsigned SROACostSavings;
|
unsigned SROACostSavings = 0;
|
||||||
unsigned SROACostSavingsLost;
|
unsigned SROACostSavingsLost = 0;
|
||||||
|
|
||||||
void dump();
|
void dump();
|
||||||
};
|
};
|
||||||
|
@ -340,7 +349,7 @@ bool CallAnalyzer::lookupSROAArgAndCost(
|
||||||
void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
|
void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
|
||||||
// If we're no longer able to perform SROA we need to undo its cost savings
|
// If we're no longer able to perform SROA we need to undo its cost savings
|
||||||
// and prevent subsequent analysis.
|
// and prevent subsequent analysis.
|
||||||
Cost += CostIt->second;
|
addCost(CostIt->second);
|
||||||
SROACostSavings -= CostIt->second;
|
SROACostSavings -= CostIt->second;
|
||||||
SROACostSavingsLost += CostIt->second;
|
SROACostSavingsLost += CostIt->second;
|
||||||
SROAArgCosts.erase(CostIt);
|
SROAArgCosts.erase(CostIt);
|
||||||
|
@ -364,7 +373,7 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
|
||||||
|
|
||||||
void CallAnalyzer::disableLoadElimination() {
|
void CallAnalyzer::disableLoadElimination() {
|
||||||
if (EnableLoadElimination) {
|
if (EnableLoadElimination) {
|
||||||
Cost += LoadEliminationCost;
|
addCost(LoadEliminationCost);
|
||||||
LoadEliminationCost = 0;
|
LoadEliminationCost = 0;
|
||||||
EnableLoadElimination = false;
|
EnableLoadElimination = false;
|
||||||
}
|
}
|
||||||
|
@ -719,7 +728,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
|
||||||
case Instruction::FPToUI:
|
case Instruction::FPToUI:
|
||||||
case Instruction::FPToSI:
|
case Instruction::FPToSI:
|
||||||
if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
|
if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
|
||||||
Cost += InlineConstants::CallPenalty;
|
addCost(InlineConstants::CallPenalty);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@ -1089,7 +1098,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
|
||||||
// as such.
|
// as such.
|
||||||
if (I.getType()->isFloatingPointTy() &&
|
if (I.getType()->isFloatingPointTy() &&
|
||||||
TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
|
TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
|
||||||
Cost += InlineConstants::CallPenalty;
|
addCost(InlineConstants::CallPenalty);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1226,7 +1235,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
|
||||||
|
|
||||||
case Intrinsic::load_relative:
|
case Intrinsic::load_relative:
|
||||||
// This is normally lowered to 4 LLVM instructions.
|
// This is normally lowered to 4 LLVM instructions.
|
||||||
Cost += 3 * InlineConstants::InstrCost;
|
addCost(3 * InlineConstants::InstrCost);
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
case Intrinsic::memset:
|
case Intrinsic::memset:
|
||||||
|
@ -1255,12 +1264,12 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
|
||||||
if (TTI.isLoweredToCall(F)) {
|
if (TTI.isLoweredToCall(F)) {
|
||||||
// We account for the average 1 instruction per call argument setup
|
// We account for the average 1 instruction per call argument setup
|
||||||
// here.
|
// here.
|
||||||
Cost += Call.arg_size() * InlineConstants::InstrCost;
|
addCost(Call.arg_size() * InlineConstants::InstrCost);
|
||||||
|
|
||||||
// Everything other than inline ASM will also have a significant cost
|
// Everything other than inline ASM will also have a significant cost
|
||||||
// merely from making the call.
|
// merely from making the call.
|
||||||
if (!isa<InlineAsm>(Call.getCalledValue()))
|
if (!isa<InlineAsm>(Call.getCalledValue()))
|
||||||
Cost += InlineConstants::CallPenalty;
|
addCost(InlineConstants::CallPenalty);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Call.onlyReadsMemory())
|
if (!Call.onlyReadsMemory())
|
||||||
|
@ -1274,7 +1283,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
|
||||||
|
|
||||||
// First, pay the price of the argument setup. We account for the average
|
// First, pay the price of the argument setup. We account for the average
|
||||||
// 1 instruction per call argument setup here.
|
// 1 instruction per call argument setup here.
|
||||||
Cost += Call.arg_size() * InlineConstants::InstrCost;
|
addCost(Call.arg_size() * InlineConstants::InstrCost);
|
||||||
|
|
||||||
// Next, check if this happens to be an indirect function call to a known
|
// Next, check if this happens to be an indirect function call to a known
|
||||||
// function in this inline context. If not, we've done all we can.
|
// function in this inline context. If not, we've done all we can.
|
||||||
|
@ -1436,7 +1445,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
|
||||||
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
|
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
|
||||||
|
|
||||||
if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
|
if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
|
||||||
Cost = CostLowerBound;
|
addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1450,7 +1459,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
|
||||||
int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
|
int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
|
||||||
4 * InlineConstants::InstrCost;
|
4 * InlineConstants::InstrCost;
|
||||||
|
|
||||||
Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
|
addCost(JTCost, (int64_t)CostUpperBound);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1471,7 +1480,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
|
||||||
// n + n / 2 - 1 = n * 3 / 2 - 1
|
// n + n / 2 - 1 = n * 3 / 2 - 1
|
||||||
if (NumCaseCluster <= 3) {
|
if (NumCaseCluster <= 3) {
|
||||||
// Suppose a comparison includes one compare and one conditional branch.
|
// Suppose a comparison includes one compare and one conditional branch.
|
||||||
Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
|
addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1479,7 +1488,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
|
||||||
int64_t SwitchCost =
|
int64_t SwitchCost =
|
||||||
ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
|
ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
|
||||||
|
|
||||||
Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
|
addCost(SwitchCost, (int64_t)CostUpperBound);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1572,7 +1581,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
|
||||||
if (Base::visit(&*I))
|
if (Base::visit(&*I))
|
||||||
++NumInstructionsSimplified;
|
++NumInstructionsSimplified;
|
||||||
else
|
else
|
||||||
Cost += InlineConstants::InstrCost;
|
addCost(InlineConstants::InstrCost);
|
||||||
|
|
||||||
using namespace ore;
|
using namespace ore;
|
||||||
// If the visit this instruction detected an uninlinable pattern, abort.
|
// If the visit this instruction detected an uninlinable pattern, abort.
|
||||||
|
@ -1617,7 +1626,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
|
||||||
return IR;
|
return IR;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if we've past the maximum possible threshold so we don't spin in
|
// Check if we've passed the maximum possible threshold so we don't spin in
|
||||||
// huge basic blocks that will never inline.
|
// huge basic blocks that will never inline.
|
||||||
if (Cost >= Threshold && !ComputeFullInlineCost)
|
if (Cost >= Threshold && !ComputeFullInlineCost)
|
||||||
return false;
|
return false;
|
||||||
|
@ -1743,7 +1752,7 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
|
||||||
|
|
||||||
// Give out bonuses for the callsite, as the instructions setting them up
|
// Give out bonuses for the callsite, as the instructions setting them up
|
||||||
// will be gone after inlining.
|
// will be gone after inlining.
|
||||||
Cost -= getCallsiteCost(Call, DL);
|
addCost(-getCallsiteCost(Call, DL));
|
||||||
|
|
||||||
// If this function uses the coldcc calling convention, prefer not to inline
|
// If this function uses the coldcc calling convention, prefer not to inline
|
||||||
// it.
|
// it.
|
||||||
|
@ -1904,7 +1913,7 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
|
||||||
continue;
|
continue;
|
||||||
NumLoops++;
|
NumLoops++;
|
||||||
}
|
}
|
||||||
Cost += NumLoops * InlineConstants::CallPenalty;
|
addCost(NumLoops * InlineConstants::CallPenalty);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We applied the maximum possible vector bonus at the beginning. Now,
|
// We applied the maximum possible vector bonus at the beginning. Now,
|
||||||
|
|
Loading…
Reference in New Issue