forked from OSchip/llvm-project
[SLPVectorizer]Migrate getEntryCost to return InstructionCost
This patch also changes: the return type of getGatherCost and the signature of the debug function dumpTreeCosts to use InstructionCost. This patch is part of a series of patches to use InstructionCost instead of unsigned/int for the cost model functions. See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html See this patch for the introduction of the type: https://reviews.llvm.org/D91174 Depends on D93049 Differential Revision: https://reviews.llvm.org/D93127
This commit is contained in:
parent
c5771a2f2d
commit
be9184bc55
|
@ -79,6 +79,7 @@
|
|||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/GraphWriter.h"
|
||||
#include "llvm/Support/InstructionCost.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
@ -1493,7 +1494,7 @@ private:
|
|||
bool areAllUsersVectorized(Instruction *I) const;
|
||||
|
||||
/// \returns the cost of the vectorizable entry.
|
||||
int getEntryCost(TreeEntry *E);
|
||||
InstructionCost getEntryCost(TreeEntry *E);
|
||||
|
||||
/// This is the recursive part of buildTree.
|
||||
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
|
||||
|
@ -1515,13 +1516,14 @@ private:
|
|||
|
||||
/// \returns the scalarization cost for this type. Scalarization in this
|
||||
/// context means the creation of vectors from a group of scalars.
|
||||
int getGatherCost(FixedVectorType *Ty,
|
||||
const DenseSet<unsigned> &ShuffledIndices) const;
|
||||
InstructionCost
|
||||
getGatherCost(FixedVectorType *Ty,
|
||||
const DenseSet<unsigned> &ShuffledIndices) const;
|
||||
|
||||
/// \returns the scalarization cost for this list of values. Assuming that
|
||||
/// this subtree gets vectorized, we may need to extract the values from the
|
||||
/// roots. This method calculates the cost of extracting the values.
|
||||
int getGatherCost(ArrayRef<Value *> VL) const;
|
||||
InstructionCost getGatherCost(ArrayRef<Value *> VL) const;
|
||||
|
||||
/// Set the Builder insert point to one after the last instruction in
|
||||
/// the bundle
|
||||
|
@ -1755,8 +1757,9 @@ private:
|
|||
};
|
||||
|
||||
#ifndef NDEBUG
|
||||
void dumpTreeCosts(TreeEntry *E, int ReuseShuffleCost, int VecCost,
|
||||
int ScalarCost) const {
|
||||
void dumpTreeCosts(TreeEntry *E, InstructionCost ReuseShuffleCost,
|
||||
InstructionCost VecCost,
|
||||
InstructionCost ScalarCost) const {
|
||||
dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
|
||||
dbgs() << "SLP: Costs:\n";
|
||||
dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
|
||||
|
@ -3423,7 +3426,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
|
|||
return {IntrinsicCost, LibCost};
|
||||
}
|
||||
|
||||
int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
ArrayRef<Value*> VL = E->Scalars;
|
||||
|
||||
Type *ScalarTy = VL[0]->getType();
|
||||
|
@ -3442,7 +3445,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
|
||||
unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
|
||||
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
|
||||
int ReuseShuffleCost = 0;
|
||||
InstructionCost ReuseShuffleCost = 0;
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost =
|
||||
TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
|
||||
|
@ -3458,7 +3461,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
allSameType(VL) && allSameBlock(VL)) {
|
||||
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
|
||||
if (ShuffleKind.hasValue()) {
|
||||
int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
|
||||
InstructionCost Cost =
|
||||
TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
|
||||
for (auto *V : VL) {
|
||||
// If all users of instruction are going to be vectorized and this
|
||||
// instruction itself is not going to be vectorized, consider this
|
||||
|
@ -3490,7 +3494,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
|
||||
case Instruction::ExtractValue:
|
||||
case Instruction::ExtractElement: {
|
||||
int DeadCost = 0;
|
||||
InstructionCost DeadCost = 0;
|
||||
if (NeedToShuffleReuses) {
|
||||
unsigned Idx = 0;
|
||||
for (unsigned I : E->ReuseShuffleIndices) {
|
||||
|
@ -3565,7 +3569,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
case Instruction::FPTrunc:
|
||||
case Instruction::BitCast: {
|
||||
Type *SrcTy = VL0->getOperand(0)->getType();
|
||||
int ScalarEltCost =
|
||||
InstructionCost ScalarEltCost =
|
||||
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
|
||||
TTI::getCastContextHint(VL0), CostKind, VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
|
@ -3573,10 +3577,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
}
|
||||
|
||||
// Calculate the cost of this instruction.
|
||||
int ScalarCost = VL.size() * ScalarEltCost;
|
||||
InstructionCost ScalarCost = VL.size() * ScalarEltCost;
|
||||
|
||||
auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
|
||||
int VecCost = 0;
|
||||
InstructionCost VecCost = 0;
|
||||
// Check if the values are candidates to demote.
|
||||
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
|
||||
VecCost =
|
||||
|
@ -3591,14 +3595,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
case Instruction::ICmp:
|
||||
case Instruction::Select: {
|
||||
// Calculate the cost of this instruction.
|
||||
int ScalarEltCost =
|
||||
InstructionCost ScalarEltCost =
|
||||
TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
|
||||
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
|
||||
// Check if all entries in VL are either compares or selects with compares
|
||||
// as condition that have the same predicates.
|
||||
|
@ -3617,8 +3621,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
VecPred = CurrentPred;
|
||||
}
|
||||
|
||||
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
|
||||
VecPred, CostKind, VL0);
|
||||
InstructionCost VecCost = TTI->getCmpSelInstrCost(
|
||||
E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
|
||||
// Check if it is possible and profitable to use min/max for selects in
|
||||
// VL.
|
||||
//
|
||||
|
@ -3626,7 +3630,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
|
||||
IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
|
||||
{VecTy, VecTy});
|
||||
int IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
|
||||
InstructionCost IntrinsicCost =
|
||||
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
|
||||
// If the selects are the only uses of the compares, they will be dead
|
||||
// and we can adjust the cost by removing their cost.
|
||||
if (IntrinsicAndUse.second)
|
||||
|
@ -3695,16 +3700,16 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
}
|
||||
|
||||
SmallVector<const Value *, 4> Operands(VL0->operand_values());
|
||||
int ScalarEltCost = TTI->getArithmeticInstrCost(
|
||||
E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
|
||||
Operands, VL0);
|
||||
InstructionCost ScalarEltCost =
|
||||
TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
|
||||
Op2VK, Op1VP, Op2VP, Operands, VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
int VecCost = TTI->getArithmeticInstrCost(
|
||||
E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
|
||||
Operands, VL0);
|
||||
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
InstructionCost VecCost =
|
||||
TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK,
|
||||
Op2VK, Op1VP, Op2VP, Operands, VL0);
|
||||
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
|
||||
return ReuseShuffleCost + VecCost - ScalarCost;
|
||||
}
|
||||
|
@ -3714,30 +3719,27 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
TargetTransformInfo::OperandValueKind Op2VK =
|
||||
TargetTransformInfo::OK_UniformConstantValue;
|
||||
|
||||
int ScalarEltCost =
|
||||
TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind,
|
||||
Op1VK, Op2VK);
|
||||
InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
|
||||
Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
int VecCost =
|
||||
TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind,
|
||||
Op1VK, Op2VK);
|
||||
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
InstructionCost VecCost = TTI->getArithmeticInstrCost(
|
||||
Instruction::Add, VecTy, CostKind, Op1VK, Op2VK);
|
||||
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
|
||||
return ReuseShuffleCost + VecCost - ScalarCost;
|
||||
}
|
||||
case Instruction::Load: {
|
||||
// Cost of wide load - cost of scalar loads.
|
||||
Align alignment = cast<LoadInst>(VL0)->getAlign();
|
||||
int ScalarEltCost =
|
||||
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0,
|
||||
CostKind, VL0);
|
||||
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
|
||||
Instruction::Load, ScalarTy, alignment, 0, CostKind, VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
int VecLdCost;
|
||||
InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
InstructionCost VecLdCost;
|
||||
if (E->State == TreeEntry::Vectorize) {
|
||||
VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0,
|
||||
CostKind, VL0);
|
||||
|
@ -3759,12 +3761,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
auto *SI =
|
||||
cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
|
||||
Align Alignment = SI->getAlign();
|
||||
int ScalarEltCost =
|
||||
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
|
||||
CostKind, VL0);
|
||||
int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
|
||||
VecTy, Alignment, 0, CostKind, VL0);
|
||||
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
|
||||
Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
|
||||
InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
InstructionCost VecStCost = TTI->getMemoryOpCost(
|
||||
Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
|
||||
if (IsReorder)
|
||||
VecStCost += TTI->getShuffleCost(
|
||||
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
|
||||
|
@ -3777,14 +3778,16 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
|
||||
// Calculate the cost of the scalar and vector calls.
|
||||
IntrinsicCostAttributes CostAttrs(ID, *CI, ElementCount::getFixed(1), 1);
|
||||
int ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
|
||||
InstructionCost ScalarEltCost =
|
||||
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
int ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
|
||||
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
|
||||
int VecCallCost = std::min(VecCallCosts.first, VecCallCosts.second);
|
||||
InstructionCost VecCallCost =
|
||||
std::min(VecCallCosts.first, VecCallCosts.second);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost
|
||||
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
|
||||
|
@ -3799,7 +3802,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
(Instruction::isCast(E->getOpcode()) &&
|
||||
Instruction::isCast(E->getAltOpcode()))) &&
|
||||
"Invalid Shuffle Vector Operand");
|
||||
int ScalarCost = 0;
|
||||
InstructionCost ScalarCost = 0;
|
||||
if (NeedToShuffleReuses) {
|
||||
for (unsigned Idx : E->ReuseShuffleIndices) {
|
||||
Instruction *I = cast<Instruction>(VL[Idx]);
|
||||
|
@ -3823,7 +3826,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
}
|
||||
// VecCost is equal to sum of the cost of creating 2 vectors
|
||||
// and the cost of creating shuffle.
|
||||
int VecCost = 0;
|
||||
InstructionCost VecCost = 0;
|
||||
if (Instruction::isBinaryOp(E->getOpcode())) {
|
||||
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
|
||||
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
|
||||
|
@ -4120,21 +4123,23 @@ InstructionCost BoUpSLP::getTreeCost() {
|
|||
return Cost;
|
||||
}
|
||||
|
||||
int BoUpSLP::getGatherCost(FixedVectorType *Ty,
|
||||
const DenseSet<unsigned> &ShuffledIndices) const {
|
||||
InstructionCost
|
||||
BoUpSLP::getGatherCost(FixedVectorType *Ty,
|
||||
const DenseSet<unsigned> &ShuffledIndices) const {
|
||||
unsigned NumElts = Ty->getNumElements();
|
||||
APInt DemandedElts = APInt::getNullValue(NumElts);
|
||||
for (unsigned I = 0; I < NumElts; ++I)
|
||||
if (!ShuffledIndices.count(I))
|
||||
DemandedElts.setBit(I);
|
||||
int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
|
||||
/*Extract*/ false);
|
||||
InstructionCost Cost =
|
||||
TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
|
||||
/*Extract*/ false);
|
||||
if (!ShuffledIndices.empty())
|
||||
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
|
||||
return Cost;
|
||||
}
|
||||
|
||||
int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
|
||||
InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
|
||||
// Find the type of the operands in VL.
|
||||
Type *ScalarTy = VL[0]->getType();
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
|
||||
|
|
Loading…
Reference in New Issue