[SLPVectorizer]Migrate getEntryCost to return InstructionCost

This patch also changes:
  the return type of getGatherCost and
  the signature of the debug function dumpTreeCosts
to use InstructionCost.

This patch is part of a series of patches to use InstructionCost instead of
unsigned/int for the cost model functions.

See this thread for context:
http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html

See this patch for the introduction of the type:
https://reviews.llvm.org/D91174

Depends on D93049

Differential Revision: https://reviews.llvm.org/D93127
This commit is contained in:
Caroline Concatto 2020-12-10 17:23:46 +00:00
parent c5771a2f2d
commit be9184bc55
1 changed files with 58 additions and 53 deletions

View File

@ -79,6 +79,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@ -1493,7 +1494,7 @@ private:
bool areAllUsersVectorized(Instruction *I) const;
/// \returns the cost of the vectorizable entry.
int getEntryCost(TreeEntry *E);
InstructionCost getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
@ -1515,13 +1516,14 @@ private:
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
int getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const;
InstructionCost
getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const;
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
int getGatherCost(ArrayRef<Value *> VL) const;
InstructionCost getGatherCost(ArrayRef<Value *> VL) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
@ -1755,8 +1757,9 @@ private:
};
#ifndef NDEBUG
void dumpTreeCosts(TreeEntry *E, int ReuseShuffleCost, int VecCost,
int ScalarCost) const {
void dumpTreeCosts(TreeEntry *E, InstructionCost ReuseShuffleCost,
InstructionCost VecCost,
InstructionCost ScalarCost) const {
dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
dbgs() << "SLP: Costs:\n";
dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
@ -3423,7 +3426,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
return {IntrinsicCost, LibCost};
}
int BoUpSLP::getEntryCost(TreeEntry *E) {
InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
ArrayRef<Value*> VL = E->Scalars;
Type *ScalarTy = VL[0]->getType();
@ -3442,7 +3445,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
int ReuseShuffleCost = 0;
InstructionCost ReuseShuffleCost = 0;
if (NeedToShuffleReuses) {
ReuseShuffleCost =
TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
@ -3458,7 +3461,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
allSameType(VL) && allSameBlock(VL)) {
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
if (ShuffleKind.hasValue()) {
int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
InstructionCost Cost =
TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
for (auto *V : VL) {
// If all users of instruction are going to be vectorized and this
// instruction itself is not going to be vectorized, consider this
@ -3490,7 +3494,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
int DeadCost = 0;
InstructionCost DeadCost = 0;
if (NeedToShuffleReuses) {
unsigned Idx = 0;
for (unsigned I : E->ReuseShuffleIndices) {
@ -3565,7 +3569,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType();
int ScalarEltCost =
InstructionCost ScalarEltCost =
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
TTI::getCastContextHint(VL0), CostKind, VL0);
if (NeedToShuffleReuses) {
@ -3573,10 +3577,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
// Calculate the cost of this instruction.
int ScalarCost = VL.size() * ScalarEltCost;
InstructionCost ScalarCost = VL.size() * ScalarEltCost;
auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
int VecCost = 0;
InstructionCost VecCost = 0;
// Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
VecCost =
@ -3591,14 +3595,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::ICmp:
case Instruction::Select: {
// Calculate the cost of this instruction.
int ScalarEltCost =
InstructionCost ScalarEltCost =
TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
// Check if all entries in VL are either compares or selects with compares
// as condition that have the same predicates.
@ -3617,8 +3621,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
VecPred = CurrentPred;
}
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
VecPred, CostKind, VL0);
InstructionCost VecCost = TTI->getCmpSelInstrCost(
E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
// Check if it is possible and profitable to use min/max for selects in
// VL.
//
@ -3626,7 +3630,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
{VecTy, VecTy});
int IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
InstructionCost IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
// If the selects are the only uses of the compares, they will be dead
// and we can adjust the cost by removing their cost.
if (IntrinsicAndUse.second)
@ -3695,16 +3700,16 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
SmallVector<const Value *, 4> Operands(VL0->operand_values());
int ScalarEltCost = TTI->getArithmeticInstrCost(
E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
Operands, VL0);
InstructionCost ScalarEltCost =
TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = TTI->getArithmeticInstrCost(
E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
Operands, VL0);
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost =
TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return ReuseShuffleCost + VecCost - ScalarCost;
}
@ -3714,30 +3719,27 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
int ScalarEltCost =
TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind,
Op1VK, Op2VK);
InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost =
TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind,
Op1VK, Op2VK);
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost = TTI->getArithmeticInstrCost(
Instruction::Add, VecTy, CostKind, Op1VK, Op2VK);
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
Align alignment = cast<LoadInst>(VL0)->getAlign();
int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0,
CostKind, VL0);
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Load, ScalarTy, alignment, 0, CostKind, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
int VecLdCost;
InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecLdCost;
if (E->State == TreeEntry::Vectorize) {
VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0,
CostKind, VL0);
@ -3759,12 +3761,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
auto *SI =
cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
Align Alignment = SI->getAlign();
int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
CostKind, VL0);
int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
VecTy, Alignment, 0, CostKind, VL0);
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecStCost = TTI->getMemoryOpCost(
Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
if (IsReorder)
VecStCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
@ -3777,14 +3778,16 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Calculate the cost of the scalar and vector calls.
IntrinsicCostAttributes CostAttrs(ID, *CI, ElementCount::getFixed(1), 1);
int ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
InstructionCost ScalarEltCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
int VecCallCost = std::min(VecCallCosts.first, VecCallCosts.second);
InstructionCost VecCallCost =
std::min(VecCallCosts.first, VecCallCosts.second);
LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
@ -3799,7 +3802,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
(Instruction::isCast(E->getOpcode()) &&
Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");
int ScalarCost = 0;
InstructionCost ScalarCost = 0;
if (NeedToShuffleReuses) {
for (unsigned Idx : E->ReuseShuffleIndices) {
Instruction *I = cast<Instruction>(VL[Idx]);
@ -3823,7 +3826,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
// VecCost is equal to sum of the cost of creating 2 vectors
// and the cost of creating shuffle.
int VecCost = 0;
InstructionCost VecCost = 0;
if (Instruction::isBinaryOp(E->getOpcode())) {
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
@ -4120,21 +4123,23 @@ InstructionCost BoUpSLP::getTreeCost() {
return Cost;
}
int BoUpSLP::getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const {
InstructionCost
BoUpSLP::getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const {
unsigned NumElts = Ty->getNumElements();
APInt DemandedElts = APInt::getNullValue(NumElts);
for (unsigned I = 0; I < NumElts; ++I)
if (!ShuffledIndices.count(I))
DemandedElts.setBit(I);
int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
/*Extract*/ false);
InstructionCost Cost =
TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
/*Extract*/ false);
if (!ShuffledIndices.empty())
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
}
int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Find the type of the operands in VL.
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))