forked from OSchip/llvm-project
Clean up usages of asserting vector getters in Type
Summary: Remove usages of asserting vector getters in Type in preparation for the VectorType refactor. The existence of these functions complicates the refactor while adding little value. Reviewers: craig.topper, sdesmalen, efriedma, RKSimon Reviewed By: efriedma Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D77264
This commit is contained in:
parent
5f0903e9be
commit
dd24fb388b
|
@ -127,7 +127,7 @@ public:
|
|||
|
||||
bool X86InterleavedAccessGroup::isSupported() const {
|
||||
VectorType *ShuffleVecTy = Shuffles[0]->getType();
|
||||
Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType();
|
||||
Type *ShuffleEltTy = ShuffleVecTy->getElementType();
|
||||
unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy);
|
||||
unsigned WideInstSize;
|
||||
|
||||
|
@ -186,7 +186,7 @@ void X86InterleavedAccessGroup::decompose(
|
|||
DecomposedVectors.push_back(
|
||||
cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
|
||||
Op0, Op1,
|
||||
createSequentialMask(Indices[i], SubVecTy->getVectorNumElements(),
|
||||
createSequentialMask(Indices[i], SubVecTy->getNumElements(),
|
||||
0))));
|
||||
return;
|
||||
}
|
||||
|
@ -727,8 +727,8 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
|
|||
// Try to generate target-sized register(/instruction).
|
||||
decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
|
||||
|
||||
Type *ShuffleEltTy = Inst->getType();
|
||||
unsigned NumSubVecElems = ShuffleEltTy->getVectorNumElements() / Factor;
|
||||
auto *ShuffleEltTy = cast<VectorType>(Inst->getType());
|
||||
unsigned NumSubVecElems = ShuffleEltTy->getNumElements() / Factor;
|
||||
// Perform matrix-transposition in order to compute interleaved
|
||||
// results by generating some sort of (optimized) target-specific
|
||||
// instructions.
|
||||
|
@ -756,8 +756,8 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
|
|||
return true;
|
||||
}
|
||||
|
||||
Type *ShuffleEltTy = ShuffleTy->getVectorElementType();
|
||||
unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor;
|
||||
Type *ShuffleEltTy = ShuffleTy->getElementType();
|
||||
unsigned NumSubVecElems = ShuffleTy->getNumElements() / Factor;
|
||||
|
||||
// Lower the interleaved stores:
|
||||
// 1. Decompose the interleaved wide shuffle into individual shuffle
|
||||
|
@ -825,7 +825,7 @@ bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI,
|
|||
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
|
||||
"Invalid interleave factor");
|
||||
|
||||
assert(SVI->getType()->getVectorNumElements() % Factor == 0 &&
|
||||
assert(SVI->getType()->getNumElements() % Factor == 0 &&
|
||||
"Invalid interleaved store");
|
||||
|
||||
// Holds the indices of SVI that correspond to the starting index of each
|
||||
|
|
|
@ -89,7 +89,7 @@ static bool isVectorReductionOp(const BinaryOperator &BO) {
|
|||
return false;
|
||||
}
|
||||
|
||||
unsigned ElemNum = BO.getType()->getVectorNumElements();
|
||||
unsigned ElemNum = cast<VectorType>(BO.getType())->getNumElements();
|
||||
// Ensure the reduction size is a power of 2.
|
||||
if (!isPowerOf2_32(ElemNum))
|
||||
return false;
|
||||
|
@ -141,7 +141,7 @@ static bool isVectorReductionOp(const BinaryOperator &BO) {
|
|||
// ElemNumToReduce / 2 elements, and store the result in
|
||||
// ElemNumToReduce / 2 elements in another vector.
|
||||
|
||||
unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
|
||||
unsigned ResultElements = ShufInst->getType()->getNumElements();
|
||||
if (ResultElements < ElemNum)
|
||||
return false;
|
||||
|
||||
|
@ -236,8 +236,8 @@ bool X86PartialReduction::tryMAddReplacement(Value *Op, BinaryOperator *Add) {
|
|||
|
||||
IRBuilder<> Builder(Add);
|
||||
|
||||
Type *MulTy = Op->getType();
|
||||
unsigned NumElts = MulTy->getVectorNumElements();
|
||||
auto *MulTy = cast<VectorType>(Op->getType());
|
||||
unsigned NumElts = MulTy->getNumElements();
|
||||
|
||||
// Extract even elements and odd elements and add them together. This will
|
||||
// be pattern matched by SelectionDAG to pmaddwd. This instruction will be
|
||||
|
@ -272,11 +272,11 @@ bool X86PartialReduction::tryMAddPattern(BinaryOperator *BO) {
|
|||
return false;
|
||||
|
||||
// Need at least 8 elements.
|
||||
if (BO->getType()->getVectorNumElements() < 8)
|
||||
if (cast<VectorType>(BO->getType())->getNumElements() < 8)
|
||||
return false;
|
||||
|
||||
// Element type should be i32.
|
||||
if (!BO->getType()->getVectorElementType()->isIntegerTy(32))
|
||||
if (!cast<VectorType>(BO->getType())->getElementType()->isIntegerTy(32))
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
|
@ -305,7 +305,9 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
|
|||
// Look for zero extend from i8.
|
||||
auto getZeroExtendedVal = [](Value *Op) -> Value * {
|
||||
if (auto *ZExt = dyn_cast<ZExtInst>(Op))
|
||||
if (ZExt->getOperand(0)->getType()->getVectorElementType()->isIntegerTy(8))
|
||||
if (cast<VectorType>(ZExt->getOperand(0)->getType())
|
||||
->getElementType()
|
||||
->isIntegerTy(8))
|
||||
return ZExt->getOperand(0);
|
||||
|
||||
return nullptr;
|
||||
|
@ -319,8 +321,8 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
|
|||
|
||||
IRBuilder<> Builder(Add);
|
||||
|
||||
Type *OpTy = Op->getType();
|
||||
unsigned NumElts = OpTy->getVectorNumElements();
|
||||
auto *OpTy = cast<VectorType>(Op->getType());
|
||||
unsigned NumElts = OpTy->getNumElements();
|
||||
|
||||
unsigned IntrinsicNumElts;
|
||||
Intrinsic::ID IID;
|
||||
|
@ -371,7 +373,8 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
|
|||
assert(isPowerOf2_32(NumSplits) && "Expected power of 2 splits");
|
||||
unsigned Stages = Log2_32(NumSplits);
|
||||
for (unsigned s = Stages; s > 0; --s) {
|
||||
unsigned NumConcatElts = Ops[0]->getType()->getVectorNumElements() * 2;
|
||||
unsigned NumConcatElts =
|
||||
cast<VectorType>(Ops[0]->getType())->getNumElements() * 2;
|
||||
for (unsigned i = 0; i != 1U << (s - 1); ++i) {
|
||||
SmallVector<int, 64> ConcatMask(NumConcatElts);
|
||||
std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
|
||||
|
@ -381,13 +384,13 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
|
|||
|
||||
// At this point the final value should be in Ops[0]. Now we need to adjust
|
||||
// it to the final original type.
|
||||
NumElts = OpTy->getVectorNumElements();
|
||||
NumElts = cast<VectorType>(OpTy)->getNumElements();
|
||||
if (NumElts == 2) {
|
||||
// Extract down to 2 elements.
|
||||
Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{0, 1});
|
||||
} else if (NumElts >= 8) {
|
||||
SmallVector<int, 32> ConcatMask(NumElts);
|
||||
unsigned SubElts = Ops[0]->getType()->getVectorNumElements();
|
||||
unsigned SubElts = cast<VectorType>(Ops[0]->getType())->getNumElements();
|
||||
for (unsigned i = 0; i != SubElts; ++i)
|
||||
ConcatMask[i] = i;
|
||||
for (unsigned i = SubElts; i != NumElts; ++i)
|
||||
|
@ -411,7 +414,7 @@ bool X86PartialReduction::trySADPattern(BinaryOperator *BO) {
|
|||
|
||||
// TODO: There's nothing special about i32, any integer type above i16 should
|
||||
// work just as well.
|
||||
if (!BO->getType()->getVectorElementType()->isIntegerTy(32))
|
||||
if (!cast<VectorType>(BO->getType())->getElementType()->isIntegerTy(32))
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
|
|
|
@ -36,17 +36,17 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
|
|||
//
|
||||
// <4 x i32> <i32 -2147483648, i32 -2147483648,
|
||||
// i32 -2147483648, i32 -2147483648>
|
||||
Type *CstTy = C->getType();
|
||||
if (!CstTy->isVectorTy())
|
||||
auto *CstTy = dyn_cast<VectorType>(C->getType());
|
||||
if (!CstTy)
|
||||
return false;
|
||||
|
||||
Type *CstEltTy = CstTy->getVectorElementType();
|
||||
Type *CstEltTy = CstTy->getElementType();
|
||||
if (!CstEltTy->isIntegerTy())
|
||||
return false;
|
||||
|
||||
unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
|
||||
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
|
||||
unsigned NumCstElts = CstTy->getVectorNumElements();
|
||||
unsigned NumCstElts = CstTy->getNumElements();
|
||||
|
||||
assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
|
||||
"Unaligned shuffle mask size");
|
||||
|
|
|
@ -925,8 +925,9 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
|
|||
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
|
||||
}
|
||||
|
||||
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
|
||||
Type *SubTp) {
|
||||
auto *Tp = cast<VectorType>(BaseTp);
|
||||
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
|
||||
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
|
@ -958,18 +959,18 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
|||
// FIXME: Remove some of the alignment restrictions.
|
||||
// FIXME: We can use permq for 64-bit or larger extracts from 256-bit
|
||||
// vectors.
|
||||
int OrigSubElts = SubTp->getVectorNumElements();
|
||||
if (NumSubElts > OrigSubElts &&
|
||||
(Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 &&
|
||||
int OrigSubElts = cast<VectorType>(SubTp)->getNumElements();
|
||||
if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
|
||||
(NumSubElts % OrigSubElts) == 0 &&
|
||||
LT.second.getVectorElementType() ==
|
||||
SubLT.second.getVectorElementType() &&
|
||||
SubLT.second.getVectorElementType() &&
|
||||
LT.second.getVectorElementType().getSizeInBits() ==
|
||||
Tp->getVectorElementType()->getPrimitiveSizeInBits()) {
|
||||
Tp->getElementType()->getPrimitiveSizeInBits()) {
|
||||
assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
|
||||
"Unexpected number of elements!");
|
||||
Type *VecTy = VectorType::get(Tp->getVectorElementType(),
|
||||
Type *VecTy = VectorType::get(Tp->getElementType(),
|
||||
LT.second.getVectorNumElements());
|
||||
Type *SubTy = VectorType::get(Tp->getVectorElementType(),
|
||||
Type *SubTy = VectorType::get(Tp->getElementType(),
|
||||
SubLT.second.getVectorNumElements());
|
||||
int ExtractIndex = alignDown((Index % NumElts), NumSubElts);
|
||||
int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
|
||||
|
@ -1031,8 +1032,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
|||
MVT LegalVT = LT.second;
|
||||
if (LegalVT.isVector() &&
|
||||
LegalVT.getVectorElementType().getSizeInBits() ==
|
||||
Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
|
||||
LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
|
||||
Tp->getElementType()->getPrimitiveSizeInBits() &&
|
||||
LegalVT.getVectorNumElements() < Tp->getNumElements()) {
|
||||
|
||||
unsigned VecTySize = DL.getTypeStoreSize(Tp);
|
||||
unsigned LegalVTSize = LegalVT.getStoreSize();
|
||||
|
@ -1041,8 +1042,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
|||
// Number of destination vectors after legalization:
|
||||
unsigned NumOfDests = LT.first;
|
||||
|
||||
Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
Type *SingleOpTy =
|
||||
VectorType::get(Tp->getElementType(), LegalVT.getVectorNumElements());
|
||||
|
||||
unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
|
||||
return NumOfShuffles *
|
||||
|
@ -2675,7 +2676,7 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||
const Instruction *I) {
|
||||
// Handle non-power-of-two vectors such as <3 x float>
|
||||
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
|
||||
unsigned NumElem = VTy->getVectorNumElements();
|
||||
unsigned NumElem = VTy->getNumElements();
|
||||
|
||||
// Handle a few common cases:
|
||||
// <3 x float>
|
||||
|
@ -2725,7 +2726,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
|
|||
// To calculate scalar take the regular cost, without mask
|
||||
return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
unsigned NumElem = SrcVTy->getVectorNumElements();
|
||||
unsigned NumElem = SrcVTy->getNumElements();
|
||||
VectorType *MaskTy =
|
||||
VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
|
||||
if ((IsLoad && !isLegalMaskedLoad(SrcVTy, MaybeAlign(Alignment))) ||
|
||||
|
@ -2756,7 +2757,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
|
|||
getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, 0, nullptr);
|
||||
|
||||
else if (LT.second.getVectorNumElements() > NumElem) {
|
||||
VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
|
||||
VectorType *NewMaskTy = VectorType::get(MaskTy->getElementType(),
|
||||
LT.second.getVectorNumElements());
|
||||
// Expanding requires fill mask with zeroes
|
||||
Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
|
||||
|
@ -2861,12 +2862,14 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
|
|||
|
||||
MVT MTy = LT.second;
|
||||
|
||||
auto *ValVTy = cast<VectorType>(ValTy);
|
||||
|
||||
unsigned ArithmeticCost = 0;
|
||||
if (LT.first != 1 && MTy.isVector() &&
|
||||
MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
|
||||
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
|
||||
// Type needs to be split. We need LT.first - 1 arithmetic ops.
|
||||
Type *SingleOpTy = VectorType::get(ValTy->getVectorElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
Type *SingleOpTy =
|
||||
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
|
||||
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy);
|
||||
ArithmeticCost *= LT.first - 1;
|
||||
}
|
||||
|
@ -2930,13 +2933,13 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
|
|||
};
|
||||
|
||||
// Handle bool allof/anyof patterns.
|
||||
if (ValTy->getVectorElementType()->isIntegerTy(1)) {
|
||||
if (ValVTy->getElementType()->isIntegerTy(1)) {
|
||||
unsigned ArithmeticCost = 0;
|
||||
if (LT.first != 1 && MTy.isVector() &&
|
||||
MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
|
||||
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
|
||||
// Type needs to be split. We need LT.first - 1 arithmetic ops.
|
||||
Type *SingleOpTy = VectorType::get(ValTy->getVectorElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
Type *SingleOpTy =
|
||||
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
|
||||
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy);
|
||||
ArithmeticCost *= LT.first - 1;
|
||||
}
|
||||
|
@ -2954,25 +2957,24 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
|
|||
if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
|
||||
return ArithmeticCost + Entry->Cost;
|
||||
|
||||
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
|
||||
return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise);
|
||||
}
|
||||
|
||||
unsigned NumVecElts = ValTy->getVectorNumElements();
|
||||
unsigned ScalarSize = ValTy->getScalarSizeInBits();
|
||||
unsigned NumVecElts = ValVTy->getNumElements();
|
||||
unsigned ScalarSize = ValVTy->getScalarSizeInBits();
|
||||
|
||||
// Special case power of 2 reductions where the scalar type isn't changed
|
||||
// by type legalization.
|
||||
if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits())
|
||||
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
|
||||
return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise);
|
||||
|
||||
unsigned ReductionCost = 0;
|
||||
|
||||
Type *Ty = ValTy;
|
||||
auto *Ty = ValVTy;
|
||||
if (LT.first != 1 && MTy.isVector() &&
|
||||
MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
|
||||
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
|
||||
// Type needs to be split. We need LT.first - 1 arithmetic ops.
|
||||
Ty = VectorType::get(ValTy->getVectorElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
|
||||
ReductionCost = getArithmeticInstrCost(Opcode, Ty);
|
||||
ReductionCost *= LT.first - 1;
|
||||
NumVecElts = MTy.getVectorNumElements();
|
||||
|
@ -2986,32 +2988,32 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
|
|||
NumVecElts /= 2;
|
||||
// If we're reducing from 256/512 bits, use an extract_subvector.
|
||||
if (Size > 128) {
|
||||
Type *SubTy = VectorType::get(ValTy->getVectorElementType(), NumVecElts);
|
||||
auto *SubTy = VectorType::get(ValVTy->getElementType(), NumVecElts);
|
||||
ReductionCost +=
|
||||
getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy);
|
||||
Ty = SubTy;
|
||||
} else if (Size == 128) {
|
||||
// Reducing from 128 bits is a permute of v2f64/v2i64.
|
||||
Type *ShufTy;
|
||||
if (ValTy->isFloatingPointTy())
|
||||
ShufTy = VectorType::get(Type::getDoubleTy(ValTy->getContext()), 2);
|
||||
VectorType *ShufTy;
|
||||
if (ValVTy->isFloatingPointTy())
|
||||
ShufTy = VectorType::get(Type::getDoubleTy(ValVTy->getContext()), 2);
|
||||
else
|
||||
ShufTy = VectorType::get(Type::getInt64Ty(ValTy->getContext()), 2);
|
||||
ShufTy = VectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2);
|
||||
ReductionCost +=
|
||||
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
|
||||
} else if (Size == 64) {
|
||||
// Reducing from 64 bits is a shuffle of v4f32/v4i32.
|
||||
Type *ShufTy;
|
||||
if (ValTy->isFloatingPointTy())
|
||||
ShufTy = VectorType::get(Type::getFloatTy(ValTy->getContext()), 4);
|
||||
VectorType *ShufTy;
|
||||
if (ValVTy->isFloatingPointTy())
|
||||
ShufTy = VectorType::get(Type::getFloatTy(ValVTy->getContext()), 4);
|
||||
else
|
||||
ShufTy = VectorType::get(Type::getInt32Ty(ValTy->getContext()), 4);
|
||||
ShufTy = VectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4);
|
||||
ReductionCost +=
|
||||
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
|
||||
} else {
|
||||
// Reducing from smaller size is a shift by immediate.
|
||||
Type *ShiftTy = VectorType::get(
|
||||
Type::getIntNTy(ValTy->getContext(), Size), 128 / Size);
|
||||
auto *ShiftTy = VectorType::get(
|
||||
Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
|
||||
ReductionCost += getArithmeticInstrCost(
|
||||
Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue,
|
||||
TargetTransformInfo::OK_UniformConstantValue,
|
||||
|
@ -3230,17 +3232,17 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
|
|||
return Entry->Cost;
|
||||
}
|
||||
|
||||
unsigned NumVecElts = ValTy->getVectorNumElements();
|
||||
auto *ValVTy = cast<VectorType>(ValTy);
|
||||
unsigned NumVecElts = ValVTy->getNumElements();
|
||||
|
||||
Type *Ty = ValTy;
|
||||
auto *Ty = ValVTy;
|
||||
unsigned MinMaxCost = 0;
|
||||
if (LT.first != 1 && MTy.isVector() &&
|
||||
MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
|
||||
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
|
||||
// Type needs to be split. We need LT.first - 1 operations ops.
|
||||
Ty = VectorType::get(ValTy->getVectorElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
Type *SubCondTy = VectorType::get(CondTy->getVectorElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
|
||||
Type *SubCondTy = VectorType::get(
|
||||
cast<VectorType>(CondTy)->getElementType(), MTy.getVectorNumElements());
|
||||
MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned);
|
||||
MinMaxCost *= LT.first - 1;
|
||||
NumVecElts = MTy.getVectorNumElements();
|
||||
|
@ -3266,7 +3268,7 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
|
|||
|
||||
// Special case power of 2 reductions where the scalar type isn't changed
|
||||
// by type legalization.
|
||||
if (!isPowerOf2_32(ValTy->getVectorNumElements()) ||
|
||||
if (!isPowerOf2_32(ValVTy->getNumElements()) ||
|
||||
ScalarSize != MTy.getScalarSizeInBits())
|
||||
return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
|
||||
|
||||
|
@ -3278,7 +3280,7 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
|
|||
NumVecElts /= 2;
|
||||
// If we're reducing from 256/512 bits, use an extract_subvector.
|
||||
if (Size > 128) {
|
||||
Type *SubTy = VectorType::get(ValTy->getVectorElementType(), NumVecElts);
|
||||
auto *SubTy = VectorType::get(ValVTy->getElementType(), NumVecElts);
|
||||
MinMaxCost +=
|
||||
getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy);
|
||||
Ty = SubTy;
|
||||
|
@ -3311,8 +3313,8 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
|
|||
}
|
||||
|
||||
// Add the arithmetic op for this level.
|
||||
Type *SubCondTy = VectorType::get(CondTy->getVectorElementType(),
|
||||
Ty->getVectorNumElements());
|
||||
auto *SubCondTy = VectorType::get(
|
||||
cast<VectorType>(CondTy)->getElementType(), Ty->getNumElements());
|
||||
MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
|
||||
}
|
||||
|
||||
|
@ -3519,7 +3521,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
|
|||
unsigned Alignment, unsigned AddressSpace) {
|
||||
|
||||
assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
|
||||
unsigned VF = SrcVTy->getVectorNumElements();
|
||||
unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
|
||||
|
||||
// Try to reduce index size from 64 bit (default for GEP)
|
||||
// to 32. It is essential for VF 16. If the index can't be reduced to 32, the
|
||||
|
@ -3540,8 +3542,8 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
|
|||
if (isa<Constant>(GEP->getOperand(i)))
|
||||
continue;
|
||||
Type *IndxTy = GEP->getOperand(i)->getType();
|
||||
if (IndxTy->isVectorTy())
|
||||
IndxTy = IndxTy->getVectorElementType();
|
||||
if (auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
|
||||
IndxTy = IndexVTy->getElementType();
|
||||
if ((IndxTy->getPrimitiveSizeInBits() == 64 &&
|
||||
!isa<SExtInst>(GEP->getOperand(i))) ||
|
||||
++NumOfVarIndices > 1)
|
||||
|
@ -3589,7 +3591,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
|
|||
int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
|
||||
bool VariableMask, unsigned Alignment,
|
||||
unsigned AddressSpace) {
|
||||
unsigned VF = SrcVTy->getVectorNumElements();
|
||||
unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
|
||||
|
||||
int MaskUnpackCost = 0;
|
||||
if (VariableMask) {
|
||||
|
@ -3628,10 +3630,11 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
|
|||
unsigned Alignment,
|
||||
const Instruction *I = nullptr) {
|
||||
assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
|
||||
unsigned VF = SrcVTy->getVectorNumElements();
|
||||
unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
|
||||
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
|
||||
if (!PtrTy && Ptr->getType()->isVectorTy())
|
||||
PtrTy = dyn_cast<PointerType>(Ptr->getType()->getVectorElementType());
|
||||
PtrTy = dyn_cast<PointerType>(
|
||||
cast<VectorType>(Ptr->getType())->getElementType());
|
||||
assert(PtrTy && "Unexpected type for Ptr argument");
|
||||
unsigned AddressSpace = PtrTy->getAddressSpace();
|
||||
|
||||
|
@ -3677,7 +3680,8 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
|
|||
return false;
|
||||
|
||||
// The backend can't handle a single element vector.
|
||||
if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)
|
||||
if (isa<VectorType>(DataTy) &&
|
||||
cast<VectorType>(DataTy)->getNumElements() == 1)
|
||||
return false;
|
||||
Type *ScalarTy = DataTy->getScalarType();
|
||||
|
||||
|
@ -3742,10 +3746,10 @@ bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
|
|||
return false;
|
||||
|
||||
// The backend can't handle a single element vector.
|
||||
if (DataTy->getVectorNumElements() == 1)
|
||||
if (cast<VectorType>(DataTy)->getNumElements() == 1)
|
||||
return false;
|
||||
|
||||
Type *ScalarTy = DataTy->getVectorElementType();
|
||||
Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
|
||||
|
||||
if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
|
||||
return true;
|
||||
|
@ -3781,8 +3785,8 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, MaybeAlign Alignment) {
|
|||
// In this case we can reject non-power-of-2 vectors.
|
||||
// We also reject single element vectors as the type legalizer can't
|
||||
// scalarize it.
|
||||
if (isa<VectorType>(DataTy)) {
|
||||
unsigned NumElts = DataTy->getVectorNumElements();
|
||||
if (auto *DataVTy = dyn_cast<VectorType>(DataTy)) {
|
||||
unsigned NumElts = DataVTy->getNumElements();
|
||||
if (NumElts == 1 || !isPowerOf2_32(NumElts))
|
||||
return false;
|
||||
}
|
||||
|
@ -3921,8 +3925,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
|
|||
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||
Alignment, AddressSpace);
|
||||
|
||||
unsigned VF = VecTy->getVectorNumElements() / Factor;
|
||||
Type *ScalarTy = VecTy->getVectorElementType();
|
||||
unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor;
|
||||
Type *ScalarTy = cast<VectorType>(VecTy)->getElementType();
|
||||
|
||||
// Calculate the number of memory operations (NumOfMemOps), required
|
||||
// for load/store the VecTy.
|
||||
|
@ -3931,8 +3935,9 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
|
|||
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
|
||||
|
||||
// Get the cost of one memory operation.
|
||||
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
Type *SingleMemOpTy =
|
||||
VectorType::get(cast<VectorType>(VecTy)->getElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
||||
MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
|
@ -4031,12 +4036,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
|
|||
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
|
||||
|
||||
// Get the cost of one memory operation.
|
||||
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
Type *SingleMemOpTy =
|
||||
VectorType::get(cast<VectorType>(VecTy)->getElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
||||
MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
unsigned VF = VecTy->getVectorNumElements() / Factor;
|
||||
unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor;
|
||||
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
|
||||
|
||||
if (Opcode == Instruction::Load) {
|
||||
|
@ -4068,8 +4074,9 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
|
|||
|
||||
unsigned NumOfLoadsInInterleaveGrp =
|
||||
Indices.size() ? Indices.size() : Factor;
|
||||
Type *ResultTy = VectorType::get(VecTy->getVectorElementType(),
|
||||
VecTy->getVectorNumElements() / Factor);
|
||||
Type *ResultTy =
|
||||
VectorType::get(cast<VectorType>(VecTy)->getElementType(),
|
||||
cast<VectorType>(VecTy)->getNumElements() / Factor);
|
||||
unsigned NumOfResults =
|
||||
getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
|
||||
NumOfLoadsInInterleaveGrp;
|
||||
|
@ -4139,7 +4146,7 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
|||
bool UseMaskForCond,
|
||||
bool UseMaskForGaps) {
|
||||
auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
|
||||
Type *EltTy = VecTy->getVectorElementType();
|
||||
Type *EltTy = cast<VectorType>(VecTy)->getElementType();
|
||||
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
|
||||
EltTy->isIntegerTy(32) || EltTy->isPointerTy())
|
||||
return true;
|
||||
|
|
Loading…
Reference in New Issue