[SVE] Eliminate calls to default-false VectorType::get() from X86

Reviewers: efriedma, sdesmalen, c-rhodes, craig.topper

Reviewed By: craig.topper

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80331
This commit is contained in:
Christopher Tetreault 2020-05-29 15:52:33 -07:00
parent 4034d0ce20
commit 5a99ec10f5
5 changed files with 32 additions and 25 deletions

View File

@ -28632,7 +28632,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy)
: (Type *)VectorType::get(ArgTy, 4);
: (Type *)FixedVectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)

View File

@ -5999,14 +5999,18 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
Ty = Type::getFP128Ty(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16);
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 8);
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
8);
else if (Opc == X86::MMX_SET0)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 2);
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
2);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 4);
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
4);
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
Opc == X86::AVX512_512_SETALLONES ||

View File

@ -201,7 +201,7 @@ void X86InterleavedAccessGroup::decompose(
// [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
if (VecLength == 768 || VecLength == 1536) {
VecBaseTy = VectorType::get(Type::getInt8Ty(LI->getContext()), 16);
VecBaseTy = FixedVectorType::get(Type::getInt8Ty(LI->getContext()), 16);
VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
NumLoads = NumSubVectors * (VecLength / 384);
@ -768,7 +768,8 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
// Lower the interleaved stores:
// 1. Decompose the interleaved wide shuffle into individual shuffle
// vectors.
decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems),
decompose(Shuffles[0], Factor,
FixedVectorType::get(ShuffleEltTy, NumSubVecElems),
DecomposedVectors);
// 2. Transpose the interleaved-vectors into vectors of contiguous

View File

@ -372,7 +372,8 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
}
// Intrinsics produce vXi64 and need to be casted to vXi32.
Type *I32Ty = VectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
auto *I32Ty =
FixedVectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
assert(NumElts % IntrinsicNumElts == 0 && "Unexpected number of elements!");
unsigned NumSplits = NumElts / IntrinsicNumElts;

View File

@ -3164,8 +3164,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
if (LT.first != 1 && MTy.isVector() &&
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
// Type needs to be split. We need LT.first - 1 arithmetic ops.
VectorType *SingleOpTy =
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
MTy.getVectorNumElements());
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
ArithmeticCost *= LT.first - 1;
}
@ -3234,8 +3234,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
if (LT.first != 1 && MTy.isVector() &&
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
// Type needs to be split. We need LT.first - 1 arithmetic ops.
Type *SingleOpTy =
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
MTy.getVectorNumElements());
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
ArithmeticCost *= LT.first - 1;
}
@ -3310,7 +3310,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
} else {
// Reducing from smaller size is a shift by immediate.
auto *ShiftTy = VectorType::get(
auto *ShiftTy = FixedVectorType::get(
Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
ReductionCost += getArithmeticInstrCost(
Instruction::LShr, ShiftTy, CostKind,
@ -3617,8 +3617,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
}
// Add the arithmetic op for this level.
auto *SubCondTy = VectorType::get(CondTy->getElementType(),
Ty->getNumElements());
auto *SubCondTy =
FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements());
MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
}
@ -3866,14 +3866,15 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
? getIndexSizeInBits(Ptr, DL)
: DL.getPointerSizeInBits();
Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(),
IndexSize), VF);
auto *IndexVTy = FixedVectorType::get(
IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
if (SplitFactor > 1) {
// Handle splitting of vector of pointers
Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
auto *SplitSrcTy =
FixedVectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment,
AddressSpace);
}
@ -4265,14 +4266,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Get the cost of one memory operation.
Type *SingleMemOpTy =
VectorType::get(cast<VectorType>(VecTy)->getElementType(),
LegalVT.getVectorNumElements());
auto *SingleMemOpTy =
FixedVectorType::get(cast<VectorType>(VecTy)->getElementType(),
LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
MaybeAlign(Alignment), AddressSpace,
CostKind);
VectorType *VT = VectorType::get(ScalarTy, VF);
auto *VT = FixedVectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT);
if (!ETy.isSimple())
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
@ -4408,9 +4409,9 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned NumOfLoadsInInterleaveGrp =
Indices.size() ? Indices.size() : Factor;
Type *ResultTy =
VectorType::get(cast<VectorType>(VecTy)->getElementType(),
cast<VectorType>(VecTy)->getNumElements() / Factor);
auto *ResultTy = FixedVectorType::get(
cast<VectorType>(VecTy)->getElementType(),
cast<VectorType>(VecTy)->getNumElements() / Factor);
unsigned NumOfResults =
getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
NumOfLoadsInInterleaveGrp;