forked from OSchip/llvm-project
[SVE] Eliminate calls to default-false VectorType::get() from X86
Reviewers: efriedma, sdesmalen, c-rhodes, craig.topper Reviewed By: craig.topper Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80331
This commit is contained in:
parent
4034d0ce20
commit
5a99ec10f5
|
@ -28632,7 +28632,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
|
|||
DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
|
||||
|
||||
Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy)
|
||||
: (Type *)VectorType::get(ArgTy, 4);
|
||||
: (Type *)FixedVectorType::get(ArgTy, 4);
|
||||
|
||||
TargetLowering::CallLoweringInfo CLI(DAG);
|
||||
CLI.setDebugLoc(dl)
|
||||
|
|
|
@ -5999,14 +5999,18 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
|
|||
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
|
||||
Ty = Type::getFP128Ty(MF.getFunction().getContext());
|
||||
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
|
||||
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16);
|
||||
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
|
||||
16);
|
||||
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
|
||||
Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES)
|
||||
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 8);
|
||||
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
|
||||
8);
|
||||
else if (Opc == X86::MMX_SET0)
|
||||
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 2);
|
||||
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
|
||||
2);
|
||||
else
|
||||
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 4);
|
||||
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
|
||||
4);
|
||||
|
||||
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
|
||||
Opc == X86::AVX512_512_SETALLONES ||
|
||||
|
|
|
@ -201,7 +201,7 @@ void X86InterleavedAccessGroup::decompose(
|
|||
// [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
|
||||
unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
|
||||
if (VecLength == 768 || VecLength == 1536) {
|
||||
VecBaseTy = VectorType::get(Type::getInt8Ty(LI->getContext()), 16);
|
||||
VecBaseTy = FixedVectorType::get(Type::getInt8Ty(LI->getContext()), 16);
|
||||
VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
|
||||
VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
|
||||
NumLoads = NumSubVectors * (VecLength / 384);
|
||||
|
@ -768,7 +768,8 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
|
|||
// Lower the interleaved stores:
|
||||
// 1. Decompose the interleaved wide shuffle into individual shuffle
|
||||
// vectors.
|
||||
decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems),
|
||||
decompose(Shuffles[0], Factor,
|
||||
FixedVectorType::get(ShuffleEltTy, NumSubVecElems),
|
||||
DecomposedVectors);
|
||||
|
||||
// 2. Transpose the interleaved-vectors into vectors of contiguous
|
||||
|
|
|
@ -372,7 +372,8 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
|
|||
}
|
||||
|
||||
// Intrinsics produce vXi64 and need to be casted to vXi32.
|
||||
Type *I32Ty = VectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
|
||||
auto *I32Ty =
|
||||
FixedVectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
|
||||
|
||||
assert(NumElts % IntrinsicNumElts == 0 && "Unexpected number of elements!");
|
||||
unsigned NumSplits = NumElts / IntrinsicNumElts;
|
||||
|
|
|
@ -3164,8 +3164,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
|||
if (LT.first != 1 && MTy.isVector() &&
|
||||
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
|
||||
// Type needs to be split. We need LT.first - 1 arithmetic ops.
|
||||
VectorType *SingleOpTy =
|
||||
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
|
||||
auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
|
||||
ArithmeticCost *= LT.first - 1;
|
||||
}
|
||||
|
@ -3234,8 +3234,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
|||
if (LT.first != 1 && MTy.isVector() &&
|
||||
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
|
||||
// Type needs to be split. We need LT.first - 1 arithmetic ops.
|
||||
Type *SingleOpTy =
|
||||
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
|
||||
auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
|
||||
ArithmeticCost *= LT.first - 1;
|
||||
}
|
||||
|
@ -3310,7 +3310,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
|||
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
|
||||
} else {
|
||||
// Reducing from smaller size is a shift by immediate.
|
||||
auto *ShiftTy = VectorType::get(
|
||||
auto *ShiftTy = FixedVectorType::get(
|
||||
Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
|
||||
ReductionCost += getArithmeticInstrCost(
|
||||
Instruction::LShr, ShiftTy, CostKind,
|
||||
|
@ -3617,8 +3617,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
|
|||
}
|
||||
|
||||
// Add the arithmetic op for this level.
|
||||
auto *SubCondTy = VectorType::get(CondTy->getElementType(),
|
||||
Ty->getNumElements());
|
||||
auto *SubCondTy =
|
||||
FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements());
|
||||
MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
|
||||
}
|
||||
|
||||
|
@ -3866,14 +3866,15 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
|
|||
? getIndexSizeInBits(Ptr, DL)
|
||||
: DL.getPointerSizeInBits();
|
||||
|
||||
Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(),
|
||||
IndexSize), VF);
|
||||
auto *IndexVTy = FixedVectorType::get(
|
||||
IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
|
||||
std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
|
||||
std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
|
||||
int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
|
||||
if (SplitFactor > 1) {
|
||||
// Handle splitting of vector of pointers
|
||||
Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
|
||||
auto *SplitSrcTy =
|
||||
FixedVectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
|
||||
return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment,
|
||||
AddressSpace);
|
||||
}
|
||||
|
@ -4265,14 +4266,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
|
|||
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
|
||||
|
||||
// Get the cost of one memory operation.
|
||||
Type *SingleMemOpTy =
|
||||
VectorType::get(cast<VectorType>(VecTy)->getElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
auto *SingleMemOpTy =
|
||||
FixedVectorType::get(cast<VectorType>(VecTy)->getElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
||||
MaybeAlign(Alignment), AddressSpace,
|
||||
CostKind);
|
||||
|
||||
VectorType *VT = VectorType::get(ScalarTy, VF);
|
||||
auto *VT = FixedVectorType::get(ScalarTy, VF);
|
||||
EVT ETy = TLI->getValueType(DL, VT);
|
||||
if (!ETy.isSimple())
|
||||
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||
|
@ -4408,9 +4409,9 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
|
|||
|
||||
unsigned NumOfLoadsInInterleaveGrp =
|
||||
Indices.size() ? Indices.size() : Factor;
|
||||
Type *ResultTy =
|
||||
VectorType::get(cast<VectorType>(VecTy)->getElementType(),
|
||||
cast<VectorType>(VecTy)->getNumElements() / Factor);
|
||||
auto *ResultTy = FixedVectorType::get(
|
||||
cast<VectorType>(VecTy)->getElementType(),
|
||||
cast<VectorType>(VecTy)->getNumElements() / Factor);
|
||||
unsigned NumOfResults =
|
||||
getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
|
||||
NumOfLoadsInInterleaveGrp;
|
||||
|
|
Loading…
Reference in New Issue