diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index e93a1e2b7aaf..170d6b8f35ff 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1127,7 +1127,7 @@ public: /// <0,0,0,1,1,1,2,2,2,3,3,3> InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedReplicatedElts, + const APInt &DemandedDstElts, TTI::TargetCostKind CostKind); /// \return The cost of Load and Store instructions. @@ -1659,7 +1659,7 @@ public: virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedReplicatedElts, + const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, @@ -2173,10 +2173,10 @@ public: } InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedReplicatedElts, + const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) override { return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, - DemandedReplicatedElts, CostKind); + DemandedDstElts, CostKind); } InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index a0bae8ed29a1..bbcfd9a45eb3 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -552,7 +552,7 @@ public: } unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedReplicatedElts, + const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) { return 1; } @@ -1114,15 +1114,15 @@ public: int ReplicationFactor, VF; if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { - APInt DemandedReplicatedElts = + APInt DemandedDstElts = APInt::getNullValue(Shuffle->getShuffleMask().size()); for (auto I : enumerate(Shuffle->getShuffleMask())) { if (I.value() != UndefMaskElem) - DemandedReplicatedElts.setBit(I.index()); + DemandedDstElts.setBit(I.index()); } return TargetTTI->getReplicationShuffleCost( VecSrcTy->getElementType(), ReplicationFactor, VF, - DemandedReplicatedElts, CostKind); + DemandedDstElts, CostKind); } return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index aeea6e459eac..c265a22f0e6e 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1121,11 +1121,10 @@ public: InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedReplicatedElts, + const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) { - assert(DemandedReplicatedElts.getBitWidth() == - (unsigned)VF * ReplicationFactor && - "Unexpected size of DemandedReplicatedElts."); + assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor && + "Unexpected size of DemandedDstElts."); InstructionCost Cost; @@ -1142,12 +1141,12 @@ public: // The cost is estimated as extract all mask elements from the <8xi1> mask // vector and insert them factor times into the <24xi1> shuffled mask // vector. - APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedReplicatedElts, VF); + APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF); Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts, /*Insert*/ false, /*Extract*/ true); Cost += - thisT()->getScalarizationOverhead(ReplicatedVT, DemandedReplicatedElts, + thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts, /*Insert*/ true, /*Extract*/ false); return Cost; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index dcd015b36ee8..5067f493f02d 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -834,10 +834,10 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode, } InstructionCost TargetTransformInfo::getReplicationShuffleCost( - Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) { + Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, + TTI::TargetCostKind CostKind) { InstructionCost Cost = TTIImpl->getReplicationShuffleCost( - EltTy, ReplicationFactor, VF, DemandedReplicatedElts, CostKind); + EltTy, ReplicationFactor, VF, DemandedDstElts, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 4606a22022de..589c0352bcbc 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3624,14 +3624,15 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, return Cost; } -InstructionCost X86TTIImpl::getReplicationShuffleCost( - Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) { +InstructionCost +X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, + int VF, const APInt &DemandedDstElts, + TTI::TargetCostKind CostKind) { const unsigned EltTyBits = DL.getTypeSizeInBits(EltTy); auto bailout = [&]() { return BaseT::getReplicationShuffleCost(EltTy, ReplicationFactor, VF, - DemandedReplicatedElts, CostKind); + DemandedDstElts, CostKind); }; // For now, only deal with AVX512 cases. @@ -3655,43 +3656,37 @@ InstructionCost X86TTIImpl::getReplicationShuffleCost( } auto *SrcVecTy = FixedVectorType::get(EltTy, VF); - int NumReplicatedElements = VF * ReplicationFactor; - auto *ReplicatedVecTy = FixedVectorType::get(EltTy, NumReplicatedElements); + int NumDstElements = VF * ReplicationFactor; + auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements); // Legalize the types. MVT LegalSrcVecTy = TLI->getTypeLegalizationCost(DL, SrcVecTy).second; - MVT LegalReplicatedVecTy = - TLI->getTypeLegalizationCost(DL, ReplicatedVecTy).second; + MVT LegalDstVecTy = TLI->getTypeLegalizationCost(DL, DstVecTy).second; // They both should have legalized into vector types. - if (!LegalSrcVecTy.isVector() || !LegalReplicatedVecTy.isVector()) + if (!LegalSrcVecTy.isVector() || !LegalDstVecTy.isVector()) return bailout(); assert(LegalSrcVecTy.getScalarSizeInBits() == EltTyBits && - LegalSrcVecTy.getScalarType() == - LegalReplicatedVecTy.getScalarType() && + LegalSrcVecTy.getScalarType() == LegalDstVecTy.getScalarType() && "We expect that the legalization doesn't affect the element width, " "doesn't coalesce/split elements."); - unsigned NumEltsPerReplicatedVec = - LegalReplicatedVecTy.getVectorNumElements(); - unsigned NumReplicatedVectors = - divideCeil(ReplicatedVecTy->getNumElements(), NumEltsPerReplicatedVec); + unsigned NumEltsPerDstVec = LegalDstVecTy.getVectorNumElements(); + unsigned NumDstVectors = + divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec); - auto *SingleReplicatedVecTy = - FixedVectorType::get(EltTy, NumEltsPerReplicatedVec); + auto *SingleDstVecTy = FixedVectorType::get(EltTy, NumEltsPerDstVec); - APInt DemandedReplicatedVectors = APIntOps::ScaleBitMask( - DemandedReplicatedElts.zextOrSelf(NumReplicatedVectors * - NumEltsPerReplicatedVec), - NumReplicatedVectors); - unsigned NumReplicatedVectorsDemanded = - DemandedReplicatedVectors.countPopulation(); + APInt DemandedDstVectors = APIntOps::ScaleBitMask( + DemandedDstElts.zextOrSelf(NumDstVectors * NumEltsPerDstVec), + NumDstVectors); + unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation(); InstructionCost SingleShuffleCost = - getShuffleCost(TTI::SK_PermuteSingleSrc, SingleReplicatedVecTy, + getShuffleCost(TTI::SK_PermuteSingleSrc, SingleDstVecTy, /*Mask=*/None, /*Index=*/0, /*SubTp=*/nullptr); - return NumReplicatedVectorsDemanded * SingleShuffleCost; + return NumDstVectorsDemanded * SingleShuffleCost; } InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index ca8e42bd2c70..c53424ec0026 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -147,7 +147,7 @@ public: bool Insert, bool Extract); InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedReplicatedElts, + const APInt &DemandedDstElts, TTI::TargetCostKind CostKind); InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace,