[NFC][TTI] `getReplicationShuffleCost()`: s/Replicated/Dst/

'Replicated' is mouthful and somewhat ambigious,
while 'destination' is pretty self-explanatory.
This commit is contained in:
Roman Lebedev 2021-11-14 19:56:10 +03:00
parent b69dc2d180
commit e876698a5d
No known key found for this signature in database
GPG Key ID: 083C3EBB4A1689E0
6 changed files with 37 additions and 43 deletions

View File

@ -1127,7 +1127,7 @@ public:
/// <0,0,0,1,1,1,2,2,2,3,3,3>
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
int VF,
const APInt &DemandedReplicatedElts,
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind);
/// \return The cost of Load and Store instructions.
@ -1659,7 +1659,7 @@ public:
virtual InstructionCost
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
const APInt &DemandedReplicatedElts,
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
@ -2173,10 +2173,10 @@ public:
}
InstructionCost
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
const APInt &DemandedReplicatedElts,
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) override {
return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
DemandedReplicatedElts, CostKind);
DemandedDstElts, CostKind);
}
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,

View File

@ -552,7 +552,7 @@ public:
}
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
const APInt &DemandedReplicatedElts,
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) {
return 1;
}
@ -1114,15 +1114,15 @@ public:
int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
APInt DemandedReplicatedElts =
APInt DemandedDstElts =
APInt::getNullValue(Shuffle->getShuffleMask().size());
for (auto I : enumerate(Shuffle->getShuffleMask())) {
if (I.value() != UndefMaskElem)
DemandedReplicatedElts.setBit(I.index());
DemandedDstElts.setBit(I.index());
}
return TargetTTI->getReplicationShuffleCost(
VecSrcTy->getElementType(), ReplicationFactor, VF,
DemandedReplicatedElts, CostKind);
DemandedDstElts, CostKind);
}
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;

View File

@ -1121,11 +1121,10 @@ public:
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
int VF,
const APInt &DemandedReplicatedElts,
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) {
assert(DemandedReplicatedElts.getBitWidth() ==
(unsigned)VF * ReplicationFactor &&
"Unexpected size of DemandedReplicatedElts.");
assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
"Unexpected size of DemandedDstElts.");
InstructionCost Cost;
@ -1142,12 +1141,12 @@ public:
// The cost is estimated as extract all mask elements from the <8xi1> mask
// vector and insert them factor times into the <24xi1> shuffled mask
// vector.
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedReplicatedElts, VF);
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
/*Insert*/ false,
/*Extract*/ true);
Cost +=
thisT()->getScalarizationOverhead(ReplicatedVT, DemandedReplicatedElts,
thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
/*Insert*/ true, /*Extract*/ false);
return Cost;

View File

@ -834,10 +834,10 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode,
}
InstructionCost TargetTransformInfo::getReplicationShuffleCost(
Type *EltTy, int ReplicationFactor, int VF,
const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) {
Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) {
InstructionCost Cost = TTIImpl->getReplicationShuffleCost(
EltTy, ReplicationFactor, VF, DemandedReplicatedElts, CostKind);
EltTy, ReplicationFactor, VF, DemandedDstElts, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}

View File

@ -3624,14 +3624,15 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
return Cost;
}
InstructionCost X86TTIImpl::getReplicationShuffleCost(
Type *EltTy, int ReplicationFactor, int VF,
const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) {
InstructionCost
X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
int VF, const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) {
const unsigned EltTyBits = DL.getTypeSizeInBits(EltTy);
auto bailout = [&]() {
return BaseT::getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
DemandedReplicatedElts, CostKind);
DemandedDstElts, CostKind);
};
// For now, only deal with AVX512 cases.
@ -3655,43 +3656,37 @@ InstructionCost X86TTIImpl::getReplicationShuffleCost(
}
auto *SrcVecTy = FixedVectorType::get(EltTy, VF);
int NumReplicatedElements = VF * ReplicationFactor;
auto *ReplicatedVecTy = FixedVectorType::get(EltTy, NumReplicatedElements);
int NumDstElements = VF * ReplicationFactor;
auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements);
// Legalize the types.
MVT LegalSrcVecTy = TLI->getTypeLegalizationCost(DL, SrcVecTy).second;
MVT LegalReplicatedVecTy =
TLI->getTypeLegalizationCost(DL, ReplicatedVecTy).second;
MVT LegalDstVecTy = TLI->getTypeLegalizationCost(DL, DstVecTy).second;
// They both should have legalized into vector types.
if (!LegalSrcVecTy.isVector() || !LegalReplicatedVecTy.isVector())
if (!LegalSrcVecTy.isVector() || !LegalDstVecTy.isVector())
return bailout();
assert(LegalSrcVecTy.getScalarSizeInBits() == EltTyBits &&
LegalSrcVecTy.getScalarType() ==
LegalReplicatedVecTy.getScalarType() &&
LegalSrcVecTy.getScalarType() == LegalDstVecTy.getScalarType() &&
"We expect that the legalization doesn't affect the element width, "
"doesn't coalesce/split elements.");
unsigned NumEltsPerReplicatedVec =
LegalReplicatedVecTy.getVectorNumElements();
unsigned NumReplicatedVectors =
divideCeil(ReplicatedVecTy->getNumElements(), NumEltsPerReplicatedVec);
unsigned NumEltsPerDstVec = LegalDstVecTy.getVectorNumElements();
unsigned NumDstVectors =
divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
auto *SingleReplicatedVecTy =
FixedVectorType::get(EltTy, NumEltsPerReplicatedVec);
auto *SingleDstVecTy = FixedVectorType::get(EltTy, NumEltsPerDstVec);
APInt DemandedReplicatedVectors = APIntOps::ScaleBitMask(
DemandedReplicatedElts.zextOrSelf(NumReplicatedVectors *
NumEltsPerReplicatedVec),
NumReplicatedVectors);
unsigned NumReplicatedVectorsDemanded =
DemandedReplicatedVectors.countPopulation();
APInt DemandedDstVectors = APIntOps::ScaleBitMask(
DemandedDstElts.zextOrSelf(NumDstVectors * NumEltsPerDstVec),
NumDstVectors);
unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation();
InstructionCost SingleShuffleCost =
getShuffleCost(TTI::SK_PermuteSingleSrc, SingleReplicatedVecTy,
getShuffleCost(TTI::SK_PermuteSingleSrc, SingleDstVecTy,
/*Mask=*/None, /*Index=*/0, /*SubTp=*/nullptr);
return NumReplicatedVectorsDemanded * SingleShuffleCost;
return NumDstVectorsDemanded * SingleShuffleCost;
}
InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

View File

@ -147,7 +147,7 @@ public:
bool Insert, bool Extract);
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
int VF,
const APInt &DemandedReplicatedElts,
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,