forked from OSchip/llvm-project
[NFC][TTI] `getReplicationShuffleCost()`: s/Replicated/Dst/
'Replicated' is mouthful and somewhat ambigious, while 'destination' is pretty self-explanatory.
This commit is contained in:
parent
b69dc2d180
commit
e876698a5d
|
@ -1127,7 +1127,7 @@ public:
|
|||
/// <0,0,0,1,1,1,2,2,2,3,3,3>
|
||||
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
|
||||
int VF,
|
||||
const APInt &DemandedReplicatedElts,
|
||||
const APInt &DemandedDstElts,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
/// \return The cost of Load and Store instructions.
|
||||
|
@ -1659,7 +1659,7 @@ public:
|
|||
|
||||
virtual InstructionCost
|
||||
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
|
||||
const APInt &DemandedReplicatedElts,
|
||||
const APInt &DemandedDstElts,
|
||||
TTI::TargetCostKind CostKind) = 0;
|
||||
|
||||
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
|
@ -2173,10 +2173,10 @@ public:
|
|||
}
|
||||
InstructionCost
|
||||
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
|
||||
const APInt &DemandedReplicatedElts,
|
||||
const APInt &DemandedDstElts,
|
||||
TTI::TargetCostKind CostKind) override {
|
||||
return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
|
||||
DemandedReplicatedElts, CostKind);
|
||||
DemandedDstElts, CostKind);
|
||||
}
|
||||
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||
unsigned AddressSpace,
|
||||
|
|
|
@ -552,7 +552,7 @@ public:
|
|||
}
|
||||
|
||||
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
|
||||
const APInt &DemandedReplicatedElts,
|
||||
const APInt &DemandedDstElts,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
return 1;
|
||||
}
|
||||
|
@ -1114,15 +1114,15 @@ public:
|
|||
|
||||
int ReplicationFactor, VF;
|
||||
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
|
||||
APInt DemandedReplicatedElts =
|
||||
APInt DemandedDstElts =
|
||||
APInt::getNullValue(Shuffle->getShuffleMask().size());
|
||||
for (auto I : enumerate(Shuffle->getShuffleMask())) {
|
||||
if (I.value() != UndefMaskElem)
|
||||
DemandedReplicatedElts.setBit(I.index());
|
||||
DemandedDstElts.setBit(I.index());
|
||||
}
|
||||
return TargetTTI->getReplicationShuffleCost(
|
||||
VecSrcTy->getElementType(), ReplicationFactor, VF,
|
||||
DemandedReplicatedElts, CostKind);
|
||||
DemandedDstElts, CostKind);
|
||||
}
|
||||
|
||||
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
|
||||
|
|
|
@ -1121,11 +1121,10 @@ public:
|
|||
|
||||
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
|
||||
int VF,
|
||||
const APInt &DemandedReplicatedElts,
|
||||
const APInt &DemandedDstElts,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
assert(DemandedReplicatedElts.getBitWidth() ==
|
||||
(unsigned)VF * ReplicationFactor &&
|
||||
"Unexpected size of DemandedReplicatedElts.");
|
||||
assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
|
||||
"Unexpected size of DemandedDstElts.");
|
||||
|
||||
InstructionCost Cost;
|
||||
|
||||
|
@ -1142,12 +1141,12 @@ public:
|
|||
// The cost is estimated as extract all mask elements from the <8xi1> mask
|
||||
// vector and insert them factor times into the <24xi1> shuffled mask
|
||||
// vector.
|
||||
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedReplicatedElts, VF);
|
||||
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
|
||||
Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
|
||||
/*Insert*/ false,
|
||||
/*Extract*/ true);
|
||||
Cost +=
|
||||
thisT()->getScalarizationOverhead(ReplicatedVT, DemandedReplicatedElts,
|
||||
thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
|
||||
/*Insert*/ true, /*Extract*/ false);
|
||||
|
||||
return Cost;
|
||||
|
|
|
@ -834,10 +834,10 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode,
|
|||
}
|
||||
|
||||
InstructionCost TargetTransformInfo::getReplicationShuffleCost(
|
||||
Type *EltTy, int ReplicationFactor, int VF,
|
||||
const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) {
|
||||
Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
InstructionCost Cost = TTIImpl->getReplicationShuffleCost(
|
||||
EltTy, ReplicationFactor, VF, DemandedReplicatedElts, CostKind);
|
||||
EltTy, ReplicationFactor, VF, DemandedDstElts, CostKind);
|
||||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||
return Cost;
|
||||
}
|
||||
|
|
|
@ -3624,14 +3624,15 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
|||
return Cost;
|
||||
}
|
||||
|
||||
InstructionCost X86TTIImpl::getReplicationShuffleCost(
|
||||
Type *EltTy, int ReplicationFactor, int VF,
|
||||
const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) {
|
||||
InstructionCost
|
||||
X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
|
||||
int VF, const APInt &DemandedDstElts,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
const unsigned EltTyBits = DL.getTypeSizeInBits(EltTy);
|
||||
|
||||
auto bailout = [&]() {
|
||||
return BaseT::getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
|
||||
DemandedReplicatedElts, CostKind);
|
||||
DemandedDstElts, CostKind);
|
||||
};
|
||||
|
||||
// For now, only deal with AVX512 cases.
|
||||
|
@ -3655,43 +3656,37 @@ InstructionCost X86TTIImpl::getReplicationShuffleCost(
|
|||
}
|
||||
|
||||
auto *SrcVecTy = FixedVectorType::get(EltTy, VF);
|
||||
int NumReplicatedElements = VF * ReplicationFactor;
|
||||
auto *ReplicatedVecTy = FixedVectorType::get(EltTy, NumReplicatedElements);
|
||||
int NumDstElements = VF * ReplicationFactor;
|
||||
auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements);
|
||||
|
||||
// Legalize the types.
|
||||
MVT LegalSrcVecTy = TLI->getTypeLegalizationCost(DL, SrcVecTy).second;
|
||||
MVT LegalReplicatedVecTy =
|
||||
TLI->getTypeLegalizationCost(DL, ReplicatedVecTy).second;
|
||||
MVT LegalDstVecTy = TLI->getTypeLegalizationCost(DL, DstVecTy).second;
|
||||
|
||||
// They both should have legalized into vector types.
|
||||
if (!LegalSrcVecTy.isVector() || !LegalReplicatedVecTy.isVector())
|
||||
if (!LegalSrcVecTy.isVector() || !LegalDstVecTy.isVector())
|
||||
return bailout();
|
||||
|
||||
assert(LegalSrcVecTy.getScalarSizeInBits() == EltTyBits &&
|
||||
LegalSrcVecTy.getScalarType() ==
|
||||
LegalReplicatedVecTy.getScalarType() &&
|
||||
LegalSrcVecTy.getScalarType() == LegalDstVecTy.getScalarType() &&
|
||||
"We expect that the legalization doesn't affect the element width, "
|
||||
"doesn't coalesce/split elements.");
|
||||
|
||||
unsigned NumEltsPerReplicatedVec =
|
||||
LegalReplicatedVecTy.getVectorNumElements();
|
||||
unsigned NumReplicatedVectors =
|
||||
divideCeil(ReplicatedVecTy->getNumElements(), NumEltsPerReplicatedVec);
|
||||
unsigned NumEltsPerDstVec = LegalDstVecTy.getVectorNumElements();
|
||||
unsigned NumDstVectors =
|
||||
divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
|
||||
|
||||
auto *SingleReplicatedVecTy =
|
||||
FixedVectorType::get(EltTy, NumEltsPerReplicatedVec);
|
||||
auto *SingleDstVecTy = FixedVectorType::get(EltTy, NumEltsPerDstVec);
|
||||
|
||||
APInt DemandedReplicatedVectors = APIntOps::ScaleBitMask(
|
||||
DemandedReplicatedElts.zextOrSelf(NumReplicatedVectors *
|
||||
NumEltsPerReplicatedVec),
|
||||
NumReplicatedVectors);
|
||||
unsigned NumReplicatedVectorsDemanded =
|
||||
DemandedReplicatedVectors.countPopulation();
|
||||
APInt DemandedDstVectors = APIntOps::ScaleBitMask(
|
||||
DemandedDstElts.zextOrSelf(NumDstVectors * NumEltsPerDstVec),
|
||||
NumDstVectors);
|
||||
unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation();
|
||||
|
||||
InstructionCost SingleShuffleCost =
|
||||
getShuffleCost(TTI::SK_PermuteSingleSrc, SingleReplicatedVecTy,
|
||||
getShuffleCost(TTI::SK_PermuteSingleSrc, SingleDstVecTy,
|
||||
/*Mask=*/None, /*Index=*/0, /*SubTp=*/nullptr);
|
||||
return NumReplicatedVectorsDemanded * SingleShuffleCost;
|
||||
return NumDstVectorsDemanded * SingleShuffleCost;
|
||||
}
|
||||
|
||||
InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
|
|
|
@ -147,7 +147,7 @@ public:
|
|||
bool Insert, bool Extract);
|
||||
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
|
||||
int VF,
|
||||
const APInt &DemandedReplicatedElts,
|
||||
const APInt &DemandedDstElts,
|
||||
TTI::TargetCostKind CostKind);
|
||||
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
MaybeAlign Alignment, unsigned AddressSpace,
|
||||
|
|
Loading…
Reference in New Issue