[TTI][CostModel] `getUserCost()`: recognize replication shuffles and query their cost

This finally creates proper test coverage for replication shuffles,
that are used by LV for conditional loads, and will allow to add
proper costmodel at least for AVX512.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D113324
This commit is contained in:
Roman Lebedev 2021-11-06 16:45:15 +03:00
parent f8efc5c0ac
commit a30ec4778a
No known key found for this signature in database
GPG Key ID: 083C3EBB4A1689E0
5 changed files with 412 additions and 343 deletions

View File

@ -1123,6 +1123,9 @@ public:
const APInt &DemandedSrcElts,
const APInt &DemandedReplicatedElts,
TTI::TargetCostKind CostKind);
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
int VF, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind);
/// \return The cost of Load and Store instructions.
InstructionCost
@ -1651,6 +1654,10 @@ public:
virtual InstructionCost getReplicationShuffleCost(
Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedSrcElts,
const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment,
@ -2162,6 +2169,13 @@ public:
DemandedSrcElts,
DemandedReplicatedElts, CostKind);
}
InstructionCost
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind) override {
return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, Mask,
CostKind);
}
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,

View File

@ -550,6 +550,11 @@ public:
TTI::TargetCostKind CostKind) {
return 1;
}
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind) {
return 1;
}
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
@ -1106,6 +1111,12 @@ public:
SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF))
return TargetTTI->getReplicationShuffleCost(
VecSrcTy->getElementType(), ReplicationFactor, VF,
Shuffle->getShuffleMask(), CostKind);
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
}

View File

@ -1143,6 +1143,41 @@ public:
return Cost;
}
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
int VF, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind) {
assert(Mask.size() == (unsigned)VF * ReplicationFactor && "Bad mask size.");
APInt DemandedSrcElts = APInt::getNullValue(VF);
ArrayRef<int> RemainingMask = Mask;
for (int i = 0; i < VF; i++) {
ArrayRef<int> CurrSubMask = RemainingMask.take_front(ReplicationFactor);
RemainingMask = RemainingMask.drop_front(CurrSubMask.size());
assert(all_of(CurrSubMask,
[i](int MaskElt) {
return MaskElt == UndefMaskElem || MaskElt == i;
}) &&
"Not a replication mask.");
if (any_of(CurrSubMask,
[](int MaskElt) { return MaskElt != UndefMaskElem; }))
DemandedSrcElts.setBit(i);
}
assert(RemainingMask.empty() && "Did not consume the entire mask?");
APInt DemandedReplicatedElts = APInt::getNullValue(Mask.size());
for (auto I : enumerate(Mask)) {
if (I.value() != UndefMaskElem)
DemandedReplicatedElts.setBit(I.index());
}
return thisT()->getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
DemandedSrcElts,
DemandedReplicatedElts, CostKind);
}
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,

View File

@ -833,6 +833,15 @@ InstructionCost TargetTransformInfo::getReplicationShuffleCost(
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getReplicationShuffleCost(
Type *EltTy, int ReplicationFactor, int VF, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind) {
InstructionCost Cost = TTIImpl->getReplicationShuffleCost(
EltTy, ReplicationFactor, VF, Mask, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, const Instruction *I) const {

File diff suppressed because one or more lines are too long