forked from OSchip/llvm-project
[TTI][CostModel] `getUserCost()`: recognize replication shuffles and query their cost
This finally creates proper test coverage for replication shuffles, that are used by LV for conditional loads, and will allow to add proper costmodel at least for AVX512. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D113324
This commit is contained in:
parent
f8efc5c0ac
commit
a30ec4778a
|
@ -1123,6 +1123,9 @@ public:
|
|||
const APInt &DemandedSrcElts,
|
||||
const APInt &DemandedReplicatedElts,
|
||||
TTI::TargetCostKind CostKind);
|
||||
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
|
||||
int VF, ArrayRef<int> Mask,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
/// \return The cost of Load and Store instructions.
|
||||
InstructionCost
|
||||
|
@ -1651,6 +1654,10 @@ public:
|
|||
virtual InstructionCost getReplicationShuffleCost(
|
||||
Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedSrcElts,
|
||||
const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) = 0;
|
||||
virtual InstructionCost
|
||||
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
|
||||
ArrayRef<int> Mask,
|
||||
TTI::TargetCostKind CostKind) = 0;
|
||||
|
||||
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
Align Alignment,
|
||||
|
@ -2162,6 +2169,13 @@ public:
|
|||
DemandedSrcElts,
|
||||
DemandedReplicatedElts, CostKind);
|
||||
}
|
||||
InstructionCost
|
||||
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
|
||||
ArrayRef<int> Mask,
|
||||
TTI::TargetCostKind CostKind) override {
|
||||
return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, Mask,
|
||||
CostKind);
|
||||
}
|
||||
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||
unsigned AddressSpace,
|
||||
TTI::TargetCostKind CostKind,
|
||||
|
|
|
@ -550,6 +550,11 @@ public:
|
|||
TTI::TargetCostKind CostKind) {
|
||||
return 1;
|
||||
}
|
||||
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
|
||||
ArrayRef<int> Mask,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
||||
unsigned AddressSpace,
|
||||
|
@ -1106,6 +1111,12 @@ public:
|
|||
SubIndex,
|
||||
FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
|
||||
|
||||
int ReplicationFactor, VF;
|
||||
if (Shuffle->isReplicationMask(ReplicationFactor, VF))
|
||||
return TargetTTI->getReplicationShuffleCost(
|
||||
VecSrcTy->getElementType(), ReplicationFactor, VF,
|
||||
Shuffle->getShuffleMask(), CostKind);
|
||||
|
||||
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -1143,6 +1143,41 @@ public:
|
|||
return Cost;
|
||||
}
|
||||
|
||||
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
|
||||
int VF, ArrayRef<int> Mask,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
assert(Mask.size() == (unsigned)VF * ReplicationFactor && "Bad mask size.");
|
||||
|
||||
APInt DemandedSrcElts = APInt::getNullValue(VF);
|
||||
|
||||
ArrayRef<int> RemainingMask = Mask;
|
||||
for (int i = 0; i < VF; i++) {
|
||||
ArrayRef<int> CurrSubMask = RemainingMask.take_front(ReplicationFactor);
|
||||
RemainingMask = RemainingMask.drop_front(CurrSubMask.size());
|
||||
|
||||
assert(all_of(CurrSubMask,
|
||||
[i](int MaskElt) {
|
||||
return MaskElt == UndefMaskElem || MaskElt == i;
|
||||
}) &&
|
||||
"Not a replication mask.");
|
||||
|
||||
if (any_of(CurrSubMask,
|
||||
[](int MaskElt) { return MaskElt != UndefMaskElem; }))
|
||||
DemandedSrcElts.setBit(i);
|
||||
}
|
||||
assert(RemainingMask.empty() && "Did not consume the entire mask?");
|
||||
|
||||
APInt DemandedReplicatedElts = APInt::getNullValue(Mask.size());
|
||||
for (auto I : enumerate(Mask)) {
|
||||
if (I.value() != UndefMaskElem)
|
||||
DemandedReplicatedElts.setBit(I.index());
|
||||
}
|
||||
|
||||
return thisT()->getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
|
||||
DemandedSrcElts,
|
||||
DemandedReplicatedElts, CostKind);
|
||||
}
|
||||
|
||||
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
MaybeAlign Alignment, unsigned AddressSpace,
|
||||
TTI::TargetCostKind CostKind,
|
||||
|
|
|
@ -833,6 +833,15 @@ InstructionCost TargetTransformInfo::getReplicationShuffleCost(
|
|||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||
return Cost;
|
||||
}
|
||||
InstructionCost TargetTransformInfo::getReplicationShuffleCost(
|
||||
Type *EltTy, int ReplicationFactor, int VF, ArrayRef<int> Mask,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
InstructionCost Cost = TTIImpl->getReplicationShuffleCost(
|
||||
EltTy, ReplicationFactor, VF, Mask, CostKind);
|
||||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||
return Cost;
|
||||
}
|
||||
|
||||
InstructionCost TargetTransformInfo::getMemoryOpCost(
|
||||
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
|
||||
TTI::TargetCostKind CostKind, const Instruction *I) const {
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue