[TTI] Add generic SK_Broadcast shuffle costs

I noticed while fixing PR39368 that we don't have generic shuffle costs for broadcast style shuffles.

This patch adds SK_BROADCAST handling, but exposes ARM/AARCH64 lack of handling of this type, which I've added a fix for at the same time.

Differential Revision: https://reviews.llvm.org/D53570

llvm-svn: 345253
This commit is contained in:
Simon Pilgrim 2018-10-25 10:52:36 +00:00
parent 2a9c728088
commit 071e82218f
3 changed files with 54 additions and 5 deletions

View File

@ -80,6 +80,23 @@ private:
using BaseT = TargetTransformInfoImplCRTPBase<T>;
using TTI = TargetTransformInfo;
/// Estimate a cost of Broadcast as an extract and sequence of insert
/// operations.
unsigned getBroadcastShuffleOverhead(Type *Ty) {
assert(Ty->isVectorTy() && "Can only shuffle vectors");
unsigned Cost = 0;
// Broadcast cost is equal to the cost of extracting the zero'th element
// plus the cost of inserting it into every element of the result vector.
Cost += static_cast<T *>(this)->getVectorInstrCost(
Instruction::ExtractElement, Ty, 0);
for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
Cost += static_cast<T *>(this)->getVectorInstrCost(
Instruction::InsertElement, Ty, i);
}
return Cost;
}
/// Estimate a cost of shuffle as a sequence of extract and insert
/// operations.
unsigned getPermuteShuffleOverhead(Type *Ty) {
@ -554,6 +571,8 @@ public:
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
switch (Kind) {
case TTI::SK_Broadcast:
return getBroadcastShuffleOverhead(Tp);
case TTI::SK_Select:
case TTI::SK_Reverse:
case TTI::SK_Transpose:

View File

@ -946,9 +946,20 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
if (Kind == TTI::SK_Transpose || Kind == TTI::SK_Select ||
Kind == TTI::SK_PermuteSingleSrc) {
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
static const CostTblEntry ShuffleTbl[] = {
// Broadcast shuffle kinds can be performed with 'dup'.
{ TTI::SK_Broadcast, MVT::v8i8, 1 },
{ TTI::SK_Broadcast, MVT::v16i8, 1 },
{ TTI::SK_Broadcast, MVT::v4i16, 1 },
{ TTI::SK_Broadcast, MVT::v8i16, 1 },
{ TTI::SK_Broadcast, MVT::v2i32, 1 },
{ TTI::SK_Broadcast, MVT::v4i32, 1 },
{ TTI::SK_Broadcast, MVT::v2i64, 1 },
{ TTI::SK_Broadcast, MVT::v2f32, 1 },
{ TTI::SK_Broadcast, MVT::v4f32, 1 },
{ TTI::SK_Broadcast, MVT::v2f64, 1 },
// Transpose shuffle kinds can be performed with 'trn1/trn2' and
// 'zip1/zip2' instructions.
{ TTI::SK_Transpose, MVT::v8i8, 1 },

View File

@ -400,10 +400,29 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
// We only handle costs of reverse and select shuffles for now.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Broadcast) {
static const CostTblEntry NEONDupTbl[] = {
// VDUP handles these cases.
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
{ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
LT.second))
return LT.first * Entry->Cost;
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
if (Kind == TTI::SK_Reverse) {
static const CostTblEntry NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a