[CostModel][X86] Merged SK_PermuteSingleSrc/SK_PermuteTwoSrc into common shuffle cost LUTs. NFCI.

llvm-svn: 291146
This commit is contained in:
Simon Pilgrim 2017-01-05 17:56:19 +00:00
parent 58a0dcee80
commit f74700aa8c
1 changed files with 233 additions and 278 deletions

View File

@ -605,17 +605,57 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64. // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate ||
Kind == TTI::SK_Broadcast) {
// For Broadcasts we are splatting the first element from the first input // For Broadcasts we are splatting the first element from the first input
// register, so only need to reference that input and all the output // register, so only need to reference that input and all the output
// registers are the same. // registers are the same.
if (Kind == TTI::SK_Broadcast) if (Kind == TTI::SK_Broadcast)
LT.first = 1; LT.first = 1;
// We are going to permute multiple sources and the result will be in multiple
// destinations. Providing an accurate cost only for splits where the element
// type remains the same.
if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
MVT LegalVT = LT.second;
if (LegalVT.getVectorElementType().getSizeInBits() ==
Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
unsigned VecTySize = DL.getTypeStoreSize(Tp);
unsigned LegalVTSize = LegalVT.getStoreSize();
// Number of source vectors after legalization:
unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Number of destination vectors after legalization:
unsigned NumOfDests = LT.first;
Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
LegalVT.getVectorNumElements());
unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
return NumOfShuffles *
getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
// For 2-input shuffles, we must account for splitting the 2 inputs into many.
if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
// We assume that source and destination have the same vector type.
int NumOfDests = LT.first;
int NumOfShufflesPerDest = LT.first * 2 - 1;
LT.first = NumOfDests * NumOfShufflesPerDest;
}
static const CostTblEntry AVX512VBMIShuffleTbl[] = { static const CostTblEntry AVX512VBMIShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
{ TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb { TTI::SK_Reverse, MVT::v32i8, 1 }, // vpermb
{ TTI::SK_PermuteSingleSrc, MVT::v64i8, 1 }, // vpermb
{ TTI::SK_PermuteSingleSrc, MVT::v32i8, 1 }, // vpermb
{ TTI::SK_PermuteTwoSrc, MVT::v64i8, 1 }, // vpermt2b
{ TTI::SK_PermuteTwoSrc, MVT::v32i8, 1 }, // vpermt2b
{ TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 } // vpermt2b
}; };
if (ST->hasVBMI()) if (ST->hasVBMI())
@ -629,8 +669,21 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
{ TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
{ TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128 { TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128
// + 2*pshufb + vinserti64x4 // + 2*pshufb + vinserti64x4
{ TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
{ TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
{ TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // vpermw
{ TTI::SK_PermuteSingleSrc, MVT::v64i8, 8 }, // extend to v32i16
{ TTI::SK_PermuteSingleSrc, MVT::v32i8, 3 }, // vpermw + zext/trunc
{ TTI::SK_PermuteTwoSrc, MVT::v32i16, 1 }, // vpermt2w
{ TTI::SK_PermuteTwoSrc, MVT::v16i16, 1 }, // vpermt2w
{ TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpermt2w
{ TTI::SK_PermuteTwoSrc, MVT::v32i8, 3 }, // zext + vpermt2w + trunc
{ TTI::SK_PermuteTwoSrc, MVT::v64i8, 19 }, // 6 * v32i8 + 1
{ TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 } // zext + vpermt2w + trunc
}; };
if (ST->hasBWI()) if (ST->hasBWI())
@ -647,12 +700,38 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
{ TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
{ TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
{ TTI::SK_Reverse, MVT::v16i32, 1 } // vpermd { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
{ TTI::SK_PermuteSingleSrc, MVT::v8f64, 1 }, // vpermpd
{ TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
{ TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // vpermpd
{ TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
{ TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
{ TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // vpermps
{ TTI::SK_PermuteSingleSrc, MVT::v8i64, 1 }, // vpermq
{ TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
{ TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // vpermq
{ TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
{ TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
{ TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // vpermd
{ TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
{ TTI::SK_PermuteTwoSrc, MVT::v8f64, 1 }, // vpermt2pd
{ TTI::SK_PermuteTwoSrc, MVT::v16f32, 1 }, // vpermt2ps
{ TTI::SK_PermuteTwoSrc, MVT::v8i64, 1 }, // vpermt2q
{ TTI::SK_PermuteTwoSrc, MVT::v16i32, 1 }, // vpermt2d
{ TTI::SK_PermuteTwoSrc, MVT::v4f64, 1 }, // vpermt2pd
{ TTI::SK_PermuteTwoSrc, MVT::v8f32, 1 }, // vpermt2ps
{ TTI::SK_PermuteTwoSrc, MVT::v4i64, 1 }, // vpermt2q
{ TTI::SK_PermuteTwoSrc, MVT::v8i32, 1 }, // vpermt2d
{ TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // vpermt2pd
{ TTI::SK_PermuteTwoSrc, MVT::v4f32, 1 }, // vpermt2ps
{ TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // vpermt2q
{ TTI::SK_PermuteTwoSrc, MVT::v4i32, 1 } // vpermt2d
}; };
if (ST->hasAVX512()) if (ST->hasAVX512())
if (const auto *Entry = if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost; return LT.first * Entry->Cost;
static const CostTblEntry AVX2ShuffleTbl[] = { static const CostTblEntry AVX2ShuffleTbl[] = {
@ -770,130 +849,6 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost; return LT.first * Entry->Cost;
} else if (Kind == TTI::SK_PermuteTwoSrc) {
// We assume that source and destination have the same vector type.
int NumOfDests = LT.first;
int NumOfShufflesPerDest = LT.first * 2 - 1;
int NumOfShuffles = NumOfDests * NumOfShufflesPerDest;
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermt2b
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 1}, // vpermt2b
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // vpermt2b
};
if (ST->hasVBMI())
if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return NumOfShuffles * Entry->Cost;
static const CostTblEntry AVX512BWShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermt2w
{ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermt2w
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermt2w
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 3}, // zext + vpermt2w + trunc
{ISD::VECTOR_SHUFFLE, MVT::v64i8, 19}, // 6 * v32i8 + 1
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // zext + vpermt2w + trunc
};
if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return NumOfShuffles * Entry->Cost;
static const CostTblEntry AVX512ShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermt2pd
{ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermt2ps
{ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermt2q
{ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermt2d
{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermt2pd
{ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermt2ps
{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermt2q
{ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermt2d
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermt2pd
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermt2ps
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermt2q
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1} // vpermt2d
};
if (ST->hasAVX512())
if (const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return NumOfShuffles * Entry->Cost;
} else if (Kind == TTI::SK_PermuteSingleSrc) {
if (LT.first == 1) {
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermb
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 1} // vpermb
};
if (ST->hasVBMI())
if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return Entry->Cost;
static const CostTblEntry AVX512BWShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermw
{ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermw
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermw
{ISD::VECTOR_SHUFFLE, MVT::v64i8, 8}, // extend to v32i16
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 3} // vpermw + zext/trunc
};
if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return Entry->Cost;
static const CostTblEntry AVX512ShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermpd
{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermpd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermpd
{ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermps
{ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermps
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermps
{ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermq
{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermq
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermq
{ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermd
{ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermd
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, // vpermd
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // pshufb
};
if (ST->hasAVX512())
if (const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return Entry->Cost;
} else {
// We are going to permute multiple sources and the result will be in
// multiple destinations. Providing an accurate cost only for splits where
// the element type remains the same.
MVT LegalVT = LT.second;
if (LegalVT.getVectorElementType().getSizeInBits() ==
Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
unsigned VecTySize = DL.getTypeStoreSize(Tp);
unsigned LegalVTSize = LegalVT.getStoreSize();
// Number of source vectors after legalization:
unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Number of destination vectors after legalization:
unsigned NumOfDests = LT.first;
Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
LegalVT.getVectorNumElements());
unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
return NumOfShuffles *
getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
}
}
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
} }