forked from OSchip/llvm-project
[X86] Sort cast cost tables. NFC.
Cast cost tables are now sorted, for each cast type, lexicographically on [source base type, source vector width, dest base type, base vector width]. llvm-svn: 274653
This commit is contained in:
parent
5c574341f5
commit
1b62e0e91f
|
@ -532,19 +532,19 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
// potential massive combinations (elem_num x src_type x dst_type).
|
||||
|
||||
static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
|
||||
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 },
|
||||
};
|
||||
|
||||
static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
|
||||
|
@ -560,43 +560,42 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
// v16i1 -> v16i32 - load + broadcast
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
|
||||
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
|
||||
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
|
||||
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
|
||||
|
@ -608,20 +607,20 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
};
|
||||
|
||||
static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
|
||||
|
||||
|
@ -639,56 +638,56 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
};
|
||||
|
||||
static const TypeConversionCostTblEntry AVXConversionTbl[] = {
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
|
||||
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
|
||||
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 },
|
||||
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
|
||||
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
|
||||
// The generic code to compute the scalar overhead is currently broken.
|
||||
// Workaround this limitation by estimating the scalarization overhead
|
||||
// here. We have roughly 10 instructions per scalar element.
|
||||
|
@ -697,8 +696,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 4*10 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
|
||||
// This node is expanded into scalarized operations but BasicTTI is overly
|
||||
// optimistic estimating its cost. It computes 3 per element (one
|
||||
// vector-extract, one scalar conversion and one vector-insert). The
|
||||
|
@ -709,98 +708,98 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
};
|
||||
|
||||
static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
|
||||
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
|
||||
|
||||
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
|
||||
|
||||
};
|
||||
|
||||
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
|
||||
// These are somewhat magic numbers justified by looking at the output of
|
||||
// Intel's IACA, running some kernels and making sure when we take
|
||||
// legalization into account the throughput will be overestimated.
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
|
||||
// There are faster sequences for float conversions.
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
|
||||
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
|
||||
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 },
|
||||
|
||||
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
|
||||
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 },
|
||||
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 },
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
|
||||
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 },
|
||||
};
|
||||
|
||||
std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
|
||||
|
|
Loading…
Reference in New Issue