From 1b62e0e91f6768647d05e60bed1969db5cdcd8c3 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 6 Jul 2016 18:26:48 +0000 Subject: [PATCH] [X86] Sort cast cost tables. NFC. Cast cost tables are now sorted, for each cast type, lexicographically on [source base type, source vector width, dest base type, base vector width]. llvm-svn: 274653 --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 263 +++++++++--------- 1 file changed, 131 insertions(+), 132 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 338472adf000..cd8bde93ae2d 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -532,19 +532,19 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { // potential massive combinations (elem_num x src_type x dst_type). static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, - { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 }, - { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 }, + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, + { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 }, + { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 }, }; static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { @@ -560,43 +560,42 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { // v16i1 -> v16i32 - load + broadcast { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, - { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, - { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, - { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, - { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, - { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, - { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, - { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, - { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, - { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, - { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 }, @@ -608,20 +607,20 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { }; static const TypeConversionCostTblEntry AVX2ConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -639,56 +638,56 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { }; static const TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, - { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, // The generic code to compute the scalar overhead is currently broken. // Workaround this limitation by estimating the scalarization overhead // here. We have roughly 10 instructions per scalar element. @@ -697,8 +696,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 4*10 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 }, // This node is expanded into scalarized operations but BasicTTI is overly // optimistic estimating its cost. It computes 3 per element (one // vector-extract, one scalar conversion and one vector-insert). The @@ -709,98 +708,98 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { }; static const TypeConversionCostTblEntry SSE41ConversionTbl[] = { - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 2 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 2 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 2 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, - { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, - { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, - { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, - { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, - { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, - { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, - { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, - { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 }, - { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 }, + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, + }; static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { // These are somewhat magic numbers justified by looking at the output of // Intel's IACA, running some kernels and making sure when we take // legalization into account the throughput will be overestimated. - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, - // There are faster sequences for float conversions. - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, - { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 }, - { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, - { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 }, - { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 }, - { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, - { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, - { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, - { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, - { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 }, { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 }, - { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, - { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, - { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 }, }; std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src);