From a7da0296a663094e661c54a5ba2c4ce0239c312b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Jul 2021 19:49:01 +0100 Subject: [PATCH] [CostModel][X86] Adjust sitofp/uitofp SSE/AVX legalized costs based on llvm-mca reports. Update (mainly) vXi8/vXi16 -> vXf32/vXf64 sitofp/uitofp costs based on the worst case costs from the script in D103695. Move to using legalized types wherever possible, which allows us to prune the cost tables. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 182 ++++++++++-------- llvm/test/Analysis/CostModel/X86/cast.ll | 124 +++++++----- llvm/test/Analysis/CostModel/X86/sitofp.ll | 98 +++++----- llvm/test/Analysis/CostModel/X86/uitofp.ll | 104 +++++----- .../SLPVectorizer/X86/sitofp-inseltpoison.ll | 38 +--- .../Transforms/SLPVectorizer/X86/sitofp.ll | 38 +--- .../Transforms/SLPVectorizer/X86/uitofp.ll | 59 +----- 7 files changed, 302 insertions(+), 341 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 87b96b699928..561442d34b02 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1673,24 +1673,24 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, - { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right - { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right + { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right + { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, - { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, - { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v16i8, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 1 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, - { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, - { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 1 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, - { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, - { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v16i8, 2 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 1 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, - { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 1 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 }, @@ -1820,16 +1820,18 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, // vpternlogq { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, // vpternlogq+psrlq + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 1 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 1 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, @@ -1891,10 +1893,18 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 4 }, { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 15 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 3 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 2 }, @@ -1923,77 +1933,73 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, - { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 }, - { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 }, - { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 }, - { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 }, - { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 }, - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // and+extract+packuswb - { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, // and+packusdw+packuswb - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, - { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, - { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 3 }, // and+extract+2*packusdw - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // and+extract+packuswb + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, // and+packusdw+packuswb + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 3 }, // and+extract+2*packusdw + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, - { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, - { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, - { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, - { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, - { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 4 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v16i8, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v8i16, 2 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 }, - { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, - { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 6 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 4 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v16i8, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v8i16, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 }, - { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 }, - { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 }, - { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 }, - { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 }, - { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 }, + { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 }, + { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 5 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 9 }, - { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 5 }, - { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 }, - { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 }, - { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 9 }, - { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 }, - { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 }, - { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 9 }, - { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 19 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 5 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 9 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 5 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 9 }, + { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 9 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 19 }, { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 }, { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 }, @@ -2057,6 +2063,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 1 }, { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 1 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, @@ -2065,6 +2075,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 1 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 2 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 12 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 22 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 4 }, @@ -2102,6 +2119,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 4 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 4 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 7 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 7 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 15 }, diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index 7d0a3fd8fba1..444398e25329 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -386,10 +386,10 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; SSE41-LABEL: 'sitofp4' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -397,10 +397,10 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; AVX1-LABEL: 'sitofp4' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -408,10 +408,10 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; AVX2-LABEL: 'sitofp4' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -419,10 +419,10 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; AVX512-LABEL: 'sitofp4' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -447,30 +447,30 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'sitofp8' -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'sitofp8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'sitofp8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'sitofp8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -482,23 +482,34 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { } define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { -; SSE-LABEL: 'uitofp4' -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'uitofp4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'uitofp4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'uitofp4' ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> @@ -507,9 +518,9 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; AVX2-LABEL: 'uitofp4' ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> @@ -518,9 +529,9 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; AVX512-LABEL: 'uitofp4' ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> @@ -538,31 +549,38 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { } define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { -; SSE-LABEL: 'uitofp8' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'uitofp8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'uitofp8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'uitofp8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'uitofp8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'uitofp8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index 69af3a273a33..b6d17253128c 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -20,29 +20,29 @@ define i32 @sitofp_i8_double() { ; ; SSE42-LABEL: 'sitofp_i8_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = sitofp i8 undef to double -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sitofp_i8_double' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = sitofp i8 undef to double -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sitofp_i8_double' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = sitofp i8 undef to double -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i8_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = sitofp i8 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -63,29 +63,29 @@ define i32 @sitofp_i16_double() { ; ; SSE42-LABEL: 'sitofp_i16_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = sitofp i16 undef to double -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sitofp_i16_double' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = sitofp i16 undef to double -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sitofp_i16_double' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = sitofp i16 undef to double -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i16_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = sitofp i16 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -193,34 +193,34 @@ define i32 @sitofp_i8_float() { ; ; SSE42-LABEL: 'sitofp_i8_float' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = sitofp i8 undef to float -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sitofp_i8_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = sitofp i8 undef to float -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sitofp_i8_float' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = sitofp i8 undef to float -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i8_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = sitofp i8 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %cvt_i8_f32 = sitofp i8 undef to float @@ -242,34 +242,34 @@ define i32 @sitofp_i16_float() { ; ; SSE42-LABEL: 'sitofp_i16_float' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = sitofp i16 undef to float -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sitofp_i16_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = sitofp i16 undef to float -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sitofp_i16_float' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = sitofp i16 undef to float -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i16_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = sitofp i16 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %cvt_i16_f32 = sitofp i16 undef to float diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll index e7dedf74ff0f..5f83033a1eea 100644 --- a/llvm/test/Analysis/CostModel/X86/uitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll @@ -20,28 +20,28 @@ define i32 @uitofp_i8_double() { ; ; SSE42-LABEL: 'uitofp_i8_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = uitofp i8 undef to double -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'uitofp_i8_double' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = uitofp i8 undef to double -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'uitofp_i8_double' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = uitofp i8 undef to double -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'uitofp_i8_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = uitofp i8 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -63,28 +63,28 @@ define i32 @uitofp_i16_double() { ; ; SSE42-LABEL: 'uitofp_i16_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = uitofp i16 undef to double -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'uitofp_i16_double' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = uitofp i16 undef to double -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'uitofp_i16_double' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = uitofp i16 undef to double -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'uitofp_i16_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = uitofp i16 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -106,9 +106,9 @@ define i32 @uitofp_i32_double() { ; ; SSE42-LABEL: 'uitofp_i32_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'uitofp_i32_double' @@ -193,34 +193,34 @@ define i32 @uitofp_i8_float() { ; ; SSE42-LABEL: 'uitofp_i8_float' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = uitofp i8 undef to float -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'uitofp_i8_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = uitofp i8 undef to float -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'uitofp_i8_float' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = uitofp i8 undef to float -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'uitofp_i8_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = uitofp i8 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %cvt_i8_f32 = uitofp i8 undef to float @@ -242,34 +242,34 @@ define i32 @uitofp_i16_float() { ; ; SSE42-LABEL: 'uitofp_i16_float' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = uitofp i16 undef to float -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'uitofp_i16_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = uitofp i16 undef to float -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'uitofp_i16_float' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = uitofp i16 undef to float -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'uitofp_i16_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = uitofp i16 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %cvt_i16_f32 = uitofp i16 undef to float @@ -283,7 +283,7 @@ define i32 @uitofp_i16_float() { define i32 @uitofp_i32_float() { ; SSE2-LABEL: 'uitofp_i32_float' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_i32_f32 = uitofp i32 undef to float -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> @@ -291,15 +291,15 @@ define i32 @uitofp_i32_float() { ; ; SSE42-LABEL: 'uitofp_i32_float' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'uitofp_i32_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll index c0b1bae204ad..3418b6e01696 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll @@ -321,20 +321,11 @@ define void @sitofp_8i32_8f64() #0 { } define void @sitofp_2i16_2f64() #0 { -; SSE-LABEL: @sitofp_2i16_2f64( -; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64 -; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double> -; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; SSE-NEXT: ret void -; -; AVX-LABEL: @sitofp_2i16_2f64( -; AVX-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; AVX-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; AVX-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double -; AVX-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double -; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; AVX-NEXT: ret void +; CHECK-LABEL: @sitofp_2i16_2f64( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double> +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 @@ -435,20 +426,11 @@ define void @sitofp_8i16_8f64() #0 { } define void @sitofp_2i8_2f64() #0 { -; SSE-LABEL: @sitofp_2i8_2f64( -; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 -; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double> -; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; SSE-NEXT: ret void -; -; AVX-LABEL: @sitofp_2i8_2f64( -; AVX-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 -; AVX-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 -; AVX-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double -; AVX-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double -; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; AVX-NEXT: ret void +; CHECK-LABEL: @sitofp_2i8_2f64( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double> +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll index f0cb3e542875..5cec75d85fea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -321,20 +321,11 @@ define void @sitofp_8i32_8f64() #0 { } define void @sitofp_2i16_2f64() #0 { -; SSE-LABEL: @sitofp_2i16_2f64( -; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64 -; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double> -; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; SSE-NEXT: ret void -; -; AVX-LABEL: @sitofp_2i16_2f64( -; AVX-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; AVX-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; AVX-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double -; AVX-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double -; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; AVX-NEXT: ret void +; CHECK-LABEL: @sitofp_2i16_2f64( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double> +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 @@ -435,20 +426,11 @@ define void @sitofp_8i16_8f64() #0 { } define void @sitofp_2i8_2f64() #0 { -; SSE-LABEL: @sitofp_2i8_2f64( -; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 -; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double> -; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; SSE-NEXT: ret void -; -; AVX-LABEL: @sitofp_2i8_2f64( -; AVX-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 -; AVX-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 -; AVX-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double -; AVX-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double -; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; AVX-NEXT: ret void +; CHECK-LABEL: @sitofp_2i8_2f64( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double> +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll index 67feab3d4875..b38bd8d6f88e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll @@ -258,20 +258,11 @@ define void @uitofp_8i32_8f64() #0 { } define void @uitofp_2i16_2f64() #0 { -; SSE-LABEL: @uitofp_2i16_2f64( -; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64 -; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double> -; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; SSE-NEXT: ret void -; -; AVX-LABEL: @uitofp_2i16_2f64( -; AVX-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; AVX-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; AVX-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double -; AVX-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double -; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; AVX-NEXT: ret void +; CHECK-LABEL: @uitofp_2i16_2f64( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double> +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 @@ -372,41 +363,11 @@ define void @uitofp_8i16_8f64() #0 { } define void @uitofp_2i8_2f64() #0 { -; SSE-LABEL: @uitofp_2i8_2f64( -; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 -; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> -; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; SSE-NEXT: ret void -; -; AVX1-LABEL: @uitofp_2i8_2f64( -; AVX1-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 -; AVX1-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 -; AVX1-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double -; AVX1-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double -; AVX1-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; AVX1-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; AVX1-NEXT: ret void -; -; AVX2-LABEL: @uitofp_2i8_2f64( -; AVX2-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 -; AVX2-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 -; AVX2-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double -; AVX2-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double -; AVX2-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; AVX2-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; AVX2-NEXT: ret void -; -; AVX512-LABEL: @uitofp_2i8_2f64( -; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 -; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> -; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; AVX512-NEXT: ret void -; -; AVX256DQ-LABEL: @uitofp_2i8_2f64( -; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 -; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> -; AVX256DQ-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; AVX256DQ-NEXT: ret void +; CHECK-LABEL: @uitofp_2i8_2f64( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1