From 5eff75d86a4fc72e70ee29599e32778a0e534ca5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 27 Apr 2020 10:11:02 -0700 Subject: [PATCH] [X86][CostModel] Improve costs for fp_to_uint/fp_to_sint for vXi8/vXi16/v2i32 results. Differential Revision: https://reviews.llvm.org/D78893 --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 47 +++++- llvm/test/Analysis/CostModel/X86/fptosi.ll | 130 +++++++-------- llvm/test/Analysis/CostModel/X86/fptoui.ll | 148 ++++++++---------- .../X86/fp_to_sint8-cost-model.ll | 2 +- .../Transforms/SLPVectorizer/X86/fptoui.ll | 74 ++------- 5 files changed, 175 insertions(+), 226 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index ca2bf9a05fa0..e3cbf1237d2e 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1500,12 +1500,17 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 }, + { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 }, + { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, - { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 }, - { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 }, + { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 }, { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 }, - { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 2 }, - { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 }, + { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 }, + { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 }, }; static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] { @@ -1605,6 +1610,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 }, + + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 }, + { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 }, { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 }, @@ -1717,8 +1726,15 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 }, + + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 }, + { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 }, // This node is expanded into scalarized operations but BasicTTI is overly // optimistic estimating its cost. It computes 3 per element (one // vector-extract, one scalar conversion and one vector-insert). The @@ -1769,6 +1785,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, + + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 }, + + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, }; static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { @@ -1794,16 +1817,26 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 }, { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 }, { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 }, diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index c30041af018c..adc294f1cbec 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -63,21 +63,21 @@ define i32 @fptosi_double_i64(i32 %arg) { define i32 @fptosi_double_i32(i32 %arg) { ; SSE-LABEL: 'fptosi_double_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_double_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -100,15 +100,15 @@ define i32 @fptosi_double_i16(i32 %arg) { ; AVX-LABEL: 'fptosi_double_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_double_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptosi double undef to i16 @@ -121,37 +121,37 @@ define i32 @fptosi_double_i16(i32 %arg) { define i32 @fptosi_double_i8(i32 %arg) { ; SSE2-LABEL: 'fptosi_double_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptosi_double_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_double_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptosi_double_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptosi double undef to i8 @@ -252,45 +252,29 @@ define i32 @fptosi_float_i32(i32 %arg) { } define i32 @fptosi_float_i16(i32 %arg) { -; SSE2-LABEL: 'fptosi_float_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'fptosi_float_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'fptosi_float_i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_float_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SLM-LABEL: 'fptosi_float_i16' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptosi float undef to i16 %V2I16 = fptosi <2 x float> undef to <2 x i16> @@ -303,42 +287,42 @@ define i32 @fptosi_float_i16(i32 %arg) { define i32 @fptosi_float_i8(i32 %arg) { ; SSE2-LABEL: 'fptosi_float_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptosi_float_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_float_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptosi_float_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptosi float undef to i8 diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll index 357b5fd864d8..9cfd280fa36f 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -104,40 +104,26 @@ define i32 @fptoui_double_i32(i32 %arg) { } define i32 @fptoui_double_i16(i32 %arg) { -; SSE2-LABEL: 'fptoui_double_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'fptoui_double_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'fptoui_double_i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_double_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SLM-LABEL: 'fptoui_double_i16' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptoui double undef to i16 %V2I16 = fptoui <2 x double> undef to <2 x i16> @@ -149,37 +135,37 @@ define i32 @fptoui_double_i16(i32 %arg) { define i32 @fptoui_double_i8(i32 %arg) { ; SSE2-LABEL: 'fptoui_double_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_double_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_double_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptoui_double_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptoui double undef to i8 @@ -298,42 +284,42 @@ define i32 @fptoui_float_i32(i32 %arg) { define i32 @fptoui_float_i16(i32 %arg) { ; SSE2-LABEL: 'fptoui_float_i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_float_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptoui_float_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptoui float undef to i16 @@ -347,42 +333,42 @@ define i32 @fptoui_float_i16(i32 %arg) { define i32 @fptoui_float_i8(i32 %arg) { ; SSE2-LABEL: 'fptoui_float_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_float_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptoui_float_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptoui float undef to i8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll index b3a0710545d7..b5fed033e327 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-macosx10.8.0" -; CHECK: cost of 7 for VF 8 For instruction: %conv = fptosi float %tmp to i8 +; CHECK: cost of 4 for VF 8 For instruction: %conv = fptosi float %tmp to i8 define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind { entry: br label %for.body diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll index dafab8ee72c2..216cd7a68693 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll @@ -211,38 +211,11 @@ define void @fptoui_8f64_8i32() #0 { } define void @fptoui_8f64_8i16() #0 { -; SSE-LABEL: @fptoui_8f64_8i16( -; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8 -; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8 -; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8 -; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8 -; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8 -; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8 -; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8 -; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8 -; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i16 -; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i16 -; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i16 -; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i16 -; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i16 -; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i16 -; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i16 -; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i16 -; SSE-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2 -; SSE-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2 -; SSE-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2 -; SSE-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2 -; SSE-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2 -; SSE-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2 -; SSE-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2 -; SSE-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2 -; SSE-NEXT: ret void -; -; AVX-LABEL: @fptoui_8f64_8i16( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8 -; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16> -; AVX-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2 -; AVX-NEXT: ret void +; CHECK-LABEL: @fptoui_8f64_8i16( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16> +; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2 +; CHECK-NEXT: ret void ; %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8 %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8 @@ -522,38 +495,11 @@ define void @fptoui_8f32_8i32() #0 { } define void @fptoui_8f32_8i16() #0 { -; SSE-LABEL: @fptoui_8f32_8i16( -; SSE-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4 -; SSE-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4 -; SSE-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4 -; SSE-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4 -; SSE-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4 -; SSE-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4 -; SSE-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4 -; SSE-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4 -; SSE-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i16 -; SSE-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i16 -; SSE-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i16 -; SSE-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i16 -; SSE-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i16 -; SSE-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i16 -; SSE-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i16 -; SSE-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i16 -; SSE-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2 -; SSE-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2 -; SSE-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2 -; SSE-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2 -; SSE-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2 -; SSE-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2 -; SSE-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2 -; SSE-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2 -; SSE-NEXT: ret void -; -; AVX-LABEL: @fptoui_8f32_8i16( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4 -; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16> -; AVX-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2 -; AVX-NEXT: ret void +; CHECK-LABEL: @fptoui_8f32_8i16( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16> +; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2 +; CHECK-NEXT: ret void ; %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4 %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4