forked from OSchip/llvm-project
[X86][CostModel] Improve costs for fp_to_uint/fp_to_sint for vXi8/vXi16/v2i32 results.
Differential Revision: https://reviews.llvm.org/D78893
This commit is contained in:
parent
9d1fc92b14
commit
5eff75d86a
|
@ -1500,12 +1500,17 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 },
|
||||
{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 },
|
||||
{ ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 },
|
||||
{ ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 },
|
||||
|
||||
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 },
|
||||
};
|
||||
|
||||
static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] {
|
||||
|
@ -1605,6 +1610,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 },
|
||||
|
||||
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
|
||||
|
||||
|
@ -1717,8 +1726,15 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
|
||||
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
|
||||
|
||||
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
|
||||
// This node is expanded into scalarized operations but BasicTTI is overly
|
||||
// optimistic estimating its cost. It computes 3 per element (one
|
||||
// vector-extract, one scalar conversion and one vector-insert). The
|
||||
|
@ -1769,6 +1785,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 },
|
||||
|
||||
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
|
||||
};
|
||||
|
||||
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
|
||||
|
@ -1794,16 +1817,26 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
|
||||
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
|
||||
|
||||
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
|
||||
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
|
||||
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
|
||||
|
|
|
@ -63,21 +63,21 @@ define i32 @fptosi_double_i64(i32 %arg) {
|
|||
define i32 @fptosi_double_i32(i32 %arg) {
|
||||
; SSE-LABEL: 'fptosi_double_i32'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'fptosi_double_i32'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptosi_double_i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
|
@ -100,15 +100,15 @@ define i32 @fptosi_double_i16(i32 %arg) {
|
|||
; AVX-LABEL: 'fptosi_double_i16'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptosi_double_i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%I16 = fptosi double undef to i16
|
||||
|
@ -121,37 +121,37 @@ define i32 @fptosi_double_i16(i32 %arg) {
|
|||
define i32 @fptosi_double_i8(i32 %arg) {
|
||||
; SSE2-LABEL: 'fptosi_double_i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'fptosi_double_i8'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'fptosi_double_i8'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptosi_double_i8'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SLM-LABEL: 'fptosi_double_i8'
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%I8 = fptosi double undef to i8
|
||||
|
@ -252,45 +252,29 @@ define i32 @fptosi_float_i32(i32 %arg) {
|
|||
}
|
||||
|
||||
define i32 @fptosi_float_i16(i32 %arg) {
|
||||
; SSE2-LABEL: 'fptosi_float_i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'fptosi_float_i16'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
; SSE-LABEL: 'fptosi_float_i16'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'fptosi_float_i16'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptosi_float_i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SLM-LABEL: 'fptosi_float_i16'
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%I16 = fptosi float undef to i16
|
||||
%V2I16 = fptosi <2 x float> undef to <2 x i16>
|
||||
|
@ -303,42 +287,42 @@ define i32 @fptosi_float_i16(i32 %arg) {
|
|||
define i32 @fptosi_float_i8(i32 %arg) {
|
||||
; SSE2-LABEL: 'fptosi_float_i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'fptosi_float_i8'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'fptosi_float_i8'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptosi_float_i8'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SLM-LABEL: 'fptosi_float_i8'
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%I8 = fptosi float undef to i8
|
||||
|
|
|
@ -104,40 +104,26 @@ define i32 @fptoui_double_i32(i32 %arg) {
|
|||
}
|
||||
|
||||
define i32 @fptoui_double_i16(i32 %arg) {
|
||||
; SSE2-LABEL: 'fptoui_double_i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'fptoui_double_i16'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
; SSE-LABEL: 'fptoui_double_i16'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'fptoui_double_i16'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptoui_double_i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SLM-LABEL: 'fptoui_double_i16'
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%I16 = fptoui double undef to i16
|
||||
%V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
|
@ -149,37 +135,37 @@ define i32 @fptoui_double_i16(i32 %arg) {
|
|||
define i32 @fptoui_double_i8(i32 %arg) {
|
||||
; SSE2-LABEL: 'fptoui_double_i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'fptoui_double_i8'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'fptoui_double_i8'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptoui_double_i8'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SLM-LABEL: 'fptoui_double_i8'
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%I8 = fptoui double undef to i8
|
||||
|
@ -298,42 +284,42 @@ define i32 @fptoui_float_i32(i32 %arg) {
|
|||
define i32 @fptoui_float_i16(i32 %arg) {
|
||||
; SSE2-LABEL: 'fptoui_float_i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'fptoui_float_i16'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'fptoui_float_i16'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptoui_float_i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SLM-LABEL: 'fptoui_float_i16'
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%I16 = fptoui float undef to i16
|
||||
|
@ -347,42 +333,42 @@ define i32 @fptoui_float_i16(i32 %arg) {
|
|||
define i32 @fptoui_float_i8(i32 %arg) {
|
||||
; SSE2-LABEL: 'fptoui_float_i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'fptoui_float_i8'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'fptoui_float_i8'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'fptoui_float_i8'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SLM-LABEL: 'fptoui_float_i8'
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
|
||||
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%I8 = fptoui float undef to i8
|
||||
|
|
|
@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
|||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
|
||||
; CHECK: cost of 7 for VF 8 For instruction: %conv = fptosi float %tmp to i8
|
||||
; CHECK: cost of 4 for VF 8 For instruction: %conv = fptosi float %tmp to i8
|
||||
define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
|
|
@ -211,38 +211,11 @@ define void @fptoui_8f64_8i32() #0 {
|
|||
}
|
||||
|
||||
define void @fptoui_8f64_8i16() #0 {
|
||||
; SSE-LABEL: @fptoui_8f64_8i16(
|
||||
; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
|
||||
; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
|
||||
; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
|
||||
; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
|
||||
; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
|
||||
; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i16
|
||||
; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i16
|
||||
; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i16
|
||||
; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i16
|
||||
; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i16
|
||||
; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i16
|
||||
; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i16
|
||||
; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i16
|
||||
; SSE-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
|
||||
; SSE-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
|
||||
; SSE-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
|
||||
; SSE-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
|
||||
; SSE-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
|
||||
; SSE-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
|
||||
; SSE-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
|
||||
; SSE-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @fptoui_8f64_8i16(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
|
||||
; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16>
|
||||
; AVX-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
|
||||
; AVX-NEXT: ret void
|
||||
; CHECK-LABEL: @fptoui_8f64_8i16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16>
|
||||
; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
|
||||
%a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
|
||||
|
@ -522,38 +495,11 @@ define void @fptoui_8f32_8i32() #0 {
|
|||
}
|
||||
|
||||
define void @fptoui_8f32_8i16() #0 {
|
||||
; SSE-LABEL: @fptoui_8f32_8i16(
|
||||
; SSE-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
|
||||
; SSE-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
|
||||
; SSE-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
|
||||
; SSE-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
|
||||
; SSE-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
|
||||
; SSE-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i16
|
||||
; SSE-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i16
|
||||
; SSE-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i16
|
||||
; SSE-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i16
|
||||
; SSE-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i16
|
||||
; SSE-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i16
|
||||
; SSE-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i16
|
||||
; SSE-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i16
|
||||
; SSE-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
|
||||
; SSE-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
|
||||
; SSE-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
|
||||
; SSE-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
|
||||
; SSE-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
|
||||
; SSE-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
|
||||
; SSE-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
|
||||
; SSE-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @fptoui_8f32_8i16(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
|
||||
; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16>
|
||||
; AVX-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
|
||||
; AVX-NEXT: ret void
|
||||
; CHECK-LABEL: @fptoui_8f32_8i16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16>
|
||||
; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
|
||||
%a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
|
||||
|
|
Loading…
Reference in New Issue