diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 62577162f1d4..e7fe40379b41 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -937,14 +937,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); - // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into - // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is - // split again based on the input type, this will cause an AssertSExt i16 to - // be emitted instead of an AssertZExt. This will allow packssdw followed by - // packuswb to be used to truncate to v8i8. This is necessary since packusdw - // isn't available until sse4.1. - setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index b4db1cc9bc17..0b3a5319baac 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1576,6 +1576,9 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 }, diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index 2f8ea1352aff..7583d6e60c80 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -92,28 +92,28 @@ define i32 @fptosi_double_i32(i32 %arg) { define i32 @fptosi_double_i16(i32 %arg) { ; SSE-LABEL: 'fptosi_double_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_double_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fptosi_double_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -235,28 +235,28 @@ define i32 @fptosi_float_i32(i32 %arg) { define i32 @fptosi_float_i16(i32 %arg) { ; SSE-LABEL: 'fptosi_float_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_float_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fptosi_float_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef