diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 21b835ec5ba5..38d3a30cb19e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -882,6 +882,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); + // Custom legalize these to avoid over promotion. + setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); @@ -26025,6 +26030,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); + // Promote these manually to avoid over promotion to v2i64. Type + // legalization will revisit the v2i32 operation for more cleanup. + if ((VT == MVT::v2i8 || VT == MVT::v2i16) && + getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { + // AVX512DQ provides instructions that produce a v2i64 result. + if (Subtarget.hasDQI()) + return; + + SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src); + Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext + : ISD::AssertSext, + dl, MVT::v2i32, Res, + DAG.getValueType(VT.getVectorElementType())); + Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + Results.push_back(Res); + return; + } + if (VT == MVT::v2i32) { assert((IsSigned || Subtarget.hasAVX512()) && "Can only handle signed conversion without AVX512"); @@ -26051,7 +26074,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } if (SrcVT == MVT::v2f32 && - getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) { + getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { SDValue Idx = DAG.getIntPtrConstant(0, dl); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32)); diff --git a/llvm/test/CodeGen/X86/vec_cast3.ll b/llvm/test/CodeGen/X86/vec_cast3.ll index e0cc4f3e3960..e8662b8cc34d 100644 --- a/llvm/test/CodeGen/X86/vec_cast3.ll +++ b/llvm/test/CodeGen/X86/vec_cast3.ll @@ -111,19 +111,8 @@ define <2 x float> @cvt_v2u32_v2f32(<2 x i32> %src) { define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: subl $68, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 72 -; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) -; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fisttpll (%esp) -; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 -; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; CHECK-NEXT: addl $68, %esp +; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 +; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 ; CHECK-NEXT: retl ; ; CHECK-WIDE-LABEL: cvt_v2f32_v2i8: @@ -141,19 +130,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) { define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: subl $68, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 72 -; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) -; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fisttpll (%esp) -; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 -; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; CHECK-NEXT: addl $68, %esp +; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 +; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 ; CHECK-NEXT: retl ; ; CHECK-WIDE-LABEL: cvt_v2f32_v2i16: @@ -186,37 +164,8 @@ define <2 x i32> @cvt_v2f32_v2i32(<2 x float> %src) { define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2u8: ; CHECK: ## %bb.0: -; CHECK-NEXT: subl $68, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 72 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3 -; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3 -; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) -; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3 -; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4 -; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 -; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fisttpll (%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: vucomiss %xmm2, %xmm1 -; CHECK-NEXT: setae %al -; CHECK-NEXT: shll $31, %eax -; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: vucomiss %xmm2, %xmm0 -; CHECK-NEXT: setae %cl -; CHECK-NEXT: shll $31, %ecx -; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 -; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: addl $68, %esp +; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 +; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; CHECK-NEXT: retl ; ; CHECK-WIDE-LABEL: cvt_v2f32_v2u8: @@ -234,37 +183,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) { define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2u16: ; CHECK: ## %bb.0: -; CHECK-NEXT: subl $68, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 72 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3 -; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3 -; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) -; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3 -; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4 -; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 -; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fisttpll (%esp) -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: vucomiss %xmm2, %xmm1 -; CHECK-NEXT: setae %al -; CHECK-NEXT: shll $31, %eax -; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: vucomiss %xmm2, %xmm0 -; CHECK-NEXT: setae %cl -; CHECK-NEXT: shll $31, %ecx -; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 -; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: addl $68, %esp +; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 +; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; CHECK-NEXT: retl ; ; CHECK-WIDE-LABEL: cvt_v2f32_v2u16: diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index e80abc91cd1d..651c0e65aa05 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -2866,3 +2866,445 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %ext } + +define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) { +; SSE-LABEL: fptosi_2f32_to_2i8: +; SSE: # %bb.0: +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; VEX-LABEL: fptosi_2f32_to_2i8: +; VEX: # %bb.0: +; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 +; VEX-NEXT: vpmovsxdq %xmm0, %xmm0 +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptosi_2f32_to_2i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptosi_2f32_to_2i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptosi_2f32_to_2i8: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq +; +; WIDEN-LABEL: fptosi_2f32_to_2i8: +; WIDEN: # %bb.0: +; WIDEN-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; WIDEN-NEXT: vcvttps2dq %zmm0, %zmm0 +; WIDEN-NEXT: vpmovdb %zmm0, %xmm0 +; WIDEN-NEXT: vzeroupper +; WIDEN-NEXT: retq + %cvt = fptosi <2 x float> %a to <2 x i8> + ret <2 x i8> %cvt +} + +define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) { +; SSE-LABEL: fptosi_2f32_to_2i16: +; SSE: # %bb.0: +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; VEX-LABEL: fptosi_2f32_to_2i16: +; VEX: # %bb.0: +; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 +; VEX-NEXT: vpmovsxdq %xmm0, %xmm0 +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptosi_2f32_to_2i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptosi_2f32_to_2i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptosi_2f32_to_2i16: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptosi_2f32_to_2i16: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq +; +; WIDEN_SKX-LABEL: fptosi_2f32_to_2i16: +; WIDEN_SKX: # %bb.0: +; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; WIDEN_SKX-NEXT: vcvttps2dq %ymm0, %ymm0 +; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0 +; WIDEN_SKX-NEXT: vzeroupper +; WIDEN_SKX-NEXT: retq +; +; WIDEN_KNL-LABEL: fptosi_2f32_to_2i16: +; WIDEN_KNL: # %bb.0: +; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; WIDEN_KNL-NEXT: vcvttps2dq %ymm0, %ymm0 +; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0 +; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; WIDEN_KNL-NEXT: vzeroupper +; WIDEN_KNL-NEXT: retq + %cvt = fptosi <2 x float> %a to <2 x i16> + ret <2 x i16> %cvt +} + +define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) { +; SSE-LABEL: fptoui_2f32_to_2i8: +; SSE: # %bb.0: +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; VEX-LABEL: fptoui_2f32_to_2i8: +; VEX: # %bb.0: +; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 +; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptoui_2f32_to_2i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptoui_2f32_to_2i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptoui_2f32_to_2i8: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq +; +; WIDEN-LABEL: fptoui_2f32_to_2i8: +; WIDEN: # %bb.0: +; WIDEN-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; WIDEN-NEXT: vcvttps2dq %zmm0, %zmm0 +; WIDEN-NEXT: vpmovdb %zmm0, %xmm0 +; WIDEN-NEXT: vzeroupper +; WIDEN-NEXT: retq + %cvt = fptoui <2 x float> %a to <2 x i8> + ret <2 x i8> %cvt +} + +define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) { +; SSE-LABEL: fptoui_2f32_to_2i16: +; SSE: # %bb.0: +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; VEX-LABEL: fptoui_2f32_to_2i16: +; VEX: # %bb.0: +; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 +; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptoui_2f32_to_2i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptoui_2f32_to_2i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptoui_2f32_to_2i16: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptoui_2f32_to_2i16: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq +; +; WIDEN_SKX-LABEL: fptoui_2f32_to_2i16: +; WIDEN_SKX: # %bb.0: +; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; WIDEN_SKX-NEXT: vcvttps2dq %ymm0, %ymm0 +; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0 +; WIDEN_SKX-NEXT: vzeroupper +; WIDEN_SKX-NEXT: retq +; +; WIDEN_KNL-LABEL: fptoui_2f32_to_2i16: +; WIDEN_KNL: # %bb.0: +; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; WIDEN_KNL-NEXT: vcvttps2dq %ymm0, %ymm0 +; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0 +; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; WIDEN_KNL-NEXT: vzeroupper +; WIDEN_KNL-NEXT: retq + %cvt = fptoui <2 x float> %a to <2 x i16> + ret <2 x i16> %cvt +} + +define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) { +; SSE-LABEL: fptosi_2f64_to_2i8: +; SSE: # %bb.0: +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; VEX-LABEL: fptosi_2f64_to_2i8: +; VEX: # %bb.0: +; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 +; VEX-NEXT: vpmovsxdq %xmm0, %xmm0 +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptosi_2f64_to_2i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptosi_2f64_to_2i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptosi_2f64_to_2i8: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq +; +; WIDEN-LABEL: fptosi_2f64_to_2i8: +; WIDEN: # %bb.0: +; WIDEN-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; WIDEN-NEXT: vcvttsd2si %xmm1, %eax +; WIDEN-NEXT: vcvttsd2si %xmm0, %ecx +; WIDEN-NEXT: vmovd %ecx, %xmm0 +; WIDEN-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; WIDEN-NEXT: retq + %cvt = fptosi <2 x double> %a to <2 x i8> + ret <2 x i8> %cvt +} + +define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) { +; SSE-LABEL: fptosi_2f64_to_2i16: +; SSE: # %bb.0: +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; VEX-LABEL: fptosi_2f64_to_2i16: +; VEX: # %bb.0: +; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 +; VEX-NEXT: vpmovsxdq %xmm0, %xmm0 +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptosi_2f64_to_2i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptosi_2f64_to_2i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptosi_2f64_to_2i16: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptosi_2f64_to_2i16: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq +; +; WIDEN_SKX-LABEL: fptosi_2f64_to_2i16: +; WIDEN_SKX: # %bb.0: +; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; WIDEN_SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 +; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0 +; WIDEN_SKX-NEXT: vzeroupper +; WIDEN_SKX-NEXT: retq +; +; WIDEN_KNL-LABEL: fptosi_2f64_to_2i16: +; WIDEN_KNL: # %bb.0: +; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; WIDEN_KNL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0 +; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; WIDEN_KNL-NEXT: vzeroupper +; WIDEN_KNL-NEXT: retq + %cvt = fptosi <2 x double> %a to <2 x i16> + ret <2 x i16> %cvt +} + +define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) { +; SSE-LABEL: fptoui_2f64_to_2i8: +; SSE: # %bb.0: +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: xorpd %xmm1, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; VEX-LABEL: fptoui_2f64_to_2i8: +; VEX: # %bb.0: +; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 +; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptoui_2f64_to_2i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptoui_2f64_to_2i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptoui_2f64_to_2i8: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq +; +; WIDEN-LABEL: fptoui_2f64_to_2i8: +; WIDEN: # %bb.0: +; WIDEN-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; WIDEN-NEXT: vcvttsd2si %xmm1, %eax +; WIDEN-NEXT: vcvttsd2si %xmm0, %ecx +; WIDEN-NEXT: vmovd %ecx, %xmm0 +; WIDEN-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; WIDEN-NEXT: retq + %cvt = fptoui <2 x double> %a to <2 x i8> + ret <2 x i8> %cvt +} + +define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) { +; SSE-LABEL: fptoui_2f64_to_2i16: +; SSE: # %bb.0: +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: xorpd %xmm1, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; VEX-LABEL: fptoui_2f64_to_2i16: +; VEX: # %bb.0: +; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 +; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptoui_2f64_to_2i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptoui_2f64_to_2i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptoui_2f64_to_2i16: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptoui_2f64_to_2i16: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq +; +; WIDEN_SKX-LABEL: fptoui_2f64_to_2i16: +; WIDEN_SKX: # %bb.0: +; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; WIDEN_SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 +; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0 +; WIDEN_SKX-NEXT: vzeroupper +; WIDEN_SKX-NEXT: retq +; +; WIDEN_KNL-LABEL: fptoui_2f64_to_2i16: +; WIDEN_KNL: # %bb.0: +; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; WIDEN_KNL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0 +; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; WIDEN_KNL-NEXT: vzeroupper +; WIDEN_KNL-NEXT: retq + %cvt = fptoui <2 x double> %a to <2 x i16> + ret <2 x i16> %cvt +}