[X86] Add custom promotion of v2i8/v2i16 fp_to_sint to avoid over promotion to v2i64 which would force scalarization.

llvm-svn: 346259
This commit is contained in:
Craig Topper 2018-11-06 19:24:21 +00:00
parent bcee83da3e
commit 6428a2cd9a
3 changed files with 474 additions and 89 deletions

View File

@ -882,6 +882,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
// Custom legalize these to avoid over promotion.
setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@ -26025,6 +26030,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
// Promote these manually to avoid over promotion to v2i64. Type
// legalization will revisit the v2i32 operation for more cleanup.
if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
// AVX512DQ provides instructions that produce a v2i64 result.
if (Subtarget.hasDQI())
return;
SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
: ISD::AssertSext,
dl, MVT::v2i32, Res,
DAG.getValueType(VT.getVectorElementType()));
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
Results.push_back(Res);
return;
}
if (VT == MVT::v2i32) {
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
@ -26051,7 +26074,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
if (SrcVT == MVT::v2f32 &&
getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) {
getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));

View File

@ -111,19 +111,8 @@ define <2 x float> @cvt_v2u32_v2f32(<2 x i32> %src) {
define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2i8:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $68, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 72
; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll (%esp)
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; CHECK-NEXT: addl $68, %esp
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
@ -141,19 +130,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2i16:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $68, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 72
; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll (%esp)
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; CHECK-NEXT: addl $68, %esp
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
@ -186,37 +164,8 @@ define <2 x i32> @cvt_v2f32_v2i32(<2 x float> %src) {
define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2u8:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $68, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 72
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3
; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4
; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3
; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll (%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vucomiss %xmm2, %xmm1
; CHECK-NEXT: setae %al
; CHECK-NEXT: shll $31, %eax
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: vucomiss %xmm2, %xmm0
; CHECK-NEXT: setae %cl
; CHECK-NEXT: shll $31, %ecx
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; CHECK-NEXT: addl $68, %esp
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
@ -234,37 +183,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2u16:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $68, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 72
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3
; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4
; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3
; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll (%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vucomiss %xmm2, %xmm1
; CHECK-NEXT: setae %al
; CHECK-NEXT: shll $31, %eax
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: vucomiss %xmm2, %xmm0
; CHECK-NEXT: setae %cl
; CHECK-NEXT: shll $31, %ecx
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; CHECK-NEXT: addl $68, %esp
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:

View File

@ -2866,3 +2866,445 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %ext
}
define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f32_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptosi_2f32_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptosi_2f32_to_2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
;
; WIDEN-LABEL: fptosi_2f32_to_2i8:
; WIDEN: # %bb.0:
; WIDEN-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; WIDEN-NEXT: vcvttps2dq %zmm0, %zmm0
; WIDEN-NEXT: vpmovdb %zmm0, %xmm0
; WIDEN-NEXT: vzeroupper
; WIDEN-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i8>
ret <2 x i8> %cvt
}
define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f32_to_2i16:
; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptosi_2f32_to_2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptosi_2f32_to_2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptosi_2f32_to_2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
;
; WIDEN_SKX-LABEL: fptosi_2f32_to_2i16:
; WIDEN_SKX: # %bb.0:
; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; WIDEN_SKX-NEXT: vcvttps2dq %ymm0, %ymm0
; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
; WIDEN_SKX-NEXT: vzeroupper
; WIDEN_SKX-NEXT: retq
;
; WIDEN_KNL-LABEL: fptosi_2f32_to_2i16:
; WIDEN_KNL: # %bb.0:
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; WIDEN_KNL-NEXT: vcvttps2dq %ymm0, %ymm0
; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; WIDEN_KNL-NEXT: vzeroupper
; WIDEN_KNL-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
}
define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptoui_2f32_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptoui_2f32_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_2f32_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f32_to_2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
;
; WIDEN-LABEL: fptoui_2f32_to_2i8:
; WIDEN: # %bb.0:
; WIDEN-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; WIDEN-NEXT: vcvttps2dq %zmm0, %zmm0
; WIDEN-NEXT: vpmovdb %zmm0, %xmm0
; WIDEN-NEXT: vzeroupper
; WIDEN-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i8>
ret <2 x i8> %cvt
}
define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
; SSE-LABEL: fptoui_2f32_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptoui_2f32_to_2i16:
; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_2f32_to_2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f32_to_2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_2f32_to_2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
;
; WIDEN_SKX-LABEL: fptoui_2f32_to_2i16:
; WIDEN_SKX: # %bb.0:
; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; WIDEN_SKX-NEXT: vcvttps2dq %ymm0, %ymm0
; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
; WIDEN_SKX-NEXT: vzeroupper
; WIDEN_SKX-NEXT: retq
;
; WIDEN_KNL-LABEL: fptoui_2f32_to_2i16:
; WIDEN_KNL: # %bb.0:
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; WIDEN_KNL-NEXT: vcvttps2dq %ymm0, %ymm0
; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; WIDEN_KNL-NEXT: vzeroupper
; WIDEN_KNL-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
}
define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f64_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptosi_2f64_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptosi_2f64_to_2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
;
; WIDEN-LABEL: fptosi_2f64_to_2i8:
; WIDEN: # %bb.0:
; WIDEN-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; WIDEN-NEXT: vcvttsd2si %xmm1, %eax
; WIDEN-NEXT: vcvttsd2si %xmm0, %ecx
; WIDEN-NEXT: vmovd %ecx, %xmm0
; WIDEN-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; WIDEN-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i8>
ret <2 x i8> %cvt
}
define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f64_to_2i16:
; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptosi_2f64_to_2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptosi_2f64_to_2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptosi_2f64_to_2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
;
; WIDEN_SKX-LABEL: fptosi_2f64_to_2i16:
; WIDEN_SKX: # %bb.0:
; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; WIDEN_SKX-NEXT: vcvttpd2dq %zmm0, %ymm0
; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
; WIDEN_SKX-NEXT: vzeroupper
; WIDEN_SKX-NEXT: retq
;
; WIDEN_KNL-LABEL: fptosi_2f64_to_2i16:
; WIDEN_KNL: # %bb.0:
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; WIDEN_KNL-NEXT: vcvttpd2dq %zmm0, %ymm0
; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; WIDEN_KNL-NEXT: vzeroupper
; WIDEN_KNL-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
}
define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptoui_2f64_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptoui_2f64_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_2f64_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f64_to_2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
;
; WIDEN-LABEL: fptoui_2f64_to_2i8:
; WIDEN: # %bb.0:
; WIDEN-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; WIDEN-NEXT: vcvttsd2si %xmm1, %eax
; WIDEN-NEXT: vcvttsd2si %xmm0, %ecx
; WIDEN-NEXT: vmovd %ecx, %xmm0
; WIDEN-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; WIDEN-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i8>
ret <2 x i8> %cvt
}
define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
; SSE-LABEL: fptoui_2f64_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptoui_2f64_to_2i16:
; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_2f64_to_2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f64_to_2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_2f64_to_2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
;
; WIDEN_SKX-LABEL: fptoui_2f64_to_2i16:
; WIDEN_SKX: # %bb.0:
; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; WIDEN_SKX-NEXT: vcvttpd2dq %zmm0, %ymm0
; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
; WIDEN_SKX-NEXT: vzeroupper
; WIDEN_SKX-NEXT: retq
;
; WIDEN_KNL-LABEL: fptoui_2f64_to_2i16:
; WIDEN_KNL: # %bb.0:
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; WIDEN_KNL-NEXT: vcvttpd2dq %zmm0, %ymm0
; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; WIDEN_KNL-NEXT: vzeroupper
; WIDEN_KNL-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
}