forked from OSchip/llvm-project
[X86] Add custom promotion of v2i8/v2i16 fp_to_sint to avoid over promotion to v2i64 which would force scalarization.
llvm-svn: 346259
This commit is contained in:
parent
bcee83da3e
commit
6428a2cd9a
|
@ -882,6 +882,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
|
||||
// Custom legalize these to avoid over promotion.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
|
||||
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
|
||||
|
@ -26025,6 +26030,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
SDValue Src = N->getOperand(0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
|
||||
// Promote these manually to avoid over promotion to v2i64. Type
|
||||
// legalization will revisit the v2i32 operation for more cleanup.
|
||||
if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
|
||||
getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
|
||||
// AVX512DQ provides instructions that produce a v2i64 result.
|
||||
if (Subtarget.hasDQI())
|
||||
return;
|
||||
|
||||
SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
|
||||
Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
|
||||
: ISD::AssertSext,
|
||||
dl, MVT::v2i32, Res,
|
||||
DAG.getValueType(VT.getVectorElementType()));
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
|
||||
Results.push_back(Res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (VT == MVT::v2i32) {
|
||||
assert((IsSigned || Subtarget.hasAVX512()) &&
|
||||
"Can only handle signed conversion without AVX512");
|
||||
|
@ -26051,7 +26074,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
return;
|
||||
}
|
||||
if (SrcVT == MVT::v2f32 &&
|
||||
getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) {
|
||||
getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
|
||||
SDValue Idx = DAG.getIntPtrConstant(0, dl);
|
||||
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
|
||||
DAG.getUNDEF(MVT::v2f32));
|
||||
|
|
|
@ -111,19 +111,8 @@ define <2 x float> @cvt_v2u32_v2f32(<2 x i32> %src) {
|
|||
define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
|
||||
; CHECK-LABEL: cvt_v2f32_v2i8:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: subl $68, %esp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 72
|
||||
; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fisttpll (%esp)
|
||||
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; CHECK-NEXT: addl $68, %esp
|
||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
|
||||
|
@ -141,19 +130,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
|
|||
define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
|
||||
; CHECK-LABEL: cvt_v2f32_v2i16:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: subl $68, %esp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 72
|
||||
; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fisttpll (%esp)
|
||||
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; CHECK-NEXT: addl $68, %esp
|
||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
|
||||
|
@ -186,37 +164,8 @@ define <2 x i32> @cvt_v2f32_v2i32(<2 x float> %src) {
|
|||
define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
|
||||
; CHECK-LABEL: cvt_v2f32_v2u8:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: subl $68, %esp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 72
|
||||
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
|
||||
; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4
|
||||
; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3
|
||||
; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
|
||||
; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4
|
||||
; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3
|
||||
; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fisttpll (%esp)
|
||||
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: vucomiss %xmm2, %xmm1
|
||||
; CHECK-NEXT: setae %al
|
||||
; CHECK-NEXT: shll $31, %eax
|
||||
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: vucomiss %xmm2, %xmm0
|
||||
; CHECK-NEXT: setae %cl
|
||||
; CHECK-NEXT: shll $31, %ecx
|
||||
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; CHECK-NEXT: addl $68, %esp
|
||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; CHECK-NEXT: retl
|
||||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
|
||||
|
@ -234,37 +183,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
|
|||
define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
|
||||
; CHECK-LABEL: cvt_v2f32_v2u16:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: subl $68, %esp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 72
|
||||
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
|
||||
; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4
|
||||
; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3
|
||||
; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
|
||||
; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4
|
||||
; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3
|
||||
; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fisttpll (%esp)
|
||||
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: vucomiss %xmm2, %xmm1
|
||||
; CHECK-NEXT: setae %al
|
||||
; CHECK-NEXT: shll $31, %eax
|
||||
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: vucomiss %xmm2, %xmm0
|
||||
; CHECK-NEXT: setae %cl
|
||||
; CHECK-NEXT: shll $31, %ecx
|
||||
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; CHECK-NEXT: addl $68, %esp
|
||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; CHECK-NEXT: retl
|
||||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:
|
||||
|
|
|
@ -2866,3 +2866,445 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
|
|||
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
ret <4 x i32> %ext
|
||||
}
|
||||
|
||||
define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
|
||||
; SSE-LABEL: fptosi_2f32_to_2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: psrad $31, %xmm1
|
||||
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptosi_2f32_to_2i8:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptosi_2f32_to_2i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptosi_2f32_to_2i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; WIDEN-LABEL: fptosi_2f32_to_2i8:
|
||||
; WIDEN: # %bb.0:
|
||||
; WIDEN-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; WIDEN-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; WIDEN-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; WIDEN-NEXT: vzeroupper
|
||||
; WIDEN-NEXT: retq
|
||||
%cvt = fptosi <2 x float> %a to <2 x i8>
|
||||
ret <2 x i8> %cvt
|
||||
}
|
||||
|
||||
define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
|
||||
; SSE-LABEL: fptosi_2f32_to_2i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: psrad $31, %xmm1
|
||||
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptosi_2f32_to_2i16:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; WIDEN_SKX-LABEL: fptosi_2f32_to_2i16:
|
||||
; WIDEN_SKX: # %bb.0:
|
||||
; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; WIDEN_SKX-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; WIDEN_SKX-NEXT: vzeroupper
|
||||
; WIDEN_SKX-NEXT: retq
|
||||
;
|
||||
; WIDEN_KNL-LABEL: fptosi_2f32_to_2i16:
|
||||
; WIDEN_KNL: # %bb.0:
|
||||
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; WIDEN_KNL-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; WIDEN_KNL-NEXT: vzeroupper
|
||||
; WIDEN_KNL-NEXT: retq
|
||||
%cvt = fptosi <2 x float> %a to <2 x i16>
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
|
||||
; SSE-LABEL: fptoui_2f32_to_2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptoui_2f32_to_2i8:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_2f32_to_2i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptoui_2f32_to_2i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; WIDEN-LABEL: fptoui_2f32_to_2i8:
|
||||
; WIDEN: # %bb.0:
|
||||
; WIDEN-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; WIDEN-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; WIDEN-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; WIDEN-NEXT: vzeroupper
|
||||
; WIDEN-NEXT: retq
|
||||
%cvt = fptoui <2 x float> %a to <2 x i8>
|
||||
ret <2 x i8> %cvt
|
||||
}
|
||||
|
||||
define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
|
||||
; SSE-LABEL: fptoui_2f32_to_2i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptoui_2f32_to_2i16:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; WIDEN_SKX-LABEL: fptoui_2f32_to_2i16:
|
||||
; WIDEN_SKX: # %bb.0:
|
||||
; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; WIDEN_SKX-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; WIDEN_SKX-NEXT: vzeroupper
|
||||
; WIDEN_SKX-NEXT: retq
|
||||
;
|
||||
; WIDEN_KNL-LABEL: fptoui_2f32_to_2i16:
|
||||
; WIDEN_KNL: # %bb.0:
|
||||
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; WIDEN_KNL-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; WIDEN_KNL-NEXT: vzeroupper
|
||||
; WIDEN_KNL-NEXT: retq
|
||||
%cvt = fptoui <2 x float> %a to <2 x i16>
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
|
||||
; SSE-LABEL: fptosi_2f64_to_2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: movapd %xmm0, %xmm1
|
||||
; SSE-NEXT: psrad $31, %xmm1
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptosi_2f64_to_2i8:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptosi_2f64_to_2i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptosi_2f64_to_2i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; WIDEN-LABEL: fptosi_2f64_to_2i8:
|
||||
; WIDEN: # %bb.0:
|
||||
; WIDEN-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; WIDEN-NEXT: vcvttsd2si %xmm1, %eax
|
||||
; WIDEN-NEXT: vcvttsd2si %xmm0, %ecx
|
||||
; WIDEN-NEXT: vmovd %ecx, %xmm0
|
||||
; WIDEN-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; WIDEN-NEXT: retq
|
||||
%cvt = fptosi <2 x double> %a to <2 x i8>
|
||||
ret <2 x i8> %cvt
|
||||
}
|
||||
|
||||
define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
|
||||
; SSE-LABEL: fptosi_2f64_to_2i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: movapd %xmm0, %xmm1
|
||||
; SSE-NEXT: psrad $31, %xmm1
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptosi_2f64_to_2i16:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; WIDEN_SKX-LABEL: fptosi_2f64_to_2i16:
|
||||
; WIDEN_SKX: # %bb.0:
|
||||
; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; WIDEN_SKX-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; WIDEN_SKX-NEXT: vzeroupper
|
||||
; WIDEN_SKX-NEXT: retq
|
||||
;
|
||||
; WIDEN_KNL-LABEL: fptosi_2f64_to_2i16:
|
||||
; WIDEN_KNL: # %bb.0:
|
||||
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; WIDEN_KNL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; WIDEN_KNL-NEXT: vzeroupper
|
||||
; WIDEN_KNL-NEXT: retq
|
||||
%cvt = fptosi <2 x double> %a to <2 x i16>
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
|
||||
; SSE-LABEL: fptoui_2f64_to_2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptoui_2f64_to_2i8:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_2f64_to_2i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptoui_2f64_to_2i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; WIDEN-LABEL: fptoui_2f64_to_2i8:
|
||||
; WIDEN: # %bb.0:
|
||||
; WIDEN-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; WIDEN-NEXT: vcvttsd2si %xmm1, %eax
|
||||
; WIDEN-NEXT: vcvttsd2si %xmm0, %ecx
|
||||
; WIDEN-NEXT: vmovd %ecx, %xmm0
|
||||
; WIDEN-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; WIDEN-NEXT: retq
|
||||
%cvt = fptoui <2 x double> %a to <2 x i8>
|
||||
ret <2 x i8> %cvt
|
||||
}
|
||||
|
||||
define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
|
||||
; SSE-LABEL: fptoui_2f64_to_2i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptoui_2f64_to_2i16:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; WIDEN_SKX-LABEL: fptoui_2f64_to_2i16:
|
||||
; WIDEN_SKX: # %bb.0:
|
||||
; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; WIDEN_SKX-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; WIDEN_SKX-NEXT: vzeroupper
|
||||
; WIDEN_SKX-NEXT: retq
|
||||
;
|
||||
; WIDEN_KNL-LABEL: fptoui_2f64_to_2i16:
|
||||
; WIDEN_KNL: # %bb.0:
|
||||
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; WIDEN_KNL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; WIDEN_KNL-NEXT: vzeroupper
|
||||
; WIDEN_KNL-NEXT: retq
|
||||
%cvt = fptoui <2 x double> %a to <2 x i16>
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue