forked from OSchip/llvm-project
[X86] Add custom promotion of narrow fp_to_uint/fp_to_sint operations under -x86-experimental-vector-widening-legalization.
This tries to force the result type to vXi32 followed by a truncate. This can help avoid scalarization that would otherwise occur. There's some annoying examples of an avx512 truncate instruction followed by a packus where we should really be able to just use one truncate. But overall this is still a net improvement. llvm-svn: 347105
This commit is contained in:
parent
ac35cd330a
commit
ee0333b4a9
|
@ -899,10 +899,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
|
||||
// Custom legalize these to avoid over promotion.
|
||||
|
||||
// Custom legalize these to avoid over promotion or custom promotion.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
|
||||
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
|
||||
|
@ -26287,7 +26295,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
// Promote these manually to avoid over promotion to v2i64. Type
|
||||
// legalization will revisit the v2i32 operation for more cleanup.
|
||||
if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
|
||||
getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
|
||||
getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) {
|
||||
// AVX512DQ provides instructions that produce a v2i64 result.
|
||||
if (Subtarget.hasDQI())
|
||||
return;
|
||||
|
@ -26302,6 +26310,43 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
return;
|
||||
}
|
||||
|
||||
if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
|
||||
if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
|
||||
return;
|
||||
|
||||
// Try to create a 128 bit vector, but don't exceed a 32 bit element.
|
||||
unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
|
||||
MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth),
|
||||
VT.getVectorNumElements());
|
||||
unsigned Opc = N->getOpcode();
|
||||
if (PromoteVT == MVT::v2i32 || PromoteVT == MVT::v4i32)
|
||||
Opc = ISD::FP_TO_SINT;
|
||||
|
||||
SDValue Res = DAG.getNode(Opc, dl, PromoteVT, Src);
|
||||
|
||||
// Preserve what we know about the size of the original result. Except
|
||||
// when the result is v2i32 since we can't widen the assert.
|
||||
if (PromoteVT != MVT::v2i32)
|
||||
Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
|
||||
: ISD::AssertSext,
|
||||
dl, PromoteVT, Res,
|
||||
DAG.getValueType(VT.getVectorElementType()));
|
||||
|
||||
// Truncate back to the original width.
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
|
||||
|
||||
// Now widen to 128 bits.
|
||||
unsigned NumConcats = 128 / VT.getSizeInBits();
|
||||
MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
|
||||
VT.getVectorNumElements() * NumConcats);
|
||||
SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
|
||||
ConcatOps[0] = Res;
|
||||
Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
|
||||
Results.push_back(Res);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (VT == MVT::v2i32) {
|
||||
assert((IsSigned || Subtarget.hasAVX512()) &&
|
||||
"Can only handle signed conversion without AVX512");
|
||||
|
|
|
@ -502,33 +502,21 @@ define <8 x i16> @f64to8us(<8 x double> %f) {
|
|||
}
|
||||
|
||||
define <8 x i8> @f64to8uc(<8 x double> %f) {
|
||||
; ALL-LABEL: f64to8uc:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; ALL-NEXT: vcvttsd2si %xmm1, %eax
|
||||
; ALL-NEXT: vcvttsd2si %xmm0, %ecx
|
||||
; ALL-NEXT: vmovd %ecx, %xmm1
|
||||
; ALL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||
; ALL-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; ALL-NEXT: vcvttsd2si %xmm2, %eax
|
||||
; ALL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
||||
; ALL-NEXT: vcvttsd2si %xmm2, %eax
|
||||
; ALL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
||||
; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm2
|
||||
; ALL-NEXT: vcvttsd2si %xmm2, %eax
|
||||
; ALL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
||||
; ALL-NEXT: vcvttsd2si %xmm2, %eax
|
||||
; ALL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
||||
; ALL-NEXT: vextractf32x4 $3, %zmm0, %xmm0
|
||||
; ALL-NEXT: vcvttsd2si %xmm0, %eax
|
||||
; ALL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; ALL-NEXT: vcvttsd2si %xmm0, %eax
|
||||
; ALL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
; NOVL-LABEL: f64to8uc:
|
||||
; NOVL: # %bb.0:
|
||||
; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; NOVL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||
; NOVL-NEXT: vzeroupper
|
||||
; NOVL-NEXT: retq
|
||||
;
|
||||
; VL-LABEL: f64to8uc:
|
||||
; VL: # %bb.0:
|
||||
; VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||
; VL-NEXT: vzeroupper
|
||||
; VL-NEXT: retq
|
||||
%res = fptoui <8 x double> %f to <8 x i8>
|
||||
ret <8 x i8> %res
|
||||
}
|
||||
|
|
|
@ -172,29 +172,10 @@ define <8 x i8> @cvt_v8f32_v8i8(<8 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v8f32_v8i8:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vzeroupper
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptosi <8 x float> %src to <8 x i8>
|
||||
|
@ -229,17 +210,8 @@ define <4 x i8> @cvt_v4f32_v4i8(<4 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v4f32_v4i8:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptosi <4 x float> %src to <4 x i8>
|
||||
ret <4 x i8> %res
|
||||
|
@ -253,11 +225,8 @@ define <4 x i16> @cvt_v4f32_v4i16(<4 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v4f32_v4i16:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vzeroupper
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptosi <4 x float> %src to <4 x i16>
|
||||
ret <4 x i16> %res
|
||||
|
@ -274,29 +243,10 @@ define <8 x i8> @cvt_v8f32_v8u8(<8 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v8f32_v8u8:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vzeroupper
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptoui <8 x float> %src to <8 x i8>
|
||||
|
@ -331,17 +281,8 @@ define <4 x i8> @cvt_v4f32_v4u8(<4 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v4f32_v4u8:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptoui <4 x float> %src to <4 x i8>
|
||||
ret <4 x i8> %res
|
||||
|
@ -355,11 +296,8 @@ define <4 x i16> @cvt_v4f32_v4u16(<4 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v4f32_v4u16:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vzeroupper
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptoui <4 x float> %src to <4 x i16>
|
||||
ret <4 x i16> %res
|
||||
|
|
|
@ -117,11 +117,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptosi <2 x float> %src to <2 x i8>
|
||||
ret <2 x i8> %res
|
||||
|
@ -136,11 +133,8 @@ define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vzeroupper
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptosi <2 x float> %src to <2 x i16>
|
||||
ret <2 x i16> %res
|
||||
|
@ -170,11 +164,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptoui <2 x float> %src to <2 x i8>
|
||||
ret <2 x i8> %res
|
||||
|
@ -189,11 +180,8 @@ define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:
|
||||
; CHECK-WIDE: ## %bb.0:
|
||||
; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vzeroupper
|
||||
; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = fptoui <2 x float> %src to <2 x i16>
|
||||
ret <2 x i16> %res
|
||||
|
|
|
@ -2310,31 +2310,17 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
|
|||
define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
|
||||
; SSE-LABEL: fptosi_2f32_to_2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttss2si %xmm0, %eax
|
||||
; SSE-NEXT: movzbl %al, %eax
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: cvttss2si %xmm0, %ecx
|
||||
; SSE-NEXT: shll $8, %ecx
|
||||
; SSE-NEXT: orl %eax, %ecx
|
||||
; SSE-NEXT: movd %ecx, %xmm0
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptosi_2f32_to_2i8:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; VEX-NEXT: vcvttss2si %xmm1, %eax
|
||||
; VEX-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; VEX-NEXT: vmovd %ecx, %xmm0
|
||||
; VEX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: fptosi_2f32_to_2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: fptosi_2f32_to_2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX-NEXT: retq
|
||||
%cvt = fptosi <2 x float> %a to <2 x i8>
|
||||
ret <2 x i8> %cvt
|
||||
}
|
||||
|
@ -2342,64 +2328,15 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
|
|||
define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
|
||||
; SSE-LABEL: fptosi_2f32_to_2i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttss2si %xmm0, %eax
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: cvttss2si %xmm0, %ecx
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pinsrw $1, %ecx, %xmm0
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
; AVX-LABEL: fptosi_2f32_to_2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%cvt = fptosi <2 x float> %a to <2 x i16>
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
@ -2407,31 +2344,17 @@ define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
|
|||
define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
|
||||
; SSE-LABEL: fptoui_2f32_to_2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttss2si %xmm0, %eax
|
||||
; SSE-NEXT: movzbl %al, %eax
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: cvttss2si %xmm0, %ecx
|
||||
; SSE-NEXT: shll $8, %ecx
|
||||
; SSE-NEXT: orl %eax, %ecx
|
||||
; SSE-NEXT: movd %ecx, %xmm0
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptoui_2f32_to_2i8:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; VEX-NEXT: vcvttss2si %xmm1, %eax
|
||||
; VEX-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; VEX-NEXT: vmovd %ecx, %xmm0
|
||||
; VEX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: fptoui_2f32_to_2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX-LABEL: fptoui_2f32_to_2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX-NEXT: retq
|
||||
%cvt = fptoui <2 x float> %a to <2 x i8>
|
||||
ret <2 x i8> %cvt
|
||||
}
|
||||
|
@ -2439,64 +2362,15 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
|
|||
define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
|
||||
; SSE-LABEL: fptoui_2f32_to_2i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttss2si %xmm0, %eax
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: cvttss2si %xmm0, %ecx
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pinsrw $1, %ecx, %xmm0
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
; AVX-LABEL: fptoui_2f32_to_2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%cvt = fptoui <2 x float> %a to <2 x i16>
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
@ -2504,22 +2378,16 @@ define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
|
|||
define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
|
||||
; SSE-LABEL: fptosi_2f64_to_2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %eax
|
||||
; SSE-NEXT: movzbl %al, %eax
|
||||
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %ecx
|
||||
; SSE-NEXT: shll $8, %ecx
|
||||
; SSE-NEXT: orl %eax, %ecx
|
||||
; SSE-NEXT: movd %ecx, %xmm0
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: fptosi_2f64_to_2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX-NEXT: vcvttsd2si %xmm1, %eax
|
||||
; AVX-NEXT: vcvttsd2si %xmm0, %ecx
|
||||
; AVX-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX-NEXT: retq
|
||||
%cvt = fptosi <2 x double> %a to <2 x i8>
|
||||
ret <2 x i8> %cvt
|
||||
|
@ -2528,55 +2396,15 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
|
|||
define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
|
||||
; SSE-LABEL: fptosi_2f64_to_2i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %eax
|
||||
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %ecx
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pinsrw $1, %ecx, %xmm0
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptosi_2f64_to_2i16:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; VEX-NEXT: vcvttsd2si %xmm1, %eax
|
||||
; VEX-NEXT: vcvttsd2si %xmm0, %ecx
|
||||
; VEX-NEXT: vmovd %ecx, %xmm0
|
||||
; VEX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
; AVX-LABEL: fptosi_2f64_to_2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%cvt = fptosi <2 x double> %a to <2 x i16>
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
@ -2584,22 +2412,16 @@ define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
|
|||
define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
|
||||
; SSE-LABEL: fptoui_2f64_to_2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %eax
|
||||
; SSE-NEXT: movzbl %al, %eax
|
||||
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %ecx
|
||||
; SSE-NEXT: shll $8, %ecx
|
||||
; SSE-NEXT: orl %eax, %ecx
|
||||
; SSE-NEXT: movd %ecx, %xmm0
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: fptoui_2f64_to_2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX-NEXT: vcvttsd2si %xmm1, %eax
|
||||
; AVX-NEXT: vcvttsd2si %xmm0, %ecx
|
||||
; AVX-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX-NEXT: retq
|
||||
%cvt = fptoui <2 x double> %a to <2 x i8>
|
||||
ret <2 x i8> %cvt
|
||||
|
@ -2608,55 +2430,15 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
|
|||
define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
|
||||
; SSE-LABEL: fptoui_2f64_to_2i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %eax
|
||||
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE-NEXT: cvttsd2si %xmm0, %ecx
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pinsrw $1, %ecx, %xmm0
|
||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: fptoui_2f64_to_2i16:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; VEX-NEXT: vcvttsd2si %xmm1, %eax
|
||||
; VEX-NEXT: vcvttsd2si %xmm0, %ecx
|
||||
; VEX-NEXT: vmovd %ecx, %xmm0
|
||||
; VEX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX512VLDQ: # %bb.0:
|
||||
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
; AVX-LABEL: fptoui_2f64_to_2i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%cvt = fptoui <2 x double> %a to <2 x i16>
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue