forked from OSchip/llvm-project
[X86] Improve v4i32->v4f64 uint_to_fp for AVX1/AVX2 targets.
Use zext+or+fsub to do the conversion. Similar to D71971. Differential Revision: https://reviews.llvm.org/D71971
This commit is contained in:
parent
317cbdad4d
commit
6a0564adcf
|
@ -19101,6 +19101,21 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
|
|||
return Res;
|
||||
}
|
||||
|
||||
if (Subtarget.hasAVX() && VecIntVT == MVT::v4i32 &&
|
||||
Op->getSimpleValueType(0) == MVT::v4f64) {
|
||||
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i64, V);
|
||||
SDValue VBias =
|
||||
DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v4f64);
|
||||
SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn,
|
||||
DAG.getBitcast(MVT::v4i64, VBias));
|
||||
Or = DAG.getBitcast(MVT::v4f64, Or);
|
||||
|
||||
if (IsStrict)
|
||||
return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v4f64, MVT::Other},
|
||||
{Op.getOperand(0), Or, VBias});
|
||||
return DAG.getNode(ISD::FSUB, DL, MVT::v4f64, Or, VBias);
|
||||
}
|
||||
|
||||
// The algorithm is the following:
|
||||
// #ifdef __SSE4_1__
|
||||
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
|
||||
|
|
|
@ -582,38 +582,23 @@ define <4 x double> @sitofp_v4i32_v4f64(<4 x i32> %x) #0 {
|
|||
}
|
||||
|
||||
define <4 x double> @uitofp_v4i32_v4f64(<4 x i32> %x) #0 {
|
||||
; AVX1-32-LABEL: uitofp_v4i32_v4f64:
|
||||
; AVX1-32: # %bb.0:
|
||||
; AVX1-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; AVX1-32-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX1-32-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
; AVX1-32-NEXT: vmulpd {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; AVX1-32-NEXT: vcvtdq2pd %xmm1, %ymm1
|
||||
; AVX1-32-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-32-NEXT: retl
|
||||
;
|
||||
; AVX1-64-LABEL: uitofp_v4i32_v4f64:
|
||||
; AVX1-64: # %bb.0:
|
||||
; AVX1-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; AVX1-64-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX1-64-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
; AVX1-64-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-64-NEXT: vcvtdq2pd %xmm1, %ymm1
|
||||
; AVX1-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-64-NEXT: retq
|
||||
; AVX1-LABEL: uitofp_v4i32_v4f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
|
||||
; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX2-LABEL: uitofp_v4i32_v4f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
|
||||
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
||||
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
|
||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512F-LABEL: uitofp_v4i32_v4f64:
|
||||
|
|
|
@ -724,13 +724,9 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
|
|||
;
|
||||
; VEX-LABEL: uitofp_4i32_to_2f64:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; VEX-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; VEX-NEXT: vcvtdq2pd %xmm1, %xmm1
|
||||
; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; VEX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; VEX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
||||
; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; VEX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; VEX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: uitofp_4i32_to_2f64:
|
||||
|
@ -1017,24 +1013,20 @@ define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
|
|||
; AVX1-LABEL: uitofp_4i32_to_4f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
|
||||
; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: uitofp_4i32_to_4f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
|
||||
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
||||
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
|
||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: uitofp_4i32_to_4f64:
|
||||
|
@ -3525,17 +3517,20 @@ define <2 x double> @uitofp_load_4i32_to_2f64_2(<4 x i32>* %x) {
|
|||
; SSE41-NEXT: subpd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: uitofp_load_4i32_to_2f64_2:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; VEX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; VEX-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; VEX-NEXT: vcvtdq2pd %xmm1, %xmm1
|
||||
; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; VEX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; VEX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
; AVX1-LABEL: uitofp_load_4i32_to_2f64_2:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
|
||||
; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: uitofp_load_4i32_to_2f64_2:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: uitofp_load_4i32_to_2f64_2:
|
||||
; AVX512F: # %bb.0:
|
||||
|
@ -3587,17 +3582,20 @@ define <2 x double> @uitofp_volatile_load_4i32_to_2f64_2(<4 x i32>* %x) {
|
|||
; SSE41-NEXT: subpd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; VEX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; VEX-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; VEX-NEXT: vcvtdq2pd %xmm1, %xmm1
|
||||
; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; VEX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; VEX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
; AVX1-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
|
||||
; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
|
||||
; AVX512F: # %bb.0:
|
||||
|
@ -3841,25 +3839,20 @@ define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) {
|
|||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
|
||||
; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: uitofp_load_4i32_to_4f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
|
||||
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
||||
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
|
||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: uitofp_load_4i32_to_4f64:
|
||||
|
|
|
@ -159,8 +159,8 @@ define <4 x double> @test3(<4 x i32> %arg) {
|
|||
; CHECK-LABEL: test3:
|
||||
; This test used to crash because we were custom lowering it as if it was
|
||||
; a conversion between <4 x i32> and <4 x float>.
|
||||
; AVX: vcvtdq2pd
|
||||
; AVX2: vcvtdq2pd
|
||||
; AVX: vsubpd
|
||||
; AVX2: vsubpd
|
||||
; CHECK: retq
|
||||
%tmp = uitofp <4 x i32> %arg to <4 x double>
|
||||
ret <4 x double> %tmp
|
||||
|
|
|
@ -7269,12 +7269,12 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
|
|||
; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
|
||||
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
|
||||
; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32:
|
||||
|
|
Loading…
Reference in New Issue