forked from OSchip/llvm-project
[X86] Fix (v2f64 (s/uint_to_fp (v2i1))) to avoid scalarization without AVX512DQ.
Previously we extended v2i1 to v2f64 and then tried to use cvtuqq2pd/cvtqq2pd, but that only works with avx512dq. So we ended up scalarizing it. Now we widen to v4i1 first and extend to v4i32. llvm-svn: 321420
This commit is contained in:
parent
64edcdc3fb
commit
2d1d9a11c1
|
@ -15543,7 +15543,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
|||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (SrcVT.isVector()) {
|
||||
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
|
||||
|
@ -15551,9 +15550,15 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
|||
DAG.getUNDEF(SrcVT)));
|
||||
}
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(VT == MVT::v2f64 && "Unexpected type");
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
|
||||
}
|
||||
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
|
||||
|
@ -15903,9 +15908,15 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
|
|||
SDLoc dl(Op);
|
||||
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1)
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
|
||||
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
|
||||
}
|
||||
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
|
||||
|
|
|
@ -1691,8 +1691,8 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
|
|||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0
|
||||
; VLDQ-NEXT: vpmovm2q %k0, %xmm0
|
||||
; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
|
||||
; VLDQ-NEXT: vpmovm2d %k0, %xmm0
|
||||
; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: sbto2f64:
|
||||
|
@ -1700,12 +1700,8 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
|
|||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1
|
||||
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
|
||||
; VLNODQ-NEXT: vmovq %xmm0, %rax
|
||||
; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0
|
||||
; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; VLNODQ-NEXT: retq
|
||||
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
|
||||
%1 = sitofp <2 x i1> %cmpres to <2 x double>
|
||||
|
@ -2002,30 +1998,22 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
|
|||
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
||||
; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; NOVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
|
||||
; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
|
||||
; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
|
||||
; NOVL-NEXT: vzeroupper
|
||||
; NOVL-NEXT: retq
|
||||
;
|
||||
; VLDQ-LABEL: ubto2f64:
|
||||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
|
||||
; VLDQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: ubto2f64:
|
||||
; VLNODQ: # %bb.0:
|
||||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; VLNODQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
|
||||
; VLNODQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
|
||||
; VLNODQ-NEXT: vmovq %xmm0, %rax
|
||||
; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0
|
||||
; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; VLNODQ-NEXT: retq
|
||||
; VL-LABEL: ubto2f64:
|
||||
; VL: # %bb.0:
|
||||
; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
|
||||
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; VL-NEXT: vcvtudq2pd %xmm0, %xmm0
|
||||
; VL-NEXT: retq
|
||||
%mask = icmp ult <2 x i32> %a, zeroinitializer
|
||||
%1 = uitofp <2 x i1> %mask to <2 x double>
|
||||
ret <2 x double> %1
|
||||
|
|
|
@ -2602,16 +2602,16 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sbto2f64:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
|
||||
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
|
||||
; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
|
||||
; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
|
||||
%1 = sitofp <2 x i1> %cmpres to <2 x double>
|
||||
|
@ -2989,8 +2989,8 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
|
|||
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
|
||||
; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50]
|
||||
; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
|
||||
; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: ubto2f64:
|
||||
|
@ -2998,8 +2998,8 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
|
|||
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
|
||||
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
|
||||
; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
|
||||
; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
|
||||
; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
%mask = icmp ult <2 x i32> %a, zeroinitializer
|
||||
%1 = uitofp <2 x i1> %mask to <2 x double>
|
||||
|
|
Loading…
Reference in New Issue