forked from OSchip/llvm-project
[AVX-512] If avx512dq is available use vpmovm2d/vpmovm2q instead of vselect of zeroes/ones when handling sign extends of i1 without VLX.
llvm-svn: 291402
This commit is contained in:
parent
0930a523cc
commit
f51ba1e3da
|
@ -17206,22 +17206,26 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
|
|||
if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
|
||||
return SDValue();
|
||||
|
||||
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
|
||||
if (VT.is512BitVector() && InVTElt != MVT::i1) {
|
||||
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
|
||||
return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
|
||||
}
|
||||
|
||||
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
|
||||
assert (InVTElt == MVT::i1 && "Unexpected vector type");
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
|
||||
SDValue NegOne = DAG.getConstant(
|
||||
APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
|
||||
SDValue Zero = DAG.getConstant(
|
||||
APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
|
||||
SDValue V;
|
||||
if (Subtarget.hasDQI()) {
|
||||
V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
|
||||
assert(!VT.is512BitVector() && "Unexpected vector type");
|
||||
} else {
|
||||
SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
|
||||
SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
|
||||
V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
|
||||
if (VT.is512BitVector())
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
|
||||
if (VT.is512BitVector())
|
||||
return V;
|
||||
return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
|
||||
}
|
||||
|
||||
|
|
|
@ -605,13 +605,28 @@ define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_cmp_v8f64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1
|
||||
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: test_cmp_v8f64:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_cmp_v8f64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
|
||||
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_cmp_v8f64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
|
||||
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%1 = fcmp ogt <8 x double> %a0, %a1
|
||||
ret <8 x i1> %1
|
||||
}
|
||||
|
@ -652,13 +667,28 @@ define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_cmp_v16f32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vcmpltps %zmm0, %zmm1, %k1
|
||||
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: test_cmp_v16f32:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_cmp_v16f32:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0
|
||||
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_cmp_v16f32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
|
||||
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%1 = fcmp ogt <16 x float> %a0, %a1
|
||||
ret <16 x i1> %1
|
||||
}
|
||||
|
@ -750,13 +780,28 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_cmp_v8i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
|
||||
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: test_cmp_v8i64:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_cmp_v8i64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_cmp_v8i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
|
||||
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%1 = icmp sgt <8 x i64> %a0, %a1
|
||||
ret <8 x i1> %1
|
||||
}
|
||||
|
@ -800,13 +845,28 @@ define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_cmp_v16i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
|
||||
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: test_cmp_v16i32:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_cmp_v16i32:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_cmp_v16i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
|
||||
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%1 = icmp sgt <16 x i32> %a0, %a1
|
||||
ret <16 x i1> %1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue