forked from OSchip/llvm-project
[X86][AVX] computeKnownBitsForTargetNode - add VTRUNC/VTRUNCS/VTRUNCUS known zero upper elements handling.
Like many of the AVX512 conversion ops, the VTRUNC ops guarantee the upper destination elements are zero.
This commit is contained in:
parent
08ba4f112d
commit
80a0dc59b7
|
@ -33856,6 +33856,9 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
|||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::VTRUNC:
|
||||
case X86ISD::VTRUNCS:
|
||||
case X86ISD::VTRUNCUS:
|
||||
case X86ISD::CVTSI2P:
|
||||
case X86ISD::CVTUI2P:
|
||||
case X86ISD::CVTP2SI:
|
||||
|
@ -33872,7 +33875,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
|||
case X86ISD::VMFPROUND:
|
||||
case X86ISD::CVTPS2PH:
|
||||
case X86ISD::MCVTPS2PH: {
|
||||
// Conversions - upper elements are known zero.
|
||||
// Truncations/Conversions - upper elements are known zero.
|
||||
EVT SrcVT = Op.getOperand(0).getValueType();
|
||||
if (SrcVT.isVector()) {
|
||||
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
||||
|
|
|
@ -3418,8 +3418,6 @@ define <2 x i64> @test_mm256_cvtepi64_epi8(<4 x i64> %__A) {
|
|||
; CHECK-LABEL: test_mm256_cvtepi64_epi8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vpmovqb %ymm0, %xmm0
|
||||
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
|
@ -3433,7 +3431,6 @@ define <2 x i64> @test_mm256_cvtepi64_epi16(<4 x i64> %__A) {
|
|||
; CHECK-LABEL: test_mm256_cvtepi64_epi16:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
|
@ -3447,7 +3444,6 @@ define <2 x i64> @test_mm256_cvtepi32_epi8(<4 x i64> %__A) {
|
|||
; CHECK-LABEL: test_mm256_cvtepi32_epi8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
|
|
|
@ -505,7 +505,6 @@ define <2 x i64> @trunc_v8i32_to_v8i8_return_v2i64(<8 x i32> %vec) nounwind {
|
|||
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -520,14 +519,12 @@ define <2 x i64> @trunc_v8i32_to_v8i8_return_v2i64(<8 x i32> %vec) nounwind {
|
|||
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
||||
; AVX512VBMIVL: # %bb.0:
|
||||
; AVX512VBMIVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512VBMIVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512VBMIVL-NEXT: vzeroupper
|
||||
; AVX512VBMIVL-NEXT: retq
|
||||
%truncated.vec = trunc <8 x i32> %vec to <8 x i8>
|
||||
|
@ -685,7 +682,6 @@ define <16 x i8> @trunc_v8i32_to_v8i8_return_v16i8(<8 x i32> %vec) nounwind {
|
|||
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -700,14 +696,12 @@ define <16 x i8> @trunc_v8i32_to_v8i8_return_v16i8(<8 x i32> %vec) nounwind {
|
|||
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
||||
; AVX512VBMIVL: # %bb.0:
|
||||
; AVX512VBMIVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512VBMIVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512VBMIVL-NEXT: vzeroupper
|
||||
; AVX512VBMIVL-NEXT: retq
|
||||
%truncated = trunc <8 x i32> %vec to <8 x i8>
|
||||
|
@ -751,7 +745,6 @@ define <2 x i64> @trunc_v4i64_to_v4i16_return_v2i64(<4 x i64> %vec) nounwind {
|
|||
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -766,14 +759,12 @@ define <2 x i64> @trunc_v4i64_to_v4i16_return_v2i64(<4 x i64> %vec) nounwind {
|
|||
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
||||
; AVX512VBMIVL: # %bb.0:
|
||||
; AVX512VBMIVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512VBMIVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512VBMIVL-NEXT: vzeroupper
|
||||
; AVX512VBMIVL-NEXT: retq
|
||||
%truncated = trunc <4 x i64> %vec to <4 x i16>
|
||||
|
@ -941,7 +932,6 @@ define <8 x i16> @trunc_v4i64_to_v4i16_return_v8i16(<4 x i64> %vec) nounwind {
|
|||
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -956,14 +946,12 @@ define <8 x i16> @trunc_v4i64_to_v4i16_return_v8i16(<4 x i64> %vec) nounwind {
|
|||
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
||||
; AVX512VBMIVL: # %bb.0:
|
||||
; AVX512VBMIVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512VBMIVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512VBMIVL-NEXT: vzeroupper
|
||||
; AVX512VBMIVL-NEXT: retq
|
||||
%truncated = trunc <4 x i64> %vec to <4 x i16>
|
||||
|
@ -1008,8 +996,6 @@ define <16 x i8> @trunc_v4i64_to_v4i8_return_v16i8(<4 x i64> %vec) nounwind {
|
|||
; AVX512VL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovqb %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -1025,16 +1011,12 @@ define <16 x i8> @trunc_v4i64_to_v4i8_return_v16i8(<4 x i64> %vec) nounwind {
|
|||
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovqb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
||||
; AVX512VBMIVL: # %bb.0:
|
||||
; AVX512VBMIVL-NEXT: vpmovqb %ymm0, %xmm0
|
||||
; AVX512VBMIVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VBMIVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; AVX512VBMIVL-NEXT: vzeroupper
|
||||
; AVX512VBMIVL-NEXT: retq
|
||||
%truncated = trunc <4 x i64> %vec to <4 x i8>
|
||||
|
|
|
@ -499,7 +499,6 @@ define <16 x i8> @trunc_v8i64_to_v8i8_return_v16i8(<8 x i64> %vec) nounwind {
|
|||
; AVX512-LABEL: trunc_v8i64_to_v8i8_return_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpmovqb %zmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%truncated = trunc <8 x i64> %vec to <8 x i8>
|
||||
|
|
Loading…
Reference in New Issue