forked from OSchip/llvm-project
[X86][AVX] Fold extract_subvector(splat, c) -> extract_subvector(splat, 0)
We already do this for VBROADCASTs, extend this for any splat that SelectionDAG::isSplatValue recognises as well.
This commit is contained in:
parent
853c52c988
commit
598ceb25d4
|
@ -49852,7 +49852,8 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
|
|||
// extract the lowest subvector instead which should allow
|
||||
// SimplifyDemandedVectorElts do more simplifications.
|
||||
if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST ||
|
||||
InVec.getOpcode() == X86ISD::VBROADCAST_LOAD))
|
||||
InVec.getOpcode() == X86ISD::VBROADCAST_LOAD ||
|
||||
DAG.isSplatValue(InVec, /*AllowUndefs*/ false)))
|
||||
return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits);
|
||||
|
||||
// If we're extracting a broadcasted subvector, just use the lowest subvector.
|
||||
|
|
|
@ -1521,39 +1521,35 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOPAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
|
||||
; XOPAVX1-NEXT: vmovaps {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; XOPAVX1-NEXT: vandps %ymm4, %ymm2, %ymm5
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm5, %xmm6
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
|
||||
; XOPAVX1-NEXT: vpshlb %xmm6, %xmm7, %xmm6
|
||||
; XOPAVX1-NEXT: vpshlb %xmm5, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: vandnps %ymm4, %ymm2, %ymm2
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; XOPAVX1-NEXT: vpsubb %xmm4, %xmm3, %xmm4
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
|
||||
; XOPAVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
|
||||
; XOPAVX1-NEXT: vpshlb %xmm6, %xmm5, %xmm5
|
||||
; XOPAVX1-NEXT: vpshlb %xmm4, %xmm5, %xmm4
|
||||
; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
|
||||
; XOPAVX1-NEXT: vpshlb %xmm6, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm4
|
||||
; XOPAVX1-NEXT: vandnps {{.*}}(%rip), %ymm4, %ymm4
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
|
||||
; XOPAVX1-NEXT: vpsubb %xmm5, %xmm3, %xmm5
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
|
||||
; XOPAVX1-NEXT: vpcmpeqd %xmm7, %xmm7, %xmm7
|
||||
; XOPAVX1-NEXT: vpshlb %xmm7, %xmm6, %xmm6
|
||||
; XOPAVX1-NEXT: vpshlb %xmm5, %xmm6, %xmm5
|
||||
; XOPAVX1-NEXT: vpsubb %xmm4, %xmm3, %xmm3
|
||||
; XOPAVX1-NEXT: vpshlb %xmm7, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1
|
||||
; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm3, %xmm3
|
||||
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v32i8:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6
|
||||
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm6, %xmm5
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
|
||||
; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm4
|
||||
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm3, %xmm3
|
||||
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
|
||||
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpandn {{.*}}(%rip), %ymm2, %ymm2
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; XOPAVX2-NEXT: vpsubb %xmm3, %xmm4, %xmm3
|
||||
|
|
|
@ -1465,45 +1465,39 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOPAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
|
||||
; XOPAVX1-NEXT: vmovaps {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; XOPAVX1-NEXT: vandnps %ymm4, %ymm2, %ymm5
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm5, %xmm6
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
|
||||
; XOPAVX1-NEXT: vpaddb %xmm7, %xmm7, %xmm7
|
||||
; XOPAVX1-NEXT: vpshlb %xmm6, %xmm7, %xmm6
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm4
|
||||
; XOPAVX1-NEXT: vandnps {{.*}}(%rip), %ymm4, %ymm4
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
|
||||
; XOPAVX1-NEXT: vpaddb %xmm6, %xmm6, %xmm6
|
||||
; XOPAVX1-NEXT: vpshlb %xmm5, %xmm6, %xmm5
|
||||
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlb %xmm5, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: vandps %ymm4, %ymm2, %ymm2
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; XOPAVX1-NEXT: vpsubb %xmm4, %xmm3, %xmm4
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
|
||||
; XOPAVX1-NEXT: vpshlb %xmm4, %xmm5, %xmm4
|
||||
; XOPAVX1-NEXT: vpshlb %xmm4, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm3, %xmm3
|
||||
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v32i8:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5
|
||||
; XOPAVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6
|
||||
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm6, %xmm5
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
|
||||
; XOPAVX2-NEXT: vpandn {{.*}}(%rip), %ymm2, %ymm4
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5
|
||||
; XOPAVX2-NEXT: vpshlb %xmm5, %xmm3, %xmm3
|
||||
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; XOPAVX2-NEXT: vpsubb %xmm3, %xmm4, %xmm3
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
|
||||
; XOPAVX2-NEXT: vpshlb %xmm3, %xmm5, %xmm3
|
||||
; XOPAVX2-NEXT: vpsubb %xmm2, %xmm4, %xmm2
|
||||
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
|
||||
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm3, %xmm3
|
||||
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
|
||||
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
|
|
|
@ -720,9 +720,9 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
|
||||
; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; XOPAVX2-NEXT: vprotq %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
|
||||
|
@ -825,9 +825,9 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v8i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
|
||||
; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; XOPAVX2-NEXT: vprotd %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
|
||||
|
@ -963,9 +963,9 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; XOPAVX2-NEXT: vprotw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
|
||||
|
@ -1155,9 +1155,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v32i8:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
||||
; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; XOPAVX2-NEXT: vprotb %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
|
||||
|
|
|
@ -430,7 +430,6 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
|
|||
; AVX512F-LABEL: splatvar_rotate_v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
|
@ -447,7 +446,6 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
|
|||
; AVX512VL-LABEL: splatvar_rotate_v32i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
|
@ -506,7 +504,6 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
|||
; AVX512F-LABEL: splatvar_rotate_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
|
||||
; AVX512F-NEXT: vpsubb %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
|
@ -532,7 +529,6 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
|||
; AVX512VL-LABEL: splatvar_rotate_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
|
||||
; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
|
|
Loading…
Reference in New Issue