forked from OSchip/llvm-project
[DAG] Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),undef) -> bop(shuffle'(x,y),shuffle'(z,w))
Followup to D96345, handle unary shuffles of binops (as well as binary shuffles) if we can merge the shuffle with inner operand shuffles. Differential Revision: https://reviews.llvm.org/D98646
This commit is contained in:
parent
2ec9239a7b
commit
ffb2887103
|
@ -21255,14 +21255,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
|||
|
||||
// Merge shuffles through binops if we are able to merge it with at least
|
||||
// one other shuffles.
|
||||
// shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
|
||||
// shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
|
||||
unsigned SrcOpcode = N0.getOpcode();
|
||||
if (SrcOpcode == N1.getOpcode() && TLI.isBinOp(SrcOpcode) &&
|
||||
N->isOnlyUserOf(N0.getNode()) && N->isOnlyUserOf(N1.getNode())) {
|
||||
if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
|
||||
(N1.isUndef() ||
|
||||
(SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
|
||||
// Get binop source ops, or just pass on the undef.
|
||||
SDValue Op00 = N0.getOperand(0);
|
||||
SDValue Op10 = N1.getOperand(0);
|
||||
SDValue Op01 = N0.getOperand(1);
|
||||
SDValue Op11 = N1.getOperand(1);
|
||||
SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
|
||||
SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
|
||||
// TODO: We might be able to relax the VT check but we don't currently
|
||||
// have any isBinOp() that has different result/ops VTs so play safe until
|
||||
// we have test coverage.
|
||||
|
|
|
@ -123,26 +123,25 @@ define <8 x float> @hadd_reverse2_v8f32(<8 x float> %a0, <8 x float> %a1) {
|
|||
define <8 x float> @hadd_reverse3_v8f32(<8 x float> %a0, <8 x float> %a1) {
|
||||
; SSE-LABEL: hadd_reverse3_v8f32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE-NEXT: haddps %xmm2, %xmm4
|
||||
; SSE-NEXT: haddps %xmm3, %xmm1
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2,1,0]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,2,1,0]
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: movaps %xmm4, %xmm1
|
||||
; SSE-NEXT: haddps %xmm1, %xmm3
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0,3,2]
|
||||
; SSE-NEXT: haddps %xmm0, %xmm2
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0,3,2]
|
||||
; SSE-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: hadd_reverse3_v8f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vhaddps %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: hadd_reverse3_v8f32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX2-NEXT: vhaddps %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
%shuf0 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
|
||||
|
|
|
@ -525,7 +525,6 @@ define <8 x i32> @hadd_v8i32b(<8 x i32> %a) {
|
|||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: hadd_v8i32b:
|
||||
|
@ -615,7 +614,6 @@ define <8 x i32> @hsub_v8i32b(<8 x i32> %a) {
|
|||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphsubd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: hsub_v8i32b:
|
||||
|
@ -705,7 +703,6 @@ define <16 x i16> @hadd_v16i16b(<16 x i16> %a) {
|
|||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: hadd_v16i16b:
|
||||
|
@ -795,7 +792,6 @@ define <16 x i16> @hsub_v16i16b(<16 x i16> %a) {
|
|||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphsubw %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: hsub_v16i16b:
|
||||
|
|
|
@ -513,9 +513,8 @@ define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpsrad $25, %xmm0, %xmm0
|
||||
; X86-NEXT: vpsrad $25, %xmm1, %xmm1
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
@ -523,9 +522,8 @@ define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; X64-AVX1: # %bb.0:
|
||||
; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -553,9 +551,8 @@ define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpsrad $25, %xmm0, %xmm0
|
||||
; X86-NEXT: vpsrad $25, %xmm1, %xmm1
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
@ -563,9 +560,8 @@ define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; X64-AVX1: # %bb.0:
|
||||
; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -593,9 +589,8 @@ define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpsrad $25, %xmm0, %xmm0
|
||||
; X86-NEXT: vpsrad $25, %xmm1, %xmm1
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
@ -603,9 +598,8 @@ define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; X64-AVX1: # %bb.0:
|
||||
; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -633,9 +627,8 @@ define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpsrad $25, %xmm0, %xmm0
|
||||
; X86-NEXT: vpsrad $25, %xmm1, %xmm1
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X86-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
@ -643,9 +636,8 @@ define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; X64-AVX1: # %bb.0:
|
||||
; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue