forked from OSchip/llvm-project
[X86][SSE] canonicalizeShuffleWithBinOps - add MOVSD/MOVSS handling.
This commit is contained in:
parent
518e6f341d
commit
53283cc2f1
|
@ -37100,6 +37100,8 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
|
|||
break;
|
||||
LLVM_FALLTHROUGH;
|
||||
}
|
||||
case X86ISD::MOVSD:
|
||||
case X86ISD::MOVSS:
|
||||
case X86ISD::BLENDI:
|
||||
case X86ISD::SHUFP:
|
||||
case X86ISD::UNPCKH:
|
||||
|
|
|
@ -20,18 +20,9 @@ define <4 x float> @pair_sum_v4f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
|
|||
; SSSE3-SLOW-LABEL: pair_sum_v4f32_v4f32:
|
||||
; SSSE3-SLOW: # %bb.0:
|
||||
; SSSE3-SLOW-NEXT: haddps %xmm1, %xmm0
|
||||
; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm1
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,1,3]
|
||||
; SSSE3-SLOW-NEXT: addps %xmm1, %xmm0
|
||||
; SSSE3-SLOW-NEXT: haddps %xmm2, %xmm2
|
||||
; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; SSSE3-SLOW-NEXT: addps %xmm2, %xmm1
|
||||
; SSSE3-SLOW-NEXT: haddps %xmm3, %xmm3
|
||||
; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSSE3-SLOW-NEXT: addps %xmm3, %xmm2
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
|
||||
; SSSE3-SLOW-NEXT: haddps %xmm2, %xmm3
|
||||
; SSSE3-SLOW-NEXT: haddps %xmm3, %xmm0
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,3,2]
|
||||
; SSSE3-SLOW-NEXT: retq
|
||||
;
|
||||
; SSSE3-FAST-LABEL: pair_sum_v4f32_v4f32:
|
||||
|
@ -106,13 +97,11 @@ define <4 x i32> @pair_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2,
|
|||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,1,3]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,1,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
; SSSE3-SLOW-NEXT: phaddd %xmm2, %xmm2
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm2, %xmm1
|
||||
; SSSE3-SLOW-NEXT: phaddd %xmm3, %xmm3
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,1,1]
|
||||
; SSSE3-SLOW-NEXT: phaddd %xmm2, %xmm3
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,1,0,1]
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[1,1]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm3, %xmm2
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
|
||||
; SSSE3-SLOW-NEXT: retq
|
||||
;
|
||||
|
@ -699,23 +688,20 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
|
|||
; SSSE3-SLOW-NEXT: phaddd %xmm1, %xmm4
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
|
||||
; SSSE3-SLOW-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm4, %xmm1
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm4
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,1,0,1]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm2, %xmm5
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm4, %xmm5
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm5
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm3, %xmm2
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm2, %xmm3
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm3
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm5[2,3]
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm3, %xmm6
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm6
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm5[2,3]
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,0]
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm2[3,3]
|
||||
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm3[2,0]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm4, %xmm0
|
||||
; SSSE3-SLOW-NEXT: retq
|
||||
;
|
||||
; SSSE3-FAST-LABEL: sequential_sum_v4i32_v4i32:
|
||||
|
@ -724,21 +710,19 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
|
|||
; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm4
|
||||
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
|
||||
; SSSE3-FAST-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSSE3-FAST-NEXT: paddd %xmm4, %xmm1
|
||||
; SSSE3-FAST-NEXT: paddd %xmm1, %xmm0
|
||||
; SSSE3-FAST-NEXT: paddd %xmm0, %xmm4
|
||||
; SSSE3-FAST-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSSE3-FAST-NEXT: phaddd %xmm2, %xmm1
|
||||
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
|
||||
; SSSE3-FAST-NEXT: paddd %xmm1, %xmm4
|
||||
; SSSE3-FAST-NEXT: paddd %xmm2, %xmm4
|
||||
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm2 = xmm3[3,3,3,3]
|
||||
; SSSE3-FAST-NEXT: phaddd %xmm3, %xmm3
|
||||
; SSSE3-FAST-NEXT: paddd %xmm3, %xmm2
|
||||
; SSSE3-FAST-NEXT: paddd %xmm1, %xmm2
|
||||
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3]
|
||||
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
|
||||
; SSSE3-FAST-NEXT: paddd %xmm2, %xmm1
|
||||
; SSSE3-FAST-NEXT: movdqa %xmm3, %xmm5
|
||||
; SSSE3-FAST-NEXT: phaddd %xmm3, %xmm5
|
||||
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
|
||||
; SSSE3-FAST-NEXT: paddd %xmm5, %xmm6
|
||||
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm1[2,3]
|
||||
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,0]
|
||||
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm2[3,3]
|
||||
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm3[2,0]
|
||||
; SSSE3-FAST-NEXT: paddd %xmm4, %xmm0
|
||||
; SSSE3-FAST-NEXT: retq
|
||||
;
|
||||
; AVX1-SLOW-LABEL: sequential_sum_v4i32_v4i32:
|
||||
|
|
Loading…
Reference in New Issue