forked from OSchip/llvm-project
[SelectionDAG] allow FP binops in SimplifyDemandedVectorElts
This is intended to make the backend on par with functionality that was added to the IR version of SimplifyDemandedVectorElts in: rL343727 ...and the original motivation is that we need to improve demanded-vector-elements in several ways to avoid problems that would be exposed in D51553. Differential Revision: https://reviews.llvm.org/D52912 llvm-svn: 344541
This commit is contained in:
parent
4c9eb0fac4
commit
4cf1da0e02
|
@ -1712,7 +1712,12 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
|||
break;
|
||||
}
|
||||
case ISD::ADD:
|
||||
case ISD::SUB: {
|
||||
case ISD::SUB:
|
||||
case ISD::FADD:
|
||||
case ISD::FSUB:
|
||||
case ISD::FMUL:
|
||||
case ISD::FDIV:
|
||||
case ISD::FREM: {
|
||||
APInt SrcUndef, SrcZero;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
|
||||
SrcZero, TLO, Depth + 1))
|
||||
|
|
|
@ -178,16 +178,16 @@ define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) {
|
|||
define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) {
|
||||
; KNL-LABEL: fadd_noundef_low:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
|
||||
; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
|
||||
; KNL-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
||||
; KNL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
|
||||
; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: fadd_noundef_low:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
|
||||
; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
|
||||
; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
||||
; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
|
||||
; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; SKX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; SKX-NEXT: retq
|
||||
|
@ -252,17 +252,15 @@ define <8 x i32> @hadd_16_3_sv(<16 x i32> %x225, <16 x i32> %x227) {
|
|||
define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) {
|
||||
; KNL-LABEL: fadd_noundef_eel:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
|
||||
; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
|
||||
; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; KNL-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: fadd_noundef_eel:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
|
||||
; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
|
||||
; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
|
@ -278,18 +276,18 @@ define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) {
|
|||
define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) {
|
||||
; KNL-LABEL: fsub_noundef_ee:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
|
||||
; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
|
||||
; KNL-NEXT: vsubpd %zmm0, %zmm2, %zmm0
|
||||
; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0
|
||||
; KNL-NEXT: vbroadcastsd %xmm0, %zmm0
|
||||
; KNL-NEXT: vsubpd %zmm1, %zmm0, %zmm0
|
||||
; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm0
|
||||
; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: fsub_noundef_ee:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
|
||||
; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
|
||||
; SKX-NEXT: vsubpd %zmm0, %zmm2, %zmm0
|
||||
; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm0
|
||||
; SKX-NEXT: vbroadcastsd %xmm0, %zmm0
|
||||
; SKX-NEXT: vsubpd %zmm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0
|
||||
; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; SKX-NEXT: vzeroupper
|
||||
|
|
|
@ -7304,7 +7304,7 @@ define float @test_mm512_reduce_add_ps(<16 x float> %__W) {
|
|||
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
||||
; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; X86-NEXT: flds (%esp)
|
||||
|
@ -7321,7 +7321,7 @@ define float @test_mm512_reduce_add_ps(<16 x float> %__W) {
|
|||
; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
||||
; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
|
@ -7354,7 +7354,7 @@ define float @test_mm512_reduce_mul_ps(<16 x float> %__W) {
|
|||
; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
||||
; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; X86-NEXT: flds (%esp)
|
||||
|
@ -7371,7 +7371,7 @@ define float @test_mm512_reduce_mul_ps(<16 x float> %__W) {
|
|||
; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
||||
; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
|
@ -7516,7 +7516,7 @@ define float @test_mm512_mask_reduce_add_ps(i16 zeroext %__M, <16 x float> %__W)
|
|||
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
||||
; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; X86-NEXT: flds (%esp)
|
||||
|
@ -7535,7 +7535,7 @@ define float @test_mm512_mask_reduce_add_ps(i16 zeroext %__M, <16 x float> %__W)
|
|||
; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
||||
; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
|
@ -7573,7 +7573,7 @@ define float @test_mm512_mask_reduce_mul_ps(i16 zeroext %__M, <16 x float> %__W)
|
|||
; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
||||
; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; X86-NEXT: flds (%esp)
|
||||
|
@ -7593,7 +7593,7 @@ define float @test_mm512_mask_reduce_mul_ps(i16 zeroext %__M, <16 x float> %__W)
|
|||
; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
||||
; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
|
|
|
@ -2703,7 +2703,7 @@ define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) {
|
|||
; SSE-LABEL: PR22377:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[2,3]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
|
||||
; SSE-NEXT: addps %xmm0, %xmm1
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
|
@ -2711,7 +2711,7 @@ define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) {
|
|||
;
|
||||
; AVX-LABEL: PR22377:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3]
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,2,3]
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
|
||||
; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1
|
||||
; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
|
|
Loading…
Reference in New Issue