From de995e6e37d038efcd134dc40c1168a7d023eafc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 14 Mar 2018 13:22:56 +0000 Subject: [PATCH] [X86][SSE] Use WriteFShuffleLd for MOVDDUP/MOVSHDUP/MOVSLDUP reg-mem instructions They shouldn't be treated as pure loads. Found while investigating D44428 llvm-svn: 327505 --- llvm/lib/Target/X86/X86InstrSSE.td | 9 +++++---- llvm/test/CodeGen/X86/avx-schedule.ll | 6 +++--- llvm/test/CodeGen/X86/sse3-schedule.ll | 27 ++++++++++++++------------ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 51fbdf835cac..17cc80680787 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4708,6 +4708,7 @@ let AddedComplexity = 20 in { //===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// + multiclass sse3_replicate_sfp op, SDNode OpNode, string OpcodeStr, ValueType vt, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop> { @@ -4718,7 +4719,7 @@ def rr : S3SI, Sched<[WriteLoad]>; + IIC_SSE_MOV_LH>, Sched<[WriteFShuffleLd]>; } let Predicates = [HasAVX, NoVLX] in { @@ -4786,10 +4787,10 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), [(set VR128:$dst, (v2f64 (X86Movddup (scalar_to_vector (loadf64 addr:$src)))))], - IIC_SSE_MOV_LH>, Sched<[WriteLoad]>; + IIC_SSE_MOV_LH>, Sched<[WriteFShuffleLd]>; } -// FIXME: Merge with above classe when there're patterns for the ymm version +// FIXME: Merge with above classes when there are patterns for the ymm version multiclass sse3_replicate_dfp_y { def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -4799,7 +4800,7 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, - Sched<[WriteLoad]>; + Sched<[WriteFShuffleLd]>; } let Predicates = [HasAVX, NoVLX] in { diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 164f97fef214..5d2aacdf4403 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -2690,7 +2690,7 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [5:1.00] +; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:1.00] ; BTVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3030,7 +3030,7 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [5:1.00] +; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:1.00] ; BTVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3093,7 +3093,7 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [5:1.00] +; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:1.00] ; BTVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll index 5de26ab19d21..bb7694c1e4a3 100644 --- a/llvm/test/CodeGen/X86/sse3-schedule.ll +++ b/llvm/test/CodeGen/X86/sse3-schedule.ll @@ -563,9 +563,10 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; ; SLM-LABEL: test_movddup: ; SLM: # %bb.0: -; SLM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; SLM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [3:1.00] -; SLM-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; SLM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00] +; SLM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] +; SLM-NEXT: subpd %xmm0, %xmm1 # sched: [3:1.00] +; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_movddup: @@ -605,7 +606,7 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:1.00] +; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00] ; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50] ; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -641,9 +642,10 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; ; SLM-LABEL: test_movshdup: ; SLM: # %bb.0: -; SLM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; SLM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [3:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SLM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00] +; SLM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] +; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_movshdup: @@ -683,7 +685,7 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:1.00] +; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00] ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50] ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -719,9 +721,10 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; ; SLM-LABEL: test_movsldup: ; SLM: # %bb.0: -; SLM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; SLM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [3:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SLM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00] +; SLM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] +; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_movsldup: @@ -761,7 +764,7 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:1.00] +; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00] ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50] ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00]