[X86][BtVer2] Partial vector stores (inc MMX) have a 2cy latency

llvm-svn: 332722
This commit is contained in:
Simon Pilgrim 2018-05-18 14:22:22 +00:00
parent 804f4d4650
commit 3ecb0b80f6
8 changed files with 38 additions and 38 deletions

View File

@ -277,7 +277,7 @@ defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
@ -405,7 +405,7 @@ defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1],
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;

View File

@ -953,7 +953,7 @@ define void @test_movq(i64 *%a0) {
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq (%rdi), %mm0 # sched: [5:1.00]
; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
; BTVER2-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: movq %mm0, (%rdi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movq:

View File

@ -2812,7 +2812,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movhps:
@ -3051,14 +3051,14 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movlps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movlps:
@ -3367,14 +3367,14 @@ define void @test_movss_mem(float* %a0, float* %a1) {
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
; BTVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movss_mem:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movss_mem:

View File

@ -4610,7 +4610,7 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:0.50]
; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
@ -4620,7 +4620,7 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
; BTVER2-NEXT: vmovd %edi, %xmm1 # sched: [1:0.50]
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00]
; BTVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
@ -4791,7 +4791,7 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.50]
; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
@ -4801,7 +4801,7 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
; BTVER2-NEXT: vmovq %rdi, %xmm1 # sched: [1:0.50]
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00]
; BTVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
@ -4932,14 +4932,14 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movhpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movhpd:
@ -5060,14 +5060,14 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movlpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movlpd:
@ -5496,14 +5496,14 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movq_mem:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movq_mem:
@ -5732,14 +5732,14 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movsd_mem:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
; BTVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movsd_mem:

View File

@ -1255,7 +1255,7 @@ vzeroupper
# CHECK-NEXT: 1 1 0.50 vmovd %eax, %xmm2
# CHECK-NEXT: 1 5 1.00 * vmovd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovd %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * vmovd %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovd %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vmovddup %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vmovddup (%rax), %xmm2
# CHECK-NEXT: 2 1 1.00 vmovddup %ymm0, %ymm2
@ -1274,13 +1274,13 @@ vzeroupper
# CHECK-NEXT: 1 5 1.00 * vmovdqu (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovhlps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vmovlhps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovhpd %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovhpd %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovhpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovhps %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovhps %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovhps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovlps %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovlps %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskpd %ymm0, %ecx
@ -1298,9 +1298,9 @@ vzeroupper
# CHECK-NEXT: 1 1 0.50 vmovq %rax, %xmm2
# CHECK-NEXT: 1 5 1.00 * vmovq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovq %xmm0, %rcx
# CHECK-NEXT: 1 1 1.00 * vmovq %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovq %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vmovsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovsd %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovsd %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovshdup %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vmovshdup (%rax), %xmm2
@ -1311,7 +1311,7 @@ vzeroupper
# CHECK-NEXT: 2 1 1.00 vmovsldup %ymm0, %ymm2
# CHECK-NEXT: 2 6 2.00 * vmovsldup (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovss %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovss %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovupd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovupd %xmm0, (%rax)

View File

@ -168,11 +168,11 @@ pxor (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 movd %eax, %mm2
# CHECK-NEXT: 1 5 1.00 * movd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 movd %mm0, %ecx
# CHECK-NEXT: 1 1 1.00 * * movd %mm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * * movd %mm0, (%rax)
# CHECK-NEXT: 1 1 0.50 movq %rax, %mm2
# CHECK-NEXT: 1 5 1.00 * movq (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 movq %mm0, %rcx
# CHECK-NEXT: 1 1 1.00 * movq %mm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movq %mm0, (%rax)
# CHECK-NEXT: 1 1 0.50 packsswb %mm0, %mm2
# CHECK-NEXT: 1 6 1.00 * packsswb (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 packssdw %mm0, %mm2

View File

@ -245,15 +245,15 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 * movaps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movhlps %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 movlhps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movlps %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movlps %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * * * movntq %mm0, (%rax)
# CHECK-NEXT: 1 1 0.50 movss %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movss %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movss %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * movss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movups %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movups %xmm0, (%rax)

View File

@ -475,7 +475,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movd %eax, %xmm2
# CHECK-NEXT: 1 5 1.00 * movd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movd %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movd %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movd %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 movdqa %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * movdqa (%rax), %xmm2
@ -483,9 +483,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movdqu %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * movdqu (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movdq2q %xmm0, %mm2
# CHECK-NEXT: 1 1 1.00 * movhpd %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movhpd %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * movhpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movlpd %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movlpd %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * movlpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 movmskpd %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movntil %eax, (%rax)
@ -496,10 +496,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movq %rax, %xmm2
# CHECK-NEXT: 1 5 1.00 * movq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movq %xmm0, %rcx
# CHECK-NEXT: 1 1 1.00 * movq %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movq %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 movq2dq %mm0, %xmm2
# CHECK-NEXT: 1 1 0.50 movsd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movsd %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * movsd %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * movsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movupd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax)