From c5f0f5309e3d849a76d733ae35f58565d1c4eb65 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> Date: Wed, 16 Jan 2019 18:18:01 +0000 Subject: [PATCH] [X86][BtVer2] Update latency of horizontal operations. On Jaguar, horizontal adds/subs have local forwarding disable. That means, we pay a compulsory extra cycle of write-back stage, and the value is not available until the end of that stage. This patch changes the latency of horizontal operations by adding an extra cycle. With this patch, latency numbers now match what is reported by perf. I plan to send another patch to also 'fix' the latency of shuffle operations (on Jaguar, local forwarding is disabled for vector shuffles too). Differential Revision: https://reviews.llvm.org/D56777 llvm-svn: 351366 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 10 ++-- llvm/test/CodeGen/X86/avx-schedule.ll | 16 +++--- llvm/test/CodeGen/X86/mmx-schedule.ll | 24 ++++---- llvm/test/CodeGen/X86/sse3-schedule.ll | 32 +++++------ llvm/test/CodeGen/X86/ssse3-schedule.ll | 48 ++++++++-------- .../tools/llvm-mca/X86/BtVer2/dot-product.s | 32 +++++------ .../X86/BtVer2/hadd-read-after-ld-1.s | 14 ++--- .../X86/BtVer2/hadd-read-after-ld-2.s | 14 ++--- .../X86/BtVer2/instruction-info-view.s | 10 ++-- .../llvm-mca/X86/BtVer2/resources-avx1.s | 56 +++++++++---------- .../llvm-mca/X86/BtVer2/resources-sse3.s | 16 +++--- .../llvm-mca/X86/BtVer2/resources-ssse3.s | 48 ++++++++-------- 12 files changed, 161 insertions(+), 159 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 33a6b01546d7..adb69cc44083 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -174,6 +174,8 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW, } } +// Instructions that have local forwarding disabled have an extra +1cy latency. + // A folded store needs a cycle on the SAGU for the store data, // most RMW instructions don't need an extra uop. defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>; @@ -575,10 +577,10 @@ defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU01, JVALU, JFPU0, JVIMUL], 3, [1, // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>; -defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>; -defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>; -defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 4>; // +1cy latency. +defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 4, [2,2], 2>; // +1cy latency. +defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 2>; // +1cy latency. +defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 2>; // +1cy latency. defm : X86WriteResPairUnsupported<WritePHAddY>; //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index e1db9bab294c..6818ea06732e 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -1951,8 +1951,8 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] +; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [4:2.00] +; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_haddpd: @@ -2012,8 +2012,8 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] +; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [4:2.00] +; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_haddps: @@ -2073,8 +2073,8 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] +; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [4:2.00] +; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_hsubpd: @@ -2134,8 +2134,8 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] +; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [4:2.00] +; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_hsubps: diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 51dc5e102ff1..513332f61f11 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -3368,8 +3368,8 @@ define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [2:0.50] +; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [7:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3453,8 +3453,8 @@ define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [2:0.50] +; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [7:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3538,8 +3538,8 @@ define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [2:0.50] +; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [7:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3623,8 +3623,8 @@ define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [2:0.50] +; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [7:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3708,8 +3708,8 @@ define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [2:0.50] +; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [7:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3793,8 +3793,8 @@ define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [2:0.50] +; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [7:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll index 1c3419a35ff3..c80d0e446599 100644 --- a/llvm/test/CodeGen/X86/sse3-schedule.ll +++ b/llvm/test/CodeGen/X86/sse3-schedule.ll @@ -356,14 +356,14 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; ; BTVER2-SSE-LABEL: test_haddpd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [4:1.00] +; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [9:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_haddpd: @@ -477,14 +477,14 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; ; BTVER2-SSE-LABEL: test_haddps: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [4:1.00] +; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [9:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_haddps: @@ -598,14 +598,14 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; ; BTVER2-SSE-LABEL: test_hsubpd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [4:1.00] +; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_hsubpd: @@ -719,14 +719,14 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; ; BTVER2-SSE-LABEL: test_hsubps: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [4:1.00] +; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [9:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_hsubps: diff --git a/llvm/test/CodeGen/X86/ssse3-schedule.ll b/llvm/test/CodeGen/X86/ssse3-schedule.ll index 5c8bd2dc843e..5a871e273609 100644 --- a/llvm/test/CodeGen/X86/ssse3-schedule.ll +++ b/llvm/test/CodeGen/X86/ssse3-schedule.ll @@ -676,14 +676,14 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; BTVER2-SSE-LABEL: test_phaddd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [7:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_phaddd: @@ -797,14 +797,14 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; BTVER2-SSE-LABEL: test_phaddsw: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [7:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_phaddsw: @@ -918,14 +918,14 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; BTVER2-SSE-LABEL: test_phaddw: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [7:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_phaddw: @@ -1039,14 +1039,14 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; BTVER2-SSE-LABEL: test_phsubd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [7:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_phsubd: @@ -1160,14 +1160,14 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; BTVER2-SSE-LABEL: test_phsubsw: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [7:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_phsubsw: @@ -1281,14 +1281,14 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; BTVER2-SSE-LABEL: test_phsubw: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [7:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_phsubw: diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s index 643e456450cd..a43b8285a518 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s @@ -7,12 +7,12 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Iterations: 300 # CHECK-NEXT: Instructions: 900 -# CHECK-NEXT: Total Cycles: 610 +# CHECK-NEXT: Total Cycles: 611 # CHECK-NEXT: Total uOps: 900 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.48 -# CHECK-NEXT: IPC: 1.48 +# CHECK-NEXT: uOps Per Cycle: 1.47 +# CHECK-NEXT: IPC: 1.47 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -25,8 +25,8 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: 1 4 1.00 vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 1 4 1.00 vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 @@ -59,14 +59,14 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeER. . . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,1] D==eeeER . . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [0,2] .D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 -# CHECK-NEXT: [1,0] .DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,1] . D=eeeE---R . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [1,2] . D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 -# CHECK-NEXT: [2,0] . DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: [0,1] D==eeeeER . . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,2] .D=====eeeeER . vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: [1,0] .DeeE-------R . vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,1] . D=eeeeE----R . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,2] . D=====eeeeER . vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: [2,0] . DeeE-------R. vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [2,1] . D==eeeeE---R. vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,2] . D=====eeeeER vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -75,6 +75,6 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: 0. 3 1.0 1.0 4.7 vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 3 2.7 0.0 2.3 vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 2. 3 6.0 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s index 87862a6e5a33..197c1dce5815 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s @@ -6,12 +6,12 @@ vhaddps (%rdi), %xmm1, %xmm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 11 +# CHECK-NEXT: Total Cycles: 12 # CHECK-NEXT: Total uOps: 2 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.18 -# CHECK-NEXT: IPC: 0.18 +# CHECK-NEXT: uOps Per Cycle: 0.17 +# CHECK-NEXT: IPC: 0.17 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: @@ -24,14 +24,14 @@ vhaddps (%rdi), %xmm1, %xmm2 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 0.50 vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: 1 8 1.00 * vhaddps (%rdi), %xmm1, %xmm2 +# CHECK-NEXT: 1 9 1.00 * vhaddps (%rdi), %xmm1, %xmm2 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: [0,1] DeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2 +# CHECK: [0,0] DeER . .. vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: [0,1] DeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s index 80d5109d07ee..e64ee28103f3 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s @@ -6,12 +6,12 @@ vhaddps (%rdi), %ymm1, %ymm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Total Cycles: 13 # CHECK-NEXT: Total uOps: 3 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.25 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 0.23 +# CHECK-NEXT: IPC: 0.15 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -24,14 +24,14 @@ vhaddps (%rdi), %ymm1, %ymm2 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 0.50 vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: 2 8 2.00 * vhaddps (%rdi), %ymm1, %ymm2 +# CHECK-NEXT: 2 9 2.00 * vhaddps (%rdi), %ymm1, %ymm2 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . .. vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: [0,1] .DeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2 +# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: [0,1] .DeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s index fa19b55c1d18..8f17c44e0ef4 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s @@ -13,13 +13,13 @@ vhaddps %xmm3, %xmm3, %xmm4 # ENABLED: Iterations: 100 # ENABLED-NEXT: Instructions: 300 -# ENABLED-NEXT: Total Cycles: 209 +# ENABLED-NEXT: Total Cycles: 211 # ENABLED-NEXT: Total uOps: 300 # ENABLED: Dispatch Width: 2 -# ENABLED-NEXT: uOps Per Cycle: 1.44 -# ENABLED-NEXT: IPC: 1.44 +# ENABLED-NEXT: uOps Per Cycle: 1.42 +# ENABLED-NEXT: IPC: 1.42 # ENABLED-NEXT: Block RThroughput: 2.0 # ENABLED: Instruction Info: @@ -32,5 +32,5 @@ vhaddps %xmm3, %xmm3, %xmm4 # ENABLED: [1] [2] [3] [4] [5] [6] Instructions: # ENABLED-NEXT: 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 -# ENABLED-NEXT: 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 -# ENABLED-NEXT: 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 +# ENABLED-NEXT: 1 4 1.00 vhaddps %xmm2, %xmm2, %xmm3 +# ENABLED-NEXT: 1 4 1.00 vhaddps %xmm3, %xmm3, %xmm4 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index 3db3470e0c4f..0df1d17983e6 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1196,22 +1196,22 @@ vzeroupper # CHECK-NEXT: 1 1 1.00 * vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 1 3 1.00 vextractps $1, %xmm0, %ecx # CHECK-NEXT: 1 3 1.00 * vextractps $1, %xmm0, (%rax) -# CHECK-NEXT: 1 3 1.00 vhaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vhaddpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 3 2.00 vhaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 2 8 2.00 * vhaddpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vhaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vhaddps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 3 2.00 vhaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 2 8 2.00 * vhaddps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vhsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vhsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 3 2.00 vhsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 2 8 2.00 * vhsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vhsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vhsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 3 2.00 vhsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 2 8 2.00 * vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vhaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 9 1.00 * vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 4 2.00 vhaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 9 2.00 * vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vhaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 9 1.00 * vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 4 2.00 vhaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 9 2.00 * vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vhsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 9 1.00 * vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 4 2.00 vhsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 9 2.00 * vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vhsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 9 1.00 * vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 4 2.00 vhsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 9 2.00 * vhsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 1 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 2 6 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2 @@ -1455,20 +1455,20 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 * vpextrq $1, %xmm0, (%rax) # CHECK-NEXT: 1 3 1.00 vpextrw $1, %xmm0, %ecx # CHECK-NEXT: 1 3 1.00 * vpextrw $1, %xmm0, (%rax) -# CHECK-NEXT: 1 1 0.50 vphaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vphaddd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vphaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vphaddsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vphaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vphaddd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vphaddsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vphaddw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vphaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 0.50 vphminposuw %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * vphminposuw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 vphsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vphsubd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vphsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vphsubsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vphsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vphsubd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vphsubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vphsubw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vphsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 7 0.50 vpinsrb $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: 1 4 1.00 * vpinsrb $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 7 0.50 vpinsrd $1, %eax, %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse3.s index 3fd2f406a98b..add8bd843bfa 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse3.s @@ -43,14 +43,14 @@ movsldup (%rax), %xmm2 # CHECK-NEXT: 1 8 1.00 * addsubpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * addsubps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 haddpd %xmm0, %xmm2 -# CHECK-NEXT: 1 8 1.00 * haddpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 haddps %xmm0, %xmm2 -# CHECK-NEXT: 1 8 1.00 * haddps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 hsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 8 1.00 * hsubpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 hsubps %xmm0, %xmm2 -# CHECK-NEXT: 1 8 1.00 * hsubps (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 1 9 1.00 * haddpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 haddps %xmm0, %xmm2 +# CHECK-NEXT: 1 9 1.00 * haddps (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 9 1.00 * hsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 1 9 1.00 * hsubps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movddup %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * movddup (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s index ff7ff3f93bac..0b64d1da48b6 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s @@ -122,30 +122,30 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phaddd %mm0, %mm2 -# CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phaddd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phaddsw %mm0, %mm2 -# CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phaddw %mm0, %mm2 -# CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phaddw %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phsubd %mm0, %mm2 -# CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phsubd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phsubsw %mm0, %mm2 -# CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phsubw %mm0, %mm2 -# CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phsubw %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 phaddd %mm0, %mm2 +# CHECK-NEXT: 1 7 1.00 * phaddd (%rax), %mm2 +# CHECK-NEXT: 1 2 0.50 phaddd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * phaddd (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 phaddsw %mm0, %mm2 +# CHECK-NEXT: 1 7 1.00 * phaddsw (%rax), %mm2 +# CHECK-NEXT: 1 2 0.50 phaddsw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * phaddsw (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 phaddw %mm0, %mm2 +# CHECK-NEXT: 1 7 1.00 * phaddw (%rax), %mm2 +# CHECK-NEXT: 1 2 0.50 phaddw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * phaddw (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 phsubd %mm0, %mm2 +# CHECK-NEXT: 1 7 1.00 * phsubd (%rax), %mm2 +# CHECK-NEXT: 1 2 0.50 phsubd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * phsubd (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 phsubsw %mm0, %mm2 +# CHECK-NEXT: 1 7 1.00 * phsubsw (%rax), %mm2 +# CHECK-NEXT: 1 2 0.50 phsubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * phsubsw (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 phsubw %mm0, %mm2 +# CHECK-NEXT: 1 7 1.00 * phsubw (%rax), %mm2 +# CHECK-NEXT: 1 2 0.50 phsubw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * phsubw (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 pmaddubsw %mm0, %mm2 # CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2 # CHECK-NEXT: 1 2 1.00 pmaddubsw %xmm0, %xmm2