From 484944ac3b10530343df8461554b12190bbde9e9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 7 Sep 2021 22:30:21 +0100 Subject: [PATCH] [X86][SLM] Fix HADD/HSUB uops, latency and throughput Noticed while trying to improve generic reduction costs via the D103695 helper script. Confirmed with Intel AoM / Agner / InstLatX64. --- llvm/lib/Target/X86/X86ScheduleSLM.td | 10 +- .../tools/llvm-mca/X86/SLM/resources-sse3.s | 34 +++---- .../tools/llvm-mca/X86/SLM/resources-ssse3.s | 98 +++++++++---------- 3 files changed, 71 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index e5bc42a773de..bd7b56a57360 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -420,12 +420,12 @@ def : WriteRes { // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; // String instructions. diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s index 23949737b3ca..bb34d31a93ae 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s @@ -47,14 +47,14 @@ mwait # CHECK-NEXT: 1 7 2.00 * addsubpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * addsubps (%rax), %xmm2 -# CHECK-NEXT: 4 6 3.00 haddpd %xmm0, %xmm2 -# CHECK-NEXT: 4 9 3.00 * haddpd (%rax), %xmm2 -# CHECK-NEXT: 4 6 3.00 haddps %xmm0, %xmm2 -# CHECK-NEXT: 4 9 3.00 * haddps (%rax), %xmm2 -# CHECK-NEXT: 4 6 3.00 hsubpd %xmm0, %xmm2 -# CHECK-NEXT: 4 9 3.00 * hsubpd (%rax), %xmm2 -# CHECK-NEXT: 4 6 3.00 hsubps %xmm0, %xmm2 -# CHECK-NEXT: 4 9 3.00 * hsubps (%rax), %xmm2 +# CHECK-NEXT: 4 6 6.00 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 5 9 6.00 * haddpd (%rax), %xmm2 +# CHECK-NEXT: 4 6 6.00 haddps %xmm0, %xmm2 +# CHECK-NEXT: 5 9 6.00 * haddps (%rax), %xmm2 +# CHECK-NEXT: 4 6 6.00 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 5 9 6.00 * hsubpd (%rax), %xmm2 +# CHECK-NEXT: 4 6 6.00 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 5 9 6.00 * hsubps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 1.00 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 @@ -77,7 +77,7 @@ mwait # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 32.00 30.00 - - 10.00 +# CHECK-NEXT: - - - 8.00 54.00 - - 10.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -85,14 +85,14 @@ mwait # CHECK-NEXT: - - - - 2.00 - - 1.00 addsubpd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - addsubps %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 addsubps (%rax), %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 - - - haddpd %xmm0, %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddpd (%rax), %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 - - - haddps %xmm0, %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddps (%rax), %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 - - - hsubpd %xmm0, %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubpd (%rax), %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 - - - hsubps %xmm0, %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubps (%rax), %xmm2 +# CHECK-NEXT: - - - - 6.00 - - - haddpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - 6.00 - - 1.00 haddpd (%rax), %xmm2 +# CHECK-NEXT: - - - - 6.00 - - - haddps %xmm0, %xmm2 +# CHECK-NEXT: - - - - 6.00 - - 1.00 haddps (%rax), %xmm2 +# CHECK-NEXT: - - - - 6.00 - - - hsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - 6.00 - - 1.00 hsubpd (%rax), %xmm2 +# CHECK-NEXT: - - - - 6.00 - - - hsubps %xmm0, %xmm2 +# CHECK-NEXT: - - - - 6.00 - - 1.00 hsubps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 lddqu (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - monitor # CHECK-NEXT: - - - 1.00 - - - - movddup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s index 3fb48787d929..e74a73f5bb3d 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s @@ -122,30 +122,30 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 * palignr $1, (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * palignr $1, (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phaddd %mm0, %mm2 -# CHECK-NEXT: 1 4 1.00 * phaddd (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phaddd %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * phaddd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phaddsw %mm0, %mm2 -# CHECK-NEXT: 1 4 1.00 * phaddsw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * phaddsw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phaddw %mm0, %mm2 -# CHECK-NEXT: 1 4 1.00 * phaddw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phaddw %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * phaddw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phsubd %mm0, %mm2 -# CHECK-NEXT: 1 4 1.00 * phsubd (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phsubd %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * phsubd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phsubsw %mm0, %mm2 -# CHECK-NEXT: 1 4 1.00 * phsubsw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * phsubsw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 phsubw %mm0, %mm2 -# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 phsubw %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %xmm2 +# CHECK-NEXT: 3 6 3.00 phaddd %mm0, %mm2 +# CHECK-NEXT: 4 9 3.00 * phaddd (%rax), %mm2 +# CHECK-NEXT: 3 6 3.00 phaddd %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * phaddd (%rax), %xmm2 +# CHECK-NEXT: 3 6 3.00 phaddsw %mm0, %mm2 +# CHECK-NEXT: 4 9 3.00 * phaddsw (%rax), %mm2 +# CHECK-NEXT: 3 6 3.00 phaddsw %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * phaddsw (%rax), %xmm2 +# CHECK-NEXT: 3 6 3.00 phaddw %mm0, %mm2 +# CHECK-NEXT: 4 9 3.00 * phaddw (%rax), %mm2 +# CHECK-NEXT: 3 6 3.00 phaddw %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * phaddw (%rax), %xmm2 +# CHECK-NEXT: 3 6 3.00 phsubd %mm0, %mm2 +# CHECK-NEXT: 4 9 3.00 * phsubd (%rax), %mm2 +# CHECK-NEXT: 3 6 3.00 phsubd %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * phsubd (%rax), %xmm2 +# CHECK-NEXT: 3 6 3.00 phsubsw %mm0, %mm2 +# CHECK-NEXT: 4 9 3.00 * phsubsw (%rax), %mm2 +# CHECK-NEXT: 3 6 3.00 phsubsw %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * phsubsw (%rax), %xmm2 +# CHECK-NEXT: 3 6 3.00 phsubw %mm0, %mm2 +# CHECK-NEXT: 4 9 3.00 * phsubw (%rax), %mm2 +# CHECK-NEXT: 3 6 3.00 phsubw %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * phsubw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2 # CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2 # CHECK-NEXT: 1 5 2.00 pmaddubsw %xmm0, %xmm2 @@ -183,7 +183,7 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 52.00 24.00 - - 32.00 +# CHECK-NEXT: - - - 112.00 84.00 - - 32.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -203,30 +203,30 @@ psignw (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 palignr $1, (%rax), %mm2 # CHECK-NEXT: - - - 1.00 - - - - palignr $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 palignr $1, (%rax), %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phaddd %mm0, %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddd (%rax), %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phaddd %xmm0, %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddd (%rax), %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phaddsw %mm0, %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddsw (%rax), %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phaddsw %xmm0, %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddsw (%rax), %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phaddw %mm0, %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddw (%rax), %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phaddw %xmm0, %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phaddw (%rax), %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phsubd %mm0, %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubd (%rax), %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phsubd %xmm0, %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubd (%rax), %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phsubsw %mm0, %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubsw (%rax), %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phsubsw %xmm0, %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubsw (%rax), %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phsubw %mm0, %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubw (%rax), %mm2 -# CHECK-NEXT: - - - 0.50 0.50 - - - phsubw %xmm0, %xmm2 -# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubw (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phaddd %mm0, %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddd (%rax), %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phaddd %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddd (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phaddsw %mm0, %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddsw (%rax), %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phaddsw %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddsw (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phaddw %mm0, %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddw (%rax), %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phaddw %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phaddw (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phsubd %mm0, %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubd (%rax), %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phsubd %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubd (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phsubsw %mm0, %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubsw (%rax), %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phsubsw %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubsw (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phsubw %mm0, %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubw (%rax), %mm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - phsubw %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 phsubw (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - pmaddubsw %mm0, %mm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddubsw (%rax), %mm2 # CHECK-NEXT: - - - 2.00 - - - - pmaddubsw %xmm0, %xmm2