From 6ba0b9f68ac9b3a058a2e98e5c4d1e039eff662f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 2 Sep 2021 18:07:40 +0100 Subject: [PATCH] [X86][SLM] Fix PBLENDVB uops and throughput SLM PBLENDVB is just as bad as BLENDVPD/PS - so model it as such, fixing the rr vs rm uops diff as well. The Intel AoM appears to have a copy+paste typo with PBLENDW, it doesn't match Agner or InstLatX64. Noticed while investigating some of the weird discrepancies reported by the D103695 helper script (SLM had much better vector shift throughputs than it should). --- llvm/lib/Target/X86/X86ScheduleSLM.td | 8 ++++---- llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index da48a9b88888..2e58b1cb4249 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -62,7 +62,7 @@ def : ReadAdvance; multiclass SLMWriteResPair ExePorts, int Lat, list Res = [1], int UOps = 1, - int LoadLat = 3> { + int LoadLat = 3, int LoadUOps = 0> { // Register variant is using a single cycle on ExePort. def : WriteRes { let Latency = Lat; @@ -75,7 +75,7 @@ multiclass SLMWriteResPair { let Latency = !add(Lat, LoadLat); let ResourceCycles = !listconcat([1], Res); - let NumMicroOps = UOps; + let NumMicroOps = !add(UOps, LoadUOps); } } @@ -280,7 +280,7 @@ defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; @@ -391,7 +391,7 @@ defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s index 7fbf75b627de..2bcebead6181 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s @@ -159,9 +159,9 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 * blendpd $11, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 blendps $11, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * blendps $11, (%rax), %xmm2 -# CHECK-NEXT: 3 4 4.00 blendvpd %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 2 4 4.00 blendvpd %xmm0, %xmm0, %xmm2 # CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2 -# CHECK-NEXT: 3 4 4.00 blendvps %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 2 4 4.00 blendvps %xmm0, %xmm0, %xmm2 # CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2 @@ -176,8 +176,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 10 1.00 * mpsadbw $1, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 packusdw %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * packusdw (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 pblendvb %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 2 4 4.00 pblendvb %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 3 7 4.00 * pblendvb %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pblendw $11, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * pblendw $11, (%rax), %xmm2 # CHECK-NEXT: 2 4 2.00 pcmpeqq %xmm0, %xmm2 @@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 90.00 25.00 - - 54.00 +# CHECK-NEXT: - - - 96.00 25.00 - - 54.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -289,8 +289,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 mpsadbw $1, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - packusdw %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 packusdw (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - pblendvb %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: - - - 4.00 - - - - pblendvb %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - 4.00 - - - 1.00 pblendvb %xmm0, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - pblendw $11, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 pblendw $11, (%rax), %xmm2 # CHECK-NEXT: - - - 2.00 2.00 - - - pcmpeqq %xmm0, %xmm2