[X86][SLM] Fix DIVPD/DIVPS/RCPPS/RSQRTPS/SQRTPD/SQRTPS/DPPD/DPPS uops, latency and throughput

The packed variants of the instructions had been modelled as the same as the scalar variants.

Reported during a run of llvm-exegesis on a cheap SLM box and matches what Agner / InstLatX64 report as well.
This commit is contained in:
Simon Pilgrim 2021-09-11 20:29:25 +01:00
parent 6d970e83fa
commit 65ad09da0e
4 changed files with 49 additions and 49 deletions

View File

@ -233,33 +233,33 @@ defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39], 6, 1>;
defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69], 6, 1>;
defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20]>;
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0, SLMFPDivider], 20, [1,20]>;
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0, SLMFPDivider], 41, [1,40], 5, 1>;
defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35]>;
defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0, SLMFPDivider], 35, [1,35]>;
defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0, SLMFPDivider], 71, [1,70], 5, 1>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 12, [8], 5, 1>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 15, [12], 9, 1>;
defm : X86WriteResPairUnsupported<WriteDPPSY>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;

View File

@ -226,8 +226,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %ecx
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 39 39.00 divps %xmm0, %xmm2
# CHECK-NEXT: 1 42 39.00 * divps (%rax), %xmm2
# CHECK-NEXT: 6 39 39.00 divps %xmm0, %xmm2
# CHECK-NEXT: 7 42 39.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 19 17.00 divss %xmm0, %xmm2
# CHECK-NEXT: 1 22 17.00 * divss (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 * * U ldmxcsr (%rax)
@ -290,19 +290,19 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 7 1.00 * psadbw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * pshufw $1, (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 rcpps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * rcpps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 rcpss %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * rcpss (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 5 9 8.00 rcpps %xmm0, %xmm2
# CHECK-NEXT: 6 12 8.00 * rcpps (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 rcpss %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * rcpss (%rax), %xmm2
# CHECK-NEXT: 5 9 8.00 rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 6 12 8.00 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U sfence
# CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * shufps $1, (%rax), %xmm2
# CHECK-NEXT: 1 41 40.00 sqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 44 40.00 * sqrtps (%rax), %xmm2
# CHECK-NEXT: 5 41 40.00 sqrtps %xmm0, %xmm2
# CHECK-NEXT: 6 44 40.00 * sqrtps (%rax), %xmm2
# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 1 23 20.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rax)
@ -331,7 +331,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
# CHECK-NEXT: - 232.00 8.00 80.00 37.00 0.50 0.50 67.00
# CHECK-NEXT: - 232.00 8.00 108.00 37.00 0.50 0.50 67.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@ -431,12 +431,12 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 psadbw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pshufw $1, %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pshufw $1, (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - rcpps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpps (%rax), %xmm2
# CHECK-NEXT: - - - 8.00 - - - - rcpps %xmm0, %xmm2
# CHECK-NEXT: - - - 8.00 - - - 1.00 rcpps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - rcpss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpss (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - rsqrtps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - 8.00 - - - - rsqrtps %xmm0, %xmm2
# CHECK-NEXT: - - - 8.00 - - - 1.00 rsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - rsqrtss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 sfence

View File

@ -460,8 +460,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvttsd2si %xmm0, %rcx
# CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %ecx
# CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %rcx
# CHECK-NEXT: 1 69 69.00 divpd %xmm0, %xmm2
# CHECK-NEXT: 1 72 69.00 * divpd (%rax), %xmm2
# CHECK-NEXT: 6 69 69.00 divpd %xmm0, %xmm2
# CHECK-NEXT: 7 72 69.00 * divpd (%rax), %xmm2
# CHECK-NEXT: 1 34 32.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 1 37 32.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U lfence
@ -658,8 +658,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * pxor (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * shufpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 71 70.00 sqrtpd %xmm0, %xmm2
# CHECK-NEXT: 1 74 70.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 5 71 70.00 sqrtpd %xmm0, %xmm2
# CHECK-NEXT: 6 74 70.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 35 35.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 1 38 35.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 4 2.00 subpd %xmm0, %xmm2

View File

@ -163,10 +163,10 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: 2 4 4.00 blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 dpps $22, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 5 12 8.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 6 15 8.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 9 15 12.00 dpps $22, %xmm0, %xmm2
# CHECK-NEXT: 10 18 12.00 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 4 2.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
# CHECK-NEXT: - - - 104.00 25.00 - - 54.00
# CHECK-NEXT: - - - 104.00 61.00 - - 54.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@ -276,10 +276,10 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - - 4.00 - - - - blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - - 1.00 dppd $22, (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - dpps $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - - 1.00 dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - - - 8.00 - - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - 8.00 - - 1.00 dppd $22, (%rax), %xmm2
# CHECK-NEXT: - - - - 12.00 - - - dpps $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - 12.00 - - 1.00 dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - 1.00 - - - 2.00 extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - insertps $1, %xmm0, %xmm2