forked from OSchip/llvm-project
[X86][SLM] Fix DIVPD/DIVPS/RCPPS/RSQRTPS/SQRTPD/SQRTPS/DPPD/DPPS uops, latency and throughput
The packed variants of the instructions had been modelled as the same as the scalar variants. Reported during a run of llvm-exegesis on a cheap SLM box and matches what Agner / InstLatX64 report as well.
This commit is contained in:
parent
6d970e83fa
commit
65ad09da0e
|
@ -233,33 +233,33 @@ defm : X86WriteResPairUnsupported<WriteFMAX>;
|
|||
defm : X86WriteResPairUnsupported<WriteFMAY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAZ>;
|
||||
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
|
||||
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
|
||||
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
|
||||
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39], 6, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDivY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDivZ>;
|
||||
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
|
||||
defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
|
||||
defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
|
||||
defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69], 6, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
|
||||
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 4>;
|
||||
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRcpY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
|
||||
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 4>;
|
||||
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
|
||||
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20]>;
|
||||
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
|
||||
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
|
||||
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0, SLMFPDivider], 20, [1,20]>;
|
||||
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0, SLMFPDivider], 41, [1,40], 5, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrtY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
|
||||
defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35]>;
|
||||
defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
|
||||
defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
|
||||
defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0, SLMFPDivider], 35, [1,35]>;
|
||||
defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0, SLMFPDivider], 71, [1,70], 5, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
|
||||
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
|
||||
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 12, [8], 5, 1>;
|
||||
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 15, [12], 9, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteDPPSY>;
|
||||
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
|
||||
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
|
||||
|
|
|
@ -226,8 +226,8 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: 1 5 1.00 cvttss2si %xmm0, %rcx
|
||||
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %ecx
|
||||
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %rcx
|
||||
# CHECK-NEXT: 1 39 39.00 divps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 42 39.00 * divps (%rax), %xmm2
|
||||
# CHECK-NEXT: 6 39 39.00 divps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 7 42 39.00 * divps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 19 17.00 divss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 22 17.00 * divss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 * * U ldmxcsr (%rax)
|
||||
|
@ -290,19 +290,19 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: 1 7 1.00 * psadbw (%rax), %mm2
|
||||
# CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2
|
||||
# CHECK-NEXT: 1 4 1.00 * pshufw $1, (%rax), %mm2
|
||||
# CHECK-NEXT: 1 5 1.00 rcpps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * rcpps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 1.00 rcpss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * rcpss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 1.00 rsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 5 9 8.00 rcpps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 6 12 8.00 * rcpps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 rcpss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * rcpss (%rax), %xmm2
|
||||
# CHECK-NEXT: 5 9 8.00 rsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 6 12 8.00 * rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * * U sfence
|
||||
# CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * shufps $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 41 40.00 sqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 44 40.00 * sqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 5 41 40.00 sqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 6 44 40.00 * sqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 23 20.00 * sqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rax)
|
||||
|
@ -331,7 +331,7 @@ xorps (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
|
||||
# CHECK-NEXT: - 232.00 8.00 80.00 37.00 0.50 0.50 67.00
|
||||
# CHECK-NEXT: - 232.00 8.00 108.00 37.00 0.50 0.50 67.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
|
||||
|
@ -431,12 +431,12 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: - - - 1.00 - - - 1.00 psadbw (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - pshufw $1, %mm0, %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - 1.00 pshufw $1, (%rax), %mm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - rcpps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 8.00 - - - - rcpps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 8.00 - - - 1.00 rcpps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - rcpss %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpss (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - rsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 8.00 - - - - rsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 8.00 - - - 1.00 rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 sfence
|
||||
|
|
|
@ -460,8 +460,8 @@ xorpd (%rax), %xmm2
|
|||
# CHECK-NEXT: 1 5 1.00 cvttsd2si %xmm0, %rcx
|
||||
# CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %ecx
|
||||
# CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %rcx
|
||||
# CHECK-NEXT: 1 69 69.00 divpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 72 69.00 * divpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 6 69 69.00 divpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 7 72 69.00 * divpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 34 32.00 divsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 37 32.00 * divsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * * U lfence
|
||||
|
@ -658,8 +658,8 @@ xorpd (%rax), %xmm2
|
|||
# CHECK-NEXT: 1 4 1.00 * pxor (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 shufpd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 * shufpd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 71 70.00 sqrtpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 74 70.00 * sqrtpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 5 71 70.00 sqrtpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 6 74 70.00 * sqrtpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 35 35.00 sqrtsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 38 35.00 * sqrtsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 4 2.00 subpd %xmm0, %xmm2
|
||||
|
|
|
@ -163,10 +163,10 @@ roundss $1, (%rax), %xmm2
|
|||
# CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 4 4.00 blendvps %xmm0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 dpps $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * dpps $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: 5 12 8.00 dppd $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 6 15 8.00 * dppd $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: 9 15 12.00 dpps $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 10 18 12.00 * dpps $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 extractps $1, %xmm0, %ecx
|
||||
# CHECK-NEXT: 2 4 2.00 * extractps $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
|
||||
|
@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
|
||||
# CHECK-NEXT: - - - 104.00 25.00 - - 54.00
|
||||
# CHECK-NEXT: - - - 104.00 61.00 - - 54.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
|
||||
|
@ -276,10 +276,10 @@ roundss $1, (%rax), %xmm2
|
|||
# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 4.00 - - - - blendvps %xmm0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - - - dppd $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - - 1.00 dppd $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - - - dpps $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - - 1.00 dpps $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 8.00 - - - dppd $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 8.00 - - 1.00 dppd $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 12.00 - - - dpps $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 12.00 - - 1.00 dpps $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - - - - extractps $1, %xmm0, %ecx
|
||||
# CHECK-NEXT: - - - 1.00 - - - 2.00 extractps $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - - 1.00 - - - - insertps $1, %xmm0, %xmm2
|
||||
|
|
Loading…
Reference in New Issue