[X86] Fix reciprocal instruction throughput/uops counts

Matches numbers from AMD SoG + Agner - should always be on FPU Pipes 0+1, no additional uops for folded instructions and znver1 double pumps 256-bit vectors

Noticed while adding CostKinds support to the x86 cost models
This commit is contained in:
Simon Pilgrim 2022-09-01 20:25:52 +01:00
parent 14757d5b84
commit f8d4da7630
6 changed files with 38 additions and 102 deletions

View File

@ -357,11 +357,11 @@ defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>;
defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
//defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>;
defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>;
//defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>;
defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>;
@ -1480,39 +1480,6 @@ def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>;
// x,m,i.
def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
// RSQRTSS
// TODO - convert to ZnWriteResFpuPair
// x,x.
def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
let Latency = 5;
}
def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>;
// x,m128.
def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
let Latency = 12;
let NumMicroOps = 2;
let ResourceCycles = [1,2]; // FIXME: Is this right?
}
def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>;
// RSQRTPS
// TODO - convert to ZnWriteResFpuPair
// y,y.
def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>;
// y,m256.
def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
let Latency = 12;
let NumMicroOps = 2;
}
def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>;
//-- Other instructions --//
// VZEROUPPER.

View File

@ -354,9 +354,11 @@ defm : Zn2WriteResFpuPair<WriteFMAY, [Zn2FPU03], 5>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : Zn2WriteResFpuPair<WriteFRcp, [Zn2FPU01], 5>;
defm : Zn2WriteResFpuPair<WriteFRcpX, [Zn2FPU01], 5>;
defm : Zn2WriteResFpuPair<WriteFRcpY, [Zn2FPU01], 5, [1], 1, 7, 2>;
defm : Zn2WriteResFpuPair<WriteFRcpY, [Zn2FPU01], 5>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : Zn2WriteResFpuPair<WriteFRsqrtX, [Zn2FPU01], 5, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFRsqrt, [Zn2FPU01], 5>;
defm : Zn2WriteResFpuPair<WriteFRsqrtX, [Zn2FPU01], 5>;
defm : Zn2WriteResFpuPair<WriteFRsqrtY, [Zn2FPU01], 5>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : Zn2WriteResFpuPair<WriteFSqrt, [Zn2FPU3], 20, [20]>;
defm : Zn2WriteResFpuPair<WriteFSqrtX, [Zn2FPU3], 20, [20]>;
@ -1491,39 +1493,6 @@ def : SchedAlias<WriteDPPD, Zn2WriteMicrocoded>;
// x,m,i.
def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
// RSQRTSS
// TODO - convert to Zn2WriteResFpuPair
// x,x.
def Zn2WriteRSQRTSSr : SchedWriteRes<[Zn2FPU02]> {
let Latency = 5;
}
def : SchedAlias<WriteFRsqrt, Zn2WriteRSQRTSSr>;
// x,m128.
def Zn2WriteRSQRTSSLd: SchedWriteRes<[Zn2AGU, Zn2FPU02]> {
let Latency = 12;
let NumMicroOps = 2;
let ResourceCycles = [1,2];
}
def : SchedAlias<WriteFRsqrtLd, Zn2WriteRSQRTSSLd>;
// RSQRTPS
// TODO - convert to Zn2WriteResFpuPair
// y,y.
def Zn2WriteRSQRTPSYr : SchedWriteRes<[Zn2FPU01]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def : SchedAlias<WriteFRsqrtY, Zn2WriteRSQRTPSYr>;
// y,m256.
def Zn2WriteRSQRTPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
let Latency = 12;
let NumMicroOps = 2;
}
def : SchedAlias<WriteFRsqrtYLd, Zn2WriteRSQRTPSYLd>;
//-- Other instructions --//
// VZEROUPPER.

View File

@ -1628,8 +1628,8 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vpxor (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vrcpps %xmm0, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 vrcpps %ymm0, %ymm2
# CHECK-NEXT: 3 12 0.50 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 2 5 1.00 vrcpps %ymm0, %ymm2
# CHECK-NEXT: 2 12 1.00 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vroundpd $1, %xmm0, %xmm2
@ -1645,11 +1645,11 @@ vzeroupper
# CHECK-NEXT: 1 4 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 vrsqrtps %ymm0, %ymm2
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %ymm2
# CHECK-NEXT: 2 12 1.00 * vrsqrtps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 12 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %ymm0, %ymm1, %ymm2
@ -1738,7 +1738,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 175.00 175.00 - - - - - 148.58 187.08 220.25 527.08 -
# CHECK-NEXT: 175.00 175.00 - - - - - 149.58 189.58 218.75 527.08 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@ -2340,8 +2340,8 @@ vzeroupper
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpxor (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpps %ymm0, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpps (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - vrcpps %ymm0, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 1.00 - - - vrcpps (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 - vroundpd $1, %xmm0, %xmm2
@ -2359,9 +2359,9 @@ vzeroupper
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - vrsqrtps %ymm0, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 0.50 - 0.50 - - vrsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 1.00 - - - vrsqrtps (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufpd $1, %ymm0, %ymm1, %ymm2

View File

@ -295,9 +295,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rcpss %xmm0, %xmm2
# CHECK-NEXT: 1 12 0.50 * rcpss (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 12 0.50 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 2 12 1.00 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 12 0.50 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 * * U sfence
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
@ -335,7 +335,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 32.50 32.50 - - - - - 25.00 29.50 30.00 112.50 -
# CHECK-NEXT: 32.50 32.50 - - - - - 24.50 30.50 28.50 112.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@ -441,8 +441,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rcpss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.50 - 0.50 - - rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - rsqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rsqrtss (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sfence
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2

View File

@ -1629,7 +1629,7 @@ vzeroupper
# CHECK-NEXT: 1 5 0.50 vrcpps %xmm0, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 vrcpps %ymm0, %ymm2
# CHECK-NEXT: 3 12 0.50 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vroundpd $1, %xmm0, %xmm2
@ -1645,11 +1645,11 @@ vzeroupper
# CHECK-NEXT: 1 3 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 vrsqrtps %ymm0, %ymm2
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %ymm2
# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 vrsqrtps %ymm0, %ymm2
# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 12 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %ymm0, %ymm1, %ymm2
@ -1739,7 +1739,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 133.92 169.42 206.25 467.42 -
# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 132.92 169.92 204.75 467.42 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@ -2359,10 +2359,10 @@ vzeroupper
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - vrsqrtps %ymm0, %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtps %ymm0, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 - 0.50 - - vrsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - 1.00 - - vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vshufpd $1, %ymm0, %ymm1, %ymm2

View File

@ -295,9 +295,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rcpss %xmm0, %xmm2
# CHECK-NEXT: 1 12 0.50 * rcpss (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 12 0.50 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 2 12 1.00 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 12 0.50 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 * * U sfence
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
@ -336,7 +336,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 21.67 21.67 21.67 - - - - - 25.00 29.50 30.00 112.50 -
# CHECK-NEXT: 21.67 21.67 21.67 - - - - - 24.50 30.50 28.50 112.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@ -442,8 +442,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rcpss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 - 0.50 - - rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - 1.00 - - rsqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rsqrtss (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - sfence
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2