forked from OSchip/llvm-project
[X86] Fix reciprocal instruction throughput/uops counts
Matches numbers from AMD SoG + Agner - should always be on FPU Pipes 0+1, no additional uops for folded instructions and znver1 double pumps 256-bit vectors Noticed while adding CostKinds support to the x86 cost models
This commit is contained in:
parent
14757d5b84
commit
f8d4da7630
|
@ -357,11 +357,11 @@ defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
|
|||
defm : X86WriteResPairUnsupported<WriteFMAZ>;
|
||||
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>;
|
||||
defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
|
||||
//defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>;
|
||||
//defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
|
||||
defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
|
||||
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>;
|
||||
defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>;
|
||||
|
@ -1480,39 +1480,6 @@ def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>;
|
|||
// x,m,i.
|
||||
def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
|
||||
|
||||
// RSQRTSS
|
||||
// TODO - convert to ZnWriteResFpuPair
|
||||
// x,x.
|
||||
def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
|
||||
let Latency = 5;
|
||||
}
|
||||
def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>;
|
||||
|
||||
// x,m128.
|
||||
def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
|
||||
let Latency = 12;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,2]; // FIXME: Is this right?
|
||||
}
|
||||
def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>;
|
||||
|
||||
// RSQRTPS
|
||||
// TODO - convert to ZnWriteResFpuPair
|
||||
// y,y.
|
||||
def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>;
|
||||
|
||||
// y,m256.
|
||||
def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
||||
let Latency = 12;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>;
|
||||
|
||||
//-- Other instructions --//
|
||||
|
||||
// VZEROUPPER.
|
||||
|
|
|
@ -354,9 +354,11 @@ defm : Zn2WriteResFpuPair<WriteFMAY, [Zn2FPU03], 5>;
|
|||
defm : X86WriteResPairUnsupported<WriteFMAZ>;
|
||||
defm : Zn2WriteResFpuPair<WriteFRcp, [Zn2FPU01], 5>;
|
||||
defm : Zn2WriteResFpuPair<WriteFRcpX, [Zn2FPU01], 5>;
|
||||
defm : Zn2WriteResFpuPair<WriteFRcpY, [Zn2FPU01], 5, [1], 1, 7, 2>;
|
||||
defm : Zn2WriteResFpuPair<WriteFRcpY, [Zn2FPU01], 5>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
|
||||
defm : Zn2WriteResFpuPair<WriteFRsqrtX, [Zn2FPU01], 5, [1], 1, 7, 1>;
|
||||
defm : Zn2WriteResFpuPair<WriteFRsqrt, [Zn2FPU01], 5>;
|
||||
defm : Zn2WriteResFpuPair<WriteFRsqrtX, [Zn2FPU01], 5>;
|
||||
defm : Zn2WriteResFpuPair<WriteFRsqrtY, [Zn2FPU01], 5>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
|
||||
defm : Zn2WriteResFpuPair<WriteFSqrt, [Zn2FPU3], 20, [20]>;
|
||||
defm : Zn2WriteResFpuPair<WriteFSqrtX, [Zn2FPU3], 20, [20]>;
|
||||
|
@ -1491,39 +1493,6 @@ def : SchedAlias<WriteDPPD, Zn2WriteMicrocoded>;
|
|||
// x,m,i.
|
||||
def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
|
||||
|
||||
// RSQRTSS
|
||||
// TODO - convert to Zn2WriteResFpuPair
|
||||
// x,x.
|
||||
def Zn2WriteRSQRTSSr : SchedWriteRes<[Zn2FPU02]> {
|
||||
let Latency = 5;
|
||||
}
|
||||
def : SchedAlias<WriteFRsqrt, Zn2WriteRSQRTSSr>;
|
||||
|
||||
// x,m128.
|
||||
def Zn2WriteRSQRTSSLd: SchedWriteRes<[Zn2AGU, Zn2FPU02]> {
|
||||
let Latency = 12;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,2];
|
||||
}
|
||||
def : SchedAlias<WriteFRsqrtLd, Zn2WriteRSQRTSSLd>;
|
||||
|
||||
// RSQRTPS
|
||||
// TODO - convert to Zn2WriteResFpuPair
|
||||
// y,y.
|
||||
def Zn2WriteRSQRTPSYr : SchedWriteRes<[Zn2FPU01]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : SchedAlias<WriteFRsqrtY, Zn2WriteRSQRTPSYr>;
|
||||
|
||||
// y,m256.
|
||||
def Zn2WriteRSQRTPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
|
||||
let Latency = 12;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : SchedAlias<WriteFRsqrtYLd, Zn2WriteRSQRTPSYLd>;
|
||||
|
||||
//-- Other instructions --//
|
||||
|
||||
// VZEROUPPER.
|
||||
|
|
|
@ -1628,8 +1628,8 @@ vzeroupper
|
|||
# CHECK-NEXT: 1 8 0.50 * vpxor (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrcpps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrcpps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 3 12 0.50 * vrcpps (%rax), %ymm2
|
||||
# CHECK-NEXT: 2 5 1.00 vrcpps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 2 12 1.00 * vrcpps (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrcpss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrcpss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 4 1.00 vroundpd $1, %xmm0, %xmm2
|
||||
|
@ -1645,11 +1645,11 @@ vzeroupper
|
|||
# CHECK-NEXT: 1 4 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 2 11 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 5 1.00 vrsqrtps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %ymm2
|
||||
# CHECK-NEXT: 2 12 1.00 * vrsqrtps (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrsqrtss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 2 12 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrsqrtss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 8 0.50 * vshufpd $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %ymm0, %ymm1, %ymm2
|
||||
|
@ -1738,7 +1738,7 @@ vzeroupper
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
|
||||
# CHECK-NEXT: 175.00 175.00 - - - - - 148.58 187.08 220.25 527.08 -
|
||||
# CHECK-NEXT: 175.00 175.00 - - - - - 149.58 189.58 218.75 527.08 -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
|
||||
|
@ -2340,8 +2340,8 @@ vzeroupper
|
|||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpxor (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - vrcpps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 1.00 - - - vrcpps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - - - 1.00 - vroundpd $1, %xmm0, %xmm2
|
||||
|
@ -2359,9 +2359,9 @@ vzeroupper
|
|||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - vrsqrtps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - - - - 0.50 - 0.50 - - vrsqrtss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - vrsqrtss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 1.00 - - - vrsqrtps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrsqrtss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufpd $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufpd $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufpd $1, %ymm0, %ymm1, %ymm2
|
||||
|
|
|
@ -295,9 +295,9 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: 1 5 0.50 rcpss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * rcpss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 rsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 12 0.50 * rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 12 1.00 * rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 * * U sfence
|
||||
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
|
||||
|
@ -335,7 +335,7 @@ xorps (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
|
||||
# CHECK-NEXT: 32.50 32.50 - - - - - 25.00 29.50 30.00 112.50 -
|
||||
# CHECK-NEXT: 32.50 32.50 - - - - - 24.50 30.50 28.50 112.50 -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
|
||||
|
@ -441,8 +441,8 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rcpss (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - rsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 0.50 - 0.50 - - rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sfence
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2
|
||||
|
|
|
@ -1629,7 +1629,7 @@ vzeroupper
|
|||
# CHECK-NEXT: 1 5 0.50 vrcpps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrcpps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 3 12 0.50 * vrcpps (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrcpss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrcpss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vroundpd $1, %xmm0, %xmm2
|
||||
|
@ -1645,11 +1645,11 @@ vzeroupper
|
|||
# CHECK-NEXT: 1 3 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 10 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 5 1.00 vrsqrtps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrsqrtps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 5 0.50 vrsqrtss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 2 12 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * vrsqrtss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 8 0.50 * vshufpd $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %ymm0, %ymm1, %ymm2
|
||||
|
@ -1739,7 +1739,7 @@ vzeroupper
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
|
||||
# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 133.92 169.42 206.25 467.42 -
|
||||
# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 132.92 169.92 204.75 467.42 -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
|
||||
|
@ -2359,10 +2359,10 @@ vzeroupper
|
|||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vroundss $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - vrsqrtps %ymm0, %ymm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtps %ymm0, %ymm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 - 0.50 - - vrsqrtss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - 1.00 - - vrsqrtss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vshufpd $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vshufpd $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vshufpd $1, %ymm0, %ymm1, %ymm2
|
||||
|
|
|
@ -295,9 +295,9 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: 1 5 0.50 rcpss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * rcpss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 rsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 12 0.50 * rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 12 1.00 * rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 12 0.50 * rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.33 * * U sfence
|
||||
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
|
||||
|
@ -336,7 +336,7 @@ xorps (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
|
||||
# CHECK-NEXT: 21.67 21.67 21.67 - - - - - 25.00 29.50 30.00 112.50 -
|
||||
# CHECK-NEXT: 21.67 21.67 21.67 - - - - - 24.50 30.50 28.50 112.50 -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
|
||||
|
@ -442,8 +442,8 @@ xorps (%rax), %xmm2
|
|||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rcpss (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - rsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 - 0.50 - - rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - 1.00 - - rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - rsqrtss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rsqrtss (%rax), %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - sfence
|
||||
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2
|
||||
|
|
Loading…
Reference in New Issue