forked from OSchip/llvm-project
[X86] AMD Zen 3: MULX w/ mem operand has the same throughput as with reg op
Exegesis is faulty and sometimes when measuring throughput^-1 produces snippets that have loop-carried dependencies, which must be what caused me to incorrectly measure it originally. After looking much more carefully, the inverse throughput should match that of the MULX w/ reg op. As per llvm-exegesis measurements.
This commit is contained in:
parent
0f04936a2d
commit
d4d459e747
|
@ -618,30 +618,10 @@ defm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Intege
|
||||||
defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
|
defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
|
||||||
defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication.
|
defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication.
|
||||||
defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
|
defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
|
||||||
|
|
||||||
def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
|
|
||||||
let Latency = !add(Znver3Model.LoadLatency, 3);
|
|
||||||
let ResourceCycles = [1, 1, 2];
|
|
||||||
let NumMicroOps = 2;
|
|
||||||
}
|
|
||||||
def : InstRW<[Zn3MULX32rm, WriteIMulHLd,
|
|
||||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
|
||||||
ReadAfterLd], (instrs MULX32rm)>;
|
|
||||||
|
|
||||||
defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
|
defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
|
||||||
defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
|
defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
|
||||||
defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication.
|
defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication.
|
||||||
defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
|
defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
|
||||||
|
|
||||||
def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
|
|
||||||
let Latency = !add(Znver3Model.LoadLatency, 3);
|
|
||||||
let ResourceCycles = [1, 1, 2];
|
|
||||||
let NumMicroOps = 2;
|
|
||||||
}
|
|
||||||
def : InstRW<[Zn3MULX64rm, WriteIMulHLd,
|
|
||||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
|
||||||
ReadAfterLd], (instrs MULX64rm)>;
|
|
||||||
|
|
||||||
defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
|
defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
|
||||||
defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
|
defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
|
||||||
defm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>; // Integer multiplication, high part.
|
defm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>; // Integer multiplication, high part.
|
||||||
|
|
|
@ -17,13 +17,13 @@ add %rax, %rax
|
||||||
|
|
||||||
# CHECK: Iterations: 1
|
# CHECK: Iterations: 1
|
||||||
# CHECK-NEXT: Instructions: 2
|
# CHECK-NEXT: Instructions: 2
|
||||||
# CHECK-NEXT: Total Cycles: 12
|
# CHECK-NEXT: Total Cycles: 11
|
||||||
# CHECK-NEXT: Total uOps: 3
|
# CHECK-NEXT: Total uOps: 3
|
||||||
|
|
||||||
# CHECK: Dispatch Width: 6
|
# CHECK: Dispatch Width: 6
|
||||||
# CHECK-NEXT: uOps Per Cycle: 0.25
|
# CHECK-NEXT: uOps Per Cycle: 0.27
|
||||||
# CHECK-NEXT: IPC: 0.17
|
# CHECK-NEXT: IPC: 0.18
|
||||||
# CHECK-NEXT: Block RThroughput: 2.0
|
# CHECK-NEXT: Block RThroughput: 1.0
|
||||||
|
|
||||||
# CHECK: Instruction Info:
|
# CHECK: Instruction Info:
|
||||||
# CHECK-NEXT: [1]: #uOps
|
# CHECK-NEXT: [1]: #uOps
|
||||||
|
@ -34,7 +34,7 @@ add %rax, %rax
|
||||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||||
# CHECK-NEXT: 2 8 2.00 * mulxl (%rdi), %eax, %ecx
|
# CHECK-NEXT: 2 8 1.00 * mulxl (%rdi), %eax, %ecx
|
||||||
# CHECK-NEXT: 1 1 0.25 addl %eax, %eax
|
# CHECK-NEXT: 1 1 0.25 addl %eax, %eax
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
|
@ -64,19 +64,19 @@ add %rax, %rax
|
||||||
|
|
||||||
# CHECK: Resource pressure per iteration:
|
# CHECK: Resource pressure per iteration:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
||||||
# CHECK-NEXT: - - 1.00 - 2.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - -
|
# CHECK-NEXT: - - 1.00 - 1.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - -
|
||||||
|
|
||||||
# CHECK: Resource pressure by instruction:
|
# CHECK: Resource pressure by instruction:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
||||||
# CHECK-NEXT: - - 1.00 - 2.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxl (%rdi), %eax, %ecx
|
# CHECK-NEXT: - - 1.00 - 1.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxl (%rdi), %eax, %ecx
|
||||||
# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addl %eax, %eax
|
# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addl %eax, %eax
|
||||||
|
|
||||||
# CHECK: Timeline view:
|
# CHECK: Timeline view:
|
||||||
# CHECK-NEXT: 01
|
# CHECK-NEXT: 0
|
||||||
# CHECK-NEXT: Index 0123456789
|
# CHECK-NEXT: Index 0123456789
|
||||||
|
|
||||||
# CHECK: [0,0] DeeeeeeeeER. mulxl (%rdi), %eax, %ecx
|
# CHECK: [0,0] DeeeeeeeeER mulxl (%rdi), %eax, %ecx
|
||||||
# CHECK-NEXT: [0,1] D========eER addl %eax, %eax
|
# CHECK-NEXT: [0,1] D=======eER addl %eax, %eax
|
||||||
|
|
||||||
# CHECK: Average Wait times (based on the timeline view):
|
# CHECK: Average Wait times (based on the timeline view):
|
||||||
# CHECK-NEXT: [0]: Executions
|
# CHECK-NEXT: [0]: Executions
|
||||||
|
@ -86,20 +86,20 @@ add %rax, %rax
|
||||||
|
|
||||||
# CHECK: [0] [1] [2] [3]
|
# CHECK: [0] [1] [2] [3]
|
||||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxl (%rdi), %eax, %ecx
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxl (%rdi), %eax, %ecx
|
||||||
# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addl %eax, %eax
|
# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addl %eax, %eax
|
||||||
# CHECK-NEXT: 1 5.0 0.5 0.0 <total>
|
# CHECK-NEXT: 1 4.5 0.5 0.0 <total>
|
||||||
|
|
||||||
# CHECK: [1] Code Region
|
# CHECK: [1] Code Region
|
||||||
|
|
||||||
# CHECK: Iterations: 1
|
# CHECK: Iterations: 1
|
||||||
# CHECK-NEXT: Instructions: 2
|
# CHECK-NEXT: Instructions: 2
|
||||||
# CHECK-NEXT: Total Cycles: 12
|
# CHECK-NEXT: Total Cycles: 11
|
||||||
# CHECK-NEXT: Total uOps: 3
|
# CHECK-NEXT: Total uOps: 3
|
||||||
|
|
||||||
# CHECK: Dispatch Width: 6
|
# CHECK: Dispatch Width: 6
|
||||||
# CHECK-NEXT: uOps Per Cycle: 0.25
|
# CHECK-NEXT: uOps Per Cycle: 0.27
|
||||||
# CHECK-NEXT: IPC: 0.17
|
# CHECK-NEXT: IPC: 0.18
|
||||||
# CHECK-NEXT: Block RThroughput: 2.0
|
# CHECK-NEXT: Block RThroughput: 1.0
|
||||||
|
|
||||||
# CHECK: Instruction Info:
|
# CHECK: Instruction Info:
|
||||||
# CHECK-NEXT: [1]: #uOps
|
# CHECK-NEXT: [1]: #uOps
|
||||||
|
@ -110,7 +110,7 @@ add %rax, %rax
|
||||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||||
# CHECK-NEXT: 2 8 2.00 * mulxq (%rdi), %rax, %rcx
|
# CHECK-NEXT: 2 8 1.00 * mulxq (%rdi), %rax, %rcx
|
||||||
# CHECK-NEXT: 1 1 0.25 addq %rax, %rax
|
# CHECK-NEXT: 1 1 0.25 addq %rax, %rax
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
|
@ -140,19 +140,19 @@ add %rax, %rax
|
||||||
|
|
||||||
# CHECK: Resource pressure per iteration:
|
# CHECK: Resource pressure per iteration:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
||||||
# CHECK-NEXT: - - 1.00 - 2.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - -
|
# CHECK-NEXT: - - 1.00 - 1.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - -
|
||||||
|
|
||||||
# CHECK: Resource pressure by instruction:
|
# CHECK: Resource pressure by instruction:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
||||||
# CHECK-NEXT: - - 1.00 - 2.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxq (%rdi), %rax, %rcx
|
# CHECK-NEXT: - - 1.00 - 1.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxq (%rdi), %rax, %rcx
|
||||||
# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addq %rax, %rax
|
# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addq %rax, %rax
|
||||||
|
|
||||||
# CHECK: Timeline view:
|
# CHECK: Timeline view:
|
||||||
# CHECK-NEXT: 01
|
# CHECK-NEXT: 0
|
||||||
# CHECK-NEXT: Index 0123456789
|
# CHECK-NEXT: Index 0123456789
|
||||||
|
|
||||||
# CHECK: [0,0] DeeeeeeeeER. mulxq (%rdi), %rax, %rcx
|
# CHECK: [0,0] DeeeeeeeeER mulxq (%rdi), %rax, %rcx
|
||||||
# CHECK-NEXT: [0,1] D========eER addq %rax, %rax
|
# CHECK-NEXT: [0,1] D=======eER addq %rax, %rax
|
||||||
|
|
||||||
# CHECK: Average Wait times (based on the timeline view):
|
# CHECK: Average Wait times (based on the timeline view):
|
||||||
# CHECK-NEXT: [0]: Executions
|
# CHECK-NEXT: [0]: Executions
|
||||||
|
@ -162,5 +162,5 @@ add %rax, %rax
|
||||||
|
|
||||||
# CHECK: [0] [1] [2] [3]
|
# CHECK: [0] [1] [2] [3]
|
||||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxq (%rdi), %rax, %rcx
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxq (%rdi), %rax, %rcx
|
||||||
# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addq %rax, %rax
|
# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addq %rax, %rax
|
||||||
# CHECK-NEXT: 1 5.0 0.5 0.0 <total>
|
# CHECK-NEXT: 1 4.5 0.5 0.0 <total>
|
||||||
|
|
|
@ -15,13 +15,13 @@ mulxq (%rdi), %rax, %rdx
|
||||||
|
|
||||||
# CHECK: Iterations: 2
|
# CHECK: Iterations: 2
|
||||||
# CHECK-NEXT: Instructions: 2
|
# CHECK-NEXT: Instructions: 2
|
||||||
# CHECK-NEXT: Total Cycles: 14
|
# CHECK-NEXT: Total Cycles: 15
|
||||||
# CHECK-NEXT: Total uOps: 4
|
# CHECK-NEXT: Total uOps: 4
|
||||||
|
|
||||||
# CHECK: Dispatch Width: 6
|
# CHECK: Dispatch Width: 6
|
||||||
# CHECK-NEXT: uOps Per Cycle: 0.29
|
# CHECK-NEXT: uOps Per Cycle: 0.27
|
||||||
# CHECK-NEXT: IPC: 0.14
|
# CHECK-NEXT: IPC: 0.13
|
||||||
# CHECK-NEXT: Block RThroughput: 2.0
|
# CHECK-NEXT: Block RThroughput: 1.0
|
||||||
|
|
||||||
# CHECK: Instruction Info:
|
# CHECK: Instruction Info:
|
||||||
# CHECK-NEXT: [1]: #uOps
|
# CHECK-NEXT: [1]: #uOps
|
||||||
|
@ -32,7 +32,7 @@ mulxq (%rdi), %rax, %rdx
|
||||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||||
# CHECK-NEXT: 2 8 2.00 * mulxl (%rdi), %eax, %edx
|
# CHECK-NEXT: 2 8 1.00 * mulxl (%rdi), %eax, %edx
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0] - Zn3AGU0
|
# CHECK-NEXT: [0] - Zn3AGU0
|
||||||
|
@ -61,18 +61,18 @@ mulxq (%rdi), %rax, %rdx
|
||||||
|
|
||||||
# CHECK: Resource pressure per iteration:
|
# CHECK: Resource pressure per iteration:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
||||||
# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - -
|
# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - -
|
||||||
|
|
||||||
# CHECK: Resource pressure by instruction:
|
# CHECK: Resource pressure by instruction:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
||||||
# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxl (%rdi), %eax, %edx
|
# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxl (%rdi), %eax, %edx
|
||||||
|
|
||||||
# CHECK: Timeline view:
|
# CHECK: Timeline view:
|
||||||
# CHECK-NEXT: 0123
|
# CHECK-NEXT: 01234
|
||||||
# CHECK-NEXT: Index 0123456789
|
# CHECK-NEXT: Index 0123456789
|
||||||
|
|
||||||
# CHECK: [0,0] DeeeeeeeeER . mulxl (%rdi), %eax, %edx
|
# CHECK: [0,0] DeeeeeeeeER . mulxl (%rdi), %eax, %edx
|
||||||
# CHECK-NEXT: [1,0] D===eeeeeeeeER mulxl (%rdi), %eax, %edx
|
# CHECK-NEXT: [1,0] D====eeeeeeeeER mulxl (%rdi), %eax, %edx
|
||||||
|
|
||||||
# CHECK: Average Wait times (based on the timeline view):
|
# CHECK: Average Wait times (based on the timeline view):
|
||||||
# CHECK-NEXT: [0]: Executions
|
# CHECK-NEXT: [0]: Executions
|
||||||
|
@ -81,19 +81,19 @@ mulxq (%rdi), %rax, %rdx
|
||||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||||
|
|
||||||
# CHECK: [0] [1] [2] [3]
|
# CHECK: [0] [1] [2] [3]
|
||||||
# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxl (%rdi), %eax, %edx
|
# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mulxl (%rdi), %eax, %edx
|
||||||
|
|
||||||
# CHECK: [1] Code Region
|
# CHECK: [1] Code Region
|
||||||
|
|
||||||
# CHECK: Iterations: 2
|
# CHECK: Iterations: 2
|
||||||
# CHECK-NEXT: Instructions: 2
|
# CHECK-NEXT: Instructions: 2
|
||||||
# CHECK-NEXT: Total Cycles: 14
|
# CHECK-NEXT: Total Cycles: 15
|
||||||
# CHECK-NEXT: Total uOps: 4
|
# CHECK-NEXT: Total uOps: 4
|
||||||
|
|
||||||
# CHECK: Dispatch Width: 6
|
# CHECK: Dispatch Width: 6
|
||||||
# CHECK-NEXT: uOps Per Cycle: 0.29
|
# CHECK-NEXT: uOps Per Cycle: 0.27
|
||||||
# CHECK-NEXT: IPC: 0.14
|
# CHECK-NEXT: IPC: 0.13
|
||||||
# CHECK-NEXT: Block RThroughput: 2.0
|
# CHECK-NEXT: Block RThroughput: 1.0
|
||||||
|
|
||||||
# CHECK: Instruction Info:
|
# CHECK: Instruction Info:
|
||||||
# CHECK-NEXT: [1]: #uOps
|
# CHECK-NEXT: [1]: #uOps
|
||||||
|
@ -104,7 +104,7 @@ mulxq (%rdi), %rax, %rdx
|
||||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||||
|
|
||||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||||
# CHECK-NEXT: 2 8 2.00 * mulxq (%rdi), %rax, %rdx
|
# CHECK-NEXT: 2 8 1.00 * mulxq (%rdi), %rax, %rdx
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0] - Zn3AGU0
|
# CHECK-NEXT: [0] - Zn3AGU0
|
||||||
|
@ -133,18 +133,18 @@ mulxq (%rdi), %rax, %rdx
|
||||||
|
|
||||||
# CHECK: Resource pressure per iteration:
|
# CHECK: Resource pressure per iteration:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
||||||
# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - -
|
# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - -
|
||||||
|
|
||||||
# CHECK: Resource pressure by instruction:
|
# CHECK: Resource pressure by instruction:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
||||||
# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxq (%rdi), %rax, %rdx
|
# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxq (%rdi), %rax, %rdx
|
||||||
|
|
||||||
# CHECK: Timeline view:
|
# CHECK: Timeline view:
|
||||||
# CHECK-NEXT: 0123
|
# CHECK-NEXT: 01234
|
||||||
# CHECK-NEXT: Index 0123456789
|
# CHECK-NEXT: Index 0123456789
|
||||||
|
|
||||||
# CHECK: [0,0] DeeeeeeeeER . mulxq (%rdi), %rax, %rdx
|
# CHECK: [0,0] DeeeeeeeeER . mulxq (%rdi), %rax, %rdx
|
||||||
# CHECK-NEXT: [1,0] D===eeeeeeeeER mulxq (%rdi), %rax, %rdx
|
# CHECK-NEXT: [1,0] D====eeeeeeeeER mulxq (%rdi), %rax, %rdx
|
||||||
|
|
||||||
# CHECK: Average Wait times (based on the timeline view):
|
# CHECK: Average Wait times (based on the timeline view):
|
||||||
# CHECK-NEXT: [0]: Executions
|
# CHECK-NEXT: [0]: Executions
|
||||||
|
@ -153,4 +153,4 @@ mulxq (%rdi), %rax, %rdx
|
||||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||||
|
|
||||||
# CHECK: [0] [1] [2] [3]
|
# CHECK: [0] [1] [2] [3]
|
||||||
# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxq (%rdi), %rax, %rdx
|
# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mulxq (%rdi), %rax, %rdx
|
||||||
|
|
|
@ -63,9 +63,9 @@ shrx %rax, (%rbx), %rcx
|
||||||
# CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx
|
# CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx
|
||||||
# CHECK-NEXT: 2 5 0.50 * bzhiq %rax, (%rbx), %rcx
|
# CHECK-NEXT: 2 5 0.50 * bzhiq %rax, (%rbx), %rcx
|
||||||
# CHECK-NEXT: 2 4 1.00 mulxl %eax, %ebx, %ecx
|
# CHECK-NEXT: 2 4 1.00 mulxl %eax, %ebx, %ecx
|
||||||
# CHECK-NEXT: 2 8 2.00 * mulxl (%rax), %ebx, %ecx
|
# CHECK-NEXT: 2 8 1.00 * mulxl (%rax), %ebx, %ecx
|
||||||
# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx
|
# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx
|
||||||
# CHECK-NEXT: 2 8 2.00 * mulxq (%rax), %rbx, %rcx
|
# CHECK-NEXT: 2 8 1.00 * mulxq (%rax), %rbx, %rcx
|
||||||
# CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx
|
# CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx
|
||||||
# CHECK-NEXT: 1 5 0.33 * pdepl (%rax), %ebx, %ecx
|
# CHECK-NEXT: 1 5 0.33 * pdepl (%rax), %ebx, %ecx
|
||||||
# CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx
|
# CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx
|
||||||
|
@ -118,7 +118,7 @@ shrx %rax, (%rbx), %rcx
|
||||||
|
|
||||||
# CHECK: Resource pressure per iteration:
|
# CHECK: Resource pressure per iteration:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
||||||
# CHECK-NEXT: 5.33 5.33 5.33 1.00 21.00 11.00 1.00 - - - - - - - - 5.33 5.33 5.33 5.33 5.33 5.33 - -
|
# CHECK-NEXT: 5.33 5.33 5.33 1.00 19.00 11.00 1.00 - - - - - - - - 5.33 5.33 5.33 5.33 5.33 5.33 - -
|
||||||
|
|
||||||
# CHECK: Resource pressure by instruction:
|
# CHECK: Resource pressure by instruction:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
||||||
|
@ -127,9 +127,9 @@ shrx %rax, (%rbx), %rcx
|
||||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - bzhiq %rax, %rbx, %rcx
|
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - bzhiq %rax, %rbx, %rcx
|
||||||
# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bzhiq %rax, (%rbx), %rcx
|
# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bzhiq %rax, (%rbx), %rcx
|
||||||
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxl %eax, %ebx, %ecx
|
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxl %eax, %ebx, %ecx
|
||||||
# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxl (%rax), %ebx, %ecx
|
# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxl (%rax), %ebx, %ecx
|
||||||
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxq %rax, %rbx, %rcx
|
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxq %rax, %rbx, %rcx
|
||||||
# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxq (%rax), %rbx, %rcx
|
# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxq (%rax), %rbx, %rcx
|
||||||
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepl %eax, %ebx, %ecx
|
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepl %eax, %ebx, %ecx
|
||||||
# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - pdepl (%rax), %ebx, %ecx
|
# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - pdepl (%rax), %ebx, %ecx
|
||||||
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepq %rax, %rbx, %rcx
|
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepq %rax, %rbx, %rcx
|
||||||
|
|
Loading…
Reference in New Issue