forked from OSchip/llvm-project
[X86][SchedModel] Fix latency the Hi register write of MULX (PR51495).
Before this patch, WriteIMulH reported a latency value which is correct for the RR variant of MULX, but not for the RM variant. This patch fixes the issue by introducing a new WriteIMulHLd, which is meant to be used only by the RM variant of MULX. Differential Revision: https://reviews.llvm.org/D108701
This commit is contained in:
parent
2e192ab1f4
commit
5f848b311f
|
@ -1503,7 +1503,7 @@ let hasSideEffects = 0 in {
|
|||
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
|
||||
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
|
||||
[]>, T8XD, VEX_4V,
|
||||
Sched<[sched.Folded, WriteIMulH,
|
||||
Sched<[sched.Folded, WriteIMulHLd,
|
||||
// Memory operand.
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
// Implicit read of EDX/RDX
|
||||
|
|
|
@ -149,7 +149,10 @@ defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
|
|||
defm : BWWriteResPair<WriteMULX64, [BWPort1,BWPort5], 4, [1,1], 2>;
|
||||
defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
|
||||
defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
|
||||
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def BWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def : WriteRes<WriteIMulHLd, []> {
|
||||
let Latency = !add(BWWriteIMulH.Latency, BroadwellModel.LoadLatency);
|
||||
}
|
||||
|
||||
defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>;
|
||||
|
|
|
@ -151,7 +151,10 @@ defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
|
|||
defm : HWWriteResPair<WriteMULX64, [HWPort1,HWPort6], 4, [1,1], 2>;
|
||||
defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
|
||||
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def HWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def : WriteRes<WriteIMulHLd, []> {
|
||||
let Latency = !add(HWWriteIMulH.Latency, HaswellModel.LoadLatency);
|
||||
}
|
||||
|
||||
defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
|
||||
|
|
|
@ -131,7 +131,10 @@ defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
|
|||
defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 4, [1,1], 2>;
|
||||
defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
|
||||
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def SBWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def : WriteRes<WriteIMulHLd, []> {
|
||||
let Latency = !add(SBWriteIMulH.Latency, SandyBridgeModel.LoadLatency);
|
||||
}
|
||||
|
||||
defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
|
||||
defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
|
||||
|
|
|
@ -129,7 +129,10 @@ defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
|
|||
defm : SKLWriteResPair<WriteMULX64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
|
||||
defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
|
||||
defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
|
||||
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def SKLWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def : WriteRes<WriteIMulHLd, []> {
|
||||
let Latency = !add(SKLWriteIMulH.Latency, SkylakeClientModel.LoadLatency);
|
||||
}
|
||||
|
||||
defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
|
||||
|
|
|
@ -130,7 +130,10 @@ defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
|
|||
defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
|
||||
defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
|
||||
defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
|
||||
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
|
||||
def : WriteRes<WriteIMulHLd, []> {
|
||||
let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
|
||||
}
|
||||
|
||||
defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
|
||||
|
|
|
@ -150,7 +150,8 @@ defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by imm
|
|||
defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
|
||||
defm WriteMULX32 : X86SchedWritePair; // Integer 32-bit Multiplication without affecting flags.
|
||||
defm WriteMULX64 : X86SchedWritePair; // Integer 64-bit Multiplication without affecting flags.
|
||||
def WriteIMulH : SchedWrite; // Integer multiplication, high part (only used by MULX).
|
||||
def WriteIMulH : SchedWrite; // Integer multiplication, high part (only used by the RR variant of MULX).
|
||||
def WriteIMulHLd : SchedWrite; // Integer multiplication, high part (only used by the RM variant of MULX).
|
||||
|
||||
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
|
||||
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
|
||||
|
|
|
@ -91,6 +91,7 @@ defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12]
|
|||
defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
|
||||
defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
|
||||
defm : X86WriteResUnsupported<WriteIMulH>;
|
||||
defm : X86WriteResUnsupported<WriteIMulHLd>;
|
||||
defm : X86WriteResPairUnsupported<WriteMULX32>;
|
||||
defm : X86WriteResPairUnsupported<WriteMULX64>;
|
||||
|
||||
|
|
|
@ -438,6 +438,7 @@ defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
|
|||
|
||||
// BMI2 MULX
|
||||
defm : X86WriteResUnsupported<WriteIMulH>;
|
||||
defm : X86WriteResUnsupported<WriteIMulHLd>;
|
||||
defm : X86WriteResPairUnsupported<WriteMULX32>;
|
||||
defm : X86WriteResPairUnsupported<WriteMULX64>;
|
||||
|
||||
|
|
|
@ -210,6 +210,7 @@ defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
|
|||
defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 1>;
|
||||
defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 1>;
|
||||
defm : X86WriteResUnsupported<WriteIMulH>;
|
||||
defm : X86WriteResUnsupported<WriteIMulHLd>;
|
||||
defm : X86WriteResPairUnsupported<WriteMULX32>;
|
||||
defm : X86WriteResPairUnsupported<WriteMULX64>;
|
||||
|
||||
|
|
|
@ -112,6 +112,7 @@ defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
|
|||
defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>;
|
||||
defm : X86WriteResUnsupported<WriteIMulH>;
|
||||
defm : X86WriteResUnsupported<WriteIMulHLd>;
|
||||
defm : X86WriteResPairUnsupported<WriteMULX32>;
|
||||
defm : X86WriteResPairUnsupported<WriteMULX64>;
|
||||
|
||||
|
|
|
@ -256,10 +256,14 @@ defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
|
|||
defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
|
||||
|
||||
// IMULH
|
||||
def : WriteRes<WriteIMulH, [ZnMultiplier]>{
|
||||
def ZnWriteIMulH : WriteRes<WriteIMulH, [ZnMultiplier]>{
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 0;
|
||||
}
|
||||
def : WriteRes<WriteIMulHLd, [ZnMultiplier]> {
|
||||
let Latency = !add(ZnWriteIMulH.Latency, Znver1Model.LoadLatency);
|
||||
let NumMicroOps = ZnWriteIMulH.NumMicroOps;
|
||||
}
|
||||
|
||||
// Floating point operations
|
||||
defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>;
|
||||
|
|
|
@ -243,11 +243,17 @@ defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
|
|||
defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
|
||||
|
||||
// IMULH
|
||||
def : WriteRes<WriteIMulH, [Zn2Multiplier]>{
|
||||
def Zn2WriteIMulH : WriteRes<WriteIMulH, [Zn2Multiplier]>{
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 0;
|
||||
}
|
||||
|
||||
def : WriteRes<WriteIMulHLd, [Zn2Multiplier]>{
|
||||
let Latency = !add(Zn2WriteIMulH.Latency, Znver2Model.LoadLatency);
|
||||
let NumMicroOps = Zn2WriteIMulH.NumMicroOps;
|
||||
}
|
||||
|
||||
|
||||
// Floating point operations
|
||||
defm : X86WriteRes<WriteFLoad, [Zn2AGU], 8, [1], 1>;
|
||||
defm : X86WriteRes<WriteFLoadX, [Zn2AGU], 8, [1], 1>;
|
||||
|
|
|
@ -631,7 +631,7 @@ def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
|
|||
let ResourceCycles = [1, 1, 2];
|
||||
let NumMicroOps = Zn3MULX32rr.NumMicroOps;
|
||||
}
|
||||
def : InstRW<[Zn3MULX32rm, WriteIMulH,
|
||||
def : InstRW<[Zn3MULX32rm, WriteIMulHLd,
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadAfterLd], (instrs MULX32rm)>;
|
||||
|
||||
|
@ -652,13 +652,14 @@ def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
|
|||
let ResourceCycles = [1, 1, 2];
|
||||
let NumMicroOps = Zn3MULX64rr.NumMicroOps;
|
||||
}
|
||||
def : InstRW<[Zn3MULX64rm, WriteIMulH,
|
||||
def : InstRW<[Zn3MULX64rm, WriteIMulHLd,
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadAfterLd], (instrs MULX64rm)>;
|
||||
|
||||
defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
|
||||
defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
|
||||
defm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part.
|
||||
defm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>; // Integer multiplication, high part.
|
||||
defm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part.
|
||||
|
||||
defm : Zn3WriteResInt<WriteBSWAP32, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap.
|
||||
defm : Zn3WriteResInt<WriteBSWAP64, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap.
|
||||
|
|
|
@ -63,7 +63,7 @@ add %rax, %rax
|
|||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeER mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: [0,1] .D==eE-----R addl %eax, %eax
|
||||
# CHECK-NEXT: [0,1] .D=======eER addl %eax, %eax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -73,8 +73,8 @@ add %rax, %rax
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: 1. 1 3.0 0.0 5.0 addl %eax, %eax
|
||||
# CHECK-NEXT: 1 2.0 0.5 2.5 <total>
|
||||
# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addl %eax, %eax
|
||||
# CHECK-NEXT: 1 4.5 0.5 0.0 <total>
|
||||
|
||||
# CHECK: [1] Code Region
|
||||
|
||||
|
@ -126,7 +126,7 @@ add %rax, %rax
|
|||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeER mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: [0,1] .D==eE-----R addq %rax, %rax
|
||||
# CHECK-NEXT: [0,1] .D=======eER addq %rax, %rax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -136,5 +136,5 @@ add %rax, %rax
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: 1. 1 3.0 0.0 5.0 addq %rax, %rax
|
||||
# CHECK-NEXT: 1 2.0 0.5 2.5 <total>
|
||||
# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addq %rax, %rax
|
||||
# CHECK-NEXT: 1 4.5 0.5 0.0 <total>
|
||||
|
|
|
@ -63,7 +63,7 @@ add %rax, %rax
|
|||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeER mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: [0,1] D===eE-----R addl %eax, %eax
|
||||
# CHECK-NEXT: [0,1] D========eER addl %eax, %eax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -73,8 +73,8 @@ add %rax, %rax
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: 1. 1 4.0 0.0 5.0 addl %eax, %eax
|
||||
# CHECK-NEXT: 1 2.5 0.5 2.5 <total>
|
||||
# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addl %eax, %eax
|
||||
# CHECK-NEXT: 1 5.0 0.5 0.0 <total>
|
||||
|
||||
# CHECK: [1] Code Region
|
||||
|
||||
|
@ -126,7 +126,7 @@ add %rax, %rax
|
|||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeER mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: [0,1] D===eE-----R addq %rax, %rax
|
||||
# CHECK-NEXT: [0,1] D========eER addq %rax, %rax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -136,5 +136,5 @@ add %rax, %rax
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: 1. 1 4.0 0.0 5.0 addq %rax, %rax
|
||||
# CHECK-NEXT: 1 2.5 0.5 2.5 <total>
|
||||
# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addq %rax, %rax
|
||||
# CHECK-NEXT: 1 5.0 0.5 0.0 <total>
|
||||
|
|
|
@ -17,12 +17,12 @@ add %rax, %rax
|
|||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 10
|
||||
# CHECK-NEXT: Total Cycles: 11
|
||||
# CHECK-NEXT: Total uOps: 2
|
||||
|
||||
# CHECK: Dispatch Width: 4
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.20
|
||||
# CHECK-NEXT: IPC: 0.20
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.18
|
||||
# CHECK-NEXT: IPC: 0.18
|
||||
# CHECK-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -62,10 +62,11 @@ add %rax, %rax
|
|||
# CHECK-NEXT: - - - - - - 1.00 - - - - - - addl %eax, %eax
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeER mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: [0,1] D===eE---R addl %eax, %eax
|
||||
# CHECK: [0,0] DeeeeeeeER. mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: [0,1] D=======eER addl %eax, %eax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -75,19 +76,19 @@ add %rax, %rax
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: 1. 1 4.0 0.0 3.0 addl %eax, %eax
|
||||
# CHECK-NEXT: 1 2.5 0.5 1.5 <total>
|
||||
# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addl %eax, %eax
|
||||
# CHECK-NEXT: 1 4.5 0.5 0.0 <total>
|
||||
|
||||
# CHECK: [1] Code Region
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 10
|
||||
# CHECK-NEXT: Total Cycles: 11
|
||||
# CHECK-NEXT: Total uOps: 2
|
||||
|
||||
# CHECK: Dispatch Width: 4
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.20
|
||||
# CHECK-NEXT: IPC: 0.20
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.18
|
||||
# CHECK-NEXT: IPC: 0.18
|
||||
# CHECK-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -127,10 +128,11 @@ add %rax, %rax
|
|||
# CHECK-NEXT: - - - - - - 1.00 - - - - - - addq %rax, %rax
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeER mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: [0,1] D===eE---R addq %rax, %rax
|
||||
# CHECK: [0,0] DeeeeeeeER. mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: [0,1] D=======eER addq %rax, %rax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -140,5 +142,5 @@ add %rax, %rax
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: 1. 1 4.0 0.0 3.0 addq %rax, %rax
|
||||
# CHECK-NEXT: 1 2.5 0.5 1.5 <total>
|
||||
# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addq %rax, %rax
|
||||
# CHECK-NEXT: 1 4.5 0.5 0.0 <total>
|
||||
|
|
|
@ -17,12 +17,12 @@ add %rax, %rax
|
|||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 11
|
||||
# CHECK-NEXT: Total Cycles: 12
|
||||
# CHECK-NEXT: Total uOps: 3
|
||||
|
||||
# CHECK: Dispatch Width: 6
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.27
|
||||
# CHECK-NEXT: IPC: 0.18
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.25
|
||||
# CHECK-NEXT: IPC: 0.17
|
||||
# CHECK-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -72,11 +72,11 @@ add %rax, %rax
|
|||
# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addl %eax, %eax
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0
|
||||
# CHECK-NEXT: 01
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeER mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: [0,1] D====eE---R addl %eax, %eax
|
||||
# CHECK: [0,0] DeeeeeeeeER. mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: [0,1] D========eER addl %eax, %eax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -86,19 +86,19 @@ add %rax, %rax
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxl (%rdi), %eax, %ecx
|
||||
# CHECK-NEXT: 1. 1 5.0 0.0 3.0 addl %eax, %eax
|
||||
# CHECK-NEXT: 1 3.0 0.5 1.5 <total>
|
||||
# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addl %eax, %eax
|
||||
# CHECK-NEXT: 1 5.0 0.5 0.0 <total>
|
||||
|
||||
# CHECK: [1] Code Region
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 11
|
||||
# CHECK-NEXT: Total Cycles: 12
|
||||
# CHECK-NEXT: Total uOps: 3
|
||||
|
||||
# CHECK: Dispatch Width: 6
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.27
|
||||
# CHECK-NEXT: IPC: 0.18
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.25
|
||||
# CHECK-NEXT: IPC: 0.17
|
||||
# CHECK-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -148,11 +148,11 @@ add %rax, %rax
|
|||
# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addq %rax, %rax
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0
|
||||
# CHECK-NEXT: 01
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeER mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: [0,1] D====eE---R addq %rax, %rax
|
||||
# CHECK: [0,0] DeeeeeeeeER. mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: [0,1] D========eER addq %rax, %rax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -162,5 +162,5 @@ add %rax, %rax
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxq (%rdi), %rax, %rcx
|
||||
# CHECK-NEXT: 1. 1 5.0 0.0 3.0 addq %rax, %rax
|
||||
# CHECK-NEXT: 1 3.0 0.5 1.5 <total>
|
||||
# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addq %rax, %rax
|
||||
# CHECK-NEXT: 1 5.0 0.5 0.0 <total>
|
||||
|
|
Loading…
Reference in New Issue