forked from OSchip/llvm-project
[llvm-mca] Increase the default number of iterations to 100.
llvm-svn: 329694
This commit is contained in:
parent
23b8bd1220
commit
074cef3dfb
|
@ -95,7 +95,7 @@ option specifies "``-``", then the output will also be sent to standard output.
|
|||
.. option:: -iterations=<number of iterations>
|
||||
|
||||
Specify the number of iterations to run. If this flag is set to 0, then the
|
||||
tool sets the number of iterations to a default value (i.e. 70).
|
||||
tool sets the number of iterations to a default value (i.e. 100).
|
||||
|
||||
.. option:: -noalias=<bool>
|
||||
|
||||
|
|
|
@ -16,14 +16,12 @@ vsqrtps %xmm0, %xmm2
|
|||
vaddps %ymm0, %ymm1, %ymm2
|
||||
vsqrtps %ymm0, %ymm2
|
||||
|
||||
|
||||
# CHECK: Iterations: 70
|
||||
# CHECK-NEXT: Instructions: 560
|
||||
# CHECK-NEXT: Total Cycles: 4416
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 800
|
||||
# CHECK-NEXT: Total Cycles: 6306
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.13
|
||||
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
|
@ -42,7 +40,6 @@ vsqrtps %ymm0, %ymm2
|
|||
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2
|
||||
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
|
@ -59,14 +56,14 @@ vsqrtps %ymm0, %ymm2
|
|||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 3.00 63.00 6.01 5.99 - - - 1.00 1.00 1.00 3.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - - 2.00 1.00 - - - - 0.03 0.97 2.00 vpmulld %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - 0.01 0.99 - - - - 0.97 0.03 - vpand %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - 2.00 1.00 - - - - 0.02 0.98 2.00 vpmulld %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - 0.01 0.99 - - - - 0.98 0.02 - vpand %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvttps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2
|
||||
|
@ -74,11 +71,10 @@ vsqrtps %ymm0, %ymm2
|
|||
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - 42.00 - 2.00 - - - - - - - vsqrtps %ymm0, %ymm2
|
||||
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789 0123456789 0123456789 0
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeER . . . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: [0,1] .DeE--R . . . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: [0,2] . DeeeER . . . . . . . . . . . . . vcvttps2dq %xmm0, %xmm2
|
||||
|
@ -93,19 +89,19 @@ vsqrtps %ymm0, %ymm2
|
|||
# CHECK-NEXT: [1,3] . . DeeE----------------------------------------------------------R vpclmulqdq $0, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: [1,4] . . DeeeE--------------------------------------------------------R vaddps %xmm0, %xmm1, %xmm2
|
||||
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 2 1.0 1.0 28.0 vpmulld %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1. 2 1.0 1.0 30.5 vpand %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3. 2 1.0 1.0 29.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 4. 2 1.0 1.0 28.0 vaddps %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 2 1.0 1.0 28.0 vpmulld %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1. 2 1.0 1.0 30.5 vpand %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3. 2 1.0 1.0 29.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 4. 2 1.0 1.0 28.0 vaddps %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2
|
||||
|
||||
|
|
|
@ -12,6 +12,30 @@ vcvtps2ph $0, %xmm0, (%rax)
|
|||
vcvtps2ph $0, %ymm0, %xmm2
|
||||
vcvtps2ph $0, %ymm0, (%rax)
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 800
|
||||
# CHECK-NEXT: Total Cycles: 1503
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.53
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * vcvtph2ps (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 3 2.00 vcvtph2ps %xmm0, %ymm2
|
||||
# CHECK-NEXT: 2 8 2.00 * vcvtph2ps (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 * vcvtps2ph $0, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 3 6 2.00 vcvtps2ph $0, %ymm0, %xmm2
|
||||
# CHECK-NEXT: 3 11 2.00 * vcvtps2ph $0, %ymm0, (%rax)
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
|
@ -28,6 +52,10 @@ vcvtps2ph $0, %ymm0, (%rax)
|
|||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 2.00 - 12.00 2.00 - 2.00 12.00 - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtph2ps %xmm0, %xmm2
|
||||
|
@ -36,5 +64,6 @@ vcvtps2ph $0, %ymm0, (%rax)
|
|||
# CHECK-NEXT: - - - - - - 2.00 1.00 - - 2.00 - - - vcvtph2ps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2ph $0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vcvtps2ph $0, %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - - 1.80 0.20 - 2.00 - - - 2.00 - - - vcvtps2ph $0, %ymm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.20 1.80 - 2.00 - - 1.00 2.00 - - - vcvtps2ph $0, %ymm0, (%rax)
|
||||
# CHECK-NEXT: - - - 1.86 0.14 - 2.00 - - - 2.00 - - - vcvtps2ph $0, %ymm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.14 1.86 - 2.00 - - 1.00 2.00 - - - vcvtps2ph $0, %ymm0, (%rax)
|
||||
|
||||
|
|
|
@ -1,27 +1,73 @@
|
|||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s | FileCheck --check-prefix=ALL --check-prefix=BTVER2 %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER1 %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge < %s | FileCheck --check-prefix=ALL --check-prefix=SANDYBRIDGE %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge < %s | FileCheck --check-prefix=ALL --check-prefix=IVYBRIDGE %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell < %s | FileCheck --check-prefix=ALL --check-prefix=HASWELL %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell < %s | FileCheck --check-prefix=ALL --check-prefix=BROADWELL %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl < %s | FileCheck --check-prefix=ALL --check-prefix=KNL %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake < %s | FileCheck --check-prefix=ALL --check-prefix=SKX %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 < %s | FileCheck --check-prefix=ALL --check-prefix=SKX-AVX512 %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=slm < %s | FileCheck --check-prefix=ALL --check-prefix=SLM %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BTVER2 %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER1 %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SANDYBRIDGE %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=IVYBRIDGE %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=HASWELL %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BROADWELL %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=KNL %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SKX %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SKX-AVX512 %s
|
||||
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=slm -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SLM %s
|
||||
|
||||
add %edi, %eax
|
||||
|
||||
# ALL: Iterations: 70
|
||||
# ALL-NEXT: Instructions: 70
|
||||
# BTVER2: Iterations: 100
|
||||
# BTVER2-NEXT: Instructions: 100
|
||||
# BTVER2-NEXT: Total Cycles: 103
|
||||
# BTVER2-NEXT: Dispatch Width: 2
|
||||
# BTVER2-NEXT: IPC: 0.97
|
||||
|
||||
# BTVER2: Dispatch Width: 2
|
||||
# ZNVER1: Dispatch Width: 4
|
||||
# SANDYBRIDGE: Dispatch Width: 4
|
||||
# IVYBRIDGE: Dispatch Width: 4
|
||||
# HASWELL: Dispatch Width: 4
|
||||
# BROADWELL: Dispatch Width: 4
|
||||
# KNL: Dispatch Width: 4
|
||||
# SKX: Dispatch Width: 6
|
||||
# SKX-AVX512: Dispatch Width: 6
|
||||
# SLM: Dispatch Width: 2
|
||||
# SLM: Iterations: 100
|
||||
# SLM-NEXT: Instructions: 100
|
||||
# SLM-NEXT: Total Cycles: 103
|
||||
# SLM-NEXT: Dispatch Width: 2
|
||||
# SLM-NEXT: IPC: 0.97
|
||||
|
||||
# BROADWELL: Iterations: 100
|
||||
# BROADWELL-NEXT: Instructions: 100
|
||||
# BROADWELL-NEXT: Total Cycles: 103
|
||||
# BROADWELL-NEXT: Dispatch Width: 4
|
||||
# BROADWELL-NEXT: IPC: 0.97
|
||||
|
||||
# HASWELL: Iterations: 100
|
||||
# HASWELL-NEXT: Instructions: 100
|
||||
# HASWELL-NEXT: Total Cycles: 103
|
||||
# HASWELL-NEXT: Dispatch Width: 4
|
||||
# HASWELL-NEXT: IPC: 0.97
|
||||
|
||||
# IVYBRIDGE: Iterations: 100
|
||||
# IVYBRIDGE-NEXT: Instructions: 100
|
||||
# IVYBRIDGE-NEXT: Total Cycles: 103
|
||||
# IVYBRIDGE-NEXT: Dispatch Width: 4
|
||||
# IVYBRIDGE-NEXT: IPC: 0.97
|
||||
|
||||
# KNL: Iterations: 100
|
||||
# KNL-NEXT: Instructions: 100
|
||||
# KNL-NEXT: Total Cycles: 103
|
||||
# KNL-NEXT: Dispatch Width: 4
|
||||
# KNL-NEXT: IPC: 0.97
|
||||
|
||||
# SANDYBRIDGE: Iterations: 100
|
||||
# SANDYBRIDGE-NEXT: Instructions: 100
|
||||
# SANDYBRIDGE-NEXT: Total Cycles: 103
|
||||
# SANDYBRIDGE-NEXT: Dispatch Width: 4
|
||||
# SANDYBRIDGE-NEXT: IPC: 0.97
|
||||
|
||||
# ZNVER1: Iterations: 100
|
||||
# ZNVER1-NEXT: Instructions: 100
|
||||
# ZNVER1-NEXT: Total Cycles: 103
|
||||
# ZNVER1-NEXT: Dispatch Width: 4
|
||||
# ZNVER1-NEXT: IPC: 0.97
|
||||
|
||||
# SKX: Iterations: 100
|
||||
# SKX-NEXT: Instructions: 100
|
||||
# SKX-NEXT: Total Cycles: 103
|
||||
# SKX-NEXT: Dispatch Width: 6
|
||||
# SKX-NEXT: IPC: 0.97
|
||||
|
||||
# SKX-AVX512: Iterations: 100
|
||||
# SKX-AVX512-NEXT: Instructions: 100
|
||||
# SKX-AVX512-NEXT: Total Cycles: 103
|
||||
# SKX-AVX512-NEXT: Dispatch Width: 6
|
||||
# SKX-AVX512-NEXT: IPC: 0.97
|
||||
|
||||
|
|
|
@ -1,11 +1,29 @@
|
|||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s 2>&1 | FileCheck --check-prefix=DEFAULT %s
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=0 < %s 2>&1 | FileCheck --check-prefix=DEFAULT %s
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 < %s 2>&1 | FileCheck --check-prefix=CUSTOM %s
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=DEFAULT %s
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=0 -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=DEFAULT %s
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=CUSTOM %s
|
||||
|
||||
add %eax, %eax
|
||||
|
||||
# DEFAULT: Iterations: 70
|
||||
# DEFAULT-NEXT: Instructions: 70
|
||||
# CUSTOM: Iterations: 1
|
||||
# CUSTOM-NEXT: Instructions: 1
|
||||
# CUSTOM-NEXT: Total Cycles: 4
|
||||
# CUSTOM-NEXT: Dispatch Width: 2
|
||||
# CUSTOM-NEXT: IPC: 0.25
|
||||
|
||||
# DEFAULT: Iterations: 100
|
||||
# DEFAULT-NEXT: Instructions: 100
|
||||
# DEFAULT-NEXT: Total Cycles: 103
|
||||
# DEFAULT-NEXT: Dispatch Width: 2
|
||||
# DEFAULT-NEXT: IPC: 0.97
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# ALL-NEXT: 1 1 0.50 addl %eax, %eax
|
||||
|
||||
# CUSTOM: Iterations: 1
|
||||
# CUSTOM-NEXT: Instructions: 1
|
||||
|
|
|
@ -59,7 +59,7 @@ sequence of MCInst is then analyzed by a 'Backend' module to generate a
|
|||
performance report.
|
||||
|
||||
The Backend module internally emulates the execution of the machine code
|
||||
sequence in a loop of iterations (which by default is 70). At the end of this
|
||||
sequence in a loop of iterations (which by default is 100). At the end of this
|
||||
process, the backend collects a number of statistics which are then printed out
|
||||
in the form of a report.
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ class SourceMgr {
|
|||
const InstVec &Sequence;
|
||||
unsigned Current;
|
||||
unsigned Iterations;
|
||||
static const unsigned DefaultIterations = 70;
|
||||
static const unsigned DefaultIterations = 100;
|
||||
|
||||
public:
|
||||
SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations)
|
||||
|
|
Loading…
Reference in New Issue