[MCA] Fixed a bug where loads and stores were sometimes incorrectly marked as depedent. Fixes PR45793.

This fixes a regression introduced by a very old commit 280ac1fd1d (was
llvm-svn 361950).

Commit 280ac1fd1d redesigned the logic in the LSUnit with the goal of
speeding up isReady() queries, and stabilising the LSUnit API (while also making
the load store unit more customisable).

The concept of MemoryGroup (effectively an alias set) was added by that commit
to better describe and track dependencies between memory operations.  However,
that concept was not just used for alias dependencies, but it was also used for
describing memory "order" dependencies (enforced by the memory consistency
model).

Instructions of a same memory group were considered "equivalent" as in:
independent operations that can potentially execute in parallel.  The problem
was that the cost of a dependency (in terms of number of cycles) should have
been different for "order" dependency. Instructions in an order dependency
simply have to have to wait until their predecessors are "issued" to an
underlying pipeline (rather than having to wait until predecessors have beeng
fully executed). For simple "order" dependencies, this was effectively
introducing an artificial delay on the "issue" of independent loads and stores.

This patch fixes the issue and adds a new test named 'independent-load-stores.s'
to a bunch of x86 targets. That test contains the reproducible posted by Fabian
Ritter on PR45793.

I had to rerun the update-mca-tests script on several files. To avoid expected
regressions on some Exynos tests, I have added a -noalias=false flag (to match
the old strict behavior on latencies).

Some tests for processor Barcelona are improved/fixed by this change and they
now show better results.  In a few tests we were incorrectly counting the time
spent by instructions in a scheduler queue.  In one case in particular we now
correctly see a store executed out of order.  That test was affected by the same
underlying issue reported as PR45793.

Reviewers: mattd

Differential Revision: https://reviews.llvm.org/D79351
This commit is contained in:
Andrea Di Biagio 2020-05-04 18:23:04 +01:00
parent 08032e7192
commit 5578ec32f9
18 changed files with 973 additions and 338 deletions

View File

@ -40,7 +40,10 @@ class MemoryGroup {
unsigned NumInstructions;
unsigned NumExecuting;
unsigned NumExecuted;
SmallVector<MemoryGroup *, 4> Succ;
// Successors that are in a order dependency with this group.
SmallVector<MemoryGroup *, 4> OrderSucc;
// Successors that are in a data dependency with this group.
SmallVector<MemoryGroup *, 4> DataSucc;
CriticalDependency CriticalPredecessor;
InstRef CriticalMemoryInstruction;
@ -55,8 +58,9 @@ public:
NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {}
MemoryGroup(MemoryGroup &&) = default;
ArrayRef<MemoryGroup *> getSuccessors() const { return Succ; }
unsigned getNumSuccessors() const { return Succ.size(); }
size_t getNumSuccessors() const {
return OrderSucc.size() + DataSucc.size();
}
unsigned getNumPredecessors() const { return NumPredecessors; }
unsigned getNumExecutingPredecessors() const {
return NumExecutingPredecessors;
@ -75,12 +79,22 @@ public:
return CriticalPredecessor;
}
void addSuccessor(MemoryGroup *Group) {
void addSuccessor(MemoryGroup *Group, bool IsDataDependent) {
// Do not need to add a dependency if there is no data
// dependency and all instructions from this group have been
// issued already.
if (!IsDataDependent && isExecuting())
return;
Group->NumPredecessors++;
assert(!isExecuted() && "Should have been removed!");
if (isExecuting())
Group->onGroupIssued(CriticalMemoryInstruction);
Succ.emplace_back(Group);
Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent);
if (IsDataDependent)
DataSucc.emplace_back(Group);
else
OrderSucc.emplace_back(Group);
}
bool isWaiting() const {
@ -98,10 +112,13 @@ public:
}
bool isExecuted() const { return NumInstructions == NumExecuted; }
void onGroupIssued(const InstRef &IR) {
void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) {
assert(!isReady() && "Unexpected group-start event!");
NumExecutingPredecessors++;
if (!ShouldUpdateCriticalDep)
return;
unsigned Cycles = IR.getInstruction()->getCyclesLeft();
if (CriticalPredecessor.Cycles < Cycles) {
CriticalPredecessor.IID = IR.getSourceIndex();
@ -133,8 +150,14 @@ public:
return;
// Notify successors that this group started execution.
for (MemoryGroup *MG : Succ)
MG->onGroupIssued(CriticalMemoryInstruction);
for (MemoryGroup *MG : OrderSucc) {
MG->onGroupIssued(CriticalMemoryInstruction, false);
// Release the order dependency with this group.
MG->onGroupExecuted();
}
for (MemoryGroup *MG : DataSucc)
MG->onGroupIssued(CriticalMemoryInstruction, true);
}
void onInstructionExecuted() {
@ -145,8 +168,8 @@ public:
if (!isExecuted())
return;
// Notify successors that this group has finished execution.
for (MemoryGroup *MG : Succ)
// Notify data dependent successors that this group has finished execution.
for (MemoryGroup *MG : DataSucc)
MG->onGroupExecuted();
}
@ -412,6 +435,7 @@ class LSUnit : public LSUnitBase {
unsigned CurrentLoadGroupID;
unsigned CurrentLoadBarrierGroupID;
unsigned CurrentStoreGroupID;
unsigned CurrentStoreBarrierGroupID;
public:
LSUnit(const MCSchedModel &SM)
@ -420,7 +444,8 @@ public:
: LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
: LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {}
CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0),
CurrentStoreBarrierGroupID(0) {}
/// Returns LSU_AVAILABLE if there are enough load/store queue entries to
/// accomodate instruction IR.

View File

@ -77,9 +77,6 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
acquireSQSlot();
if (Desc.MayStore) {
// Always create a new group for store operations.
// A store may not pass a previous store or store barrier.
unsigned NewGID = createMemoryGroup();
MemoryGroup &NewGroup = getGroup(NewGID);
NewGroup.addInstruction();
@ -91,16 +88,32 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
MemoryGroup &IDom = getGroup(ImmediateLoadDominator);
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator
<< ") --> (" << NewGID << ")\n");
IDom.addSuccessor(&NewGroup);
IDom.addSuccessor(&NewGroup, !assumeNoAlias());
}
if (CurrentStoreGroupID) {
// A store may not pass a previous store barrier.
if (CurrentStoreBarrierGroupID) {
MemoryGroup &StoreGroup = getGroup(CurrentStoreBarrierGroupID);
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: ("
<< CurrentStoreBarrierGroupID
<< ") --> (" << NewGID << ")\n");
StoreGroup.addSuccessor(&NewGroup, true);
}
// A store may not pass a previous store.
if (CurrentStoreGroupID &&
(CurrentStoreGroupID != CurrentStoreBarrierGroupID)) {
MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID);
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
<< ") --> (" << NewGID << ")\n");
StoreGroup.addSuccessor(&NewGroup);
StoreGroup.addSuccessor(&NewGroup, !assumeNoAlias());
}
CurrentStoreGroupID = NewGID;
if (IsMemBarrier)
CurrentStoreBarrierGroupID = NewGID;
if (Desc.MayLoad) {
CurrentLoadGroupID = NewGID;
if (IsMemBarrier)
@ -112,31 +125,59 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
assert(Desc.MayLoad && "Expected a load!");
// Always create a new memory group if this is the first load of the sequence.
unsigned ImmediateLoadDominator =
std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID);
// A new load group is created if we are in one of the following situations:
// 1) This is a load barrier (by construction, a load barrier is always
// assigned to a different memory group).
// 2) There is no load in flight (by construction we always keep loads and
// stores into separate memory groups).
// 3) There is a load barrier in flight. This load depends on it.
// 4) There is an intervening store between the last load dispatched to the
// LSU and this load. We always create a new group even if this load
// does not alias the last dispatched store.
// 5) There is no intervening store and there is an active load group.
// However that group has already started execution, so we cannot add
// this load to it.
bool ShouldCreateANewGroup =
IsMemBarrier || !ImmediateLoadDominator ||
CurrentLoadBarrierGroupID == ImmediateLoadDominator ||
ImmediateLoadDominator <= CurrentStoreGroupID ||
getGroup(ImmediateLoadDominator).isExecuting();
// A load may not pass a previous store unless flag 'NoAlias' is set.
// A load may pass a previous load.
// A younger load cannot pass a older load barrier.
// A load barrier cannot pass a older load.
bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier ||
CurrentLoadGroupID <= CurrentStoreGroupID ||
CurrentLoadGroupID <= CurrentLoadBarrierGroupID;
if (ShouldCreateANewGroup) {
unsigned NewGID = createMemoryGroup();
MemoryGroup &NewGroup = getGroup(NewGID);
NewGroup.addInstruction();
// A load may not pass a previous store or store barrier
// unless flag 'NoAlias' is set.
if (!assumeNoAlias() && CurrentStoreGroupID) {
MemoryGroup &StGroup = getGroup(CurrentStoreGroupID);
MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID);
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
<< ") --> (" << NewGID << ")\n");
StGroup.addSuccessor(&NewGroup);
StoreGroup.addSuccessor(&NewGroup, true);
}
if (CurrentLoadBarrierGroupID) {
MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID);
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID
<< ") --> (" << NewGID << ")\n");
LdGroup.addSuccessor(&NewGroup);
// A load barrier may not pass a previous load or load barrier.
if (IsMemBarrier) {
if (ImmediateLoadDominator) {
MemoryGroup &LoadGroup = getGroup(ImmediateLoadDominator);
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: ("
<< ImmediateLoadDominator
<< ") --> (" << NewGID << ")\n");
LoadGroup.addSuccessor(&NewGroup, true);
}
} else {
// A younger load cannot pass a older load barrier.
if (CurrentLoadBarrierGroupID) {
MemoryGroup &LoadGroup = getGroup(CurrentLoadBarrierGroupID);
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: ("
<< CurrentLoadBarrierGroupID
<< ") --> (" << NewGID << ")\n");
LoadGroup.addSuccessor(&NewGroup, true);
}
}
CurrentLoadGroupID = NewGID;
@ -145,6 +186,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
return NewGID;
}
// A load may pass a previous load.
MemoryGroup &Group = getGroup(CurrentLoadGroupID);
Group.addInstruction();
return CurrentLoadGroupID;

View File

@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5
st1 {v0.s}[0], [sp]
st1 {v0.2s}, [sp]

View File

@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5
st2 {v0.s, v1.s}[0], [sp]
st2 {v0.2s, v1.2s}, [sp]

View File

@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5
st3 {v0.s, v1.s, v2.s}[0], [sp]
st3 {v0.2s, v1.2s, v2.2s}, [sp]

View File

@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5
st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp]
st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]

View File

@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5
stur d0, [sp, #2]
stur q0, [sp, #16]

View File

@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5
stur x0, [sp, #8]
strb w0, [sp], #1

View File

@ -47,12 +47,12 @@ movaps %xmm3, (%rbx)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 208
# CHECK-NEXT: Total Cycles: 207
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.92
# CHECK-NEXT: IPC: 1.92
# CHECK-NEXT: uOps Per Cycle: 1.93
# CHECK-NEXT: IPC: 1.93
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@ -72,22 +72,21 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (16.3%)
# CHECK-NEXT: 2, 148 (71.2%)
# CHECK-NEXT: 4, 26 (12.5%)
# CHECK-NEXT: 0, 33 (15.9%)
# CHECK-NEXT: 2, 148 (71.5%)
# CHECK-NEXT: 4, 26 (12.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 3 (1.4%)
# CHECK-NEXT: 1, 10 (4.8%)
# CHECK-NEXT: 2, 195 (93.8%)
# CHECK-NEXT: 0, 7 (3.4%)
# CHECK-NEXT: 2, 200 (96.6%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -116,16 +115,16 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - 1.00 - - 1.00 movb %spl, (%rax)
# CHECK-NEXT: - - - - - - 1.00 - movb (%rcx), %bpl
# CHECK-NEXT: - - - - - - 0.95 0.05 movb (%rdx), %sil
# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movb %dil, (%rbx)
# CHECK-NEXT: - - - - - - - 1.00 movb (%rdx), %sil
# CHECK-NEXT: - - - - 1.00 - 1.00 - movb %dil, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movb %spl, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movb (%rcx), %bpl
# CHECK-NEXT: [0,2] D=eeeeeER. movb (%rdx), %sil
# CHECK-NEXT: [0,3] D======eER movb %dil, (%rbx)
# CHECK: [0,0] DeER . . movb %spl, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movb (%rcx), %bpl
# CHECK-NEXT: [0,2] D=eeeeeER movb (%rdx), %sil
# CHECK-NEXT: [0,3] D=eE----R movb %dil, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -137,19 +136,19 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movb %dil, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movb %dil, (%rbx)
# CHECK-NEXT: 1 1.5 1.0 1.0 <total>
# CHECK: [1] Code Region
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 208
# CHECK-NEXT: Total Cycles: 207
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.92
# CHECK-NEXT: IPC: 1.92
# CHECK-NEXT: uOps Per Cycle: 1.93
# CHECK-NEXT: IPC: 1.93
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@ -169,22 +168,21 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (16.3%)
# CHECK-NEXT: 2, 148 (71.2%)
# CHECK-NEXT: 4, 26 (12.5%)
# CHECK-NEXT: 0, 33 (15.9%)
# CHECK-NEXT: 2, 148 (71.5%)
# CHECK-NEXT: 4, 26 (12.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 3 (1.4%)
# CHECK-NEXT: 1, 10 (4.8%)
# CHECK-NEXT: 2, 195 (93.8%)
# CHECK-NEXT: 0, 7 (3.4%)
# CHECK-NEXT: 2, 200 (96.6%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -213,16 +211,16 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - 1.00 - - 1.00 movw %sp, (%rax)
# CHECK-NEXT: - - - - - - 1.00 - movw (%rcx), %bp
# CHECK-NEXT: - - - - - - 0.95 0.05 movw (%rdx), %si
# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movw %di, (%rbx)
# CHECK-NEXT: - - - - - - - 1.00 movw (%rdx), %si
# CHECK-NEXT: - - - - 1.00 - 1.00 - movw %di, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movw %sp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movw (%rcx), %bp
# CHECK-NEXT: [0,2] D=eeeeeER. movw (%rdx), %si
# CHECK-NEXT: [0,3] D======eER movw %di, (%rbx)
# CHECK: [0,0] DeER . . movw %sp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movw (%rcx), %bp
# CHECK-NEXT: [0,2] D=eeeeeER movw (%rdx), %si
# CHECK-NEXT: [0,3] D=eE----R movw %di, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -234,19 +232,19 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movw %di, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movw %di, (%rbx)
# CHECK-NEXT: 1 1.5 1.0 1.0 <total>
# CHECK: [2] Code Region
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 208
# CHECK-NEXT: Total Cycles: 207
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.92
# CHECK-NEXT: IPC: 1.92
# CHECK-NEXT: uOps Per Cycle: 1.93
# CHECK-NEXT: IPC: 1.93
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@ -266,22 +264,21 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (16.3%)
# CHECK-NEXT: 2, 148 (71.2%)
# CHECK-NEXT: 4, 26 (12.5%)
# CHECK-NEXT: 0, 33 (15.9%)
# CHECK-NEXT: 2, 148 (71.5%)
# CHECK-NEXT: 4, 26 (12.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 3 (1.4%)
# CHECK-NEXT: 1, 10 (4.8%)
# CHECK-NEXT: 2, 195 (93.8%)
# CHECK-NEXT: 0, 7 (3.4%)
# CHECK-NEXT: 2, 200 (96.6%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -310,16 +307,16 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - 1.00 - - 1.00 movl %esp, (%rax)
# CHECK-NEXT: - - - - - - 1.00 - movl (%rcx), %ebp
# CHECK-NEXT: - - - - - - 0.95 0.05 movl (%rdx), %esi
# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movl %edi, (%rbx)
# CHECK-NEXT: - - - - - - - 1.00 movl (%rdx), %esi
# CHECK-NEXT: - - - - 1.00 - 1.00 - movl %edi, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movl %esp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movl (%rcx), %ebp
# CHECK-NEXT: [0,2] D=eeeeeER. movl (%rdx), %esi
# CHECK-NEXT: [0,3] D======eER movl %edi, (%rbx)
# CHECK: [0,0] DeER . . movl %esp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movl (%rcx), %ebp
# CHECK-NEXT: [0,2] D=eeeeeER movl (%rdx), %esi
# CHECK-NEXT: [0,3] D=eE----R movl %edi, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -331,19 +328,19 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movl %edi, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movl %edi, (%rbx)
# CHECK-NEXT: 1 1.5 1.0 1.0 <total>
# CHECK: [3] Code Region
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 208
# CHECK-NEXT: Total Cycles: 207
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.92
# CHECK-NEXT: IPC: 1.92
# CHECK-NEXT: uOps Per Cycle: 1.93
# CHECK-NEXT: IPC: 1.93
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@ -363,22 +360,21 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (16.3%)
# CHECK-NEXT: 2, 148 (71.2%)
# CHECK-NEXT: 4, 26 (12.5%)
# CHECK-NEXT: 0, 33 (15.9%)
# CHECK-NEXT: 2, 148 (71.5%)
# CHECK-NEXT: 4, 26 (12.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 3 (1.4%)
# CHECK-NEXT: 1, 10 (4.8%)
# CHECK-NEXT: 2, 195 (93.8%)
# CHECK-NEXT: 0, 7 (3.4%)
# CHECK-NEXT: 2, 200 (96.6%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -407,16 +403,16 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - 1.00 - - 1.00 movq %rsp, (%rax)
# CHECK-NEXT: - - - - - - 1.00 - movq (%rcx), %rbp
# CHECK-NEXT: - - - - - - 0.95 0.05 movq (%rdx), %rsi
# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movq %rdi, (%rbx)
# CHECK-NEXT: - - - - - - - 1.00 movq (%rdx), %rsi
# CHECK-NEXT: - - - - 1.00 - 1.00 - movq %rdi, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movq %rsp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movq (%rcx), %rbp
# CHECK-NEXT: [0,2] D=eeeeeER. movq (%rdx), %rsi
# CHECK-NEXT: [0,3] D======eER movq %rdi, (%rbx)
# CHECK: [0,0] DeER . . movq %rsp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movq (%rcx), %rbp
# CHECK-NEXT: [0,2] D=eeeeeER movq (%rdx), %rsi
# CHECK-NEXT: [0,3] D=eE----R movq %rdi, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -428,19 +424,19 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movq %rdi, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movq %rdi, (%rbx)
# CHECK-NEXT: 1 1.5 1.0 1.0 <total>
# CHECK: [4] Code Region
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 208
# CHECK-NEXT: Total Cycles: 207
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.92
# CHECK-NEXT: IPC: 1.92
# CHECK-NEXT: uOps Per Cycle: 1.93
# CHECK-NEXT: IPC: 1.93
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@ -460,22 +456,21 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (16.3%)
# CHECK-NEXT: 2, 148 (71.2%)
# CHECK-NEXT: 4, 26 (12.5%)
# CHECK-NEXT: 0, 33 (15.9%)
# CHECK-NEXT: 2, 148 (71.5%)
# CHECK-NEXT: 4, 26 (12.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 3 (1.4%)
# CHECK-NEXT: 1, 10 (4.8%)
# CHECK-NEXT: 2, 195 (93.8%)
# CHECK-NEXT: 0, 7 (3.4%)
# CHECK-NEXT: 2, 200 (96.6%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -504,16 +499,16 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - 1.00 - - 1.00 movd %mm0, (%rax)
# CHECK-NEXT: - - - - - - 1.00 - movd (%rcx), %mm1
# CHECK-NEXT: - - - - - - 0.95 0.05 movd (%rdx), %mm2
# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movd %mm3, (%rbx)
# CHECK-NEXT: - - - - - - - 1.00 movd (%rdx), %mm2
# CHECK-NEXT: - - - - 1.00 - 1.00 - movd %mm3, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movd %mm0, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movd (%rcx), %mm1
# CHECK-NEXT: [0,2] D=eeeeeER. movd (%rdx), %mm2
# CHECK-NEXT: [0,3] D======eER movd %mm3, (%rbx)
# CHECK: [0,0] DeER . . movd %mm0, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movd (%rcx), %mm1
# CHECK-NEXT: [0,2] D=eeeeeER movd (%rdx), %mm2
# CHECK-NEXT: [0,3] D=eE----R movd %mm3, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -525,19 +520,19 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 1.5 1.0 1.0 <total>
# CHECK: [5] Code Region
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 209
# CHECK-NEXT: Total Cycles: 208
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.91
# CHECK-NEXT: IPC: 1.91
# CHECK-NEXT: uOps Per Cycle: 1.92
# CHECK-NEXT: IPC: 1.92
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@ -557,22 +552,21 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.3%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 35 (16.7%)
# CHECK-NEXT: 2, 148 (70.8%)
# CHECK-NEXT: 4, 26 (12.4%)
# CHECK-NEXT: 0, 34 (16.3%)
# CHECK-NEXT: 2, 148 (71.2%)
# CHECK-NEXT: 4, 26 (12.5%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 3 (1.4%)
# CHECK-NEXT: 1, 12 (5.7%)
# CHECK-NEXT: 2, 194 (92.8%)
# CHECK-NEXT: 0, 8 (3.8%)
# CHECK-NEXT: 2, 200 (96.2%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -601,17 +595,16 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - 1.00 - - 1.00 movaps %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 1.00 - movaps (%rcx), %xmm1
# CHECK-NEXT: - - - - - - 0.94 0.06 movaps (%rdx), %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.06 0.94 movaps %xmm3, (%rbx)
# CHECK-NEXT: - - - - - - - 1.00 movaps (%rdx), %xmm2
# CHECK-NEXT: - - - - 1.00 - 1.00 - movaps %xmm3, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . movaps %xmm0, (%rax)
# CHECK-NEXT: [0,1] DeeeeeeER . movaps (%rcx), %xmm1
# CHECK-NEXT: [0,2] D=eeeeeeER. movaps (%rdx), %xmm2
# CHECK-NEXT: [0,3] D=======eER movaps %xmm3, (%rbx)
# CHECK: [0,0] DeER . . movaps %xmm0, (%rax)
# CHECK-NEXT: [0,1] DeeeeeeER. movaps (%rcx), %xmm1
# CHECK-NEXT: [0,2] D=eeeeeeER movaps (%rdx), %xmm2
# CHECK-NEXT: [0,3] D=eE-----R movaps %xmm3, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -623,5 +616,5 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2
# CHECK-NEXT: 3. 1 8.0 0.0 0.0 movaps %xmm3, (%rbx)
# CHECK-NEXT: 1 3.0 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 2.0 0.0 5.0 movaps %xmm3, (%rbx)
# CHECK-NEXT: 1 1.5 1.0 1.3 <total>

View File

@ -135,10 +135,10 @@ movaps %xmm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movb %bpl, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movb %sil, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movb %dil, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [1] Code Region
@ -232,10 +232,10 @@ movaps %xmm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movw %bp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movw %si, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movw %di, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [2] Code Region
@ -329,10 +329,10 @@ movaps %xmm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movl %ebp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movl %esi, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movl %edi, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [3] Code Region
@ -426,10 +426,10 @@ movaps %xmm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movq %rbp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movq %rsi, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movq %rdi, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [4] Code Region
@ -620,7 +620,7 @@ movaps %xmm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movaps %xmm2, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movaps %xmm3, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movaps %xmm1, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movaps %xmm2, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movaps %xmm3, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>

View File

@ -72,23 +72,24 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (11.1%)
# CHECK-NEXT: 1, 172 (56.2%)
# CHECK-NEXT: 2, 86 (28.1%)
# CHECK-NEXT: 0, 35 (11.4%)
# CHECK-NEXT: 1, 171 (55.9%)
# CHECK-NEXT: 2, 85 (27.8%)
# CHECK-NEXT: 3, 1 (0.3%)
# CHECK-NEXT: 4, 14 (4.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 5 (1.6%)
# CHECK-NEXT: 1, 202 (66.0%)
# CHECK-NEXT: 2, 99 (32.4%)
# CHECK-NEXT: 0, 6 (2.0%)
# CHECK-NEXT: 1, 200 (65.4%)
# CHECK-NEXT: 2, 100 (32.7%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -99,8 +100,8 @@ movaps %xmm3, (%rbx)
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: PdEX 36 40 40
# CHECK-NEXT: PdFPU 0 0 64
# CHECK-NEXT: PdLoad 19 22 40
# CHECK-NEXT: PdStore 20 23 24
# CHECK-NEXT: PdLoad 21 24 40
# CHECK-NEXT: PdStore 18 21 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@ -133,18 +134,18 @@ movaps %xmm3, (%rbx)
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movb %spl, (%rax)
# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movb %spl, (%rax)
# CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movb (%rcx), %bpl
# CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movb (%rdx), %sil
# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movb %dil, (%rbx)
# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movb %dil, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movb %spl, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movb (%rcx), %bpl
# CHECK-NEXT: [0,2] D=eeeeeER. movb (%rdx), %sil
# CHECK-NEXT: [0,3] D======eER movb %dil, (%rbx)
# CHECK: [0,0] DeER . . movb %spl, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movb (%rcx), %bpl
# CHECK-NEXT: [0,2] D=eeeeeER movb (%rdx), %sil
# CHECK-NEXT: [0,3] D==eE---R movb %dil, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -156,8 +157,8 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movb %dil, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movb %dil, (%rbx)
# CHECK-NEXT: 1 1.8 1.3 0.8 <total>
# CHECK: [1] Code Region
@ -188,23 +189,24 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (11.1%)
# CHECK-NEXT: 1, 172 (56.2%)
# CHECK-NEXT: 2, 86 (28.1%)
# CHECK-NEXT: 0, 35 (11.4%)
# CHECK-NEXT: 1, 171 (55.9%)
# CHECK-NEXT: 2, 85 (27.8%)
# CHECK-NEXT: 3, 1 (0.3%)
# CHECK-NEXT: 4, 14 (4.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 5 (1.6%)
# CHECK-NEXT: 1, 202 (66.0%)
# CHECK-NEXT: 2, 99 (32.4%)
# CHECK-NEXT: 0, 6 (2.0%)
# CHECK-NEXT: 1, 200 (65.4%)
# CHECK-NEXT: 2, 100 (32.7%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -215,8 +217,8 @@ movaps %xmm3, (%rbx)
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: PdEX 36 40 40
# CHECK-NEXT: PdFPU 0 0 64
# CHECK-NEXT: PdLoad 19 22 40
# CHECK-NEXT: PdStore 20 23 24
# CHECK-NEXT: PdLoad 21 24 40
# CHECK-NEXT: PdStore 18 21 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@ -249,18 +251,18 @@ movaps %xmm3, (%rbx)
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movw %sp, (%rax)
# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movw %sp, (%rax)
# CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movw (%rcx), %bp
# CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movw (%rdx), %si
# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movw %di, (%rbx)
# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movw %di, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movw %sp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movw (%rcx), %bp
# CHECK-NEXT: [0,2] D=eeeeeER. movw (%rdx), %si
# CHECK-NEXT: [0,3] D======eER movw %di, (%rbx)
# CHECK: [0,0] DeER . . movw %sp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movw (%rcx), %bp
# CHECK-NEXT: [0,2] D=eeeeeER movw (%rdx), %si
# CHECK-NEXT: [0,3] D==eE---R movw %di, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -272,8 +274,8 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movw %di, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movw %di, (%rbx)
# CHECK-NEXT: 1 1.8 1.3 0.8 <total>
# CHECK: [2] Code Region
@ -304,23 +306,24 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (11.1%)
# CHECK-NEXT: 1, 172 (56.2%)
# CHECK-NEXT: 2, 86 (28.1%)
# CHECK-NEXT: 0, 35 (11.4%)
# CHECK-NEXT: 1, 171 (55.9%)
# CHECK-NEXT: 2, 85 (27.8%)
# CHECK-NEXT: 3, 1 (0.3%)
# CHECK-NEXT: 4, 14 (4.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 5 (1.6%)
# CHECK-NEXT: 1, 202 (66.0%)
# CHECK-NEXT: 2, 99 (32.4%)
# CHECK-NEXT: 0, 6 (2.0%)
# CHECK-NEXT: 1, 200 (65.4%)
# CHECK-NEXT: 2, 100 (32.7%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -331,8 +334,8 @@ movaps %xmm3, (%rbx)
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: PdEX 36 40 40
# CHECK-NEXT: PdFPU 0 0 64
# CHECK-NEXT: PdLoad 19 22 40
# CHECK-NEXT: PdStore 20 23 24
# CHECK-NEXT: PdLoad 21 24 40
# CHECK-NEXT: PdStore 18 21 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@ -365,18 +368,18 @@ movaps %xmm3, (%rbx)
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movl %esp, (%rax)
# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movl %esp, (%rax)
# CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movl (%rcx), %ebp
# CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movl (%rdx), %esi
# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movl %edi, (%rbx)
# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movl %edi, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movl %esp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movl (%rcx), %ebp
# CHECK-NEXT: [0,2] D=eeeeeER. movl (%rdx), %esi
# CHECK-NEXT: [0,3] D======eER movl %edi, (%rbx)
# CHECK: [0,0] DeER . . movl %esp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movl (%rcx), %ebp
# CHECK-NEXT: [0,2] D=eeeeeER movl (%rdx), %esi
# CHECK-NEXT: [0,3] D==eE---R movl %edi, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -388,8 +391,8 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movl %edi, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movl %edi, (%rbx)
# CHECK-NEXT: 1 1.8 1.3 0.8 <total>
# CHECK: [3] Code Region
@ -420,23 +423,24 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 34 (11.1%)
# CHECK-NEXT: 1, 172 (56.2%)
# CHECK-NEXT: 2, 86 (28.1%)
# CHECK-NEXT: 0, 35 (11.4%)
# CHECK-NEXT: 1, 171 (55.9%)
# CHECK-NEXT: 2, 85 (27.8%)
# CHECK-NEXT: 3, 1 (0.3%)
# CHECK-NEXT: 4, 14 (4.6%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 5 (1.6%)
# CHECK-NEXT: 1, 202 (66.0%)
# CHECK-NEXT: 2, 99 (32.4%)
# CHECK-NEXT: 0, 6 (2.0%)
# CHECK-NEXT: 1, 200 (65.4%)
# CHECK-NEXT: 2, 100 (32.7%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -447,8 +451,8 @@ movaps %xmm3, (%rbx)
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: PdEX 36 40 40
# CHECK-NEXT: PdFPU 0 0 64
# CHECK-NEXT: PdLoad 19 22 40
# CHECK-NEXT: PdStore 20 23 24
# CHECK-NEXT: PdLoad 21 24 40
# CHECK-NEXT: PdStore 18 21 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@ -481,18 +485,18 @@ movaps %xmm3, (%rbx)
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rsp, (%rax)
# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rsp, (%rax)
# CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movq (%rcx), %rbp
# CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movq (%rdx), %rsi
# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rdi, (%rbx)
# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movq %rdi, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movq %rsp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movq (%rcx), %rbp
# CHECK-NEXT: [0,2] D=eeeeeER. movq (%rdx), %rsi
# CHECK-NEXT: [0,3] D======eER movq %rdi, (%rbx)
# CHECK: [0,0] DeER . . movq %rsp, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movq (%rcx), %rbp
# CHECK-NEXT: [0,2] D=eeeeeER movq (%rdx), %rsi
# CHECK-NEXT: [0,3] D==eE---R movq %rdi, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -504,14 +508,14 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movq %rdi, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movq %rdi, (%rbx)
# CHECK-NEXT: 1 1.8 1.3 0.8 <total>
# CHECK: [4] Code Region
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 554
# CHECK-NEXT: Total Cycles: 553
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
@ -536,24 +540,24 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 55 (9.9%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 57 (10.3%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 437 (78.9%)
# CHECK-NEXT: SQ - Store queue full: 432 (78.1%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 365 (65.9%)
# CHECK-NEXT: 0, 364 (65.8%)
# CHECK-NEXT: 1, 88 (15.9%)
# CHECK-NEXT: 2, 3 (0.5%)
# CHECK-NEXT: 3, 86 (15.5%)
# CHECK-NEXT: 4, 12 (2.2%)
# CHECK-NEXT: 2, 4 (0.7%)
# CHECK-NEXT: 3, 84 (15.2%)
# CHECK-NEXT: 4, 13 (2.4%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 253 (45.7%)
# CHECK-NEXT: 1, 202 (36.5%)
# CHECK-NEXT: 2, 99 (17.9%)
# CHECK-NEXT: 0, 253 (45.8%)
# CHECK-NEXT: 1, 200 (36.2%)
# CHECK-NEXT: 2, 100 (18.1%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -599,18 +603,17 @@ movaps %xmm3, (%rbx)
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax)
# CHECK-NEXT: 1.53 1.47 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
# CHECK-NEXT: 1.47 1.53 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeeER. . movd %mm0, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movd (%rcx), %mm1
# CHECK-NEXT: [0,2] D=eeeeeER . movd (%rdx), %mm2
# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx)
# CHECK: [0,0] DeeER. . movd %mm0, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movd (%rcx), %mm1
# CHECK-NEXT: [0,2] D=eeeeeER movd (%rdx), %mm2
# CHECK-NEXT: [0,3] D===eeE-R movd %mm3, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -622,8 +625,8 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 4.0 1.0 1.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 2.0 1.3 0.3 <total>
# CHECK: [5] Code Region
@ -668,9 +671,9 @@ movaps %xmm3, (%rbx)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 104 (25.7%)
# CHECK-NEXT: 1, 202 (49.9%)
# CHECK-NEXT: 2, 99 (24.4%)
# CHECK-NEXT: 0, 105 (25.9%)
# CHECK-NEXT: 1, 200 (49.4%)
# CHECK-NEXT: 2, 100 (24.7%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -679,10 +682,10 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: PdEX 37 40 40
# CHECK-NEXT: PdFPU 37 40 64
# CHECK-NEXT: PdLoad 19 22 40
# CHECK-NEXT: PdStore 20 22 24
# CHECK-NEXT: PdEX 36 40 40
# CHECK-NEXT: PdFPU 36 40 64
# CHECK-NEXT: PdLoad 20 23 40
# CHECK-NEXT: PdStore 19 21 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@ -721,12 +724,12 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movaps %xmm3, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . movaps %xmm0, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER . movaps (%rcx), %xmm1
# CHECK-NEXT: [0,2] D=eeeeeER. movaps (%rdx), %xmm2
# CHECK-NEXT: [0,3] D======eER movaps %xmm3, (%rbx)
# CHECK: [0,0] DeER . . movaps %xmm0, (%rax)
# CHECK-NEXT: [0,1] DeeeeeER. movaps (%rcx), %xmm1
# CHECK-NEXT: [0,2] D=eeeeeER movaps (%rdx), %xmm2
# CHECK-NEXT: [0,3] D===eE--R movaps %xmm3, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -738,5 +741,5 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movaps %xmm3, (%rbx)
# CHECK-NEXT: 1 2.8 1.0 0.0 <total>
# CHECK-NEXT: 3. 1 4.0 2.0 2.0 movaps %xmm3, (%rbx)
# CHECK-NEXT: 1 2.0 1.5 0.5 <total>

View File

@ -101,9 +101,9 @@ vmovaps %xmm0, 48(%rdi)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1. 1 7.0 1.0 0.0 vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 2. 1 1.0 1.0 2.0 vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 3. 1 8.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 3. 1 8.0 1.0 0.0 vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 4. 1 3.0 3.0 0.0 vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 5. 1 9.0 1.0 0.0 vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 6. 1 3.0 3.0 2.0 vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 7. 1 10.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
# CHECK-NEXT: 1 5.3 1.3 0.5 <total>
# CHECK-NEXT: 7. 1 10.0 1.0 0.0 vmovaps %xmm0, 48(%rdi)
# CHECK-NEXT: 1 5.3 1.5 0.5 <total>

View File

@ -159,10 +159,10 @@ vmovaps %ymm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movb %bpl, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movb %sil, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movb %dil, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [1] Code Region
@ -273,10 +273,10 @@ vmovaps %ymm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movw %bp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movw %si, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movw %di, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [2] Code Region
@ -387,10 +387,10 @@ vmovaps %ymm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movl %ebp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movl %esi, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movl %edi, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [3] Code Region
@ -501,10 +501,10 @@ vmovaps %ymm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movq %rbp, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movq %rsi, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movq %rdi, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [4] Code Region
@ -732,10 +732,10 @@ vmovaps %ymm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx)
# CHECK-NEXT: 2. 1 4.0 1.0 0.0 movaps %xmm2, (%rdx)
# CHECK-NEXT: 3. 1 5.0 0.0 0.0 movaps %xmm3, (%rbx)
# CHECK-NEXT: 1 3.0 0.5 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movaps %xmm1, (%rcx)
# CHECK-NEXT: 2. 1 4.0 2.0 0.0 movaps %xmm2, (%rdx)
# CHECK-NEXT: 3. 1 5.0 1.0 0.0 movaps %xmm3, (%rbx)
# CHECK-NEXT: 1 3.0 1.3 0.0 <total>
# CHECK: [6] Code Region
@ -846,7 +846,7 @@ vmovaps %ymm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps %ymm0, (%rax)
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 vmovaps %ymm1, (%rcx)
# CHECK-NEXT: 2. 1 35.0 33.0 0.0 vmovaps %ymm2, (%rdx)
# CHECK-NEXT: 3. 1 36.0 1.0 0.0 vmovaps %ymm3, (%rbx)
# CHECK-NEXT: 1 18.5 9.0 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 2.0 0.0 vmovaps %ymm1, (%rcx)
# CHECK-NEXT: 2. 1 35.0 34.0 0.0 vmovaps %ymm2, (%rdx)
# CHECK-NEXT: 3. 1 36.0 2.0 0.0 vmovaps %ymm3, (%rbx)
# CHECK-NEXT: 1 18.5 9.8 0.0 <total>

View File

@ -0,0 +1,146 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS
addq $44, 64(%r14)
addq $44, 128(%r14)
addq $44, 192(%r14)
addq $44, 256(%r14)
addq $44, 320(%r14)
addq $44, 384(%r14)
addq $44, 448(%r14)
addq $44, 512(%r14)
addq $44, 576(%r14)
addq $44, 640(%r14)
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000
# NOALIAS-NEXT: Total Cycles: 1008
# YESALIAS-NEXT: Total Cycles: 6003
# ALL-NEXT: Total uOps: 1000
# ALL: Dispatch Width: 2
# NOALIAS-NEXT: uOps Per Cycle: 0.99
# NOALIAS-NEXT: IPC: 0.99
# YESALIAS-NEXT: uOps Per Cycle: 0.17
# YESALIAS-NEXT: IPC: 0.17
# ALL-NEXT: Block RThroughput: 10.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 1 6 1.00 * * addq $44, 64(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 128(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 192(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 256(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 320(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 384(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 448(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 512(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 576(%r14)
# ALL-NEXT: 1 6 1.00 * * addq $44, 640(%r14)
# ALL: Resources:
# ALL-NEXT: [0] - JALU0
# ALL-NEXT: [1] - JALU1
# ALL-NEXT: [2] - JDiv
# ALL-NEXT: [3] - JFPA
# ALL-NEXT: [4] - JFPM
# ALL-NEXT: [5] - JFPU0
# ALL-NEXT: [6] - JFPU1
# ALL-NEXT: [7] - JLAGU
# ALL-NEXT: [8] - JMul
# ALL-NEXT: [9] - JSAGU
# ALL-NEXT: [10] - JSTC
# ALL-NEXT: [11] - JVALU0
# ALL-NEXT: [12] - JVALU1
# ALL-NEXT: [13] - JVIMUL
# ALL: Resource pressure per iteration:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# ALL-NEXT: 5.00 5.00 - - - - - 10.00 - 10.00 - - - -
# ALL: Resource pressure by instruction:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 64(%r14)
# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 128(%r14)
# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 192(%r14)
# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 256(%r14)
# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 320(%r14)
# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 384(%r14)
# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 448(%r14)
# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 512(%r14)
# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 576(%r14)
# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 640(%r14)
# ALL: Timeline view:
# NOALIAS-NEXT: 01234567
# NOALIAS-NEXT: Index 0123456789
# YESALIAS-NEXT: 0123456789 0123456789 0123456789
# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 012
# NOALIAS: [0,0] DeeeeeeER . . . addq $44, 64(%r14)
# NOALIAS-NEXT: [0,1] D=eeeeeeER. . . addq $44, 128(%r14)
# NOALIAS-NEXT: [0,2] .D=eeeeeeER . . addq $44, 192(%r14)
# NOALIAS-NEXT: [0,3] .D==eeeeeeER . . addq $44, 256(%r14)
# NOALIAS-NEXT: [0,4] . D==eeeeeeER . . addq $44, 320(%r14)
# NOALIAS-NEXT: [0,5] . D===eeeeeeER . . addq $44, 384(%r14)
# NOALIAS-NEXT: [0,6] . D===eeeeeeER. . addq $44, 448(%r14)
# NOALIAS-NEXT: [0,7] . D====eeeeeeER . addq $44, 512(%r14)
# NOALIAS-NEXT: [0,8] . D====eeeeeeER. addq $44, 576(%r14)
# NOALIAS-NEXT: [0,9] . D=====eeeeeeER addq $44, 640(%r14)
# YESALIAS: [0,0] DeeeeeeER . . . . . . . . . . . . addq $44, 64(%r14)
# YESALIAS-NEXT: [0,1] D======eeeeeeER. . . . . . . . . . . addq $44, 128(%r14)
# YESALIAS-NEXT: [0,2] .D===========eeeeeeER . . . . . . . . . addq $44, 192(%r14)
# YESALIAS-NEXT: [0,3] .D=================eeeeeeER . . . . . . . . addq $44, 256(%r14)
# YESALIAS-NEXT: [0,4] . D======================eeeeeeER . . . . . . . addq $44, 320(%r14)
# YESALIAS-NEXT: [0,5] . D============================eeeeeeER . . . . . . addq $44, 384(%r14)
# YESALIAS-NEXT: [0,6] . D=================================eeeeeeER. . . . . addq $44, 448(%r14)
# YESALIAS-NEXT: [0,7] . D=======================================eeeeeeER . . . addq $44, 512(%r14)
# YESALIAS-NEXT: [0,8] . D============================================eeeeeeER . . addq $44, 576(%r14)
# YESALIAS-NEXT: [0,9] . D==================================================eeeeeeER addq $44, 640(%r14)
# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage
# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14)
# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14)
# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14)
# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14)
# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14)
# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14)
# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14)
# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14)
# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14)
# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14)
# NOALIAS-NEXT: 1 3.5 1.0 0.0 <total>
# YESALIAS-NEXT: 1. 1 7.0 0.0 0.0 addq $44, 128(%r14)
# YESALIAS-NEXT: 2. 1 12.0 0.0 0.0 addq $44, 192(%r14)
# YESALIAS-NEXT: 3. 1 18.0 0.0 0.0 addq $44, 256(%r14)
# YESALIAS-NEXT: 4. 1 23.0 0.0 0.0 addq $44, 320(%r14)
# YESALIAS-NEXT: 5. 1 29.0 0.0 0.0 addq $44, 384(%r14)
# YESALIAS-NEXT: 6. 1 34.0 0.0 0.0 addq $44, 448(%r14)
# YESALIAS-NEXT: 7. 1 40.0 0.0 0.0 addq $44, 512(%r14)
# YESALIAS-NEXT: 8. 1 45.0 0.0 0.0 addq $44, 576(%r14)
# YESALIAS-NEXT: 9. 1 51.0 0.0 0.0 addq $44, 640(%r14)
# YESALIAS-NEXT: 1 26.0 0.1 0.0 <total>

View File

@ -21,12 +21,12 @@ imul %ecx, %ecx
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 27
# CHECK-NEXT: Total Cycles: 24
# CHECK-NEXT: Total uOps: 16
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.59
# CHECK-NEXT: IPC: 0.37
# CHECK-NEXT: uOps Per Cycle: 0.67
# CHECK-NEXT: IPC: 0.42
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
@ -74,18 +74,18 @@ imul %ecx, %ecx
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 0123456
# CHECK-NEXT: Index 0123456789 0123
# CHECK: [0,0] DeeeeeeeeeeeER . . .. xaddl %ecx, (%rsp)
# CHECK-NEXT: [0,1] . D=eE-------R . . .. addl %ecx, %ecx
# CHECK-NEXT: [0,2] . D==eE-------R. . .. addl %ecx, %ecx
# CHECK-NEXT: [0,3] . D==eeeE----R. . .. imull %ecx, %ecx
# CHECK-NEXT: [0,4] . D=====eeeE--R . .. imull %ecx, %ecx
# CHECK-NEXT: [1,0] . D=======eeeeeeeeeeeER.. xaddl %ecx, (%rsp)
# CHECK-NEXT: [1,1] . .D========eE-------R.. addl %ecx, %ecx
# CHECK-NEXT: [1,2] . .D=========eE-------R. addl %ecx, %ecx
# CHECK-NEXT: [1,3] . . D=========eeeE----R. imull %ecx, %ecx
# CHECK-NEXT: [1,4] . . D============eeeE--R imull %ecx, %ecx
# CHECK: [0,0] DeeeeeeeeeeeER . . . xaddl %ecx, (%rsp)
# CHECK-NEXT: [0,1] . D=eE-------R . . . addl %ecx, %ecx
# CHECK-NEXT: [0,2] . D==eE-------R. . . addl %ecx, %ecx
# CHECK-NEXT: [0,3] . D==eeeE----R. . . imull %ecx, %ecx
# CHECK-NEXT: [0,4] . D=====eeeE--R . . imull %ecx, %ecx
# CHECK-NEXT: [1,0] . D====eeeeeeeeeeeER . xaddl %ecx, (%rsp)
# CHECK-NEXT: [1,1] . .D=====eE-------R . addl %ecx, %ecx
# CHECK-NEXT: [1,2] . .D======eE-------R. addl %ecx, %ecx
# CHECK-NEXT: [1,3] . . D======eeeE----R. imull %ecx, %ecx
# CHECK-NEXT: [1,4] . . D=========eeeE--R imull %ecx, %ecx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -94,12 +94,12 @@ imul %ecx, %ecx
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 4.5 0.5 0.0 xaddl %ecx, (%rsp)
# CHECK-NEXT: 1. 2 5.5 0.0 7.0 addl %ecx, %ecx
# CHECK-NEXT: 2. 2 6.5 0.0 7.0 addl %ecx, %ecx
# CHECK-NEXT: 3. 2 6.5 0.0 4.0 imull %ecx, %ecx
# CHECK-NEXT: 4. 2 9.5 0.0 2.0 imull %ecx, %ecx
# CHECK-NEXT: 2 6.5 0.1 4.0 <total>
# CHECK-NEXT: 0. 2 3.0 0.5 0.0 xaddl %ecx, (%rsp)
# CHECK-NEXT: 1. 2 4.0 0.0 7.0 addl %ecx, %ecx
# CHECK-NEXT: 2. 2 5.0 0.0 7.0 addl %ecx, %ecx
# CHECK-NEXT: 3. 2 5.0 0.0 4.0 imull %ecx, %ecx
# CHECK-NEXT: 4. 2 8.0 0.0 2.0 imull %ecx, %ecx
# CHECK-NEXT: 2 5.0 0.1 4.0 <total>
# CHECK: [1] Code Region

View File

@ -0,0 +1,142 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mcpu=haswell -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS
# RUN: llvm-mca -mcpu=haswell -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS
addq $44, 64(%r14)
addq $44, 128(%r14)
addq $44, 192(%r14)
addq $44, 256(%r14)
addq $44, 320(%r14)
addq $44, 384(%r14)
addq $44, 448(%r14)
addq $44, 512(%r14)
addq $44, 576(%r14)
addq $44, 640(%r14)
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000
# NOALIAS-NEXT: Total Cycles: 1009
# YESALIAS-NEXT: Total Cycles: 7003
# ALL-NEXT: Total uOps: 3000
# ALL: Dispatch Width: 4
# NOALIAS-NEXT: uOps Per Cycle: 2.97
# NOALIAS-NEXT: IPC: 0.99
# YESALIAS-NEXT: uOps Per Cycle: 0.43
# YESALIAS-NEXT: IPC: 0.14
# ALL-NEXT: Block RThroughput: 10.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14)
# ALL: Resources:
# ALL-NEXT: [0] - HWDivider
# ALL-NEXT: [1] - HWFPDivider
# ALL-NEXT: [2] - HWPort0
# ALL-NEXT: [3] - HWPort1
# ALL-NEXT: [4] - HWPort2
# ALL-NEXT: [5] - HWPort3
# ALL-NEXT: [6] - HWPort4
# ALL-NEXT: [7] - HWPort5
# ALL-NEXT: [8] - HWPort6
# ALL-NEXT: [9] - HWPort7
# ALL: Resource pressure per iteration:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67
# ALL: Resource pressure by instruction:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14)
# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14)
# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14)
# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14)
# ALL: Timeline view:
# NOALIAS-NEXT: 012345678
# NOALIAS-NEXT: Index 0123456789
# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012
# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789
# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14)
# NOALIAS-NEXT: [0,1] .DeeeeeeeER . . addq $44, 128(%r14)
# NOALIAS-NEXT: [0,2] . DeeeeeeeER . . addq $44, 192(%r14)
# NOALIAS-NEXT: [0,3] . DeeeeeeeER . . addq $44, 256(%r14)
# NOALIAS-NEXT: [0,4] . DeeeeeeeER . . addq $44, 320(%r14)
# NOALIAS-NEXT: [0,5] . DeeeeeeeER. . addq $44, 384(%r14)
# NOALIAS-NEXT: [0,6] . .DeeeeeeeER . addq $44, 448(%r14)
# NOALIAS-NEXT: [0,7] . . DeeeeeeeER . addq $44, 512(%r14)
# NOALIAS-NEXT: [0,8] . . DeeeeeeeER. addq $44, 576(%r14)
# NOALIAS-NEXT: [0,9] . . DeeeeeeeER addq $44, 640(%r14)
# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14)
# YESALIAS-NEXT: [0,1] .D======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14)
# YESALIAS-NEXT: [0,2] . D============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14)
# YESALIAS-NEXT: [0,3] . D==================eeeeeeeER . . . . . . . . . addq $44, 256(%r14)
# YESALIAS-NEXT: [0,4] . D========================eeeeeeeER . . . . . . . . addq $44, 320(%r14)
# YESALIAS-NEXT: [0,5] . D==============================eeeeeeeER. . . . . . . addq $44, 384(%r14)
# YESALIAS-NEXT: [0,6] . .D====================================eeeeeeeER . . . . . addq $44, 448(%r14)
# YESALIAS-NEXT: [0,7] . . D==========================================eeeeeeeER . . . . addq $44, 512(%r14)
# YESALIAS-NEXT: [0,8] . . D================================================eeeeeeeER . . addq $44, 576(%r14)
# YESALIAS-NEXT: [0,9] . . D======================================================eeeeeeeER addq $44, 640(%r14)
# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage
# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14)
# NOALIAS-NEXT: 1. 1 1.0 1.0 0.0 addq $44, 128(%r14)
# NOALIAS-NEXT: 2. 1 1.0 1.0 0.0 addq $44, 192(%r14)
# NOALIAS-NEXT: 3. 1 1.0 1.0 0.0 addq $44, 256(%r14)
# NOALIAS-NEXT: 4. 1 1.0 1.0 0.0 addq $44, 320(%r14)
# NOALIAS-NEXT: 5. 1 1.0 1.0 0.0 addq $44, 384(%r14)
# NOALIAS-NEXT: 6. 1 1.0 1.0 0.0 addq $44, 448(%r14)
# NOALIAS-NEXT: 7. 1 1.0 1.0 0.0 addq $44, 512(%r14)
# NOALIAS-NEXT: 8. 1 1.0 1.0 0.0 addq $44, 576(%r14)
# NOALIAS-NEXT: 9. 1 1.0 1.0 0.0 addq $44, 640(%r14)
# NOALIAS-NEXT: 1 1.0 1.0 0.0 <total>
# YESALIAS-NEXT: 1. 1 7.0 0.0 0.0 addq $44, 128(%r14)
# YESALIAS-NEXT: 2. 1 13.0 0.0 0.0 addq $44, 192(%r14)
# YESALIAS-NEXT: 3. 1 19.0 0.0 0.0 addq $44, 256(%r14)
# YESALIAS-NEXT: 4. 1 25.0 0.0 0.0 addq $44, 320(%r14)
# YESALIAS-NEXT: 5. 1 31.0 0.0 0.0 addq $44, 384(%r14)
# YESALIAS-NEXT: 6. 1 37.0 0.0 0.0 addq $44, 448(%r14)
# YESALIAS-NEXT: 7. 1 43.0 0.0 0.0 addq $44, 512(%r14)
# YESALIAS-NEXT: 8. 1 49.0 0.0 0.0 addq $44, 576(%r14)
# YESALIAS-NEXT: 9. 1 55.0 0.0 0.0 addq $44, 640(%r14)
# YESALIAS-NEXT: 1 28.0 0.1 0.0 <total>

View File

@ -0,0 +1,142 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS
addq $44, 64(%r14)
addq $44, 128(%r14)
addq $44, 192(%r14)
addq $44, 256(%r14)
addq $44, 320(%r14)
addq $44, 384(%r14)
addq $44, 448(%r14)
addq $44, 512(%r14)
addq $44, 576(%r14)
addq $44, 640(%r14)
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000
# NOALIAS-NEXT: Total Cycles: 1009
# YESALIAS-NEXT: Total Cycles: 7003
# ALL-NEXT: Total uOps: 3000
# ALL: Dispatch Width: 6
# NOALIAS-NEXT: uOps Per Cycle: 2.97
# NOALIAS-NEXT: IPC: 0.99
# YESALIAS-NEXT: uOps Per Cycle: 0.43
# YESALIAS-NEXT: IPC: 0.14
# ALL-NEXT: Block RThroughput: 10.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14)
# ALL: Resources:
# ALL-NEXT: [0] - SKLDivider
# ALL-NEXT: [1] - SKLFPDivider
# ALL-NEXT: [2] - SKLPort0
# ALL-NEXT: [3] - SKLPort1
# ALL-NEXT: [4] - SKLPort2
# ALL-NEXT: [5] - SKLPort3
# ALL-NEXT: [6] - SKLPort4
# ALL-NEXT: [7] - SKLPort5
# ALL-NEXT: [8] - SKLPort6
# ALL-NEXT: [9] - SKLPort7
# ALL: Resource pressure per iteration:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67
# ALL: Resource pressure by instruction:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14)
# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14)
# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14)
# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14)
# ALL: Timeline view:
# NOALIAS-NEXT: 012345678
# NOALIAS-NEXT: Index 0123456789
# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012
# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789
# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14)
# NOALIAS-NEXT: [0,1] D=eeeeeeeER . . addq $44, 128(%r14)
# NOALIAS-NEXT: [0,2] .D=eeeeeeeER . . addq $44, 192(%r14)
# NOALIAS-NEXT: [0,3] .D==eeeeeeeER . . addq $44, 256(%r14)
# NOALIAS-NEXT: [0,4] . D==eeeeeeeER . . addq $44, 320(%r14)
# NOALIAS-NEXT: [0,5] . D===eeeeeeeER. . addq $44, 384(%r14)
# NOALIAS-NEXT: [0,6] . D===eeeeeeeER . addq $44, 448(%r14)
# NOALIAS-NEXT: [0,7] . D====eeeeeeeER . addq $44, 512(%r14)
# NOALIAS-NEXT: [0,8] . D====eeeeeeeER. addq $44, 576(%r14)
# NOALIAS-NEXT: [0,9] . D=====eeeeeeeER addq $44, 640(%r14)
# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14)
# YESALIAS-NEXT: [0,1] D=======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14)
# YESALIAS-NEXT: [0,2] .D=============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14)
# YESALIAS-NEXT: [0,3] .D====================eeeeeeeER . . . . . . . . . addq $44, 256(%r14)
# YESALIAS-NEXT: [0,4] . D==========================eeeeeeeER . . . . . . . . addq $44, 320(%r14)
# YESALIAS-NEXT: [0,5] . D=================================eeeeeeeER. . . . . . . addq $44, 384(%r14)
# YESALIAS-NEXT: [0,6] . D=======================================eeeeeeeER . . . . . addq $44, 448(%r14)
# YESALIAS-NEXT: [0,7] . D==============================================eeeeeeeER . . . . addq $44, 512(%r14)
# YESALIAS-NEXT: [0,8] . D====================================================eeeeeeeER . . addq $44, 576(%r14)
# YESALIAS-NEXT: [0,9] . D===========================================================eeeeeeeER addq $44, 640(%r14)
# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage
# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14)
# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14)
# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14)
# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14)
# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14)
# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14)
# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14)
# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14)
# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14)
# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14)
# NOALIAS-NEXT: 1 3.5 1.0 0.0 <total>
# YESALIAS-NEXT: 1. 1 8.0 0.0 0.0 addq $44, 128(%r14)
# YESALIAS-NEXT: 2. 1 14.0 0.0 0.0 addq $44, 192(%r14)
# YESALIAS-NEXT: 3. 1 21.0 0.0 0.0 addq $44, 256(%r14)
# YESALIAS-NEXT: 4. 1 27.0 0.0 0.0 addq $44, 320(%r14)
# YESALIAS-NEXT: 5. 1 34.0 0.0 0.0 addq $44, 384(%r14)
# YESALIAS-NEXT: 6. 1 40.0 0.0 0.0 addq $44, 448(%r14)
# YESALIAS-NEXT: 7. 1 47.0 0.0 0.0 addq $44, 512(%r14)
# YESALIAS-NEXT: 8. 1 53.0 0.0 0.0 addq $44, 576(%r14)
# YESALIAS-NEXT: 9. 1 60.0 0.0 0.0 addq $44, 640(%r14)
# YESALIAS-NEXT: 1 30.5 0.1 0.0 <total>

View File

@ -0,0 +1,142 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS
addq $44, 64(%r14)
addq $44, 128(%r14)
addq $44, 192(%r14)
addq $44, 256(%r14)
addq $44, 320(%r14)
addq $44, 384(%r14)
addq $44, 448(%r14)
addq $44, 512(%r14)
addq $44, 576(%r14)
addq $44, 640(%r14)
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000
# NOALIAS-NEXT: Total Cycles: 1009
# YESALIAS-NEXT: Total Cycles: 7003
# ALL-NEXT: Total uOps: 3000
# ALL: Dispatch Width: 6
# NOALIAS-NEXT: uOps Per Cycle: 2.97
# NOALIAS-NEXT: IPC: 0.99
# YESALIAS-NEXT: uOps Per Cycle: 0.43
# YESALIAS-NEXT: IPC: 0.14
# ALL-NEXT: Block RThroughput: 10.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14)
# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14)
# ALL: Resources:
# ALL-NEXT: [0] - SKXDivider
# ALL-NEXT: [1] - SKXFPDivider
# ALL-NEXT: [2] - SKXPort0
# ALL-NEXT: [3] - SKXPort1
# ALL-NEXT: [4] - SKXPort2
# ALL-NEXT: [5] - SKXPort3
# ALL-NEXT: [6] - SKXPort4
# ALL-NEXT: [7] - SKXPort5
# ALL-NEXT: [8] - SKXPort6
# ALL-NEXT: [9] - SKXPort7
# ALL: Resource pressure per iteration:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67
# ALL: Resource pressure by instruction:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14)
# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14)
# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14)
# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14)
# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14)
# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14)
# ALL: Timeline view:
# NOALIAS-NEXT: 012345678
# NOALIAS-NEXT: Index 0123456789
# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012
# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789
# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14)
# NOALIAS-NEXT: [0,1] D=eeeeeeeER . . addq $44, 128(%r14)
# NOALIAS-NEXT: [0,2] .D=eeeeeeeER . . addq $44, 192(%r14)
# NOALIAS-NEXT: [0,3] .D==eeeeeeeER . . addq $44, 256(%r14)
# NOALIAS-NEXT: [0,4] . D==eeeeeeeER . . addq $44, 320(%r14)
# NOALIAS-NEXT: [0,5] . D===eeeeeeeER. . addq $44, 384(%r14)
# NOALIAS-NEXT: [0,6] . D===eeeeeeeER . addq $44, 448(%r14)
# NOALIAS-NEXT: [0,7] . D====eeeeeeeER . addq $44, 512(%r14)
# NOALIAS-NEXT: [0,8] . D====eeeeeeeER. addq $44, 576(%r14)
# NOALIAS-NEXT: [0,9] . D=====eeeeeeeER addq $44, 640(%r14)
# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14)
# YESALIAS-NEXT: [0,1] D=======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14)
# YESALIAS-NEXT: [0,2] .D=============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14)
# YESALIAS-NEXT: [0,3] .D====================eeeeeeeER . . . . . . . . . addq $44, 256(%r14)
# YESALIAS-NEXT: [0,4] . D==========================eeeeeeeER . . . . . . . . addq $44, 320(%r14)
# YESALIAS-NEXT: [0,5] . D=================================eeeeeeeER. . . . . . . addq $44, 384(%r14)
# YESALIAS-NEXT: [0,6] . D=======================================eeeeeeeER . . . . . addq $44, 448(%r14)
# YESALIAS-NEXT: [0,7] . D==============================================eeeeeeeER . . . . addq $44, 512(%r14)
# YESALIAS-NEXT: [0,8] . D====================================================eeeeeeeER . . addq $44, 576(%r14)
# YESALIAS-NEXT: [0,9] . D===========================================================eeeeeeeER addq $44, 640(%r14)
# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage
# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14)
# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14)
# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14)
# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14)
# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14)
# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14)
# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14)
# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14)
# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14)
# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14)
# NOALIAS-NEXT: 1 3.5 1.0 0.0 <total>
# YESALIAS-NEXT: 1. 1 8.0 0.0 0.0 addq $44, 128(%r14)
# YESALIAS-NEXT: 2. 1 14.0 0.0 0.0 addq $44, 192(%r14)
# YESALIAS-NEXT: 3. 1 21.0 0.0 0.0 addq $44, 256(%r14)
# YESALIAS-NEXT: 4. 1 27.0 0.0 0.0 addq $44, 320(%r14)
# YESALIAS-NEXT: 5. 1 34.0 0.0 0.0 addq $44, 384(%r14)
# YESALIAS-NEXT: 6. 1 40.0 0.0 0.0 addq $44, 448(%r14)
# YESALIAS-NEXT: 7. 1 47.0 0.0 0.0 addq $44, 512(%r14)
# YESALIAS-NEXT: 8. 1 53.0 0.0 0.0 addq $44, 576(%r14)
# YESALIAS-NEXT: 9. 1 60.0 0.0 0.0 addq $44, 640(%r14)
# YESALIAS-NEXT: 1 30.5 0.1 0.0 <total>