[MCA] Switching from conservatively guessing which instructions are

memory-barrier instructions to providing targets and developers a convenient
way to explicitly declare which instructions are memory-barriers.

Differential Revision: https://reviews.llvm.org/D116779
This commit is contained in:
Patrick Holland 2022-01-02 16:37:14 -08:00
parent e7cb716ef9
commit 85e6e748d4
22 changed files with 430 additions and 220 deletions

View File

@ -182,6 +182,11 @@ option specifies "``-``", then the output will also be sent to standard output.
Enable the printing of instruction encodings within the instruction info view.
.. option:: -show-barriers
Enable the printing of LoadBarrier and StoreBarrier flags within the
instruction info view.
.. option:: -all-stats
Print all hardware statistics. This enables extra statistics related to the
@ -949,15 +954,16 @@ cache. It only knows if an instruction "MayLoad" and/or "MayStore." For
loads, the scheduling model provides an "optimistic" load-to-use latency (which
usually matches the load-to-use latency for when there is a hit in the L1D).
:program:`llvm-mca` does not know about serializing operations or memory-barrier
like instructions. The LSUnit conservatively assumes that an instruction which
has both "MayLoad" and unmodeled side effects behaves like a "soft"
load-barrier. That means, it serializes loads without forcing a flush of the
load queue. Similarly, instructions that "MayStore" and have unmodeled side
effects are treated like store barriers. A full memory barrier is a "MayLoad"
and "MayStore" instruction with unmodeled side effects. This is inaccurate, but
it is the best that we can do at the moment with the current information
available in LLVM.
:program:`llvm-mca` does not (on its own) know about serializing operations or
memory-barrier like instructions. The LSUnit used to conservatively use an
instruction's "MayLoad", "MayStore", and unmodeled side effects flags to
determine whether an instruction should be treated as a memory-barrier. This was
inaccurate in general and was changed so that now each instruction has an
IsAStoreBarrier and IsALoadBarrier flag. These flags are mca specific and
default to false for every instruction. If any instruction should have either of
these flags set, it should be done within the target's InstrPostProcess class.
For an example, look at the `X86InstrPostProcess::postProcessInstruction` method
within `llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp`.
A load/store barrier consumes one entry of the load/store queue. A load/store
barrier enforces ordering of loads/stores. A younger load cannot pass a load

View File

@ -43,6 +43,10 @@ public:
virtual ~InstrPostProcess() {}
/// This method can be overriden by targets to modify the mca::Instruction
/// object after it has been lowered from the MCInst.
/// This is generally a less disruptive alternative to modifying the
/// scheduling model.
virtual void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
const MCInst &MCI) {}
};

View File

@ -517,9 +517,14 @@ class InstructionBase {
// Instruction opcode which can be used by mca::CustomBehaviour
unsigned Opcode;
// Flags used by the LSUnit.
bool IsALoadBarrier;
bool IsAStoreBarrier;
public:
InstructionBase(const InstrDesc &D, const unsigned Opcode)
: Desc(D), IsOptimizableMove(false), Operands(0), Opcode(Opcode) {}
: Desc(D), IsOptimizableMove(false), Operands(0), Opcode(Opcode),
IsALoadBarrier(false), IsAStoreBarrier(false) {}
SmallVectorImpl<WriteState> &getDefs() { return Defs; }
ArrayRef<WriteState> getDefs() const { return Defs; }
@ -530,6 +535,10 @@ public:
unsigned getLatency() const { return Desc.MaxLatency; }
unsigned getNumMicroOps() const { return Desc.NumMicroOps; }
unsigned getOpcode() const { return Opcode; }
bool isALoadBarrier() const { return IsALoadBarrier; }
bool isAStoreBarrier() const { return IsAStoreBarrier; }
void setLoadBarrier(bool IsBarrier) { IsALoadBarrier = IsBarrier; }
void setStoreBarrier(bool IsBarrier) { IsAStoreBarrier = IsBarrier; }
/// Return the MCAOperand which corresponds to index Idx within the original
/// MCInst.

View File

@ -68,7 +68,8 @@ void LSUnitBase::dump() const {
unsigned LSUnit::dispatch(const InstRef &IR) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
unsigned IsMemBarrier = Desc.HasSideEffects;
bool IsStoreBarrier = IR.getInstruction()->isAStoreBarrier();
bool IsLoadBarrier = IR.getInstruction()->isALoadBarrier();
assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
if (Desc.MayLoad)
@ -111,12 +112,12 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
CurrentStoreGroupID = NewGID;
if (IsMemBarrier)
if (IsStoreBarrier)
CurrentStoreBarrierGroupID = NewGID;
if (Desc.MayLoad) {
CurrentLoadGroupID = NewGID;
if (IsMemBarrier)
if (IsLoadBarrier)
CurrentLoadBarrierGroupID = NewGID;
}
@ -141,7 +142,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
// However that group has already started execution, so we cannot add
// this load to it.
bool ShouldCreateANewGroup =
IsMemBarrier || !ImmediateLoadDominator ||
IsLoadBarrier || !ImmediateLoadDominator ||
CurrentLoadBarrierGroupID == ImmediateLoadDominator ||
ImmediateLoadDominator <= CurrentStoreGroupID ||
getGroup(ImmediateLoadDominator).isExecuting();
@ -161,7 +162,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
}
// A load barrier may not pass a previous load or load barrier.
if (IsMemBarrier) {
if (IsLoadBarrier) {
if (ImmediateLoadDominator) {
MemoryGroup &LoadGroup = getGroup(ImmediateLoadDominator);
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: ("
@ -181,7 +182,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
}
CurrentLoadGroupID = NewGID;
if (IsMemBarrier)
if (IsLoadBarrier)
CurrentLoadBarrierGroupID = NewGID;
return NewGID;
}

View File

@ -109,5 +109,6 @@ add_llvm_target(X86CodeGen ${sources}
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(MCA)
add_subdirectory(MCTargetDesc)
add_subdirectory(TargetInfo)

View File

@ -0,0 +1,14 @@
add_llvm_component_library(LLVMX86TargetMCA
X86CustomBehaviour.cpp
LINK_COMPONENTS
MC
MCParser
X86Desc
X86Info
Support
MCA
ADD_TO_COMPONENT
X86
)

View File

@ -0,0 +1,64 @@
//===------------------- X86CustomBehaviour.cpp -----------------*-C++ -* -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file implements methods from the X86CustomBehaviour class.
///
//===----------------------------------------------------------------------===//
#include "X86CustomBehaviour.h"
#include "TargetInfo/X86TargetInfo.h"
#include "X86InstrInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/WithColor.h"
namespace llvm {
namespace mca {
void X86InstrPostProcess::setMemBarriers(std::unique_ptr<Instruction> &Inst,
const MCInst &MCI) {
switch (MCI.getOpcode()) {
case X86::MFENCE:
Inst->setLoadBarrier(true);
Inst->setStoreBarrier(true);
break;
case X86::LFENCE:
Inst->setLoadBarrier(true);
break;
case X86::SFENCE:
Inst->setStoreBarrier(true);
break;
}
}
void X86InstrPostProcess::postProcessInstruction(
std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
// Currently, we only modify certain instructions' IsALoadBarrier and
// IsAStoreBarrier flags.
setMemBarriers(Inst, MCI);
}
} // namespace mca
} // namespace llvm
using namespace llvm;
using namespace mca;
static InstrPostProcess *createX86InstrPostProcess(const MCSubtargetInfo &STI,
const MCInstrInfo &MCII) {
return new X86InstrPostProcess(STI, MCII);
}
/// Extern function to initialize the targets for the X86 backend
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86TargetMCA() {
TargetRegistry::RegisterInstrPostProcess(getTheX86_32Target(),
createX86InstrPostProcess);
TargetRegistry::RegisterInstrPostProcess(getTheX86_64Target(),
createX86InstrPostProcess);
}

View File

@ -0,0 +1,47 @@
//===-------------------- X86CustomBehaviour.h ------------------*-C++ -* -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file defines the X86CustomBehaviour class which inherits from
/// CustomBehaviour. This class is used by the tool llvm-mca to enforce
/// target specific behaviour that is not expressed well enough in the
/// scheduling model for mca to enforce it automatically.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_X86_MCA_X86CUSTOMBEHAVIOUR_H
#define LLVM_LIB_TARGET_X86_MCA_X86CUSTOMBEHAVIOUR_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/MCA/CustomBehaviour.h"
#include "llvm/Support/TargetParser.h"
namespace llvm {
namespace mca {
class X86InstrPostProcess : public InstrPostProcess {
void processWaitCnt(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
/// Called within X86InstrPostProcess to specify certain instructions
/// as load and store barriers.
void setMemBarriers(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
public:
X86InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
: InstrPostProcess(STI, MCII) {}
~X86InstrPostProcess() {}
void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
const MCInst &MCI) override;
};
} // namespace mca
} // namespace llvm
#endif

View File

@ -10,12 +10,12 @@ ldr x3, [x10]
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 18
# CHECK-NEXT: Total Cycles: 19
# CHECK-NEXT: Total Cycles: 16
# CHECK-NEXT: Total uOps: 18
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.95
# CHECK-NEXT: IPC: 0.95
# CHECK-NEXT: uOps Per Cycle: 1.13
# CHECK-NEXT: IPC: 1.13
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
@ -62,27 +62,27 @@ ldr x3, [x10]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldr x3, [x10]
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
# CHECK-NEXT: 012345
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DE . . . . str x1, [x10]
# CHECK-NEXT: [0,1] .DE . . . . str x1, [x10]
# CHECK-NEXT: [0,2] .DeeE. . . . ldr x2, [x10]
# CHECK-NEXT: [0,3] . DE. . . . nop
# CHECK-NEXT: [0,4] . DeeE . . . ldr x2, [x10]
# CHECK-NEXT: [0,5] . DeeE . . . ldr x3, [x10]
# CHECK-NEXT: [1,0] . DE . . . str x1, [x10]
# CHECK-NEXT: [1,1] . .DE . . . str x1, [x10]
# CHECK-NEXT: [1,2] . .DeeE. . . ldr x2, [x10]
# CHECK-NEXT: [1,3] . . DE. . . nop
# CHECK-NEXT: [1,4] . . DeeE . . ldr x2, [x10]
# CHECK-NEXT: [1,5] . . DeeE . . ldr x3, [x10]
# CHECK-NEXT: [2,0] . . DE . . str x1, [x10]
# CHECK-NEXT: [2,1] . . .DE . . str x1, [x10]
# CHECK-NEXT: [2,2] . . .DeeE. . ldr x2, [x10]
# CHECK-NEXT: [2,3] . . . DE. . nop
# CHECK-NEXT: [2,4] . . . DeeE. ldr x2, [x10]
# CHECK-NEXT: [2,5] . . . DeeE ldr x3, [x10]
# CHECK: [0,0] DE . . . str x1, [x10]
# CHECK-NEXT: [0,1] .DE . . . str x1, [x10]
# CHECK-NEXT: [0,2] .DeeE. . . ldr x2, [x10]
# CHECK-NEXT: [0,3] . DE. . . nop
# CHECK-NEXT: [0,4] . DeeE . . ldr x2, [x10]
# CHECK-NEXT: [0,5] . DeeE . . ldr x3, [x10]
# CHECK-NEXT: [1,0] . DE . . str x1, [x10]
# CHECK-NEXT: [1,1] . DE . . str x1, [x10]
# CHECK-NEXT: [1,2] . DeeE . . ldr x2, [x10]
# CHECK-NEXT: [1,3] . . DE . . nop
# CHECK-NEXT: [1,4] . . DeeE . ldr x2, [x10]
# CHECK-NEXT: [1,5] . . DeeE . ldr x3, [x10]
# CHECK-NEXT: [2,0] . . DE. . str x1, [x10]
# CHECK-NEXT: [2,1] . . DE . str x1, [x10]
# CHECK-NEXT: [2,2] . . DeeE . ldr x2, [x10]
# CHECK-NEXT: [2,3] . . .DE . nop
# CHECK-NEXT: [2,4] . . .DeeE. ldr x2, [x10]
# CHECK-NEXT: [2,5] . . . DeeE ldr x3, [x10]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -40,12 +40,12 @@ s_waitcnt vmcnt(0) lgkmcnt(0)
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 36
# CHECK-NEXT: Total Cycles: 331
# CHECK-NEXT: Total Cycles: 94
# CHECK-NEXT: Total uOps: 36
# CHECK: Dispatch Width: 1
# CHECK-NEXT: uOps Per Cycle: 0.11
# CHECK-NEXT: IPC: 0.11
# CHECK-NEXT: uOps Per Cycle: 0.38
# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 36.0
# CHECK: Instruction Info:
@ -147,45 +147,45 @@ s_waitcnt vmcnt(0) lgkmcnt(0)
# CHECK-NEXT: - - - 1.00 - - - s_waitcnt vmcnt(0) lgkmcnt(0)
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789
# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789
# CHECK: [0,0] DeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[2:3], s[0:1], 0x24
# CHECK-NEXT: [0,1] .DeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[0:1], s[0:1], 0x2c
# CHECK-NEXT: [0,2] . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . s_waitcnt lgkmcnt(0)
# CHECK-NEXT: [0,3] . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s2
# CHECK-NEXT: [0,4] . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s3
# CHECK-NEXT: [0,5] . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . flat_load_dword v2, v[0:1]
# CHECK-NEXT: [0,6] . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . flat_load_dword v3, v[0:1] offset:8
# CHECK-NEXT: [0,7] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . flat_load_dword v4, v[0:1] offset:16
# CHECK-NEXT: [0,8] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. flat_load_dword v5, v[0:1] offset:24
# CHECK-NEXT: [0,9] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s0
# CHECK-NEXT: [0,10] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s1
# CHECK-NEXT: [0,11] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v6, s6
# CHECK-NEXT: [0,12] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . v_mov_b32_e32 v7, s7
# CHECK-NEXT: [0,13] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v8, s8
# CHECK-NEXT: [0,14] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v9, s9
# CHECK-NEXT: [0,15] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . v_mov_b32_e32 v10, s10
# CHECK-NEXT: [0,16] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v11, s11
# CHECK-NEXT: [0,17] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . v_mov_b32_e32 v12, s12
# CHECK-NEXT: [0,18] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v13, s13
# CHECK-NEXT: [0,19] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v14, s14
# CHECK-NEXT: [0,20] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . v_mov_b32_e32 v15, s15
# CHECK-NEXT: [0,21] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v16, s16
# CHECK-NEXT: [0,22] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . v_mov_b32_e32 v17, s17
# CHECK-NEXT: [0,23] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v18, s18
# CHECK-NEXT: [0,24] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v19, s19
# CHECK-NEXT: [0,25] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . v_mov_b32_e32 v20, s20
# CHECK-NEXT: [0,26] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v21, s21
# CHECK-NEXT: [0,27] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . v_mov_b32_e32 v22, s22
# CHECK-NEXT: [0,28] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v23, s23
# CHECK-NEXT: [0,29] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v24, s24
# CHECK-NEXT: [0,30] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . v_mov_b32_e32 v25, s25
# CHECK-NEXT: [0,31] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v26, s26
# CHECK-NEXT: [0,32] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . v_mov_b32_e32 v27, s27
# CHECK-NEXT: [0,33] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . v_mov_b32_e32 v28, s28
# CHECK-NEXT: [0,34] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . v_mov_b32_e32 v29, s29
# CHECK-NEXT: [0,35] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE s_waitcnt vmcnt(0) lgkmcnt(0)
# CHECK: [0,0] DeeeeE . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[2:3], s[0:1], 0x24
# CHECK-NEXT: [0,1] .DeeeeE . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[0:1], s[0:1], 0x2c
# CHECK-NEXT: [0,2] . .DE . . . . . . . . . . . . . . . . . . s_waitcnt lgkmcnt(0)
# CHECK-NEXT: [0,3] . . DE . . . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s2
# CHECK-NEXT: [0,4] . . DE. . . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s3
# CHECK-NEXT: [0,5] . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . flat_load_dword v2, v[0:1]
# CHECK-NEXT: [0,6] . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . flat_load_dword v3, v[0:1] offset:8
# CHECK-NEXT: [0,7] . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . flat_load_dword v4, v[0:1] offset:16
# CHECK-NEXT: [0,8] . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. flat_load_dword v5, v[0:1] offset:24
# CHECK-NEXT: [0,9] . . . DE. . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s0
# CHECK-NEXT: [0,10] . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s1
# CHECK-NEXT: [0,11] . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v6, s6
# CHECK-NEXT: [0,12] . . . .DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v7, s7
# CHECK-NEXT: [0,13] . . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v8, s8
# CHECK-NEXT: [0,14] . . . . DE. . . . . . . . . . . . . . . . v_mov_b32_e32 v9, s9
# CHECK-NEXT: [0,15] . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v10, s10
# CHECK-NEXT: [0,16] . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v11, s11
# CHECK-NEXT: [0,17] . . . . .DE . . . . . . . . . . . . . . . v_mov_b32_e32 v12, s12
# CHECK-NEXT: [0,18] . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v13, s13
# CHECK-NEXT: [0,19] . . . . . DE. . . . . . . . . . . . . . . v_mov_b32_e32 v14, s14
# CHECK-NEXT: [0,20] . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v15, s15
# CHECK-NEXT: [0,21] . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v16, s16
# CHECK-NEXT: [0,22] . . . . . .DE . . . . . . . . . . . . . . v_mov_b32_e32 v17, s17
# CHECK-NEXT: [0,23] . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v18, s18
# CHECK-NEXT: [0,24] . . . . . . DE. . . . . . . . . . . . . . v_mov_b32_e32 v19, s19
# CHECK-NEXT: [0,25] . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v20, s20
# CHECK-NEXT: [0,26] . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v21, s21
# CHECK-NEXT: [0,27] . . . . . . .DE . . . . . . . . . . . . . v_mov_b32_e32 v22, s22
# CHECK-NEXT: [0,28] . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v23, s23
# CHECK-NEXT: [0,29] . . . . . . . DE. . . . . . . . . . . . . v_mov_b32_e32 v24, s24
# CHECK-NEXT: [0,30] . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v25, s25
# CHECK-NEXT: [0,31] . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v26, s26
# CHECK-NEXT: [0,32] . . . . . . . .DE . . . . . . . . . . . . v_mov_b32_e32 v27, s27
# CHECK-NEXT: [0,33] . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v28, s28
# CHECK-NEXT: [0,34] . . . . . . . . DE. . . . . . . . . . . . v_mov_b32_e32 v29, s29
# CHECK-NEXT: [0,35] . . . . . . . . . . . . . . . . . . . DE s_waitcnt vmcnt(0) lgkmcnt(0)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions

View File

@ -528,10 +528,10 @@ movaps %xmm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movd %mm1, (%rcx)
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movd %mm2, (%rdx)
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movd %mm1, (%rcx)
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movd %mm2, (%rdx)
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
# CHECK: [5] Code Region

View File

@ -519,12 +519,12 @@ movaps %xmm3, (%rbx)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 553
# CHECK-NEXT: Total Cycles: 405
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.72
# CHECK-NEXT: IPC: 0.72
# CHECK-NEXT: uOps Per Cycle: 0.99
# CHECK-NEXT: IPC: 0.99
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
@ -544,25 +544,24 @@ movaps %xmm3, (%rbx)
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 57 (10.3%)
# CHECK-NEXT: SCHEDQ - Scheduler full: 347 (85.7%)
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 432 (78.1%)
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 364 (65.8%)
# CHECK-NEXT: 1, 88 (15.9%)
# CHECK-NEXT: 2, 4 (0.7%)
# CHECK-NEXT: 3, 84 (15.2%)
# CHECK-NEXT: 4, 13 (2.4%)
# CHECK-NEXT: 0, 131 (32.3%)
# CHECK-NEXT: 1, 174 (43.0%)
# CHECK-NEXT: 2, 87 (21.5%)
# CHECK-NEXT: 4, 13 (3.2%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 253 (45.8%)
# CHECK-NEXT: 1, 200 (36.2%)
# CHECK-NEXT: 2, 100 (18.1%)
# CHECK-NEXT: 0, 105 (25.9%)
# CHECK-NEXT: 1, 200 (49.4%)
# CHECK-NEXT: 2, 100 (24.7%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -571,10 +570,10 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: PdEX 23 40 40
# CHECK-NEXT: PdFPU 23 40 64
# CHECK-NEXT: PdLoad 3 22 40
# CHECK-NEXT: PdStore 22 24 24
# CHECK-NEXT: PdEX 36 40 40
# CHECK-NEXT: PdFPU 36 40 64
# CHECK-NEXT: PdLoad 20 23 40
# CHECK-NEXT: PdStore 19 22 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@ -608,8 +607,8 @@ movaps %xmm3, (%rbx)
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax)
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
# CHECK-NEXT: 3.00 - - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
# CHECK-NEXT: - 3.00 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx)
# CHECK: Timeline view:
@ -630,8 +629,8 @@ movaps %xmm3, (%rbx)
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2
# CHECK-NEXT: 3. 1 4.0 1.0 1.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 2.0 1.3 0.3 <total>
# CHECK-NEXT: 3. 1 4.0 2.0 1.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 2.0 1.5 0.3 <total>
# CHECK: [5] Code Region

View File

@ -6,12 +6,12 @@ stmxcsr (%rsp)
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 4
# CHECK-NEXT: Total Cycles: 205
# CHECK-NEXT: Total Cycles: 103
# CHECK-NEXT: Total uOps: 6
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.03
# CHECK-NEXT: IPC: 0.02
# CHECK-NEXT: uOps Per Cycle: 0.06
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 18.0
# CHECK: Instruction Info:
@ -28,10 +28,12 @@ stmxcsr (%rsp)
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 012
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER. int3
# CHECK-NEXT: [0,1] D====================================================================================================eER stmxcsr (%rsp)
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
# CHECK-NEXT: [0,1] DeE---------------------------------------------------------------------------------------------------R stmxcsr (%rsp)
# CHECK-NEXT: [1,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
# CHECK-NEXT: [1,1] .D=================eE---------------------------------------------------------------------------------R stmxcsr (%rsp)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -40,6 +42,6 @@ stmxcsr (%rsp)
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 51.5 0.5 0.0 int3
# CHECK-NEXT: 1. 2 151.0 0.0 0.0 stmxcsr (%rsp)
# CHECK-NEXT: 2 101.3 0.3 0.0 <total>
# CHECK-NEXT: 0. 2 1.0 0.5 0.0 int3
# CHECK-NEXT: 1. 2 9.5 9.0 90.0 stmxcsr (%rsp)
# CHECK-NEXT: 2 5.3 4.8 45.0 <total>

View File

@ -514,12 +514,12 @@ vmovaps %ymm3, (%rbx)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 803
# CHECK-NEXT: Total Cycles: 603
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: uOps Per Cycle: 0.66
# CHECK-NEXT: IPC: 0.66
# CHECK-NEXT: Block RThroughput: 6.0
# CHECK: Instruction Info:
@ -541,21 +541,21 @@ vmovaps %ymm3, (%rbx)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 748 (93.2%)
# CHECK-NEXT: SQ - Store queue full: 560 (92.9%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 422 (52.6%)
# CHECK-NEXT: 1, 374 (46.6%)
# CHECK-NEXT: 2, 1 (0.1%)
# CHECK-NEXT: 4, 6 (0.7%)
# CHECK-NEXT: 0, 222 (36.8%)
# CHECK-NEXT: 1, 374 (62.0%)
# CHECK-NEXT: 2, 1 (0.2%)
# CHECK-NEXT: 4, 6 (1.0%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 403 (50.2%)
# CHECK-NEXT: 1, 400 (49.8%)
# CHECK-NEXT: 0, 203 (33.7%)
# CHECK-NEXT: 1, 400 (66.3%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
@ -564,8 +564,8 @@ vmovaps %ymm3, (%rbx)
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: PdEX 21 23 40
# CHECK-NEXT: PdFPU 21 23 64
# CHECK-NEXT: PdEX 21 22 40
# CHECK-NEXT: PdFPU 21 22 64
# CHECK-NEXT: PdLoad 0 0 40
# CHECK-NEXT: PdStore 22 24 24
@ -606,13 +606,12 @@ vmovaps %ymm3, (%rbx)
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeeER. . movd %mm0, (%rax)
# CHECK-NEXT: [0,1] D==eeER . movd %mm1, (%rcx)
# CHECK-NEXT: [0,2] D====eeER . movd %mm2, (%rdx)
# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx)
# CHECK: [0,0] DeeER. . movd %mm0, (%rax)
# CHECK-NEXT: [0,1] D=eeER . movd %mm1, (%rcx)
# CHECK-NEXT: [0,2] D===eeER. movd %mm2, (%rdx)
# CHECK-NEXT: [0,3] D====eeER movd %mm3, (%rbx)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -622,10 +621,10 @@ vmovaps %ymm3, (%rbx)
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
# CHECK-NEXT: 1. 1 3.0 0.0 0.0 movd %mm1, (%rcx)
# CHECK-NEXT: 2. 1 5.0 0.0 0.0 movd %mm2, (%rdx)
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 4.0 0.3 0.0 <total>
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movd %mm1, (%rcx)
# CHECK-NEXT: 2. 1 4.0 2.0 0.0 movd %mm2, (%rdx)
# CHECK-NEXT: 3. 1 5.0 1.0 0.0 movd %mm3, (%rbx)
# CHECK-NEXT: 1 3.0 1.3 0.0 <total>
# CHECK: [5] Code Region

View File

@ -6,12 +6,12 @@ stmxcsr (%rsp)
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 4
# CHECK-NEXT: Total Cycles: 205
# CHECK-NEXT: Total Cycles: 104
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.02
# CHECK-NEXT: IPC: 0.02
# CHECK-NEXT: uOps Per Cycle: 0.04
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
@ -31,7 +31,9 @@ stmxcsr (%rsp)
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER. int3
# CHECK-NEXT: [0,1] D====================================================================================================eER stmxcsr (%rsp)
# CHECK-NEXT: [0,1] DeE---------------------------------------------------------------------------------------------------R. stmxcsr (%rsp)
# CHECK-NEXT: [1,0] .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
# CHECK-NEXT: [1,1] .DeE---------------------------------------------------------------------------------------------------R stmxcsr (%rsp)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -40,6 +42,6 @@ stmxcsr (%rsp)
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 51.0 0.5 0.0 int3
# CHECK-NEXT: 1. 2 151.0 0.0 0.0 stmxcsr (%rsp)
# CHECK-NEXT: 2 101.0 0.3 0.0 <total>
# CHECK-NEXT: 0. 2 1.0 1.0 0.0 int3
# CHECK-NEXT: 1. 2 1.0 0.0 99.0 stmxcsr (%rsp)
# CHECK-NEXT: 2 1.0 0.5 49.5 <total>

View File

@ -12,12 +12,12 @@ retq
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 704
# CHECK-NEXT: Total Cycles: 308
# CHECK-NEXT: Total uOps: 600
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.85
# CHECK-NEXT: IPC: 0.85
# CHECK-NEXT: uOps Per Cycle: 1.95
# CHECK-NEXT: IPC: 1.95
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
@ -66,27 +66,27 @@ retq
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - retq
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234
# CHECK-NEXT: 0123456
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [0,1] DeER . . . . . movl $-24577, %eax
# CHECK-NEXT: [0,2] .DeeeeER . . . . andl -4(%rsp), %eax
# CHECK-NEXT: [0,3] .D====eER . . . . movl %eax, -8(%rsp)
# CHECK-NEXT: [0,4] . D===eeeER . . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [0,5] . DeeeeE--R . . . retq
# CHECK-NEXT: [1,0] . D=====eER . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [1,1] . DeE-----R . . . movl $-24577, %eax
# CHECK-NEXT: [1,2] . D====eeeeER. . . andl -4(%rsp), %eax
# CHECK-NEXT: [1,3] . D========eER . . movl %eax, -8(%rsp)
# CHECK-NEXT: [1,4] . D=======eeeER . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [1,5] . D=eeeeE-----R . . retq
# CHECK-NEXT: [2,0] . .D=========eER . . stmxcsr -4(%rsp)
# CHECK-NEXT: [2,1] . .DeE---------R . . movl $-24577, %eax
# CHECK-NEXT: [2,2] . . D========eeeeER . andl -4(%rsp), %eax
# CHECK-NEXT: [2,3] . . D============eER . movl %eax, -8(%rsp)
# CHECK-NEXT: [2,4] . . D===========eeeER ldmxcsr -8(%rsp)
# CHECK-NEXT: [2,5] . . D=eeeeE---------R retq
# CHECK: [0,0] DeER . . .. stmxcsr -4(%rsp)
# CHECK-NEXT: [0,1] DeER . . .. movl $-24577, %eax
# CHECK-NEXT: [0,2] .DeeeeER . .. andl -4(%rsp), %eax
# CHECK-NEXT: [0,3] .D====eER . .. movl %eax, -8(%rsp)
# CHECK-NEXT: [0,4] . D===eeeER .. ldmxcsr -8(%rsp)
# CHECK-NEXT: [0,5] . DeeeeE--R .. retq
# CHECK-NEXT: [1,0] . D===eE--R .. stmxcsr -4(%rsp)
# CHECK-NEXT: [1,1] . DeE-----R .. movl $-24577, %eax
# CHECK-NEXT: [1,2] . DeeeeE--R .. andl -4(%rsp), %eax
# CHECK-NEXT: [1,3] . D====eE-R .. movl %eax, -8(%rsp)
# CHECK-NEXT: [1,4] . D===eeeER .. ldmxcsr -8(%rsp)
# CHECK-NEXT: [1,5] . D=eeeeE-R .. retq
# CHECK-NEXT: [2,0] . .D===eE--R.. stmxcsr -4(%rsp)
# CHECK-NEXT: [2,1] . .DeE-----R.. movl $-24577, %eax
# CHECK-NEXT: [2,2] . . DeeeeE--R. andl -4(%rsp), %eax
# CHECK-NEXT: [2,3] . . D====eE-R. movl %eax, -8(%rsp)
# CHECK-NEXT: [2,4] . . D===eeeER ldmxcsr -8(%rsp)
# CHECK-NEXT: [2,5] . . D=eeeeE-R retq
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -95,10 +95,10 @@ retq
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 5.7 0.3 0.0 stmxcsr -4(%rsp)
# CHECK-NEXT: 1. 3 1.0 1.0 4.7 movl $-24577, %eax
# CHECK-NEXT: 2. 3 5.0 0.3 0.0 andl -4(%rsp), %eax
# CHECK-NEXT: 3. 3 9.0 0.0 0.0 movl %eax, -8(%rsp)
# CHECK-NEXT: 4. 3 8.0 0.0 0.0 ldmxcsr -8(%rsp)
# CHECK-NEXT: 5. 3 1.7 1.7 5.3 retq
# CHECK-NEXT: 3 5.1 0.6 1.7 <total>
# CHECK-NEXT: 0. 3 3.0 1.0 1.3 stmxcsr -4(%rsp)
# CHECK-NEXT: 1. 3 1.0 1.0 3.3 movl $-24577, %eax
# CHECK-NEXT: 2. 3 1.0 1.0 1.3 andl -4(%rsp), %eax
# CHECK-NEXT: 3. 3 5.0 0.0 0.7 movl %eax, -8(%rsp)
# CHECK-NEXT: 4. 3 4.0 0.0 0.0 ldmxcsr -8(%rsp)
# CHECK-NEXT: 5. 3 1.7 1.7 1.3 retq
# CHECK-NEXT: 3 2.6 0.8 1.3 <total>

View File

@ -5,11 +5,11 @@ fxrstor (%rsp)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 6403
# CHECK-NEXT: Total Cycles: 4720
# CHECK-NEXT: Total uOps: 9000
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.41
# CHECK-NEXT: uOps Per Cycle: 1.91
# CHECK-NEXT: IPC: 0.02
# CHECK-NEXT: Block RThroughput: 22.5

View File

@ -12,12 +12,12 @@ retq
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 1304
# CHECK-NEXT: Total Cycles: 413
# CHECK-NEXT: Total uOps: 1300
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 0.46
# CHECK-NEXT: uOps Per Cycle: 3.15
# CHECK-NEXT: IPC: 1.45
# CHECK-NEXT: Block RThroughput: 3.3
# CHECK: Instruction Info:
@ -50,39 +50,39 @@ retq
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 1.74 1.74 1.67 1.68 2.00 1.74 1.78 1.65
# CHECK-NEXT: - - 1.99 1.50 1.66 1.67 2.00 1.52 1.99 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - - - 0.30 - 1.00 1.00 - 0.70 stmxcsr -4(%rsp)
# CHECK-NEXT: - - 0.03 0.53 - - - 0.23 0.21 - movl $-24577, %eax
# CHECK-NEXT: - - 0.22 0.58 0.35 0.65 - - 0.20 - andl -4(%rsp), %eax
# CHECK-NEXT: - - - - 0.05 - 1.00 - - 0.95 movl %eax, -8(%rsp)
# CHECK-NEXT: - - 1.00 0.21 0.34 0.66 - 0.42 0.37 - ldmxcsr -8(%rsp)
# CHECK-NEXT: - - 0.49 0.42 0.63 0.37 - 0.09 1.00 - retq
# CHECK-NEXT: - - - - 0.16 - 1.00 1.00 - 0.84 stmxcsr -4(%rsp)
# CHECK-NEXT: - - 0.49 0.49 - - - 0.01 0.01 - movl $-24577, %eax
# CHECK-NEXT: - - 0.49 0.02 0.49 0.51 - 0.01 0.48 - andl -4(%rsp), %eax
# CHECK-NEXT: - - - - 0.17 - 1.00 - - 0.83 movl %eax, -8(%rsp)
# CHECK-NEXT: - - 1.00 0.01 0.33 0.67 - 0.49 0.50 - ldmxcsr -8(%rsp)
# CHECK-NEXT: - - 0.01 0.98 0.51 0.49 - 0.01 1.00 - retq
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 012
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234
# CHECK: [0,0] DeeER. . . . . . . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [0,1] DeE-R. . . . . . . . . movl $-24577, %eax
# CHECK-NEXT: [0,2] .DeeeeeeER. . . . . . . . andl -4(%rsp), %eax
# CHECK-NEXT: [0,3] .D======eER . . . . . . . movl %eax, -8(%rsp)
# CHECK-NEXT: [0,4] . D=====eeeeeeeER . . . . . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [0,5] . DeeeeeeeE----R . . . . . . retq
# CHECK-NEXT: [1,0] . D==========eeER . . . . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [1,1] . DeE-----------R . . . . . . movl $-24577, %eax
# CHECK-NEXT: [1,2] . D=========eeeeeeER . . . . . andl -4(%rsp), %eax
# CHECK-NEXT: [1,3] . D===============eER . . . . . movl %eax, -8(%rsp)
# CHECK-NEXT: [1,4] . .D==============eeeeeeeER. . . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [1,5] . . DeeeeeeeE-------------R. . . . retq
# CHECK-NEXT: [2,0] . . D===================eeER . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [2,1] . . DeE--------------------R . . . movl $-24577, %eax
# CHECK-NEXT: [2,2] . . D==================eeeeeeER . . andl -4(%rsp), %eax
# CHECK-NEXT: [2,3] . . D========================eER . . movl %eax, -8(%rsp)
# CHECK-NEXT: [2,4] . . D=======================eeeeeeeER ldmxcsr -8(%rsp)
# CHECK-NEXT: [2,5] . . .DeeeeeeeE----------------------R retq
# CHECK: [0,0] DeeER. . . . . stmxcsr -4(%rsp)
# CHECK-NEXT: [0,1] DeE-R. . . . . movl $-24577, %eax
# CHECK-NEXT: [0,2] .DeeeeeeER. . . . andl -4(%rsp), %eax
# CHECK-NEXT: [0,3] .D======eER . . . movl %eax, -8(%rsp)
# CHECK-NEXT: [0,4] . D=====eeeeeeeER . . ldmxcsr -8(%rsp)
# CHECK-NEXT: [0,5] . DeeeeeeeE----R . . retq
# CHECK-NEXT: [1,0] . D====eeE----R . . stmxcsr -4(%rsp)
# CHECK-NEXT: [1,1] . DeE---------R . . movl $-24577, %eax
# CHECK-NEXT: [1,2] . DeeeeeeE---R . . andl -4(%rsp), %eax
# CHECK-NEXT: [1,3] . D======eE--R . . movl %eax, -8(%rsp)
# CHECK-NEXT: [1,4] . .D=====eeeeeeeER . ldmxcsr -8(%rsp)
# CHECK-NEXT: [1,5] . . D=eeeeeeeE---R . retq
# CHECK-NEXT: [2,0] . . D====eeE----R . stmxcsr -4(%rsp)
# CHECK-NEXT: [2,1] . . DeE---------R . movl $-24577, %eax
# CHECK-NEXT: [2,2] . . DeeeeeeE---R . andl -4(%rsp), %eax
# CHECK-NEXT: [2,3] . . D======eE--R . movl %eax, -8(%rsp)
# CHECK-NEXT: [2,4] . . D=====eeeeeeeER ldmxcsr -8(%rsp)
# CHECK-NEXT: [2,5] . . .DeeeeeeeE----R retq
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -91,10 +91,10 @@ retq
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 10.7 0.3 0.0 stmxcsr -4(%rsp)
# CHECK-NEXT: 1. 3 1.0 1.0 10.7 movl $-24577, %eax
# CHECK-NEXT: 2. 3 10.0 0.3 0.0 andl -4(%rsp), %eax
# CHECK-NEXT: 3. 3 16.0 0.0 0.0 movl %eax, -8(%rsp)
# CHECK-NEXT: 4. 3 15.0 0.0 0.0 ldmxcsr -8(%rsp)
# CHECK-NEXT: 5. 3 1.0 1.0 13.0 retq
# CHECK-NEXT: 3 8.9 0.4 3.9 <total>
# CHECK-NEXT: 0. 3 3.7 1.0 2.7 stmxcsr -4(%rsp)
# CHECK-NEXT: 1. 3 1.0 1.0 6.3 movl $-24577, %eax
# CHECK-NEXT: 2. 3 1.0 1.0 2.0 andl -4(%rsp), %eax
# CHECK-NEXT: 3. 3 7.0 0.0 1.3 movl %eax, -8(%rsp)
# CHECK-NEXT: 4. 3 6.0 0.0 0.0 ldmxcsr -8(%rsp)
# CHECK-NEXT: 5. 3 1.3 1.3 3.7 retq
# CHECK-NEXT: 3 3.3 0.7 2.7 <total>

View File

@ -0,0 +1,25 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -summary-view=false -show-barriers < %s | FileCheck %s
clflush (%rax)
lfence
mfence
sfence
maskmovdqu %xmm0, %xmm1
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK-NEXT: [7]: LoadBarrier
# CHECK-NEXT: [8]: StoreBarrier
# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] Instructions:
# CHECK-NEXT: 4 5 1.00 * * U clflush (%rax)
# CHECK-NEXT: 1 1 1.00 * * U * lfence
# CHECK-NEXT: 1 1 1.00 * * U * * mfence
# CHECK-NEXT: 1 1 1.00 * * U * sfence
# CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1

View File

@ -32,14 +32,30 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
TempStream << "\n\nInstruction Info:\n";
TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
<< "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n";
if (PrintBarriers) {
TempStream << "[7]: LoadBarrier\n[8]: StoreBarrier\n";
}
if (PrintEncodings) {
TempStream << "[7]: Encoding Size\n";
TempStream << "\n[1] [2] [3] [4] [5] [6] [7] "
<< "Encodings: Instructions:\n";
if (PrintBarriers) {
TempStream << "[9]: Encoding Size\n";
TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] "
<< "[9] Encodings: Instructions:\n";
} else {
TempStream << "[7]: Encoding Size\n";
TempStream << "\n[1] [2] [3] [4] [5] [6] [7] "
<< "Encodings: Instructions:\n";
}
} else {
TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n";
if (PrintBarriers) {
TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] "
<< "Instructions:\n";
} else {
TempStream << "\n[1] [2] [3] [4] [5] [6] "
<< "Instructions:\n";
}
}
int Index = 0;
for (const auto &I : enumerate(zip(IIVD, Source))) {
const InstructionInfoViewData &IIVDEntry = std::get<0>(I.value());
@ -68,6 +84,13 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
TempStream << (IIVDEntry.mayStore ? " * " : " ");
TempStream << (IIVDEntry.hasUnmodeledSideEffects ? " U " : " ");
if (PrintBarriers) {
TempStream << (LoweredInsts[Index]->isALoadBarrier() ? " * "
: " ");
TempStream << (LoweredInsts[Index]->isAStoreBarrier() ? " * "
: " ");
}
if (PrintEncodings) {
StringRef Encoding(CE.getEncoding(I.index()));
unsigned EncodingSize = Encoding.size();
@ -83,6 +106,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
const MCInst &Inst = std::get<1>(I.value());
TempStream << printInstructionString(Inst) << '\n';
++Index;
}
TempStream.flush();

View File

@ -54,6 +54,9 @@ class InstructionInfoView : public InstructionView {
const llvm::MCInstrInfo &MCII;
CodeEmitter &CE;
bool PrintEncodings;
bool PrintBarriers;
using UniqueInst = std::unique_ptr<Instruction>;
ArrayRef<UniqueInst> LoweredInsts;
struct InstructionInfoViewData {
unsigned NumMicroOpcodes = 0;
@ -72,9 +75,12 @@ public:
InstructionInfoView(const llvm::MCSubtargetInfo &ST,
const llvm::MCInstrInfo &II, CodeEmitter &C,
bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S,
llvm::MCInstPrinter &IP)
llvm::MCInstPrinter &IP,
ArrayRef<UniqueInst> LoweredInsts,
bool ShouldPrintBarriers)
: InstructionView(ST, IP, S), MCII(II), CE(C),
PrintEncodings(ShouldPrintEncodings) {}
PrintEncodings(ShouldPrintEncodings),
PrintBarriers(ShouldPrintBarriers), LoweredInsts(LoweredInsts) {}
void printView(llvm::raw_ostream &OS) const override;
StringRef getNameAsString() const override { return "InstructionInfoView"; }

View File

@ -219,6 +219,11 @@ static cl::opt<bool> ShowEncoding(
cl::desc("Print encoding information in the instruction info view"),
cl::cat(ViewOptions), cl::init(false));
static cl::opt<bool> ShowBarriers(
"show-barriers",
cl::desc("Print memory barrier information in the instruction info view"),
cl::cat(ViewOptions), cl::init(false));
static cl::opt<bool> DisableCustomBehaviour(
"disable-cb",
cl::desc(
@ -504,7 +509,7 @@ int main(int argc, char **argv) {
// (which does nothing).
IPP = std::make_unique<mca::InstrPostProcess>(*STI, *MCII);
std::vector<std::unique_ptr<mca::Instruction>> LoweredSequence;
SmallVector<std::unique_ptr<mca::Instruction>> LoweredSequence;
for (const MCInst &MCI : Insts) {
Expected<std::unique_ptr<mca::Instruction>> Inst =
IB.createInstruction(MCI);
@ -548,7 +553,8 @@ int main(int argc, char **argv) {
// Create the views for this pipeline, execute, and emit a report.
if (PrintInstructionInfoView) {
Printer.addView(std::make_unique<mca::InstructionInfoView>(
*STI, *MCII, CE, ShowEncoding, Insts, *IP));
*STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
ShowBarriers));
}
Printer.addView(
std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
@ -624,7 +630,8 @@ int main(int argc, char **argv) {
if (PrintInstructionInfoView)
Printer.addView(std::make_unique<mca::InstructionInfoView>(
*STI, *MCII, CE, ShowEncoding, Insts, *IP));
*STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
ShowBarriers));
// Fetch custom Views that are to be placed after the InstructionInfoView.
// Refer to the comment paired with the CB->getStartViews(*IP, Insts); line