forked from OSchip/llvm-project
[MCA] Switching from conservatively guessing which instructions are
memory-barrier instructions to providing targets and developers a convenient way to explicitly declare which instructions are memory-barriers. Differential Revision: https://reviews.llvm.org/D116779
This commit is contained in:
parent
e7cb716ef9
commit
85e6e748d4
|
@ -182,6 +182,11 @@ option specifies "``-``", then the output will also be sent to standard output.
|
|||
|
||||
Enable the printing of instruction encodings within the instruction info view.
|
||||
|
||||
.. option:: -show-barriers
|
||||
|
||||
Enable the printing of LoadBarrier and StoreBarrier flags within the
|
||||
instruction info view.
|
||||
|
||||
.. option:: -all-stats
|
||||
|
||||
Print all hardware statistics. This enables extra statistics related to the
|
||||
|
@ -949,15 +954,16 @@ cache. It only knows if an instruction "MayLoad" and/or "MayStore." For
|
|||
loads, the scheduling model provides an "optimistic" load-to-use latency (which
|
||||
usually matches the load-to-use latency for when there is a hit in the L1D).
|
||||
|
||||
:program:`llvm-mca` does not know about serializing operations or memory-barrier
|
||||
like instructions. The LSUnit conservatively assumes that an instruction which
|
||||
has both "MayLoad" and unmodeled side effects behaves like a "soft"
|
||||
load-barrier. That means, it serializes loads without forcing a flush of the
|
||||
load queue. Similarly, instructions that "MayStore" and have unmodeled side
|
||||
effects are treated like store barriers. A full memory barrier is a "MayLoad"
|
||||
and "MayStore" instruction with unmodeled side effects. This is inaccurate, but
|
||||
it is the best that we can do at the moment with the current information
|
||||
available in LLVM.
|
||||
:program:`llvm-mca` does not (on its own) know about serializing operations or
|
||||
memory-barrier like instructions. The LSUnit used to conservatively use an
|
||||
instruction's "MayLoad", "MayStore", and unmodeled side effects flags to
|
||||
determine whether an instruction should be treated as a memory-barrier. This was
|
||||
inaccurate in general and was changed so that now each instruction has an
|
||||
IsAStoreBarrier and IsALoadBarrier flag. These flags are mca specific and
|
||||
default to false for every instruction. If any instruction should have either of
|
||||
these flags set, it should be done within the target's InstrPostProcess class.
|
||||
For an example, look at the `X86InstrPostProcess::postProcessInstruction` method
|
||||
within `llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp`.
|
||||
|
||||
A load/store barrier consumes one entry of the load/store queue. A load/store
|
||||
barrier enforces ordering of loads/stores. A younger load cannot pass a load
|
||||
|
|
|
@ -43,6 +43,10 @@ public:
|
|||
|
||||
virtual ~InstrPostProcess() {}
|
||||
|
||||
/// This method can be overriden by targets to modify the mca::Instruction
|
||||
/// object after it has been lowered from the MCInst.
|
||||
/// This is generally a less disruptive alternative to modifying the
|
||||
/// scheduling model.
|
||||
virtual void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
|
||||
const MCInst &MCI) {}
|
||||
};
|
||||
|
|
|
@ -517,9 +517,14 @@ class InstructionBase {
|
|||
// Instruction opcode which can be used by mca::CustomBehaviour
|
||||
unsigned Opcode;
|
||||
|
||||
// Flags used by the LSUnit.
|
||||
bool IsALoadBarrier;
|
||||
bool IsAStoreBarrier;
|
||||
|
||||
public:
|
||||
InstructionBase(const InstrDesc &D, const unsigned Opcode)
|
||||
: Desc(D), IsOptimizableMove(false), Operands(0), Opcode(Opcode) {}
|
||||
: Desc(D), IsOptimizableMove(false), Operands(0), Opcode(Opcode),
|
||||
IsALoadBarrier(false), IsAStoreBarrier(false) {}
|
||||
|
||||
SmallVectorImpl<WriteState> &getDefs() { return Defs; }
|
||||
ArrayRef<WriteState> getDefs() const { return Defs; }
|
||||
|
@ -530,6 +535,10 @@ public:
|
|||
unsigned getLatency() const { return Desc.MaxLatency; }
|
||||
unsigned getNumMicroOps() const { return Desc.NumMicroOps; }
|
||||
unsigned getOpcode() const { return Opcode; }
|
||||
bool isALoadBarrier() const { return IsALoadBarrier; }
|
||||
bool isAStoreBarrier() const { return IsAStoreBarrier; }
|
||||
void setLoadBarrier(bool IsBarrier) { IsALoadBarrier = IsBarrier; }
|
||||
void setStoreBarrier(bool IsBarrier) { IsAStoreBarrier = IsBarrier; }
|
||||
|
||||
/// Return the MCAOperand which corresponds to index Idx within the original
|
||||
/// MCInst.
|
||||
|
|
|
@ -68,7 +68,8 @@ void LSUnitBase::dump() const {
|
|||
|
||||
unsigned LSUnit::dispatch(const InstRef &IR) {
|
||||
const InstrDesc &Desc = IR.getInstruction()->getDesc();
|
||||
unsigned IsMemBarrier = Desc.HasSideEffects;
|
||||
bool IsStoreBarrier = IR.getInstruction()->isAStoreBarrier();
|
||||
bool IsLoadBarrier = IR.getInstruction()->isALoadBarrier();
|
||||
assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
|
||||
|
||||
if (Desc.MayLoad)
|
||||
|
@ -111,12 +112,12 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
|
|||
|
||||
|
||||
CurrentStoreGroupID = NewGID;
|
||||
if (IsMemBarrier)
|
||||
if (IsStoreBarrier)
|
||||
CurrentStoreBarrierGroupID = NewGID;
|
||||
|
||||
if (Desc.MayLoad) {
|
||||
CurrentLoadGroupID = NewGID;
|
||||
if (IsMemBarrier)
|
||||
if (IsLoadBarrier)
|
||||
CurrentLoadBarrierGroupID = NewGID;
|
||||
}
|
||||
|
||||
|
@ -141,7 +142,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
|
|||
// However that group has already started execution, so we cannot add
|
||||
// this load to it.
|
||||
bool ShouldCreateANewGroup =
|
||||
IsMemBarrier || !ImmediateLoadDominator ||
|
||||
IsLoadBarrier || !ImmediateLoadDominator ||
|
||||
CurrentLoadBarrierGroupID == ImmediateLoadDominator ||
|
||||
ImmediateLoadDominator <= CurrentStoreGroupID ||
|
||||
getGroup(ImmediateLoadDominator).isExecuting();
|
||||
|
@ -161,7 +162,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
|
|||
}
|
||||
|
||||
// A load barrier may not pass a previous load or load barrier.
|
||||
if (IsMemBarrier) {
|
||||
if (IsLoadBarrier) {
|
||||
if (ImmediateLoadDominator) {
|
||||
MemoryGroup &LoadGroup = getGroup(ImmediateLoadDominator);
|
||||
LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: ("
|
||||
|
@ -181,7 +182,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
|
|||
}
|
||||
|
||||
CurrentLoadGroupID = NewGID;
|
||||
if (IsMemBarrier)
|
||||
if (IsLoadBarrier)
|
||||
CurrentLoadBarrierGroupID = NewGID;
|
||||
return NewGID;
|
||||
}
|
||||
|
|
|
@ -109,5 +109,6 @@ add_llvm_target(X86CodeGen ${sources}
|
|||
|
||||
add_subdirectory(AsmParser)
|
||||
add_subdirectory(Disassembler)
|
||||
add_subdirectory(MCA)
|
||||
add_subdirectory(MCTargetDesc)
|
||||
add_subdirectory(TargetInfo)
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
add_llvm_component_library(LLVMX86TargetMCA
|
||||
X86CustomBehaviour.cpp
|
||||
|
||||
LINK_COMPONENTS
|
||||
MC
|
||||
MCParser
|
||||
X86Desc
|
||||
X86Info
|
||||
Support
|
||||
MCA
|
||||
|
||||
ADD_TO_COMPONENT
|
||||
X86
|
||||
)
|
|
@ -0,0 +1,64 @@
|
|||
//===------------------- X86CustomBehaviour.cpp -----------------*-C++ -* -===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
///
|
||||
/// This file implements methods from the X86CustomBehaviour class.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86CustomBehaviour.h"
|
||||
#include "TargetInfo/X86TargetInfo.h"
|
||||
#include "X86InstrInfo.h"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/Support/WithColor.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace mca {
|
||||
|
||||
void X86InstrPostProcess::setMemBarriers(std::unique_ptr<Instruction> &Inst,
|
||||
const MCInst &MCI) {
|
||||
switch (MCI.getOpcode()) {
|
||||
case X86::MFENCE:
|
||||
Inst->setLoadBarrier(true);
|
||||
Inst->setStoreBarrier(true);
|
||||
break;
|
||||
case X86::LFENCE:
|
||||
Inst->setLoadBarrier(true);
|
||||
break;
|
||||
case X86::SFENCE:
|
||||
Inst->setStoreBarrier(true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void X86InstrPostProcess::postProcessInstruction(
|
||||
std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
|
||||
// Currently, we only modify certain instructions' IsALoadBarrier and
|
||||
// IsAStoreBarrier flags.
|
||||
setMemBarriers(Inst, MCI);
|
||||
}
|
||||
|
||||
} // namespace mca
|
||||
} // namespace llvm
|
||||
|
||||
using namespace llvm;
|
||||
using namespace mca;
|
||||
|
||||
static InstrPostProcess *createX86InstrPostProcess(const MCSubtargetInfo &STI,
|
||||
const MCInstrInfo &MCII) {
|
||||
return new X86InstrPostProcess(STI, MCII);
|
||||
}
|
||||
|
||||
/// Extern function to initialize the targets for the X86 backend
|
||||
|
||||
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86TargetMCA() {
|
||||
TargetRegistry::RegisterInstrPostProcess(getTheX86_32Target(),
|
||||
createX86InstrPostProcess);
|
||||
TargetRegistry::RegisterInstrPostProcess(getTheX86_64Target(),
|
||||
createX86InstrPostProcess);
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
//===-------------------- X86CustomBehaviour.h ------------------*-C++ -* -===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
///
|
||||
/// This file defines the X86CustomBehaviour class which inherits from
|
||||
/// CustomBehaviour. This class is used by the tool llvm-mca to enforce
|
||||
/// target specific behaviour that is not expressed well enough in the
|
||||
/// scheduling model for mca to enforce it automatically.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_X86_MCA_X86CUSTOMBEHAVIOUR_H
|
||||
#define LLVM_LIB_TARGET_X86_MCA_X86CUSTOMBEHAVIOUR_H
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/MCA/CustomBehaviour.h"
|
||||
#include "llvm/Support/TargetParser.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace mca {
|
||||
|
||||
class X86InstrPostProcess : public InstrPostProcess {
|
||||
void processWaitCnt(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
|
||||
|
||||
/// Called within X86InstrPostProcess to specify certain instructions
|
||||
/// as load and store barriers.
|
||||
void setMemBarriers(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
|
||||
|
||||
public:
|
||||
X86InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
|
||||
: InstrPostProcess(STI, MCII) {}
|
||||
|
||||
~X86InstrPostProcess() {}
|
||||
|
||||
void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
|
||||
const MCInst &MCI) override;
|
||||
};
|
||||
|
||||
} // namespace mca
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
|
@ -10,12 +10,12 @@ ldr x3, [x10]
|
|||
|
||||
# CHECK: Iterations: 3
|
||||
# CHECK-NEXT: Instructions: 18
|
||||
# CHECK-NEXT: Total Cycles: 19
|
||||
# CHECK-NEXT: Total Cycles: 16
|
||||
# CHECK-NEXT: Total uOps: 18
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.95
|
||||
# CHECK-NEXT: IPC: 0.95
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.13
|
||||
# CHECK-NEXT: IPC: 1.13
|
||||
# CHECK-NEXT: Block RThroughput: 3.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -62,27 +62,27 @@ ldr x3, [x10]
|
|||
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldr x3, [x10]
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 012345678
|
||||
# CHECK-NEXT: 012345
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DE . . . . str x1, [x10]
|
||||
# CHECK-NEXT: [0,1] .DE . . . . str x1, [x10]
|
||||
# CHECK-NEXT: [0,2] .DeeE. . . . ldr x2, [x10]
|
||||
# CHECK-NEXT: [0,3] . DE. . . . nop
|
||||
# CHECK-NEXT: [0,4] . DeeE . . . ldr x2, [x10]
|
||||
# CHECK-NEXT: [0,5] . DeeE . . . ldr x3, [x10]
|
||||
# CHECK-NEXT: [1,0] . DE . . . str x1, [x10]
|
||||
# CHECK-NEXT: [1,1] . .DE . . . str x1, [x10]
|
||||
# CHECK-NEXT: [1,2] . .DeeE. . . ldr x2, [x10]
|
||||
# CHECK-NEXT: [1,3] . . DE. . . nop
|
||||
# CHECK-NEXT: [1,4] . . DeeE . . ldr x2, [x10]
|
||||
# CHECK-NEXT: [1,5] . . DeeE . . ldr x3, [x10]
|
||||
# CHECK-NEXT: [2,0] . . DE . . str x1, [x10]
|
||||
# CHECK-NEXT: [2,1] . . .DE . . str x1, [x10]
|
||||
# CHECK-NEXT: [2,2] . . .DeeE. . ldr x2, [x10]
|
||||
# CHECK-NEXT: [2,3] . . . DE. . nop
|
||||
# CHECK-NEXT: [2,4] . . . DeeE. ldr x2, [x10]
|
||||
# CHECK-NEXT: [2,5] . . . DeeE ldr x3, [x10]
|
||||
# CHECK: [0,0] DE . . . str x1, [x10]
|
||||
# CHECK-NEXT: [0,1] .DE . . . str x1, [x10]
|
||||
# CHECK-NEXT: [0,2] .DeeE. . . ldr x2, [x10]
|
||||
# CHECK-NEXT: [0,3] . DE. . . nop
|
||||
# CHECK-NEXT: [0,4] . DeeE . . ldr x2, [x10]
|
||||
# CHECK-NEXT: [0,5] . DeeE . . ldr x3, [x10]
|
||||
# CHECK-NEXT: [1,0] . DE . . str x1, [x10]
|
||||
# CHECK-NEXT: [1,1] . DE . . str x1, [x10]
|
||||
# CHECK-NEXT: [1,2] . DeeE . . ldr x2, [x10]
|
||||
# CHECK-NEXT: [1,3] . . DE . . nop
|
||||
# CHECK-NEXT: [1,4] . . DeeE . ldr x2, [x10]
|
||||
# CHECK-NEXT: [1,5] . . DeeE . ldr x3, [x10]
|
||||
# CHECK-NEXT: [2,0] . . DE. . str x1, [x10]
|
||||
# CHECK-NEXT: [2,1] . . DE . str x1, [x10]
|
||||
# CHECK-NEXT: [2,2] . . DeeE . ldr x2, [x10]
|
||||
# CHECK-NEXT: [2,3] . . .DE . nop
|
||||
# CHECK-NEXT: [2,4] . . .DeeE. ldr x2, [x10]
|
||||
# CHECK-NEXT: [2,5] . . . DeeE ldr x3, [x10]
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
|
|
@ -40,12 +40,12 @@ s_waitcnt vmcnt(0) lgkmcnt(0)
|
|||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 36
|
||||
# CHECK-NEXT: Total Cycles: 331
|
||||
# CHECK-NEXT: Total Cycles: 94
|
||||
# CHECK-NEXT: Total uOps: 36
|
||||
|
||||
# CHECK: Dispatch Width: 1
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.11
|
||||
# CHECK-NEXT: IPC: 0.11
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.38
|
||||
# CHECK-NEXT: IPC: 0.38
|
||||
# CHECK-NEXT: Block RThroughput: 36.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -147,45 +147,45 @@ s_waitcnt vmcnt(0) lgkmcnt(0)
|
|||
# CHECK-NEXT: - - - 1.00 - - - s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789
|
||||
# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
# CHECK-NEXT: [0,1] .DeeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[0:1], s[0:1], 0x2c
|
||||
# CHECK-NEXT: [0,2] . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . s_waitcnt lgkmcnt(0)
|
||||
# CHECK-NEXT: [0,3] . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s2
|
||||
# CHECK-NEXT: [0,4] . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s3
|
||||
# CHECK-NEXT: [0,5] . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . flat_load_dword v2, v[0:1]
|
||||
# CHECK-NEXT: [0,6] . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . flat_load_dword v3, v[0:1] offset:8
|
||||
# CHECK-NEXT: [0,7] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . . . . . . . . flat_load_dword v4, v[0:1] offset:16
|
||||
# CHECK-NEXT: [0,8] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. flat_load_dword v5, v[0:1] offset:24
|
||||
# CHECK-NEXT: [0,9] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s0
|
||||
# CHECK-NEXT: [0,10] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s1
|
||||
# CHECK-NEXT: [0,11] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v6, s6
|
||||
# CHECK-NEXT: [0,12] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . v_mov_b32_e32 v7, s7
|
||||
# CHECK-NEXT: [0,13] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v8, s8
|
||||
# CHECK-NEXT: [0,14] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v9, s9
|
||||
# CHECK-NEXT: [0,15] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . v_mov_b32_e32 v10, s10
|
||||
# CHECK-NEXT: [0,16] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v11, s11
|
||||
# CHECK-NEXT: [0,17] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . v_mov_b32_e32 v12, s12
|
||||
# CHECK-NEXT: [0,18] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v13, s13
|
||||
# CHECK-NEXT: [0,19] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v14, s14
|
||||
# CHECK-NEXT: [0,20] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . v_mov_b32_e32 v15, s15
|
||||
# CHECK-NEXT: [0,21] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v16, s16
|
||||
# CHECK-NEXT: [0,22] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . v_mov_b32_e32 v17, s17
|
||||
# CHECK-NEXT: [0,23] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v18, s18
|
||||
# CHECK-NEXT: [0,24] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v19, s19
|
||||
# CHECK-NEXT: [0,25] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . v_mov_b32_e32 v20, s20
|
||||
# CHECK-NEXT: [0,26] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v21, s21
|
||||
# CHECK-NEXT: [0,27] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . v_mov_b32_e32 v22, s22
|
||||
# CHECK-NEXT: [0,28] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v23, s23
|
||||
# CHECK-NEXT: [0,29] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v24, s24
|
||||
# CHECK-NEXT: [0,30] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . v_mov_b32_e32 v25, s25
|
||||
# CHECK-NEXT: [0,31] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v26, s26
|
||||
# CHECK-NEXT: [0,32] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . v_mov_b32_e32 v27, s27
|
||||
# CHECK-NEXT: [0,33] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . v_mov_b32_e32 v28, s28
|
||||
# CHECK-NEXT: [0,34] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . v_mov_b32_e32 v29, s29
|
||||
# CHECK-NEXT: [0,35] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
# CHECK: [0,0] DeeeeE . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
# CHECK-NEXT: [0,1] .DeeeeE . . . . . . . . . . . . . . . . . . s_load_dwordx2 s[0:1], s[0:1], 0x2c
|
||||
# CHECK-NEXT: [0,2] . .DE . . . . . . . . . . . . . . . . . . s_waitcnt lgkmcnt(0)
|
||||
# CHECK-NEXT: [0,3] . . DE . . . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s2
|
||||
# CHECK-NEXT: [0,4] . . DE. . . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s3
|
||||
# CHECK-NEXT: [0,5] . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. . flat_load_dword v2, v[0:1]
|
||||
# CHECK-NEXT: [0,6] . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . flat_load_dword v3, v[0:1] offset:8
|
||||
# CHECK-NEXT: [0,7] . . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . flat_load_dword v4, v[0:1] offset:16
|
||||
# CHECK-NEXT: [0,8] . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE. flat_load_dword v5, v[0:1] offset:24
|
||||
# CHECK-NEXT: [0,9] . . . DE. . . . . . . . . . . . . . . . . v_mov_b32_e32 v0, s0
|
||||
# CHECK-NEXT: [0,10] . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v1, s1
|
||||
# CHECK-NEXT: [0,11] . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v6, s6
|
||||
# CHECK-NEXT: [0,12] . . . .DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v7, s7
|
||||
# CHECK-NEXT: [0,13] . . . . DE . . . . . . . . . . . . . . . . v_mov_b32_e32 v8, s8
|
||||
# CHECK-NEXT: [0,14] . . . . DE. . . . . . . . . . . . . . . . v_mov_b32_e32 v9, s9
|
||||
# CHECK-NEXT: [0,15] . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v10, s10
|
||||
# CHECK-NEXT: [0,16] . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v11, s11
|
||||
# CHECK-NEXT: [0,17] . . . . .DE . . . . . . . . . . . . . . . v_mov_b32_e32 v12, s12
|
||||
# CHECK-NEXT: [0,18] . . . . . DE . . . . . . . . . . . . . . . v_mov_b32_e32 v13, s13
|
||||
# CHECK-NEXT: [0,19] . . . . . DE. . . . . . . . . . . . . . . v_mov_b32_e32 v14, s14
|
||||
# CHECK-NEXT: [0,20] . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v15, s15
|
||||
# CHECK-NEXT: [0,21] . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v16, s16
|
||||
# CHECK-NEXT: [0,22] . . . . . .DE . . . . . . . . . . . . . . v_mov_b32_e32 v17, s17
|
||||
# CHECK-NEXT: [0,23] . . . . . . DE . . . . . . . . . . . . . . v_mov_b32_e32 v18, s18
|
||||
# CHECK-NEXT: [0,24] . . . . . . DE. . . . . . . . . . . . . . v_mov_b32_e32 v19, s19
|
||||
# CHECK-NEXT: [0,25] . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v20, s20
|
||||
# CHECK-NEXT: [0,26] . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v21, s21
|
||||
# CHECK-NEXT: [0,27] . . . . . . .DE . . . . . . . . . . . . . v_mov_b32_e32 v22, s22
|
||||
# CHECK-NEXT: [0,28] . . . . . . . DE . . . . . . . . . . . . . v_mov_b32_e32 v23, s23
|
||||
# CHECK-NEXT: [0,29] . . . . . . . DE. . . . . . . . . . . . . v_mov_b32_e32 v24, s24
|
||||
# CHECK-NEXT: [0,30] . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v25, s25
|
||||
# CHECK-NEXT: [0,31] . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v26, s26
|
||||
# CHECK-NEXT: [0,32] . . . . . . . .DE . . . . . . . . . . . . v_mov_b32_e32 v27, s27
|
||||
# CHECK-NEXT: [0,33] . . . . . . . . DE . . . . . . . . . . . . v_mov_b32_e32 v28, s28
|
||||
# CHECK-NEXT: [0,34] . . . . . . . . DE. . . . . . . . . . . . v_mov_b32_e32 v29, s29
|
||||
# CHECK-NEXT: [0,35] . . . . . . . . . . . . . . . . . . . DE s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
|
|
@ -528,10 +528,10 @@ movaps %xmm3, (%rbx)
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
|
||||
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movd %mm1, (%rcx)
|
||||
# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movd %mm2, (%rdx)
|
||||
# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movd %mm3, (%rbx)
|
||||
# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
|
||||
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movd %mm1, (%rcx)
|
||||
# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movd %mm2, (%rdx)
|
||||
# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movd %mm3, (%rbx)
|
||||
# CHECK-NEXT: 1 2.5 1.0 0.0 <total>
|
||||
|
||||
# CHECK: [5] Code Region
|
||||
|
||||
|
|
|
@ -519,12 +519,12 @@ movaps %xmm3, (%rbx)
|
|||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 400
|
||||
# CHECK-NEXT: Total Cycles: 553
|
||||
# CHECK-NEXT: Total Cycles: 405
|
||||
# CHECK-NEXT: Total uOps: 400
|
||||
|
||||
# CHECK: Dispatch Width: 4
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.72
|
||||
# CHECK-NEXT: IPC: 0.72
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.99
|
||||
# CHECK-NEXT: IPC: 0.99
|
||||
# CHECK-NEXT: Block RThroughput: 4.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -544,25 +544,24 @@ movaps %xmm3, (%rbx)
|
|||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 57 (10.3%)
|
||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 347 (85.7%)
|
||||
# CHECK-NEXT: LQ - Load queue full: 0
|
||||
# CHECK-NEXT: SQ - Store queue full: 432 (78.1%)
|
||||
# CHECK-NEXT: SQ - Store queue full: 0
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||
# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 364 (65.8%)
|
||||
# CHECK-NEXT: 1, 88 (15.9%)
|
||||
# CHECK-NEXT: 2, 4 (0.7%)
|
||||
# CHECK-NEXT: 3, 84 (15.2%)
|
||||
# CHECK-NEXT: 4, 13 (2.4%)
|
||||
# CHECK-NEXT: 0, 131 (32.3%)
|
||||
# CHECK-NEXT: 1, 174 (43.0%)
|
||||
# CHECK-NEXT: 2, 87 (21.5%)
|
||||
# CHECK-NEXT: 4, 13 (3.2%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 253 (45.8%)
|
||||
# CHECK-NEXT: 1, 200 (36.2%)
|
||||
# CHECK-NEXT: 2, 100 (18.1%)
|
||||
# CHECK-NEXT: 0, 105 (25.9%)
|
||||
# CHECK-NEXT: 1, 200 (49.4%)
|
||||
# CHECK-NEXT: 2, 100 (24.7%)
|
||||
|
||||
# CHECK: Scheduler's queue usage:
|
||||
# CHECK-NEXT: [1] Resource name.
|
||||
|
@ -571,10 +570,10 @@ movaps %xmm3, (%rbx)
|
|||
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||
|
||||
# CHECK: [1] [2] [3] [4]
|
||||
# CHECK-NEXT: PdEX 23 40 40
|
||||
# CHECK-NEXT: PdFPU 23 40 64
|
||||
# CHECK-NEXT: PdLoad 3 22 40
|
||||
# CHECK-NEXT: PdStore 22 24 24
|
||||
# CHECK-NEXT: PdEX 36 40 40
|
||||
# CHECK-NEXT: PdFPU 36 40 64
|
||||
# CHECK-NEXT: PdLoad 20 23 40
|
||||
# CHECK-NEXT: PdStore 19 22 24
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0.0] - PdAGLU01
|
||||
|
@ -608,8 +607,8 @@ movaps %xmm3, (%rbx)
|
|||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
|
||||
# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax)
|
||||
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
|
||||
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
|
||||
# CHECK-NEXT: 3.00 - - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
|
||||
# CHECK-NEXT: - 3.00 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
|
||||
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx)
|
||||
|
||||
# CHECK: Timeline view:
|
||||
|
@ -630,8 +629,8 @@ movaps %xmm3, (%rbx)
|
|||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
|
||||
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1
|
||||
# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2
|
||||
# CHECK-NEXT: 3. 1 4.0 1.0 1.0 movd %mm3, (%rbx)
|
||||
# CHECK-NEXT: 1 2.0 1.3 0.3 <total>
|
||||
# CHECK-NEXT: 3. 1 4.0 2.0 1.0 movd %mm3, (%rbx)
|
||||
# CHECK-NEXT: 1 2.0 1.5 0.3 <total>
|
||||
|
||||
# CHECK: [5] Code Region
|
||||
|
||||
|
|
|
@ -6,12 +6,12 @@ stmxcsr (%rsp)
|
|||
|
||||
# CHECK: Iterations: 2
|
||||
# CHECK-NEXT: Instructions: 4
|
||||
# CHECK-NEXT: Total Cycles: 205
|
||||
# CHECK-NEXT: Total Cycles: 103
|
||||
# CHECK-NEXT: Total uOps: 6
|
||||
|
||||
# CHECK: Dispatch Width: 4
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.03
|
||||
# CHECK-NEXT: IPC: 0.02
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.06
|
||||
# CHECK-NEXT: IPC: 0.04
|
||||
# CHECK-NEXT: Block RThroughput: 18.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -28,10 +28,12 @@ stmxcsr (%rsp)
|
|||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 012
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER. int3
|
||||
# CHECK-NEXT: [0,1] D====================================================================================================eER stmxcsr (%rsp)
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
|
||||
# CHECK-NEXT: [0,1] DeE---------------------------------------------------------------------------------------------------R stmxcsr (%rsp)
|
||||
# CHECK-NEXT: [1,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
|
||||
# CHECK-NEXT: [1,1] .D=================eE---------------------------------------------------------------------------------R stmxcsr (%rsp)
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -40,6 +42,6 @@ stmxcsr (%rsp)
|
|||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 2 51.5 0.5 0.0 int3
|
||||
# CHECK-NEXT: 1. 2 151.0 0.0 0.0 stmxcsr (%rsp)
|
||||
# CHECK-NEXT: 2 101.3 0.3 0.0 <total>
|
||||
# CHECK-NEXT: 0. 2 1.0 0.5 0.0 int3
|
||||
# CHECK-NEXT: 1. 2 9.5 9.0 90.0 stmxcsr (%rsp)
|
||||
# CHECK-NEXT: 2 5.3 4.8 45.0 <total>
|
||||
|
|
|
@ -514,12 +514,12 @@ vmovaps %ymm3, (%rbx)
|
|||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 400
|
||||
# CHECK-NEXT: Total Cycles: 803
|
||||
# CHECK-NEXT: Total Cycles: 603
|
||||
# CHECK-NEXT: Total uOps: 400
|
||||
|
||||
# CHECK: Dispatch Width: 4
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.50
|
||||
# CHECK-NEXT: IPC: 0.50
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.66
|
||||
# CHECK-NEXT: IPC: 0.66
|
||||
# CHECK-NEXT: Block RThroughput: 6.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -541,21 +541,21 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||
# CHECK-NEXT: LQ - Load queue full: 0
|
||||
# CHECK-NEXT: SQ - Store queue full: 748 (93.2%)
|
||||
# CHECK-NEXT: SQ - Store queue full: 560 (92.9%)
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||
# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 422 (52.6%)
|
||||
# CHECK-NEXT: 1, 374 (46.6%)
|
||||
# CHECK-NEXT: 2, 1 (0.1%)
|
||||
# CHECK-NEXT: 4, 6 (0.7%)
|
||||
# CHECK-NEXT: 0, 222 (36.8%)
|
||||
# CHECK-NEXT: 1, 374 (62.0%)
|
||||
# CHECK-NEXT: 2, 1 (0.2%)
|
||||
# CHECK-NEXT: 4, 6 (1.0%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 403 (50.2%)
|
||||
# CHECK-NEXT: 1, 400 (49.8%)
|
||||
# CHECK-NEXT: 0, 203 (33.7%)
|
||||
# CHECK-NEXT: 1, 400 (66.3%)
|
||||
|
||||
# CHECK: Scheduler's queue usage:
|
||||
# CHECK-NEXT: [1] Resource name.
|
||||
|
@ -564,8 +564,8 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: [4] Total number of buffer entries.
|
||||
|
||||
# CHECK: [1] [2] [3] [4]
|
||||
# CHECK-NEXT: PdEX 21 23 40
|
||||
# CHECK-NEXT: PdFPU 21 23 64
|
||||
# CHECK-NEXT: PdEX 21 22 40
|
||||
# CHECK-NEXT: PdFPU 21 22 64
|
||||
# CHECK-NEXT: PdLoad 0 0 40
|
||||
# CHECK-NEXT: PdStore 22 24 24
|
||||
|
||||
|
@ -606,13 +606,12 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx)
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
# CHECK-NEXT: Index 012345678
|
||||
|
||||
# CHECK: [0,0] DeeER. . movd %mm0, (%rax)
|
||||
# CHECK-NEXT: [0,1] D==eeER . movd %mm1, (%rcx)
|
||||
# CHECK-NEXT: [0,2] D====eeER . movd %mm2, (%rdx)
|
||||
# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx)
|
||||
# CHECK: [0,0] DeeER. . movd %mm0, (%rax)
|
||||
# CHECK-NEXT: [0,1] D=eeER . movd %mm1, (%rcx)
|
||||
# CHECK-NEXT: [0,2] D===eeER. movd %mm2, (%rdx)
|
||||
# CHECK-NEXT: [0,3] D====eeER movd %mm3, (%rbx)
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -622,10 +621,10 @@ vmovaps %ymm3, (%rbx)
|
|||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
|
||||
# CHECK-NEXT: 1. 1 3.0 0.0 0.0 movd %mm1, (%rcx)
|
||||
# CHECK-NEXT: 2. 1 5.0 0.0 0.0 movd %mm2, (%rdx)
|
||||
# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx)
|
||||
# CHECK-NEXT: 1 4.0 0.3 0.0 <total>
|
||||
# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movd %mm1, (%rcx)
|
||||
# CHECK-NEXT: 2. 1 4.0 2.0 0.0 movd %mm2, (%rdx)
|
||||
# CHECK-NEXT: 3. 1 5.0 1.0 0.0 movd %mm3, (%rbx)
|
||||
# CHECK-NEXT: 1 3.0 1.3 0.0 <total>
|
||||
|
||||
# CHECK: [5] Code Region
|
||||
|
||||
|
|
|
@ -6,12 +6,12 @@ stmxcsr (%rsp)
|
|||
|
||||
# CHECK: Iterations: 2
|
||||
# CHECK-NEXT: Instructions: 4
|
||||
# CHECK-NEXT: Total Cycles: 205
|
||||
# CHECK-NEXT: Total Cycles: 104
|
||||
# CHECK-NEXT: Total uOps: 4
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.02
|
||||
# CHECK-NEXT: IPC: 0.02
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.04
|
||||
# CHECK-NEXT: IPC: 0.04
|
||||
# CHECK-NEXT: Block RThroughput: 1.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -31,7 +31,9 @@ stmxcsr (%rsp)
|
|||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER. int3
|
||||
# CHECK-NEXT: [0,1] D====================================================================================================eER stmxcsr (%rsp)
|
||||
# CHECK-NEXT: [0,1] DeE---------------------------------------------------------------------------------------------------R. stmxcsr (%rsp)
|
||||
# CHECK-NEXT: [1,0] .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
|
||||
# CHECK-NEXT: [1,1] .DeE---------------------------------------------------------------------------------------------------R stmxcsr (%rsp)
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -40,6 +42,6 @@ stmxcsr (%rsp)
|
|||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 2 51.0 0.5 0.0 int3
|
||||
# CHECK-NEXT: 1. 2 151.0 0.0 0.0 stmxcsr (%rsp)
|
||||
# CHECK-NEXT: 2 101.0 0.3 0.0 <total>
|
||||
# CHECK-NEXT: 0. 2 1.0 1.0 0.0 int3
|
||||
# CHECK-NEXT: 1. 2 1.0 0.0 99.0 stmxcsr (%rsp)
|
||||
# CHECK-NEXT: 2 1.0 0.5 49.5 <total>
|
||||
|
|
|
@ -12,12 +12,12 @@ retq
|
|||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 600
|
||||
# CHECK-NEXT: Total Cycles: 704
|
||||
# CHECK-NEXT: Total Cycles: 308
|
||||
# CHECK-NEXT: Total uOps: 600
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.85
|
||||
# CHECK-NEXT: IPC: 0.85
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.95
|
||||
# CHECK-NEXT: IPC: 1.95
|
||||
# CHECK-NEXT: Block RThroughput: 3.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -66,27 +66,27 @@ retq
|
|||
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - retq
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 01234
|
||||
# CHECK-NEXT: 0123456
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeER . . . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [0,1] DeER . . . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [0,2] .DeeeeER . . . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [0,3] .D====eER . . . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [0,4] . D===eeeER . . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [0,5] . DeeeeE--R . . . retq
|
||||
# CHECK-NEXT: [1,0] . D=====eER . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [1,1] . DeE-----R . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [1,2] . D====eeeeER. . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [1,3] . D========eER . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [1,4] . D=======eeeER . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [1,5] . D=eeeeE-----R . . retq
|
||||
# CHECK-NEXT: [2,0] . .D=========eER . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [2,1] . .DeE---------R . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [2,2] . . D========eeeeER . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [2,3] . . D============eER . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [2,4] . . D===========eeeER ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [2,5] . . D=eeeeE---------R retq
|
||||
# CHECK: [0,0] DeER . . .. stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [0,1] DeER . . .. movl $-24577, %eax
|
||||
# CHECK-NEXT: [0,2] .DeeeeER . .. andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [0,3] .D====eER . .. movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [0,4] . D===eeeER .. ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [0,5] . DeeeeE--R .. retq
|
||||
# CHECK-NEXT: [1,0] . D===eE--R .. stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [1,1] . DeE-----R .. movl $-24577, %eax
|
||||
# CHECK-NEXT: [1,2] . DeeeeE--R .. andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [1,3] . D====eE-R .. movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [1,4] . D===eeeER .. ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [1,5] . D=eeeeE-R .. retq
|
||||
# CHECK-NEXT: [2,0] . .D===eE--R.. stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [2,1] . .DeE-----R.. movl $-24577, %eax
|
||||
# CHECK-NEXT: [2,2] . . DeeeeE--R. andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [2,3] . . D====eE-R. movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [2,4] . . D===eeeER ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [2,5] . . D=eeeeE-R retq
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -95,10 +95,10 @@ retq
|
|||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 3 5.7 0.3 0.0 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 1. 3 1.0 1.0 4.7 movl $-24577, %eax
|
||||
# CHECK-NEXT: 2. 3 5.0 0.3 0.0 andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: 3. 3 9.0 0.0 0.0 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: 4. 3 8.0 0.0 0.0 ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 5. 3 1.7 1.7 5.3 retq
|
||||
# CHECK-NEXT: 3 5.1 0.6 1.7 <total>
|
||||
# CHECK-NEXT: 0. 3 3.0 1.0 1.3 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 1. 3 1.0 1.0 3.3 movl $-24577, %eax
|
||||
# CHECK-NEXT: 2. 3 1.0 1.0 1.3 andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: 3. 3 5.0 0.0 0.7 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: 4. 3 4.0 0.0 0.0 ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 5. 3 1.7 1.7 1.3 retq
|
||||
# CHECK-NEXT: 3 2.6 0.8 1.3 <total>
|
||||
|
|
|
@ -5,11 +5,11 @@ fxrstor (%rsp)
|
|||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 100
|
||||
# CHECK-NEXT: Total Cycles: 6403
|
||||
# CHECK-NEXT: Total Cycles: 4720
|
||||
# CHECK-NEXT: Total uOps: 9000
|
||||
|
||||
# CHECK: Dispatch Width: 4
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.41
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.91
|
||||
# CHECK-NEXT: IPC: 0.02
|
||||
# CHECK-NEXT: Block RThroughput: 22.5
|
||||
|
||||
|
|
|
@ -12,12 +12,12 @@ retq
|
|||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 600
|
||||
# CHECK-NEXT: Total Cycles: 1304
|
||||
# CHECK-NEXT: Total Cycles: 413
|
||||
# CHECK-NEXT: Total uOps: 1300
|
||||
|
||||
# CHECK: Dispatch Width: 4
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.00
|
||||
# CHECK-NEXT: IPC: 0.46
|
||||
# CHECK-NEXT: uOps Per Cycle: 3.15
|
||||
# CHECK-NEXT: IPC: 1.45
|
||||
# CHECK-NEXT: Block RThroughput: 3.3
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
|
@ -50,39 +50,39 @@ retq
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
# CHECK-NEXT: - - 1.74 1.74 1.67 1.68 2.00 1.74 1.78 1.65
|
||||
# CHECK-NEXT: - - 1.99 1.50 1.66 1.67 2.00 1.52 1.99 1.67
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
# CHECK-NEXT: - - - - 0.30 - 1.00 1.00 - 0.70 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: - - 0.03 0.53 - - - 0.23 0.21 - movl $-24577, %eax
|
||||
# CHECK-NEXT: - - 0.22 0.58 0.35 0.65 - - 0.20 - andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: - - - - 0.05 - 1.00 - - 0.95 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: - - 1.00 0.21 0.34 0.66 - 0.42 0.37 - ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: - - 0.49 0.42 0.63 0.37 - 0.09 1.00 - retq
|
||||
# CHECK-NEXT: - - - - 0.16 - 1.00 1.00 - 0.84 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: - - 0.49 0.49 - - - 0.01 0.01 - movl $-24577, %eax
|
||||
# CHECK-NEXT: - - 0.49 0.02 0.49 0.51 - 0.01 0.48 - andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: - - - - 0.17 - 1.00 - - 0.83 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: - - 1.00 0.01 0.33 0.67 - 0.49 0.50 - ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: - - 0.01 0.98 0.51 0.49 - 0.01 1.00 - retq
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 012
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 01234
|
||||
|
||||
# CHECK: [0,0] DeeER. . . . . . . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [0,1] DeE-R. . . . . . . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [0,2] .DeeeeeeER. . . . . . . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [0,3] .D======eER . . . . . . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [0,4] . D=====eeeeeeeER . . . . . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [0,5] . DeeeeeeeE----R . . . . . . retq
|
||||
# CHECK-NEXT: [1,0] . D==========eeER . . . . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [1,1] . DeE-----------R . . . . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [1,2] . D=========eeeeeeER . . . . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [1,3] . D===============eER . . . . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [1,4] . .D==============eeeeeeeER. . . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [1,5] . . DeeeeeeeE-------------R. . . . retq
|
||||
# CHECK-NEXT: [2,0] . . D===================eeER . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [2,1] . . DeE--------------------R . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [2,2] . . D==================eeeeeeER . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [2,3] . . D========================eER . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [2,4] . . D=======================eeeeeeeER ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [2,5] . . .DeeeeeeeE----------------------R retq
|
||||
# CHECK: [0,0] DeeER. . . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [0,1] DeE-R. . . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [0,2] .DeeeeeeER. . . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [0,3] .D======eER . . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [0,4] . D=====eeeeeeeER . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [0,5] . DeeeeeeeE----R . . retq
|
||||
# CHECK-NEXT: [1,0] . D====eeE----R . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [1,1] . DeE---------R . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [1,2] . DeeeeeeE---R . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [1,3] . D======eE--R . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [1,4] . .D=====eeeeeeeER . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [1,5] . . D=eeeeeeeE---R . retq
|
||||
# CHECK-NEXT: [2,0] . . D====eeE----R . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [2,1] . . DeE---------R . movl $-24577, %eax
|
||||
# CHECK-NEXT: [2,2] . . DeeeeeeE---R . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [2,3] . . D======eE--R . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [2,4] . . D=====eeeeeeeER ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [2,5] . . .DeeeeeeeE----R retq
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
|
@ -91,10 +91,10 @@ retq
|
|||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 3 10.7 0.3 0.0 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 1. 3 1.0 1.0 10.7 movl $-24577, %eax
|
||||
# CHECK-NEXT: 2. 3 10.0 0.3 0.0 andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: 3. 3 16.0 0.0 0.0 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: 4. 3 15.0 0.0 0.0 ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 5. 3 1.0 1.0 13.0 retq
|
||||
# CHECK-NEXT: 3 8.9 0.4 3.9 <total>
|
||||
# CHECK-NEXT: 0. 3 3.7 1.0 2.7 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 1. 3 1.0 1.0 6.3 movl $-24577, %eax
|
||||
# CHECK-NEXT: 2. 3 1.0 1.0 2.0 andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: 3. 3 7.0 0.0 1.3 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: 4. 3 6.0 0.0 0.0 ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 5. 3 1.3 1.3 3.7 retq
|
||||
# CHECK-NEXT: 3 3.3 0.7 2.7 <total>
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -summary-view=false -show-barriers < %s | FileCheck %s
|
||||
|
||||
clflush (%rax)
|
||||
lfence
|
||||
mfence
|
||||
sfence
|
||||
maskmovdqu %xmm0, %xmm1
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
# CHECK-NEXT: [7]: LoadBarrier
|
||||
# CHECK-NEXT: [8]: StoreBarrier
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] Instructions:
|
||||
# CHECK-NEXT: 4 5 1.00 * * U clflush (%rax)
|
||||
# CHECK-NEXT: 1 1 1.00 * * U * lfence
|
||||
# CHECK-NEXT: 1 1 1.00 * * U * * mfence
|
||||
# CHECK-NEXT: 1 1 1.00 * * U * sfence
|
||||
# CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1
|
|
@ -32,14 +32,30 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
|
|||
TempStream << "\n\nInstruction Info:\n";
|
||||
TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
|
||||
<< "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n";
|
||||
if (PrintBarriers) {
|
||||
TempStream << "[7]: LoadBarrier\n[8]: StoreBarrier\n";
|
||||
}
|
||||
if (PrintEncodings) {
|
||||
TempStream << "[7]: Encoding Size\n";
|
||||
TempStream << "\n[1] [2] [3] [4] [5] [6] [7] "
|
||||
<< "Encodings: Instructions:\n";
|
||||
if (PrintBarriers) {
|
||||
TempStream << "[9]: Encoding Size\n";
|
||||
TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] "
|
||||
<< "[9] Encodings: Instructions:\n";
|
||||
} else {
|
||||
TempStream << "[7]: Encoding Size\n";
|
||||
TempStream << "\n[1] [2] [3] [4] [5] [6] [7] "
|
||||
<< "Encodings: Instructions:\n";
|
||||
}
|
||||
} else {
|
||||
TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n";
|
||||
if (PrintBarriers) {
|
||||
TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] "
|
||||
<< "Instructions:\n";
|
||||
} else {
|
||||
TempStream << "\n[1] [2] [3] [4] [5] [6] "
|
||||
<< "Instructions:\n";
|
||||
}
|
||||
}
|
||||
|
||||
int Index = 0;
|
||||
for (const auto &I : enumerate(zip(IIVD, Source))) {
|
||||
const InstructionInfoViewData &IIVDEntry = std::get<0>(I.value());
|
||||
|
||||
|
@ -68,6 +84,13 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
|
|||
TempStream << (IIVDEntry.mayStore ? " * " : " ");
|
||||
TempStream << (IIVDEntry.hasUnmodeledSideEffects ? " U " : " ");
|
||||
|
||||
if (PrintBarriers) {
|
||||
TempStream << (LoweredInsts[Index]->isALoadBarrier() ? " * "
|
||||
: " ");
|
||||
TempStream << (LoweredInsts[Index]->isAStoreBarrier() ? " * "
|
||||
: " ");
|
||||
}
|
||||
|
||||
if (PrintEncodings) {
|
||||
StringRef Encoding(CE.getEncoding(I.index()));
|
||||
unsigned EncodingSize = Encoding.size();
|
||||
|
@ -83,6 +106,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
|
|||
|
||||
const MCInst &Inst = std::get<1>(I.value());
|
||||
TempStream << printInstructionString(Inst) << '\n';
|
||||
++Index;
|
||||
}
|
||||
|
||||
TempStream.flush();
|
||||
|
|
|
@ -54,6 +54,9 @@ class InstructionInfoView : public InstructionView {
|
|||
const llvm::MCInstrInfo &MCII;
|
||||
CodeEmitter &CE;
|
||||
bool PrintEncodings;
|
||||
bool PrintBarriers;
|
||||
using UniqueInst = std::unique_ptr<Instruction>;
|
||||
ArrayRef<UniqueInst> LoweredInsts;
|
||||
|
||||
struct InstructionInfoViewData {
|
||||
unsigned NumMicroOpcodes = 0;
|
||||
|
@ -72,9 +75,12 @@ public:
|
|||
InstructionInfoView(const llvm::MCSubtargetInfo &ST,
|
||||
const llvm::MCInstrInfo &II, CodeEmitter &C,
|
||||
bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S,
|
||||
llvm::MCInstPrinter &IP)
|
||||
llvm::MCInstPrinter &IP,
|
||||
ArrayRef<UniqueInst> LoweredInsts,
|
||||
bool ShouldPrintBarriers)
|
||||
: InstructionView(ST, IP, S), MCII(II), CE(C),
|
||||
PrintEncodings(ShouldPrintEncodings) {}
|
||||
PrintEncodings(ShouldPrintEncodings),
|
||||
PrintBarriers(ShouldPrintBarriers), LoweredInsts(LoweredInsts) {}
|
||||
|
||||
void printView(llvm::raw_ostream &OS) const override;
|
||||
StringRef getNameAsString() const override { return "InstructionInfoView"; }
|
||||
|
|
|
@ -219,6 +219,11 @@ static cl::opt<bool> ShowEncoding(
|
|||
cl::desc("Print encoding information in the instruction info view"),
|
||||
cl::cat(ViewOptions), cl::init(false));
|
||||
|
||||
static cl::opt<bool> ShowBarriers(
|
||||
"show-barriers",
|
||||
cl::desc("Print memory barrier information in the instruction info view"),
|
||||
cl::cat(ViewOptions), cl::init(false));
|
||||
|
||||
static cl::opt<bool> DisableCustomBehaviour(
|
||||
"disable-cb",
|
||||
cl::desc(
|
||||
|
@ -504,7 +509,7 @@ int main(int argc, char **argv) {
|
|||
// (which does nothing).
|
||||
IPP = std::make_unique<mca::InstrPostProcess>(*STI, *MCII);
|
||||
|
||||
std::vector<std::unique_ptr<mca::Instruction>> LoweredSequence;
|
||||
SmallVector<std::unique_ptr<mca::Instruction>> LoweredSequence;
|
||||
for (const MCInst &MCI : Insts) {
|
||||
Expected<std::unique_ptr<mca::Instruction>> Inst =
|
||||
IB.createInstruction(MCI);
|
||||
|
@ -548,7 +553,8 @@ int main(int argc, char **argv) {
|
|||
// Create the views for this pipeline, execute, and emit a report.
|
||||
if (PrintInstructionInfoView) {
|
||||
Printer.addView(std::make_unique<mca::InstructionInfoView>(
|
||||
*STI, *MCII, CE, ShowEncoding, Insts, *IP));
|
||||
*STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
|
||||
ShowBarriers));
|
||||
}
|
||||
Printer.addView(
|
||||
std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
|
||||
|
@ -624,7 +630,8 @@ int main(int argc, char **argv) {
|
|||
|
||||
if (PrintInstructionInfoView)
|
||||
Printer.addView(std::make_unique<mca::InstructionInfoView>(
|
||||
*STI, *MCII, CE, ShowEncoding, Insts, *IP));
|
||||
*STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
|
||||
ShowBarriers));
|
||||
|
||||
// Fetch custom Views that are to be placed after the InstructionInfoView.
|
||||
// Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
|
||||
|
|
Loading…
Reference in New Issue