From c74ad502cecf221e2cbfed86f79a93155247760a Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 5 Apr 2018 15:41:41 +0000 Subject: [PATCH] [MC][Tablegen] Allow models to describe the retire control unit for llvm-mca. This patch adds the ability to describe properties of the hardware retire control unit. Tablegen class RetireControlUnit has been added for this purpose (see TargetSchedule.td). A RetireControlUnit specifies the size of the reorder buffer, as well as the maximum number of opcodes that can be retired every cycle. A zero (or negative) value for the reorder buffer size means: "the size is unknown". If the size is unknown, then llvm-mca defaults it to the value of field SchedMachineModel::MicroOpBufferSize. A zero or negative number of opcodes retired per cycle means: "there is no restriction on the number of instructions that can be retired every cycle". Models can optionally specify an instance of RetireControlUnit. There can only be up-to one RetireControlUnit definition per scheduling model. Information related to the RCU (RetireControlUnit) is stored in (two new fields of) MCExtraProcessorInfo. llvm-mca loads that information when it initializes the DispatchUnit / RetireControlUnit (see Dispatch.h/Dispatch.cpp). This patch fixes PR36661. Differential Revision: https://reviews.llvm.org/D45259 llvm-svn: 329304 --- llvm/include/llvm/MC/MCSchedule.h | 7 +-- llvm/include/llvm/Target/TargetSchedule.td | 19 +++++++- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 5 +++ .../tools/llvm-mca/X86/BtVer2/dot-product.s | 11 +++-- .../tools/llvm-mca/X86/BtVer2/pipes-fpu.s | 43 +++++++++---------- .../llvm-mca/X86/BtVer2/register-files-5.s | 42 +++++++++--------- llvm/tools/llvm-mca/Backend.h | 6 +-- llvm/tools/llvm-mca/Dispatch.cpp | 18 ++++++++ llvm/tools/llvm-mca/Dispatch.h | 20 +++------ llvm/utils/TableGen/CodeGenSchedule.cpp | 26 ++++++++++- llvm/utils/TableGen/CodeGenSchedule.h | 12 +++++- llvm/utils/TableGen/SubtargetEmitter.cpp | 17 ++++++++ 12 files changed, 153 insertions(+), 73 deletions(-) diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h index 0075a7fabcaf..738daca0fb1a 100644 --- a/llvm/include/llvm/MC/MCSchedule.h +++ b/llvm/include/llvm/MC/MCSchedule.h @@ -163,10 +163,11 @@ struct MCRegisterFileDesc { /// normally used by the LLVM machine schedulers, but that can be consumed by /// external tools like llvm-mca to improve the quality of the peformance /// analysis. -/// In future, the plan is to extend this struct with extra information (for -/// example: maximum number of instructions retired per cycle; actual size of -/// the reorder buffer; etc.). struct MCExtraProcessorInfo { + // Actual size of the reorder buffer in hardware. + unsigned ReorderBufferSize; + // Number of instructions retired per cycle. + unsigned MaxRetirePerCycle; const MCRegisterFileDesc *RegisterFiles; unsigned NumRegisterFiles; const MCRegisterCostEntry *RegisterCostTable; diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td index 96545e8f0b23..e717cc26cae9 100644 --- a/llvm/include/llvm/Target/TargetSchedule.td +++ b/llvm/include/llvm/Target/TargetSchedule.td @@ -443,7 +443,7 @@ class SchedAlias { SchedMachineModel SchedModel = ?; } -// Alow the definition of processor register files. +// Allow the definition of processor register files. // Each processor register file declares the number of physical registers, as // well as a optional register cost information. The cost of a register R is the // number of physical registers used to rename R (at register renaming stage). @@ -459,3 +459,20 @@ class RegisterFile Classes = [], int NumPhysRegs = numPhysRegs; SchedMachineModel SchedModel = ?; } + +// Describe the retire control unit. +// A retire control unit specifies the size of the reorder buffer, as well as +// the maximum number of opcodes that can be retired every cycle. +// A value less-than-or-equal-to zero for field 'ReorderBufferSize' means: "the +// size is unknown". The idea is that external tools can fall-back to using +// field MicroOpBufferSize in SchedModel if the reorder buffer size is unknown. +// A zero or negative value for field 'MaxRetirePerCycle' means "no +// restrictions on the number of instructions retired per cycle". +// Models can optionally specify up to one instance of RetireControlUnit per +// scheduling model. +class RetireControlUnit { + int ReorderBufferSize = bufferSize; + int MaxRetirePerCycle = retirePerCycle; + SchedMachineModel SchedModel = ?; +} + diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d80e0e96915b..fda8f7c7f5b5 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -48,6 +48,11 @@ def IntegerPRF : RegisterFile<64, [GR8, GR16, GR32, GR64, CCR]>; // Reference: www.realworldtech.com/jaguar/4/ def FpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>; +// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can +// retire up to two macro-ops per cycle. +// Reference: "Software Optimization Guide for AMD Family 16h Processors" +def RCU : RetireControlUnit<64, 2>; + // Integer Pipe Scheduler def JALU01 : ProcResGroup<[JALU0, JALU1]> { let BufferSize=20; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s index e848584e8d81..7028bb37aaea 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s @@ -59,15 +59,14 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK: [0,0] DeeER. . . vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [0,1] D==eeeER . . vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: [0,2] .D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 - # CHECK: [1,0] .DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,1] . D=eeeE--R . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,1] . D=eeeE---R . vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: [1,2] . D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 - -# CHECK: [2,0] . DeeE----R . vmulps %xmm0, %xmm1, %xmm2 +# CHECK: [2,0] . DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: [2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4 + # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -75,6 +74,6 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 1.0 1.0 3.0 vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1. 3 3.3 0.7 0.7 vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s index a7bce2788f2c..d2a53036de27 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -75,24 +75,23 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: - - - - 42.00 - 2.00 - - - - - - - vsqrtps %ymm0, %ymm2 + # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 01234567 - -# CHECK: [0,0] DeeeeER . . . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,1] .DeE--R . . . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,2] . DeeeER . . . . . . . . . . . . . vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: [0,3] . DeeE-R . . . . . . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,4] . DeeeER . . . . . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: [0,6] . DeeeE-----------------R . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [0,7] . D===================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER vsqrtps %ymm0, %ymm2 - -# CHECK: [1,0] . .DeeeeE--------------------------------------------------------R vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,1] . . DeE----------------------------------------------------------R vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,2] . . DeeeE-------------------------------------------------------R vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: [1,3] . . DeeE--------------------------------------------------------R vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,4] . . DeeeE------------------------------------------------------R vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0123456789 0123456789 0123456789 0 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 +# CHECK: [0,0] DeeeeER . . . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,1] .DeE--R . . . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] . DeeeER . . . . . . . . . . . . . vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: [0,3] . DeeE-R . . . . . . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,4] . DeeeER . . . . . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: [0,6] . DeeeE-----------------R . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,7] . D===================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . vsqrtps %ymm0, %ymm2 +# CHECK: [1,0] . .DeeeeE--------------------------------------------------------R . vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,1] . . DeE-----------------------------------------------------------R. vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . . DeeeE--------------------------------------------------------R. vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: [1,3] . . DeeE----------------------------------------------------------R vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,4] . . DeeeE--------------------------------------------------------R vaddps %xmm0, %xmm1, %xmm2 # CHECK: Average Wait times (based on the timeline view): @@ -101,12 +100,12 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage -# CHECK: [0] [1] [2] [3] +# CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 1.0 1.0 28.0 vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1. 2 1.0 1.0 30.0 vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 2. 2 1.0 1.0 27.5 vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 3. 2 1.0 1.0 28.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 4. 2 1.0 1.0 27.0 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 2 1.0 1.0 30.5 vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 3. 2 1.0 1.0 29.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4. 2 1.0 1.0 28.0 vaddps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2 # CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s index 704869fd4fc8..ec29206afa9d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s @@ -72,27 +72,27 @@ # CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm0, %ymm1 # CHECK-NEXT: [0,1] .DeeeE----------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm2 -# CHECK-NEXT: [0,2] . D=eeeE--------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm3 -# CHECK-NEXT: [0,3] . D==eeeE------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm4 -# CHECK-NEXT: [0,4] . D===eeeE----------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm5 -# CHECK-NEXT: [0,5] . D====eeeE--------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm6 -# CHECK-NEXT: [0,6] . .D=====eeeE------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm7 -# CHECK-NEXT: [0,7] . . D======eeeE----------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm8 -# CHECK-NEXT: [0,8] . . D=======eeeE--------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm9 -# CHECK-NEXT: [0,9] . . D========eeeE------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm10 -# CHECK-NEXT: [0,10] . . D=========eeeE----------------R . . . . . . vaddps %ymm0, %ymm0, %ymm11 -# CHECK-NEXT: [0,11] . . .D==========eeeE--------------R . . . . . . vaddps %ymm0, %ymm0, %ymm12 -# CHECK-NEXT: [0,12] . . . D===========eeeE------------R . . . . . . vaddps %ymm0, %ymm0, %ymm13 -# CHECK-NEXT: [0,13] . . . D============eeeE----------R . . . . . . vaddps %ymm0, %ymm0, %ymm14 -# CHECK-NEXT: [0,14] . . . D=============eeeE--------R . . . . . . vaddps %ymm0, %ymm0, %ymm15 -# CHECK-NEXT: [0,15] . . . D==============eeeE------R . . . . . . vaddps %ymm2, %ymm0, %ymm0 -# CHECK-NEXT: [0,16] . . . .D================eeeE---R . . . . . . vaddps %ymm2, %ymm0, %ymm3 -# CHECK-NEXT: [0,17] . . . . D=================eeeE-R . . . . . . vaddps %ymm2, %ymm0, %ymm4 -# CHECK-NEXT: [0,18] . . . . D==================eeeER . . . . . . vaddps %ymm2, %ymm0, %ymm5 -# CHECK-NEXT: [0,19] . . . . D===================eeeER . . . . . . vaddps %ymm2, %ymm0, %ymm6 -# CHECK-NEXT: [0,20] . . . . D====================eeeER . . . . . vaddps %ymm2, %ymm0, %ymm7 -# CHECK-NEXT: [0,21] . . . . .D=====================eeeER . . . . . vaddps %ymm2, %ymm0, %ymm8 -# CHECK-NEXT: [0,22] . . . . . D======================eeeER. . . . . vaddps %ymm2, %ymm0, %ymm9 +# CHECK-NEXT: [0,2] . D=eeeE---------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm3 +# CHECK-NEXT: [0,3] . D==eeeE-------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm4 +# CHECK-NEXT: [0,4] . D===eeeE------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm5 +# CHECK-NEXT: [0,5] . D====eeeE----------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm6 +# CHECK-NEXT: [0,6] . .D=====eeeE---------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm7 +# CHECK-NEXT: [0,7] . . D======eeeE-------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm8 +# CHECK-NEXT: [0,8] . . D=======eeeE------------------------R. . . . . . vaddps %ymm0, %ymm0, %ymm9 +# CHECK-NEXT: [0,9] . . D========eeeE----------------------R. . . . . . vaddps %ymm0, %ymm0, %ymm10 +# CHECK-NEXT: [0,10] . . D=========eeeE---------------------R . . . . . vaddps %ymm0, %ymm0, %ymm11 +# CHECK-NEXT: [0,11] . . .D==========eeeE-------------------R . . . . . vaddps %ymm0, %ymm0, %ymm12 +# CHECK-NEXT: [0,12] . . . D===========eeeE------------------R . . . . . vaddps %ymm0, %ymm0, %ymm13 +# CHECK-NEXT: [0,13] . . . D============eeeE----------------R . . . . . vaddps %ymm0, %ymm0, %ymm14 +# CHECK-NEXT: [0,14] . . . D=============eeeE---------------R . . . . . vaddps %ymm0, %ymm0, %ymm15 +# CHECK-NEXT: [0,15] . . . D==============eeeE-------------R . . . . . vaddps %ymm2, %ymm0, %ymm0 +# CHECK-NEXT: [0,16] . . . .D================eeeE-----------R . . . . . vaddps %ymm2, %ymm0, %ymm3 +# CHECK-NEXT: [0,17] . . . . D=================eeeE---------R . . . . . vaddps %ymm2, %ymm0, %ymm4 +# CHECK-NEXT: [0,18] . . . . D==================eeeE--------R. . . . . vaddps %ymm2, %ymm0, %ymm5 +# CHECK-NEXT: [0,19] . . . . D===================eeeE------R. . . . . vaddps %ymm2, %ymm0, %ymm6 +# CHECK-NEXT: [0,20] . . . . D====================eeeE-----R . . . . vaddps %ymm2, %ymm0, %ymm7 +# CHECK-NEXT: [0,21] . . . . .D=====================eeeE---R . . . . vaddps %ymm2, %ymm0, %ymm8 +# CHECK-NEXT: [0,22] . . . . . D======================eeeE--R . . . . vaddps %ymm2, %ymm0, %ymm9 # CHECK-NEXT: [0,23] . . . . . D=======================eeeER . . . . vaddps %ymm2, %ymm0, %ymm10 # CHECK-NEXT: [0,24] . . . . . D========================eeeER . . . . vaddps %ymm2, %ymm0, %ymm11 # CHECK-NEXT: [0,25] . . . . . D=========================eeeER . . . vaddps %ymm2, %ymm0, %ymm12 diff --git a/llvm/tools/llvm-mca/Backend.h b/llvm/tools/llvm-mca/Backend.h index 12e80777c777..c21da1f23afe 100644 --- a/llvm/tools/llvm-mca/Backend.h +++ b/llvm/tools/llvm-mca/Backend.h @@ -68,9 +68,9 @@ public: HWS(llvm::make_unique(this, Subtarget.getSchedModel(), LoadQueueSize, StoreQueueSize, AssumeNoAlias)), - DU(llvm::make_unique( - this, STI, MRI, Subtarget.getSchedModel().MicroOpBufferSize, - RegisterFileSize, DispatchWidth, HWS.get())), + DU(llvm::make_unique(this, Subtarget.getSchedModel(), MRI, + RegisterFileSize, DispatchWidth, + HWS.get())), SM(Source), Cycles(0) { HWS->setDispatchUnit(DU.get()); } diff --git a/llvm/tools/llvm-mca/Dispatch.cpp b/llvm/tools/llvm-mca/Dispatch.cpp index 383737a41ca1..ce329b4a00d8 100644 --- a/llvm/tools/llvm-mca/Dispatch.cpp +++ b/llvm/tools/llvm-mca/Dispatch.cpp @@ -251,6 +251,24 @@ void RegisterFile::dump() const { } #endif +RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM, + DispatchUnit *DU) + : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), + AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0), Owner(DU) { + // Check if the scheduling model provides extra information about the machine + // processor. If so, then use that information to set the reorder buffer size + // and the maximum number of instructions retired per cycle. + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (EPI.ReorderBufferSize) + AvailableSlots = EPI.ReorderBufferSize; + MaxRetirePerCycle = EPI.MaxRetirePerCycle; + } + + assert(AvailableSlots && "Invalid reorder buffer size!"); + Queue.resize(AvailableSlots); +} + // Reserves a number of slots, and returns a new token. unsigned RetireControlUnit::reserveSlot(unsigned Index, unsigned NumMicroOps) { assert(isAvailable(NumMicroOps)); diff --git a/llvm/tools/llvm-mca/Dispatch.h b/llvm/tools/llvm-mca/Dispatch.h index 979e2a370dd4..3f09ab78611e 100644 --- a/llvm/tools/llvm-mca/Dispatch.h +++ b/llvm/tools/llvm-mca/Dispatch.h @@ -192,12 +192,7 @@ private: DispatchUnit *Owner; public: - RetireControlUnit(unsigned NumSlots, DispatchUnit *DU) - : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(NumSlots), MaxRetirePerCycle(0), Owner(DU) { - assert(NumSlots && "Expected at least one slot!"); - Queue.resize(NumSlots); - } + RetireControlUnit(const llvm::MCSchedModel &SM, DispatchUnit *DU); bool isFull() const { return !AvailableSlots; } bool isEmpty() const { return AvailableSlots == Queue.size(); } @@ -264,16 +259,13 @@ class DispatchUnit { llvm::ArrayRef UsedPhysRegs); public: - DispatchUnit(Backend *B, const llvm::MCSubtargetInfo &STI, - const llvm::MCRegisterInfo &MRI, unsigned MicroOpBufferSize, - unsigned RegisterFileSize, unsigned MaxDispatchWidth, - Scheduler *Sched) + DispatchUnit(Backend *B, const llvm::MCSchedModel &SM, + const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize, + unsigned MaxDispatchWidth, Scheduler *Sched) : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), CarryOver(0U), SC(Sched), - RAT(llvm::make_unique(STI.getSchedModel(), MRI, - RegisterFileSize)), - RCU(llvm::make_unique(MicroOpBufferSize, this)), - Owner(B) {} + RAT(llvm::make_unique(SM, MRI, RegisterFileSize)), + RCU(llvm::make_unique(SM, this)), Owner(B) {} unsigned getDispatchWidth() const { return DispatchWidth; } diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp index cc28cdfe8f17..0c79da9f7795 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -211,10 +211,34 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK, DEBUG(dbgs() << "\n+++ RESOURCE DEFINITIONS (collectProcResources) +++\n"); collectProcResources(); + // Collect optional processor description. + collectOptionalProcessorInfo(); + + checkCompleteness(); +} + +void CodeGenSchedModels::collectRetireControlUnits() { + RecVec Units = Records.getAllDerivedDefinitions("RetireControlUnit"); + + for (Record *RCU : Units) { + CodeGenProcModel &PM = getProcModel(RCU->getValueAsDef("SchedModel")); + if (PM.RetireControlUnit) { + PrintError(RCU->getLoc(), + "Expected a single RetireControlUnit definition"); + PrintNote(PM.RetireControlUnit->getLoc(), + "Previous definition of RetireControlUnit was here"); + } + PM.RetireControlUnit = RCU; + } +} + +/// Collect optional processor information. +void CodeGenSchedModels::collectOptionalProcessorInfo() { // Find register file definitions for each processor. collectRegisterFiles(); - checkCompleteness(); + // Collect processor RetireControlUnit descriptors if available. + collectRetireControlUnits(); } /// Gather all processor models. diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h index 388249622b1e..8379a929ee7a 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.h +++ b/llvm/utils/TableGen/CodeGenSchedule.h @@ -235,9 +235,13 @@ struct CodeGenProcModel { // List of Register Files. std::vector RegisterFiles; + // Optional Retire Control Unit definition. + Record *RetireControlUnit; + CodeGenProcModel(unsigned Idx, std::string Name, Record *MDef, Record *IDef) : - Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef) {} + Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef), + RetireControlUnit(nullptr) {} bool hasItineraries() const { return !ItinsDef->getValueAsListOfDefs("IID").empty(); @@ -248,7 +252,7 @@ struct CodeGenProcModel { } bool hasExtraProcessorInfo() const { - return !RegisterFiles.empty(); + return RetireControlUnit || !RegisterFiles.empty(); } unsigned getProcResourceIdx(Record *PRDef) const; @@ -436,8 +440,12 @@ private: void collectSchedClasses(); + void collectRetireControlUnits(); + void collectRegisterFiles(); + void collectOptionalProcessorInfo(); + std::string createSchedClassName(Record *ItinClassDef, ArrayRef OperWrites, ArrayRef OperReads); diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 1af8181cbf37..3ded329c34eb 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -608,6 +608,20 @@ void SubtargetEmitter::EmitProcessorResourceSubUnits( OS << "};\n"; } +static void EmitRetireControlUnitInfo(const CodeGenProcModel &ProcModel, + raw_ostream &OS) { + long ReorderBufferSize = 0, MaxRetirePerCycle = 0; + if (Record *RCU = ProcModel.RetireControlUnit) { + ReorderBufferSize = + std::max(ReorderBufferSize, RCU->getValueAsInt("ReorderBufferSize")); + MaxRetirePerCycle = + std::max(MaxRetirePerCycle, RCU->getValueAsInt("MaxRetirePerCycle")); + } + + OS << ReorderBufferSize << ", // ReorderBufferSize\n "; + OS << MaxRetirePerCycle << ", // MaxRetirePerCycle\n "; +} + static void EmitRegisterFileInfo(const CodeGenProcModel &ProcModel, unsigned NumRegisterFiles, unsigned NumCostEntries, raw_ostream &OS) { @@ -683,6 +697,9 @@ void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName << "ExtraInfo = {\n "; + // Add information related to the retire control unit. + EmitRetireControlUnitInfo(ProcModel, OS); + // Add information related to the register files (i.e. where to find register // file descriptors and register costs). EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(),