llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td

//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for Qualcomm Kryo to support
// instruction scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
// The issue width is set to five, matching the five issue queues for expanded
// uops. Now, the latency spreadsheet has information based on fragmented uops,
// but these do not actually take up an issue queue.

def KryoModel : SchedMachineModel {
  let IssueWidth        =   5; // 5-wide issue for expanded uops
  let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer
  let LoadLatency       =   4; // Optimistic load latency
  let MispredictPenalty =  14; // Fetch + Decode/Rename/Dispatch + Branch

  // Enable partial & runtime unrolling. The magic number is chosen based on
  // experiments and benchmarking data.
  let LoopMicroOpBufferSize = 16;
  let CompleteModel = 0;
}

//===----------------------------------------------------------------------===//
// Define each kind of processor resource and number available on Kryo.

let SchedModel = KryoModel in {
  def KryoUnitXA : ProcResource<1>;                   // Type X(A) micro-ops
  def KryoUnitXB : ProcResource<1>;                   // Type X(B) micro-ops
  def KryoUnitYA : ProcResource<1>;                   // Type Y(A) micro-ops
  def KryoUnitYB : ProcResource<1>;                   // Type Y(B) micro-ops
  def KryoUnitX : ProcResGroup<[KryoUnitXA,          // Type X micro-ops
                                KryoUnitXB]>;
  def KryoUnitY : ProcResGroup<[KryoUnitYA,          // Type Y micro-ops
                                KryoUnitYB]>;
  def KryoUnitXY : ProcResGroup<[KryoUnitXA,         // Type XY micro-ops
                                 KryoUnitXB,
                                 KryoUnitYA,
                                 KryoUnitYB]>;
  def KryoUnitLSA : ProcResource<1>;                  // Type LS(A) micro-ops
  def KryoUnitLSB : ProcResource<1>;                  // Type LS(B) micro-ops
  def KryoUnitLS : ProcResGroup<[KryoUnitLSA,        // Type LS micro-ops
                                 KryoUnitLSB]>;
}

let SchedModel = KryoModel in {

//===----------------------------------------------------------------------===//
// Map the target-defined scheduler read/write resources and latency for
// Kryo.

def : WriteRes<WriteImm,   [KryoUnitXY]> { let Latency = 1; }
def : WriteRes<WriteI,     [KryoUnitXY]> { let Latency = 1; }
def : WriteRes<WriteISReg, [KryoUnitXY, KryoUnitXY]>
      { let Latency = 2; let NumMicroOps = 2; }
def : WriteRes<WriteIEReg, [KryoUnitXY, KryoUnitXY]>
      { let Latency = 2; let NumMicroOps = 2; }
def : WriteRes<WriteExtr,  [KryoUnitXY, KryoUnitX]>
      { let Latency = 2; let NumMicroOps = 2; }
def : WriteRes<WriteIS,    [KryoUnitXY]> { let Latency = 2; }
def : WriteRes<WriteID32,  [KryoUnitXA, KryoUnitY]>
      { let Latency = 8; let NumMicroOps = 1; } // Fragent -1
def : WriteRes<WriteID64,  [KryoUnitXA, KryoUnitY]>
      { let Latency = 8; let NumMicroOps = 1; } // Fragent -1
def : WriteRes<WriteIM32,  [KryoUnitX]> { let Latency = 5; }
def : WriteRes<WriteIM64,  [KryoUnitX]> { let Latency = 5; }
def : WriteRes<WriteBr,    [KryoUnitXY]> { let Latency = 1; }
def : WriteRes<WriteBrReg, [KryoUnitXY]> { let Latency = 1; }
def : WriteRes<WriteLD,    [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteST,    [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteSTP,   [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteAdr,   [KryoUnitXY]> { let Latency = 6; }
def : WriteRes<WriteLDIdx, [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteSTIdx, [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteF,     [KryoUnitXY, KryoUnitXY]>
      { let Latency = 3; let NumMicroOps = 2; }
def : WriteRes<WriteFCmp,  [KryoUnitXY]> { let Latency = 2; }
def : WriteRes<WriteFCvt,  [KryoUnitX]> { let Latency = 4; }
def : WriteRes<WriteFCopy, [KryoUnitXY]> { let Latency = 6; }
def : WriteRes<WriteFImm,  [KryoUnitXY]> { let Latency = 6; }
def : WriteRes<WriteFMul,  [KryoUnitX, KryoUnitX]>
      { let Latency = 6; let NumMicroOps = 2; }
def : WriteRes<WriteFDiv,  [KryoUnitXA, KryoUnitY]>
      { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
def : WriteRes<WriteV,     [KryoUnitXY]> { let Latency = 6; }
def : WriteRes<WriteVLD,   [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteVST,   [KryoUnitLS]> { let Latency = 4; }

def : WriteRes<WriteSys,     []> { let Latency = 1; }
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
def : WriteRes<WriteHint,    []> { let Latency = 1; }

def : WriteRes<WriteLDHi,    []> { let Latency = 4; }

// No forwarding logic is modelled yet.
def : ReadAdvance<ReadI,       0>;
def : ReadAdvance<ReadISReg,   0>;
def : ReadAdvance<ReadIEReg,   0>;
def : ReadAdvance<ReadIM,      0>;
def : ReadAdvance<ReadIMA,     0>;
def : ReadAdvance<ReadID,      0>;
def : ReadAdvance<ReadExtrHi,  0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD,     0>;


//===----------------------------------------------------------------------===//
// Specialize the coarse model by associating instruction groups with the
// subtarget-defined types. As the modeled is refined, this will override most
// of the above SchedWriteRes and SchedAlias mappings.

// Miscellaneous
// -----------------------------------------------------------------------------

def : InstRW<[WriteI], (instrs COPY)>;


// Detailed Refinedments
// -----------------------------------------------------------------------------
include "AArch64SchedKryoDetails.td"


} // SchedModel = KryoModel
[AArch64] Add support for Qualcomm Kryo CPU. Machine model description by Dave Estes <cestes@codeaurora.org>. llvm-svn: 260686 2016-02-12 23:51:51 +08:00			`//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---- tablegen --==//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file defines the machine model for Qualcomm Kryo to support`
			`// instruction scheduling and other instruction cost heuristics.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`//===----------------------------------------------------------------------===//`
			`// The issue width is set to five, matching the five issue queues for expanded`
			`// uops. Now, the latency spreadsheet has information based on fragmented uops,`
			`// but these do not actually take up an issue queue.`

			`def KryoModel : SchedMachineModel {`
			`let IssueWidth = 5; // 5-wide issue for expanded uops`
			`let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer`
			`let LoadLatency = 4; // Optimistic load latency`
			`let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch`

			`// Enable partial & runtime unrolling. The magic number is chosen based on`
			`// experiments and benchmarking data.`
			`let LoopMicroOpBufferSize = 16;`
TableGen: Check scheduling models for completeness TableGen checks at compiletime that for scheduling models with "CompleteModel = 1" one of the following holds: - Is marked with the hasNoSchedulingInfo flag - The instruction is a subclass of Sched - There are InstRW definitions in the scheduling model Typical steps necessary to complete a model: - Ensure all pseudo instructions that are expanded before machine scheduling (usually everything handled with EmitYYY() functions in XXXTargetLowering). - If a CPU does not support some instructions mark the corresponding resource unsupported: "WriteRes<WriteXXX, []> { let Unsupported = 1; }". - Add missing scheduling information. Differential Revision: http://reviews.llvm.org/D17747 llvm-svn: 262384 2016-03-02 04:03:21 +08:00			`let CompleteModel = 0;`
[AArch64] Add support for Qualcomm Kryo CPU. Machine model description by Dave Estes <cestes@codeaurora.org>. llvm-svn: 260686 2016-02-12 23:51:51 +08:00			`}`

			`//===----------------------------------------------------------------------===//`
			`// Define each kind of processor resource and number available on Kryo.`

			`let SchedModel = KryoModel in {`
			`def KryoUnitXA : ProcResource<1>; // Type X(A) micro-ops`
			`def KryoUnitXB : ProcResource<1>; // Type X(B) micro-ops`
			`def KryoUnitYA : ProcResource<1>; // Type Y(A) micro-ops`
			`def KryoUnitYB : ProcResource<1>; // Type Y(B) micro-ops`
			`def KryoUnitX : ProcResGroup<[KryoUnitXA, // Type X micro-ops`
			`KryoUnitXB]>;`
			`def KryoUnitY : ProcResGroup<[KryoUnitYA, // Type Y micro-ops`
			`KryoUnitYB]>;`
			`def KryoUnitXY : ProcResGroup<[KryoUnitXA, // Type XY micro-ops`
			`KryoUnitXB,`
			`KryoUnitYA,`
			`KryoUnitYB]>;`
			`def KryoUnitLSA : ProcResource<1>; // Type LS(A) micro-ops`
			`def KryoUnitLSB : ProcResource<1>; // Type LS(B) micro-ops`
			`def KryoUnitLS : ProcResGroup<[KryoUnitLSA, // Type LS micro-ops`
			`KryoUnitLSB]>;`
			`}`

			`let SchedModel = KryoModel in {`

			`//===----------------------------------------------------------------------===//`
			`// Map the target-defined scheduler read/write resources and latency for`
			`// Kryo.`

			`def : WriteRes<WriteImm, [KryoUnitXY]> { let Latency = 1; }`
			`def : WriteRes<WriteI, [KryoUnitXY]> { let Latency = 1; }`
			`def : WriteRes<WriteISReg, [KryoUnitXY, KryoUnitXY]>`
			`{ let Latency = 2; let NumMicroOps = 2; }`
			`def : WriteRes<WriteIEReg, [KryoUnitXY, KryoUnitXY]>`
			`{ let Latency = 2; let NumMicroOps = 2; }`
			`def : WriteRes<WriteExtr, [KryoUnitXY, KryoUnitX]>`
			`{ let Latency = 2; let NumMicroOps = 2; }`
			`def : WriteRes<WriteIS, [KryoUnitXY]> { let Latency = 2; }`
			`def : WriteRes<WriteID32, [KryoUnitXA, KryoUnitY]>`
			`{ let Latency = 8; let NumMicroOps = 1; } // Fragent -1`
			`def : WriteRes<WriteID64, [KryoUnitXA, KryoUnitY]>`
			`{ let Latency = 8; let NumMicroOps = 1; } // Fragent -1`
			`def : WriteRes<WriteIM32, [KryoUnitX]> { let Latency = 5; }`
			`def : WriteRes<WriteIM64, [KryoUnitX]> { let Latency = 5; }`
			`def : WriteRes<WriteBr, [KryoUnitXY]> { let Latency = 1; }`
			`def : WriteRes<WriteBrReg, [KryoUnitXY]> { let Latency = 1; }`
			`def : WriteRes<WriteLD, [KryoUnitLS]> { let Latency = 4; }`
			`def : WriteRes<WriteST, [KryoUnitLS]> { let Latency = 4; }`
			`def : WriteRes<WriteSTP, [KryoUnitLS]> { let Latency = 4; }`
			`def : WriteRes<WriteAdr, [KryoUnitXY]> { let Latency = 6; }`
			`def : WriteRes<WriteLDIdx, [KryoUnitLS]> { let Latency = 4; }`
			`def : WriteRes<WriteSTIdx, [KryoUnitLS]> { let Latency = 4; }`
			`def : WriteRes<WriteF, [KryoUnitXY, KryoUnitXY]>`
			`{ let Latency = 3; let NumMicroOps = 2; }`
			`def : WriteRes<WriteFCmp, [KryoUnitXY]> { let Latency = 2; }`
			`def : WriteRes<WriteFCvt, [KryoUnitX]> { let Latency = 4; }`
			`def : WriteRes<WriteFCopy, [KryoUnitXY]> { let Latency = 6; }`
			`def : WriteRes<WriteFImm, [KryoUnitXY]> { let Latency = 6; }`
			`def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]>`
			`{ let Latency = 6; let NumMicroOps = 2; }`
			`def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]>`
			`{ let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1`
			`def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; }`
			`def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; }`
			`def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; }`

			`def : WriteRes<WriteSys, []> { let Latency = 1; }`
			`def : WriteRes<WriteBarrier, []> { let Latency = 1; }`
			`def : WriteRes<WriteHint, []> { let Latency = 1; }`

			`def : WriteRes<WriteLDHi, []> { let Latency = 4; }`

			`// No forwarding logic is modelled yet.`
			`def : ReadAdvance<ReadI, 0>;`
			`def : ReadAdvance<ReadISReg, 0>;`
			`def : ReadAdvance<ReadIEReg, 0>;`
			`def : ReadAdvance<ReadIM, 0>;`
			`def : ReadAdvance<ReadIMA, 0>;`
			`def : ReadAdvance<ReadID, 0>;`
			`def : ReadAdvance<ReadExtrHi, 0>;`
			`def : ReadAdvance<ReadAdrBase, 0>;`
			`def : ReadAdvance<ReadVLD, 0>;`


			`//===----------------------------------------------------------------------===//`
			`// Specialize the coarse model by associating instruction groups with the`
			`// subtarget-defined types. As the modeled is refined, this will override most`
			`// of the above SchedWriteRes and SchedAlias mappings.`

			`// Miscellaneous`
			`// -----------------------------------------------------------------------------`

			`def : InstRW<[WriteI], (instrs COPY)>;`


			`// Detailed Refinedments`
			`// -----------------------------------------------------------------------------`
			`include "AArch64SchedKryoDetails.td"`


			`} // SchedModel = KryoModel`