llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td

//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// MachineModel definitions for Southern Islands (SI)
//
//===----------------------------------------------------------------------===//

def WriteBranch : SchedWrite;
def WriteExport : SchedWrite;
def WriteLDS    : SchedWrite;
def WriteSALU   : SchedWrite;
def WriteSMEM   : SchedWrite;
def WriteVMEM   : SchedWrite;
def WriteBarrier : SchedWrite;

// Vector ALU instructions
def Write32Bit         : SchedWrite;
def WriteQuarterRate32 : SchedWrite;
def WriteFullOrQuarterRate32 : SchedWrite;

def WriteFloatFMA   : SchedWrite;

// Slow quarter rate f64 instruction.
def WriteDouble : SchedWrite;

// half rate f64 instruction (same as v_add_f64)
def WriteDoubleAdd  : SchedWrite;

// Half rate 64-bit instructions.
def Write64Bit : SchedWrite;

// FIXME: Should there be a class for instructions which are VALU
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
// instructions)

class SISchedMachineModel : SchedMachineModel {
  let CompleteModel = 0;
  let IssueWidth = 1;
  let PostRAScheduler = 1;
}

def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;

// XXX: Are the resource counts correct?
def HWBranch : ProcResource<1> {
  let BufferSize = 1;
}
def HWExport : ProcResource<1> {
  let BufferSize = 7; // Taken from S_WAITCNT
}
def HWLGKM   : ProcResource<1> {
  let BufferSize = 31;  // Taken from S_WAITCNT
}
def HWSALU   : ProcResource<1> {
  let BufferSize = 1;
}
def HWVMEM   : ProcResource<1> {
  let BufferSize = 15;  // Taken from S_WAITCNT
}
def HWVALU   : ProcResource<1> {
  let BufferSize = 1;
}

class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
                 int latency> : WriteRes<write, resources> {
  let Latency = latency;
}

class HWVALUWriteRes<SchedWrite write, int latency> :
  HWWriteRes<write, [HWVALU], latency>;


// The latency numbers are taken from AMD Accelerated Parallel Processing
// guide. They may not be accurate.

// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {

  def : HWWriteRes<WriteBranch,  [HWBranch], 8>;
  def : HWWriteRes<WriteExport,  [HWExport], 4>;
  def : HWWriteRes<WriteLDS,     [HWLGKM],   5>; // Can be between 2 and 64
  def : HWWriteRes<WriteSALU,    [HWSALU],   1>;
  def : HWWriteRes<WriteSMEM,    [HWLGKM],   5>;
  def : HWWriteRes<WriteVMEM,    [HWVMEM],   80>;
  def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???

  def : HWVALUWriteRes<Write32Bit,         1>;
  def : HWVALUWriteRes<Write64Bit,         2>;
  def : HWVALUWriteRes<WriteQuarterRate32, 4>;
}


let SchedModel = SIFullSpeedModel in {

defm : SICommonWriteRes;

def : HWVALUWriteRes<WriteFloatFMA,   1>;
def : HWVALUWriteRes<WriteDouble,     4>;
def : HWVALUWriteRes<WriteDoubleAdd,  2>;

} // End SchedModel = SIFullSpeedModel

let SchedModel = SIQuarterSpeedModel in {

defm : SICommonWriteRes;

def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble,   16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;

}  // End SchedModel = SIQuarterSpeedModel
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`//===-- SISchedule.td - SI Scheduling definitons -------------------------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00			`// MachineModel definitions for Southern Islands (SI)`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`//`
			`//===----------------------------------------------------------------------===//`

R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00			`def WriteBranch : SchedWrite;`
			`def WriteExport : SchedWrite;`
			`def WriteLDS : SchedWrite;`
			`def WriteSALU : SchedWrite;`
			`def WriteSMEM : SchedWrite;`
			`def WriteVMEM : SchedWrite;`
AMDGPU: Mark s_barrier as a high latency instruction These were marked as WriteSALU, which is low latency. I'm guessing at the value to use, but it should probably be considered the highest latency instruction. I'm not sure this has any actual effect since hasSideEffects probably is preventing any moving of these. llvm-svn: 247060 2015-09-09 03:54:32 +08:00			`def WriteBarrier : SchedWrite;`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00			`// Vector ALU instructions`
			`def Write32Bit : SchedWrite;`
			`def WriteQuarterRate32 : SchedWrite;`
AMDGPU: Improve accuracy of instruction rates for VOPC These were all using the default 32-bit VALU write class, but the i64/f64 compares are half rate. I'm not sure this is really correct, because they are still using the write to VALU write class, even though they really write to the SALU. llvm-svn: 248582 2015-09-26 00:58:25 +08:00			`def WriteFullOrQuarterRate32 : SchedWrite;`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00
			`def WriteFloatFMA : SchedWrite;`

AMDGPU: Improve accuracy of instruction rates for VOPC These were all using the default 32-bit VALU write class, but the i64/f64 compares are half rate. I'm not sure this is really correct, because they are still using the write to VALU write class, even though they really write to the SALU. llvm-svn: 248582 2015-09-26 00:58:25 +08:00			`// Slow quarter rate f64 instruction.`
			`def WriteDouble : SchedWrite;`

			`// half rate f64 instruction (same as v_add_f64)`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00			`def WriteDoubleAdd : SchedWrite;`

AMDGPU: Improve accuracy of instruction rates for VOPC These were all using the default 32-bit VALU write class, but the i64/f64 compares are half rate. I'm not sure this is really correct, because they are still using the write to VALU write class, even though they really write to the SALU. llvm-svn: 248582 2015-09-26 00:58:25 +08:00			`// Half rate 64-bit instructions.`
			`def Write64Bit : SchedWrite;`

			`// FIXME: Should there be a class for instructions which are VALU`
			`// instructions and have VALU rates, but write to the SALU (i.e. VOPC`
			`// instructions)`

AMDGPU/SI: Improve MachineSchedModel definition This patch contains a few improvements to the model, including: - Using a single resource with a defined buffers size for each memory unit. - Setting the IssueWidth correctly. - Fixing latency values for memory instructions. shader-db stats: 16429 shaders in 3231 tests Totals: SGPRS: 318232 -> 312328 (-1.86 %) VGPRS: 208996 -> 209346 (0.17 %) Code Size: 7147044 -> 7166440 (0.27 %) bytes LDS: 83 -> 83 (0.00 %) blocks Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave Max Waves: 49182 -> 49243 (0.12 %) Wait states: 0 -> 0 (0.00 %)A Differential Revision: http://reviews.llvm.org/D18453 llvm-svn: 264877 2016-03-31 00:35:13 +08:00			`class SISchedMachineModel : SchedMachineModel {`
TableGen: Check scheduling models for completeness TableGen checks at compiletime that for scheduling models with "CompleteModel = 1" one of the following holds: - Is marked with the hasNoSchedulingInfo flag - The instruction is a subclass of Sched - There are InstRW definitions in the scheduling model Typical steps necessary to complete a model: - Ensure all pseudo instructions that are expanded before machine scheduling (usually everything handled with EmitYYY() functions in XXXTargetLowering). - If a CPU does not support some instructions mark the corresponding resource unsupported: "WriteRes<WriteXXX, []> { let Unsupported = 1; }". - Add missing scheduling information. Differential Revision: http://reviews.llvm.org/D17747 llvm-svn: 262384 2016-03-02 04:03:21 +08:00			`let CompleteModel = 0;`
AMDGPU/SI: Improve MachineSchedModel definition This patch contains a few improvements to the model, including: - Using a single resource with a defined buffers size for each memory unit. - Setting the IssueWidth correctly. - Fixing latency values for memory instructions. shader-db stats: 16429 shaders in 3231 tests Totals: SGPRS: 318232 -> 312328 (-1.86 %) VGPRS: 208996 -> 209346 (0.17 %) Code Size: 7147044 -> 7166440 (0.27 %) bytes LDS: 83 -> 83 (0.00 %) blocks Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave Max Waves: 49182 -> 49243 (0.12 %) Wait states: 0 -> 0 (0.00 %)A Differential Revision: http://reviews.llvm.org/D18453 llvm-svn: 264877 2016-03-31 00:35:13 +08:00			`let IssueWidth = 1;`
AMDGPU/SI: Enable the post-ra scheduler Summary: This includes a hazard recognizer implementation to replace some of the hazard handling we had during frame index elimination. Reviewers: arsenm Subscribers: qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18602 llvm-svn: 268143 2016-04-30 08:23:06 +08:00			`let PostRAScheduler = 1;`
TableGen: Check scheduling models for completeness TableGen checks at compiletime that for scheduling models with "CompleteModel = 1" one of the following holds: - Is marked with the hasNoSchedulingInfo flag - The instruction is a subclass of Sched - There are InstRW definitions in the scheduling model Typical steps necessary to complete a model: - Ensure all pseudo instructions that are expanded before machine scheduling (usually everything handled with EmitYYY() functions in XXXTargetLowering). - If a CPU does not support some instructions mark the corresponding resource unsupported: "WriteRes<WriteXXX, []> { let Unsupported = 1; }". - Add missing scheduling information. Differential Revision: http://reviews.llvm.org/D17747 llvm-svn: 262384 2016-03-02 04:03:21 +08:00			`}`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00
AMDGPU/SI: Improve MachineSchedModel definition This patch contains a few improvements to the model, including: - Using a single resource with a defined buffers size for each memory unit. - Setting the IssueWidth correctly. - Fixing latency values for memory instructions. shader-db stats: 16429 shaders in 3231 tests Totals: SGPRS: 318232 -> 312328 (-1.86 %) VGPRS: 208996 -> 209346 (0.17 %) Code Size: 7147044 -> 7166440 (0.27 %) bytes LDS: 83 -> 83 (0.00 %) blocks Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave Max Waves: 49182 -> 49243 (0.12 %) Wait states: 0 -> 0 (0.00 %)A Differential Revision: http://reviews.llvm.org/D18453 llvm-svn: 264877 2016-03-31 00:35:13 +08:00			`def SIFullSpeedModel : SISchedMachineModel;`
			`def SIQuarterSpeedModel : SISchedMachineModel;`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00
			`// XXX: Are the resource counts correct?`
AMDGPU/SI: Improve MachineSchedModel definition This patch contains a few improvements to the model, including: - Using a single resource with a defined buffers size for each memory unit. - Setting the IssueWidth correctly. - Fixing latency values for memory instructions. shader-db stats: 16429 shaders in 3231 tests Totals: SGPRS: 318232 -> 312328 (-1.86 %) VGPRS: 208996 -> 209346 (0.17 %) Code Size: 7147044 -> 7166440 (0.27 %) bytes LDS: 83 -> 83 (0.00 %) blocks Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave Max Waves: 49182 -> 49243 (0.12 %) Wait states: 0 -> 0 (0.00 %)A Differential Revision: http://reviews.llvm.org/D18453 llvm-svn: 264877 2016-03-31 00:35:13 +08:00			`def HWBranch : ProcResource<1> {`
			`let BufferSize = 1;`
			`}`
			`def HWExport : ProcResource<1> {`
			`let BufferSize = 7; // Taken from S_WAITCNT`
			`}`
			`def HWLGKM : ProcResource<1> {`
			`let BufferSize = 31; // Taken from S_WAITCNT`
			`}`
			`def HWSALU : ProcResource<1> {`
			`let BufferSize = 1;`
			`}`
			`def HWVMEM : ProcResource<1> {`
			`let BufferSize = 15; // Taken from S_WAITCNT`
			`}`
			`def HWVALU : ProcResource<1> {`
			`let BufferSize = 1;`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00			`}`

			`class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,`
			`int latency> : WriteRes<write, resources> {`
			`let Latency = latency;`
			`}`

			`class HWVALUWriteRes<SchedWrite write, int latency> :`
			`HWWriteRes<write, [HWVALU], latency>;`


			`// The latency numbers are taken from AMD Accelerated Parallel Processing`
AMDGPU: Improve accuracy of instruction rates for VOPC These were all using the default 32-bit VALU write class, but the i64/f64 compares are half rate. I'm not sure this is really correct, because they are still using the write to VALU write class, even though they really write to the SALU. llvm-svn: 248582 2015-09-26 00:58:25 +08:00			`// guide. They may not be accurate.`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00
			`// The latency values are 1 / (operations / cycle) / 4.`
			`multiclass SICommonWriteRes {`

AMDGPU/SI: Improve MachineSchedModel definition This patch contains a few improvements to the model, including: - Using a single resource with a defined buffers size for each memory unit. - Setting the IssueWidth correctly. - Fixing latency values for memory instructions. shader-db stats: 16429 shaders in 3231 tests Totals: SGPRS: 318232 -> 312328 (-1.86 %) VGPRS: 208996 -> 209346 (0.17 %) Code Size: 7147044 -> 7166440 (0.27 %) bytes LDS: 83 -> 83 (0.00 %) blocks Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave Max Waves: 49182 -> 49243 (0.12 %) Wait states: 0 -> 0 (0.00 %)A Differential Revision: http://reviews.llvm.org/D18453 llvm-svn: 264877 2016-03-31 00:35:13 +08:00			`def : HWWriteRes<WriteBranch, [HWBranch], 8>;`
			`def : HWWriteRes<WriteExport, [HWExport], 4>;`
			`def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64`
			`def : HWWriteRes<WriteSALU, [HWSALU], 1>;`
			`def : HWWriteRes<WriteSMEM, [HWLGKM], 5>;`
			`def : HWWriteRes<WriteVMEM, [HWVMEM], 80>;`
AMDGPU: Mark s_barrier as a high latency instruction These were marked as WriteSALU, which is low latency. I'm guessing at the value to use, but it should probably be considered the highest latency instruction. I'm not sure this has any actual effect since hasSideEffects probably is preventing any moving of these. llvm-svn: 247060 2015-09-09 03:54:32 +08:00			`def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00
			`def : HWVALUWriteRes<Write32Bit, 1>;`
AMDGPU: Improve accuracy of instruction rates for VOPC These were all using the default 32-bit VALU write class, but the i64/f64 compares are half rate. I'm not sure this is really correct, because they are still using the write to VALU write class, even though they really write to the SALU. llvm-svn: 248582 2015-09-26 00:58:25 +08:00			`def : HWVALUWriteRes<Write64Bit, 2>;`
R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 2015-01-14 09:13:19 +08:00			`def : HWVALUWriteRes<WriteQuarterRate32, 4>;`
			`}`


			`let SchedModel = SIFullSpeedModel in {`

			`defm : SICommonWriteRes;`

			`def : HWVALUWriteRes<WriteFloatFMA, 1>;`
			`def : HWVALUWriteRes<WriteDouble, 4>;`
			`def : HWVALUWriteRes<WriteDoubleAdd, 2>;`

			`} // End SchedModel = SIFullSpeedModel`

			`let SchedModel = SIQuarterSpeedModel in {`

			`defm : SICommonWriteRes;`

			`def : HWVALUWriteRes<WriteFloatFMA, 16>;`
			`def : HWVALUWriteRes<WriteDouble, 16>;`
			`def : HWVALUWriteRes<WriteDoubleAdd, 8>;`

			`} // End SchedModel = SIQuarterSpeedModel`