llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This describes the calling conventions for the AMD Radeon GPUs.
//
//===----------------------------------------------------------------------===//

// Inversion of CCIfInReg
class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
class CCIfExtend<CCAction A>
  : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;

// Calling convention for SI
def CC_SI : CallingConv<[

  CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
  ]>>>,

  // We have no way of referring to the generated register tuples
  // here, so use a custom function.
  CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
  CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,

  // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
  CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
  ]>>>
]>;

def RetCC_SI_Shader : CallingConv<[
  CCIfType<[i32] , CCAssignToReg<[
    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
  ]>>,

  // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
  CCIfType<[f32, f16] , CCAssignToReg<[
    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
  ]>>
]>;

// Calling convention for R600
def CC_R600 : CallingConv<[
  CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
    T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
    T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
    T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
    T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
    T30_XYZW, T31_XYZW, T32_XYZW
  ]>>>
]>;

// Calling convention for compute kernels
def CC_AMDGPU_Kernel : CallingConv<[
  CCCustom<"allocateKernArg">
]>;

def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<
  (sequence "VGPR%u", 24, 255)
>;

def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
  (sequence "VGPR%u", 32, 255)
>;

def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs<
  (sequence "SGPR%u", 32, 103)
>;

def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
  (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103)
>;

// Calling convention for leaf functions
def CC_AMDGPU_Func : CallingConv<[
  CCIfByVal<CCPassByVal<4, 4>>,
  CCIfType<[i1], CCPromoteToType<i32>>,
  CCIfType<[i1, i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
  CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>,
  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
  CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
  CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
  CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
  CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
]>;

// Calling convention for leaf functions
def RetCC_AMDGPU_Func : CallingConv<[
  CCIfType<[i1], CCPromoteToType<i32>>,
  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
  CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
  CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>
]>;

def CC_AMDGPU : CallingConv<[
  CCIf<"static_cast<const AMDGPUSubtarget&>"
        "(State.getMachineFunction().getSubtarget()).getGeneration() >="
          "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
        "!AMDGPU::isShader(State.getCallingConv())",
       CCDelegateTo<CC_AMDGPU_Kernel>>,
  CCIf<"static_cast<const AMDGPUSubtarget&>"
        "(State.getMachineFunction().getSubtarget()).getGeneration() < "
          "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
         "!AMDGPU::isShader(State.getCallingConv())",
        CCDelegateTo<CC_AMDGPU_Kernel>>,
   CCIf<"static_cast<const AMDGPUSubtarget&>"
         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
           "AMDGPUSubtarget::SOUTHERN_ISLANDS",
        CCDelegateTo<CC_SI>>,
   CCIf<"static_cast<const AMDGPUSubtarget&>"
         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
           "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
        CCDelegateTo<CC_AMDGPU_Func>>,
   CCIf<"static_cast<const AMDGPUSubtarget&>"
          "(State.getMachineFunction().getSubtarget()).getGeneration() < "
            "AMDGPUSubtarget::SOUTHERN_ISLANDS",
        CCDelegateTo<CC_R600>>
]>;
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00			`//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This describes the calling conventions for the AMD Radeon GPUs.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`// Inversion of CCIfInReg`
			`class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}`
AMDGPU: Start defining a calling convention Partially implement callee-side for arguments and return values. byval doesn't work properly, and most likely sret or other on-stack return values most as well. llvm-svn: 303308 2017-05-18 05:56:25 +08:00			`class CCIfExtend<CCAction A>`
			`: CCIf<"ArgFlags.isSExt() \|\| ArgFlags.isZExt()", A>;`
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00
			`// Calling convention for SI`
			`def CC_SI : CallingConv<[`

AMDGPU: Add f16 to shader calling conventions Mostly useful for writing tests for f16 features. llvm-svn: 296370 2017-02-28 03:24:47 +08:00			`CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[`
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00			`SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,`
R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take a resource descriptor might be nicer. The maximum number of input SGPRs is bumped to 17. Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 190575 2013-09-12 10:55:14 +08:00			`SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,`
AMDGPU/SI: Allow more shader inputs Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16032 llvm-svn: 257593 2016-01-13 19:46:48 +08:00			`SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,`
			`SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,`
			`SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39`
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00			`]>>>,`

AMDGPU: Stop using CCAssignToRegWithShadow This does not do what it is attempting to use it for and requires working around in LowerFormalArguments. llvm-svn: 299667 2017-04-07 01:37:27 +08:00			`// We have no way of referring to the generated register tuples`
			`// here, so use a custom function.`
			`CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,`
			`CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,`
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00
AMDGPU/SI: Allow more shader inputs Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16032 llvm-svn: 257593 2016-01-13 19:46:48 +08:00			`// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.`
AMDGPU: Add f16 to shader calling conventions Mostly useful for writing tests for f16 features. llvm-svn: 296370 2017-02-28 03:24:47 +08:00			`CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[`
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00			`VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,`
			`VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,`
			`VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,`
AMDGPU/SI: Allow more shader inputs Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16032 llvm-svn: 257593 2016-01-13 19:46:48 +08:00			`VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,`
			`VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,`
			`VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,`
			`VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,`
			`VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,`
			`VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,`
			`VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,`
			`VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,`
			`VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,`
			`VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,`
			`VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,`
			`VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,`
			`VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,`
			`VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135`
AMDGPU: Stop using CCAssignToRegWithShadow This does not do what it is attempting to use it for and requires working around in LowerFormalArguments. llvm-svn: 299667 2017-04-07 01:37:27 +08:00			`]>>>`
R600/SI: Add a calling convention for compute shaders llvm-svn: 183137 2013-06-04 01:40:11 +08:00			`]>;`
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00
AMDGPU: Start defining a calling convention Partially implement callee-side for arguments and return values. byval doesn't work properly, and most likely sret or other on-stack return values most as well. llvm-svn: 303308 2017-05-18 05:56:25 +08:00			`def RetCC_SI_Shader : CallingConv<[`
AMDGPU/SI: Add support for non-void functions Summary: Return values can be stored in SGPRs (i32) and VGPRs (f32). This will be used by functions which expect some bytecode or other binary to be appended at the end. It allows defining in which registers the return values will be stored. v2: don't do this for compute shaders Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16033 llvm-svn: 257621 2016-01-14 01:23:04 +08:00			`CCIfType<[i32] , CCAssignToReg<[`
			`SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,`
			`SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,`
			`SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,`
			`SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,`
			`SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39`
			`]>>,`

			`// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.`
AMDGPU: Add f16 to shader calling conventions Mostly useful for writing tests for f16 features. llvm-svn: 296370 2017-02-28 03:24:47 +08:00			`CCIfType<[f32, f16] , CCAssignToReg<[`
AMDGPU/SI: Add support for non-void functions Summary: Return values can be stored in SGPRs (i32) and VGPRs (f32). This will be used by functions which expect some bytecode or other binary to be appended at the end. It allows defining in which registers the return values will be stored. v2: don't do this for compute shaders Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16033 llvm-svn: 257621 2016-01-14 01:23:04 +08:00			`VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,`
			`VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,`
			`VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,`
			`VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,`
			`VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,`
			`VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,`
			`VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,`
			`VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,`
			`VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,`
			`VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,`
			`VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,`
			`VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,`
			`VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,`
			`VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,`
			`VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,`
			`VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,`
			`VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135`
			`]>>`
			`]>;`

R600: Use function inputs to represent data stored in gpr llvm-svn: 194425 2013-11-12 06:10:24 +08:00			`// Calling convention for R600`
			`def CC_R600 : CallingConv<[`
			`CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[`
			`T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,`
			`T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,`
			`T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,`
			`T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,`
			`T30_XYZW, T31_XYZW, T32_XYZW`
			`]>>>`
			`]>;`

R600: Use the same compute kernel calling convention for all GPUs A side-effect of this is that now the compiler expects kernel arguments to be 4-byte aligned. Reviewed-by: Vincent Lejeune <vljn at ovi.com> llvm-svn: 186916 2013-07-23 09:48:05 +08:00			`// Calling convention for compute kernels`
			`def CC_AMDGPU_Kernel : CallingConv<[`
AMDGPU: Fix kernel argument alignment impacting stack size Don't use AllocateStack because kernel arguments have nothing to do with the stack. The ensureMaxAlignment call was still changing the stack alignment. llvm-svn: 273080 2016-06-18 13:15:53 +08:00			`CCCustom<"allocateKernArg">`
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00			`]>;`

AMDGPU: Start defining a calling convention Partially implement callee-side for arguments and return values. byval doesn't work properly, and most likely sret or other on-stack return values most as well. llvm-svn: 303308 2017-05-18 05:56:25 +08:00			`def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<`
			`(sequence "VGPR%u", 24, 255)`
			`>;`

			`def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<`
			`(sequence "VGPR%u", 32, 255)`
			`>;`

			`def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs<`
			`(sequence "SGPR%u", 32, 103)`
			`>;`

			`def CSR_AMDGPU_HighRegs : CalleeSavedRegs<`
			`(add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103)`
			`>;`

			`// Calling convention for leaf functions`
			`def CC_AMDGPU_Func : CallingConv<[`
			`CCIfByVal<CCPassByVal<4, 4>>,`
			`CCIfType<[i1], CCPromoteToType<i32>>,`
			`CCIfType<[i1, i8, i16], CCIfExtend<CCPromoteToType<i32>>>,`
			`CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[`
			`VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,`
			`VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,`
			`VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,`
			`VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,`
			`CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>,`
			`CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,`
			`CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,`
			`CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,`
			`CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,`
			`CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>`
			`]>;`

			`// Calling convention for leaf functions`
			`def RetCC_AMDGPU_Func : CallingConv<[`
			`CCIfType<[i1], CCPromoteToType<i32>>,`
			`CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,`
			`CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[`
			`VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,`
			`VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,`
			`VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,`
			`VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,`
			`CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>`
			`]>;`

R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00			`def CC_AMDGPU : CallingConv<[`
Remove the target machine from CCState. Previously it was only used to get the subtarget and that's accessible from the MachineFunction now. This helps clear the way for smaller changes where we getting a subtarget will require passing in a MachineFunction/Function as well. llvm-svn: 214988 2014-08-07 02:45:26 +08:00			`CCIf<"static_cast<const AMDGPUSubtarget&>"`
			`"(State.getMachineFunction().getSubtarget()).getGeneration() >="`
			`"AMDGPUSubtarget::SOUTHERN_ISLANDS && "`
AMDGPU: Add a shader calling convention This makes it possible to distinguish between mesa shaders and other kernels even in the presence of compute shaders. Patch By: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Differential Revision: http://reviews.llvm.org/D18559 llvm-svn: 265589 2016-04-07 03:40:20 +08:00			`"!AMDGPU::isShader(State.getCallingConv())",`
Remove the target machine from CCState. Previously it was only used to get the subtarget and that's accessible from the MachineFunction now. This helps clear the way for smaller changes where we getting a subtarget will require passing in a MachineFunction/Function as well. llvm-svn: 214988 2014-08-07 02:45:26 +08:00			`CCDelegateTo<CC_AMDGPU_Kernel>>,`
			`CCIf<"static_cast<const AMDGPUSubtarget&>"`
			`"(State.getMachineFunction().getSubtarget()).getGeneration() < "`
			`"AMDGPUSubtarget::SOUTHERN_ISLANDS && "`
AMDGPU: Add a shader calling convention This makes it possible to distinguish between mesa shaders and other kernels even in the presence of compute shaders. Patch By: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Differential Revision: http://reviews.llvm.org/D18559 llvm-svn: 265589 2016-04-07 03:40:20 +08:00			`"!AMDGPU::isShader(State.getCallingConv())",`
Remove the target machine from CCState. Previously it was only used to get the subtarget and that's accessible from the MachineFunction now. This helps clear the way for smaller changes where we getting a subtarget will require passing in a MachineFunction/Function as well. llvm-svn: 214988 2014-08-07 02:45:26 +08:00			`CCDelegateTo<CC_AMDGPU_Kernel>>,`
			`CCIf<"static_cast<const AMDGPUSubtarget&>"`
			`"(State.getMachineFunction().getSubtarget()).getGeneration() >= "`
			`"AMDGPUSubtarget::SOUTHERN_ISLANDS",`
			`CCDelegateTo<CC_SI>>,`
AMDGPU: Initial implementation of calls Includes a hack to fix the type selected for the GlobalAddress of the function, which will be fixed by changing the default datalayout to use generic pointers for 0. llvm-svn: 309732 2017-08-02 03:54:18 +08:00			`CCIf<"static_cast<const AMDGPUSubtarget&>"`
			`"(State.getMachineFunction().getSubtarget()).getGeneration() >= "`
			`"AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",`
			`CCDelegateTo<CC_AMDGPU_Func>>,`
Remove the target machine from CCState. Previously it was only used to get the subtarget and that's accessible from the MachineFunction now. This helps clear the way for smaller changes where we getting a subtarget will require passing in a MachineFunction/Function as well. llvm-svn: 214988 2014-08-07 02:45:26 +08:00			`CCIf<"static_cast<const AMDGPUSubtarget&>"`
			`"(State.getMachineFunction().getSubtarget()).getGeneration() < "`
			`"AMDGPUSubtarget::SOUTHERN_ISLANDS",`
			`CCDelegateTo<CC_R600>>`
R600/SI: add proper formal parameter handling for SI Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176623 2013-03-07 17:03:52 +08:00			`]>;`